bulltrackers-module 1.0.719 → 1.0.720
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/maintenance/backfill-pi-alert-history/index.js +201 -0
- package/functions/maintenance/backfill-pi-page-views/index.js +189 -0
- package/functions/maintenance/backfill-pi-ratings/index.js +376 -0
- package/functions/maintenance/backfill-price-data-from-firestore/index.js +220 -0
- package/functions/maintenance/backfill-watchlist-membership/index.js +190 -0
- package/package.json +7 -2
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Backfill PI Alert History from Firestore to BigQuery
|
|
3
|
+
*
|
|
4
|
+
* This function reads existing PI alert history data from Firestore (PIAlertHistoryData/{date})
|
|
5
|
+
* and writes it to BigQuery table.
|
|
6
|
+
*
|
|
7
|
+
* Usage (Local Node.js script):
|
|
8
|
+
* node index.js --startDate=2024-01-01 --endDate=2024-12-31
|
|
9
|
+
* node index.js (backfills all dates)
|
|
10
|
+
*
|
|
11
|
+
* Features:
|
|
12
|
+
* - Backfills alert history for date range (or all dates)
|
|
13
|
+
* - Uses load jobs (free) for efficient batching
|
|
14
|
+
* - Handles compressed Firestore data
|
|
15
|
+
* - Does NOT delete any Firestore data
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const { Firestore } = require('@google-cloud/firestore');
|
|
19
|
+
const zlib = require('zlib');
|
|
20
|
+
const {
|
|
21
|
+
ensurePIAlertHistoryTable,
|
|
22
|
+
insertRows
|
|
23
|
+
} = require('../../core/utils/bigquery_utils');
|
|
24
|
+
|
|
25
|
+
const db = new Firestore();
|
|
26
|
+
|
|
27
|
+
// Helper to decompress Firestore data
|
|
28
|
+
function tryDecompress(data) {
|
|
29
|
+
if (!data) return null;
|
|
30
|
+
if (data._compressed && data.payload) {
|
|
31
|
+
try {
|
|
32
|
+
const buffer = Buffer.from(data.payload);
|
|
33
|
+
return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
|
|
34
|
+
} catch (e) {
|
|
35
|
+
console.error(`[Backfill] Decompression failed: ${e.message}`);
|
|
36
|
+
return data;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return data;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Backfill PI alert history from Firestore to BigQuery for a date range
|
|
44
|
+
*/
|
|
45
|
+
async function backfillAlertHistory(startDate = null, endDate = null, logger = console) {
|
|
46
|
+
logger.log('INFO', `[Backfill] Starting PI alert history backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
await ensurePIAlertHistoryTable(logger);
|
|
50
|
+
|
|
51
|
+
const alertHistoryCollection = db.collection('PIAlertHistoryData');
|
|
52
|
+
|
|
53
|
+
// Get all alert history documents
|
|
54
|
+
logger.log('INFO', '[Backfill] Fetching alert history documents from Firestore...');
|
|
55
|
+
const snapshot = await alertHistoryCollection.get();
|
|
56
|
+
|
|
57
|
+
if (snapshot.empty) {
|
|
58
|
+
logger.log('WARN', '[Backfill] No alert history documents found in Firestore');
|
|
59
|
+
return { success: false, message: 'No alert history found' };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
logger.log('INFO', `[Backfill] Found ${snapshot.size} alert history documents`);
|
|
63
|
+
|
|
64
|
+
let totalRows = 0;
|
|
65
|
+
let processedDates = 0;
|
|
66
|
+
let skippedDates = 0;
|
|
67
|
+
|
|
68
|
+
// Process each document
|
|
69
|
+
for (const doc of snapshot.docs) {
|
|
70
|
+
const dateStr = doc.id; // Document ID is the date (YYYY-MM-DD)
|
|
71
|
+
|
|
72
|
+
// Filter by date range if provided
|
|
73
|
+
if (startDate && endDate) {
|
|
74
|
+
if (dateStr < startDate || dateStr > endDate) {
|
|
75
|
+
skippedDates++;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
const data = tryDecompress(doc.data());
|
|
82
|
+
const { date, lastUpdated, ...piAlertHistory } = data;
|
|
83
|
+
|
|
84
|
+
if (!piAlertHistory || Object.keys(piAlertHistory).length === 0) {
|
|
85
|
+
logger.log('WARN', `[Backfill] No alert history in document for ${dateStr}`);
|
|
86
|
+
skippedDates++;
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Transform to BigQuery rows
|
|
91
|
+
const lastUpdatedTimestamp = lastUpdated
|
|
92
|
+
? (lastUpdated.toDate ? lastUpdated.toDate().toISOString() : lastUpdated)
|
|
93
|
+
: new Date().toISOString();
|
|
94
|
+
|
|
95
|
+
const bigqueryRows = [];
|
|
96
|
+
for (const [piId, alertData] of Object.entries(piAlertHistory)) {
|
|
97
|
+
if (alertData && typeof alertData === 'object') {
|
|
98
|
+
// Each PI can have multiple alert types
|
|
99
|
+
for (const [alertType, alertInfo] of Object.entries(alertData)) {
|
|
100
|
+
if (alertInfo && typeof alertInfo === 'object') {
|
|
101
|
+
const lastTriggered = alertInfo.lastTriggered
|
|
102
|
+
? (alertInfo.lastTriggered.toDate ? alertInfo.lastTriggered.toDate().toISOString() : alertInfo.lastTriggered)
|
|
103
|
+
: null;
|
|
104
|
+
|
|
105
|
+
bigqueryRows.push({
|
|
106
|
+
date: dateStr,
|
|
107
|
+
pi_id: parseInt(piId, 10),
|
|
108
|
+
alert_type: alertType,
|
|
109
|
+
triggered: alertInfo.triggered || false,
|
|
110
|
+
trigger_count: alertInfo.count || 0,
|
|
111
|
+
triggered_for: alertInfo.triggeredFor || [],
|
|
112
|
+
metadata: alertInfo.metadata || {},
|
|
113
|
+
last_triggered: lastTriggered,
|
|
114
|
+
last_updated: lastUpdatedTimestamp
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if (bigqueryRows.length === 0) {
|
|
122
|
+
logger.log('WARN', `[Backfill] No valid alert history rows for ${dateStr}`);
|
|
123
|
+
skippedDates++;
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Write to BigQuery using load jobs (free, batched)
|
|
128
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
129
|
+
await insertRows(datasetId, 'pi_alert_history', bigqueryRows, logger);
|
|
130
|
+
|
|
131
|
+
totalRows += bigqueryRows.length;
|
|
132
|
+
processedDates++;
|
|
133
|
+
|
|
134
|
+
if (processedDates % 10 === 0) {
|
|
135
|
+
logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
|
|
136
|
+
}
|
|
137
|
+
} catch (dateError) {
|
|
138
|
+
logger.log('ERROR', `[Backfill] Failed to process alert history for ${dateStr}: ${dateError.message}`);
|
|
139
|
+
skippedDates++;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
logger.log('SUCCESS', `[Backfill] ✅ PI alert history backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
|
|
144
|
+
|
|
145
|
+
return { success: true, processedDates, totalRows, skippedDates };
|
|
146
|
+
} catch (error) {
|
|
147
|
+
logger.log('ERROR', `[Backfill] PI alert history backfill failed: ${error.message}`);
|
|
148
|
+
throw error;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Main entry point
|
|
154
|
+
*/
|
|
155
|
+
async function backfillPIAlertHistory(startDate = null, endDate = null) {
|
|
156
|
+
const logger = {
|
|
157
|
+
log: (level, message, ...args) => {
|
|
158
|
+
const timestamp = new Date().toISOString();
|
|
159
|
+
console.log(`[${timestamp}] [${level}] ${message}`, ...args);
|
|
160
|
+
}
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
logger.log('INFO', '[Backfill] Starting PI Alert History backfill...');
|
|
164
|
+
|
|
165
|
+
try {
|
|
166
|
+
const result = await backfillAlertHistory(startDate, endDate, logger);
|
|
167
|
+
|
|
168
|
+
logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
|
|
169
|
+
return result;
|
|
170
|
+
} catch (error) {
|
|
171
|
+
logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
|
|
172
|
+
throw error;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// CLI execution
|
|
177
|
+
if (require.main === module) {
|
|
178
|
+
const args = process.argv.slice(2);
|
|
179
|
+
let startDate = null;
|
|
180
|
+
let endDate = null;
|
|
181
|
+
|
|
182
|
+
args.forEach(arg => {
|
|
183
|
+
if (arg.startsWith('--startDate=')) {
|
|
184
|
+
startDate = arg.split('=')[1];
|
|
185
|
+
} else if (arg.startsWith('--endDate=')) {
|
|
186
|
+
endDate = arg.split('=')[1];
|
|
187
|
+
}
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
backfillPIAlertHistory(startDate, endDate)
|
|
191
|
+
.then(result => {
|
|
192
|
+
console.log('Backfill result:', result);
|
|
193
|
+
process.exit(0);
|
|
194
|
+
})
|
|
195
|
+
.catch(error => {
|
|
196
|
+
console.error('Backfill failed:', error);
|
|
197
|
+
process.exit(1);
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
module.exports = { backfillPIAlertHistory };
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Backfill PI Page Views from Firestore to BigQuery
|
|
3
|
+
*
|
|
4
|
+
* This function reads existing PI page views data from Firestore (PIPageViewsData/{date})
|
|
5
|
+
* and writes it to BigQuery table.
|
|
6
|
+
*
|
|
7
|
+
* Usage (Local Node.js script):
|
|
8
|
+
* node index.js --startDate=2024-01-01 --endDate=2024-12-31
|
|
9
|
+
* node index.js (backfills all dates)
|
|
10
|
+
*
|
|
11
|
+
* Features:
|
|
12
|
+
* - Backfills page views for date range (or all dates)
|
|
13
|
+
* - Uses load jobs (free) for efficient batching
|
|
14
|
+
* - Handles compressed Firestore data
|
|
15
|
+
* - Does NOT delete any Firestore data
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const { Firestore } = require('@google-cloud/firestore');
|
|
19
|
+
const zlib = require('zlib');
|
|
20
|
+
const {
|
|
21
|
+
ensurePIPageViewsTable,
|
|
22
|
+
insertRows
|
|
23
|
+
} = require('../../core/utils/bigquery_utils');
|
|
24
|
+
|
|
25
|
+
const db = new Firestore();
|
|
26
|
+
|
|
27
|
+
// Helper to decompress Firestore data
|
|
28
|
+
function tryDecompress(data) {
|
|
29
|
+
if (!data) return null;
|
|
30
|
+
if (data._compressed && data.payload) {
|
|
31
|
+
try {
|
|
32
|
+
const buffer = Buffer.from(data.payload);
|
|
33
|
+
return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
|
|
34
|
+
} catch (e) {
|
|
35
|
+
console.error(`[Backfill] Decompression failed: ${e.message}`);
|
|
36
|
+
return data;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return data;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Backfill PI page views from Firestore to BigQuery for a date range
|
|
44
|
+
*/
|
|
45
|
+
async function backfillPageViews(startDate = null, endDate = null, logger = console) {
|
|
46
|
+
logger.log('INFO', `[Backfill] Starting PI page views backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
await ensurePIPageViewsTable(logger);
|
|
50
|
+
|
|
51
|
+
const pageViewsCollection = db.collection('PIPageViewsData');
|
|
52
|
+
|
|
53
|
+
// Get all page views documents
|
|
54
|
+
logger.log('INFO', '[Backfill] Fetching page views documents from Firestore...');
|
|
55
|
+
const snapshot = await pageViewsCollection.get();
|
|
56
|
+
|
|
57
|
+
if (snapshot.empty) {
|
|
58
|
+
logger.log('WARN', '[Backfill] No page views documents found in Firestore');
|
|
59
|
+
return { success: false, message: 'No page views found' };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
logger.log('INFO', `[Backfill] Found ${snapshot.size} page views documents`);
|
|
63
|
+
|
|
64
|
+
let totalRows = 0;
|
|
65
|
+
let processedDates = 0;
|
|
66
|
+
let skippedDates = 0;
|
|
67
|
+
|
|
68
|
+
// Process each document
|
|
69
|
+
for (const doc of snapshot.docs) {
|
|
70
|
+
const dateStr = doc.id; // Document ID is the date (YYYY-MM-DD)
|
|
71
|
+
|
|
72
|
+
// Filter by date range if provided
|
|
73
|
+
if (startDate && endDate) {
|
|
74
|
+
if (dateStr < startDate || dateStr > endDate) {
|
|
75
|
+
skippedDates++;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
const data = tryDecompress(doc.data());
|
|
82
|
+
const { date, lastUpdated, ...piPageViews } = data;
|
|
83
|
+
|
|
84
|
+
if (!piPageViews || Object.keys(piPageViews).length === 0) {
|
|
85
|
+
logger.log('WARN', `[Backfill] No page views in document for ${dateStr}`);
|
|
86
|
+
skippedDates++;
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Transform to BigQuery rows
|
|
91
|
+
const lastUpdatedTimestamp = lastUpdated
|
|
92
|
+
? (lastUpdated.toDate ? lastUpdated.toDate().toISOString() : lastUpdated)
|
|
93
|
+
: new Date().toISOString();
|
|
94
|
+
|
|
95
|
+
const bigqueryRows = [];
|
|
96
|
+
for (const [piId, pageViewData] of Object.entries(piPageViews)) {
|
|
97
|
+
if (pageViewData && typeof pageViewData === 'object') {
|
|
98
|
+
bigqueryRows.push({
|
|
99
|
+
date: dateStr,
|
|
100
|
+
pi_id: parseInt(piId, 10),
|
|
101
|
+
total_views: pageViewData.totalViews || null,
|
|
102
|
+
unique_viewers: pageViewData.uniqueViewers || null,
|
|
103
|
+
views_by_user: pageViewData.viewsByUser || {},
|
|
104
|
+
last_updated: lastUpdatedTimestamp
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (bigqueryRows.length === 0) {
|
|
110
|
+
logger.log('WARN', `[Backfill] No valid page views rows for ${dateStr}`);
|
|
111
|
+
skippedDates++;
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Write to BigQuery using load jobs (free, batched)
|
|
116
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
117
|
+
await insertRows(datasetId, 'pi_page_views', bigqueryRows, logger);
|
|
118
|
+
|
|
119
|
+
totalRows += bigqueryRows.length;
|
|
120
|
+
processedDates++;
|
|
121
|
+
|
|
122
|
+
if (processedDates % 10 === 0) {
|
|
123
|
+
logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
|
|
124
|
+
}
|
|
125
|
+
} catch (dateError) {
|
|
126
|
+
logger.log('ERROR', `[Backfill] Failed to process page views for ${dateStr}: ${dateError.message}`);
|
|
127
|
+
skippedDates++;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
logger.log('SUCCESS', `[Backfill] ✅ PI page views backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
|
|
132
|
+
|
|
133
|
+
return { success: true, processedDates, totalRows, skippedDates };
|
|
134
|
+
} catch (error) {
|
|
135
|
+
logger.log('ERROR', `[Backfill] PI page views backfill failed: ${error.message}`);
|
|
136
|
+
throw error;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Main entry point
|
|
142
|
+
*/
|
|
143
|
+
async function backfillPIPageViews(startDate = null, endDate = null) {
|
|
144
|
+
const logger = {
|
|
145
|
+
log: (level, message, ...args) => {
|
|
146
|
+
const timestamp = new Date().toISOString();
|
|
147
|
+
console.log(`[${timestamp}] [${level}] ${message}`, ...args);
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
logger.log('INFO', '[Backfill] Starting PI Page Views backfill...');
|
|
152
|
+
|
|
153
|
+
try {
|
|
154
|
+
const result = await backfillPageViews(startDate, endDate, logger);
|
|
155
|
+
|
|
156
|
+
logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
|
|
157
|
+
return result;
|
|
158
|
+
} catch (error) {
|
|
159
|
+
logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
|
|
160
|
+
throw error;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// CLI execution
|
|
165
|
+
if (require.main === module) {
|
|
166
|
+
const args = process.argv.slice(2);
|
|
167
|
+
let startDate = null;
|
|
168
|
+
let endDate = null;
|
|
169
|
+
|
|
170
|
+
args.forEach(arg => {
|
|
171
|
+
if (arg.startsWith('--startDate=')) {
|
|
172
|
+
startDate = arg.split('=')[1];
|
|
173
|
+
} else if (arg.startsWith('--endDate=')) {
|
|
174
|
+
endDate = arg.split('=')[1];
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
backfillPIPageViews(startDate, endDate)
|
|
179
|
+
.then(result => {
|
|
180
|
+
console.log('Backfill result:', result);
|
|
181
|
+
process.exit(0);
|
|
182
|
+
})
|
|
183
|
+
.catch(error => {
|
|
184
|
+
console.error('Backfill failed:', error);
|
|
185
|
+
process.exit(1);
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
module.exports = { backfillPIPageViews };
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Backfill PI Ratings from Firestore to BigQuery
|
|
3
|
+
*
|
|
4
|
+
* This function reads existing PI ratings data from Firestore (PiReviews/{date}/shards/)
|
|
5
|
+
* aggregates them by PI, and writes to BigQuery table.
|
|
6
|
+
*
|
|
7
|
+
* Note: Raw reviews are stored in PiReviews/{date}/shards/, not in PIRatingsData/{date}
|
|
8
|
+
* This script aggregates the raw reviews into the expected format.
|
|
9
|
+
*
|
|
10
|
+
* Usage (Local Node.js script):
|
|
11
|
+
* node index.js --startDate=2024-01-01 --endDate=2024-12-31
|
|
12
|
+
* node index.js (backfills all dates)
|
|
13
|
+
*
|
|
14
|
+
* Features:
|
|
15
|
+
* - Backfills ratings for date range (or all dates)
|
|
16
|
+
* - Aggregates raw reviews from shards
|
|
17
|
+
* - Uses load jobs (free) for efficient batching
|
|
18
|
+
* - Handles compressed Firestore data
|
|
19
|
+
* - Does NOT delete any Firestore data
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
const { Firestore } = require('@google-cloud/firestore');
|
|
23
|
+
const zlib = require('zlib');
|
|
24
|
+
const {
|
|
25
|
+
ensurePIRatingsTable,
|
|
26
|
+
insertRows
|
|
27
|
+
} = require('../../core/utils/bigquery_utils');
|
|
28
|
+
|
|
29
|
+
const db = new Firestore();
|
|
30
|
+
|
|
31
|
+
// Helper to decompress Firestore data
|
|
32
|
+
function tryDecompress(data) {
|
|
33
|
+
if (!data) return null;
|
|
34
|
+
if (data._compressed && data.payload) {
|
|
35
|
+
try {
|
|
36
|
+
const buffer = Buffer.from(data.payload);
|
|
37
|
+
return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
|
|
38
|
+
} catch (e) {
|
|
39
|
+
console.error(`[Backfill] Decompression failed: ${e.message}`);
|
|
40
|
+
return data;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return data;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Backfill PI ratings from Firestore to BigQuery for a date range
|
|
48
|
+
* Reads from PiReviews/{date}/shards/ and aggregates by PI
|
|
49
|
+
*/
|
|
50
|
+
async function backfillRatings(startDate = null, endDate = null, logger = console) {
|
|
51
|
+
logger.log('INFO', `[Backfill] Starting PI ratings backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
await ensurePIRatingsTable(logger);
|
|
55
|
+
|
|
56
|
+
// Use collection group query to find all shards
|
|
57
|
+
// Path structure: PiReviews/{date}/shards/{shardId}
|
|
58
|
+
logger.log('INFO', '[Backfill] Fetching all shards from PiReviews using collection group query...');
|
|
59
|
+
const shardsCollectionGroup = db.collectionGroup('shards');
|
|
60
|
+
const allShardsSnapshot = await shardsCollectionGroup.get();
|
|
61
|
+
|
|
62
|
+
if (allShardsSnapshot.empty) {
|
|
63
|
+
logger.log('WARN', '[Backfill] No shards found in collection group query');
|
|
64
|
+
return { success: false, message: 'No shards found' };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
logger.log('INFO', `[Backfill] Found ${allShardsSnapshot.size} shard documents total`);
|
|
68
|
+
|
|
69
|
+
// Group shards by date (extract from parent path: PiReviews/{date}/shards/{shardId})
|
|
70
|
+
// Filter to only shards under PiReviews collection
|
|
71
|
+
const shardsByDate = {};
|
|
72
|
+
let samplePath = null;
|
|
73
|
+
|
|
74
|
+
for (const shardDoc of allShardsSnapshot.docs) {
|
|
75
|
+
// Path structure: PiReviews/{date}/shards/{shardId}
|
|
76
|
+
// Get full path: projects/{project}/databases/{db}/documents/PiReviews/{date}/shards/{shardId}
|
|
77
|
+
const fullPath = shardDoc.ref.path;
|
|
78
|
+
|
|
79
|
+
// Log first path for debugging
|
|
80
|
+
if (!samplePath) {
|
|
81
|
+
samplePath = fullPath;
|
|
82
|
+
logger.log('INFO', `[Backfill] Sample shard path: ${fullPath}`);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Check if this shard is under PiReviews collection
|
|
86
|
+
if (!fullPath.includes('PiReviews')) {
|
|
87
|
+
continue; // Skip shards from other collections
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Extract date from path: PiReviews/{date}/shards/{shardId}
|
|
91
|
+
// Path format: projects/{project}/databases/{db}/documents/PiReviews/{date}/shards/{shardId}
|
|
92
|
+
// Or simpler: PiReviews/{date}/shards/{shardId}
|
|
93
|
+
const pathParts = fullPath.split('/');
|
|
94
|
+
|
|
95
|
+
// Find PiReviews in path
|
|
96
|
+
let piReviewsIndex = -1;
|
|
97
|
+
for (let i = 0; i < pathParts.length; i++) {
|
|
98
|
+
if (pathParts[i] === 'PiReviews') {
|
|
99
|
+
piReviewsIndex = i;
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (piReviewsIndex >= 0 && pathParts[piReviewsIndex + 1]) {
|
|
105
|
+
const dateStr = pathParts[piReviewsIndex + 1];
|
|
106
|
+
|
|
107
|
+
// Validate date format (YYYY-MM-DD)
|
|
108
|
+
if (/^\d{4}-\d{2}-\d{2}$/.test(dateStr)) {
|
|
109
|
+
if (!shardsByDate[dateStr]) {
|
|
110
|
+
shardsByDate[dateStr] = [];
|
|
111
|
+
}
|
|
112
|
+
shardsByDate[dateStr].push(shardDoc);
|
|
113
|
+
} else {
|
|
114
|
+
logger.log('DEBUG', `[Backfill] Skipping invalid date format: ${dateStr} from path ${fullPath}`);
|
|
115
|
+
}
|
|
116
|
+
} else {
|
|
117
|
+
logger.log('DEBUG', `[Backfill] Could not extract date from path: ${fullPath}`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const uniqueDates = Object.keys(shardsByDate);
|
|
122
|
+
logger.log('INFO', `[Backfill] Found shards for ${uniqueDates.length} unique dates: ${uniqueDates.slice(0, 5).join(', ')}${uniqueDates.length > 5 ? '...' : ''}`);
|
|
123
|
+
|
|
124
|
+
let totalRows = 0;
|
|
125
|
+
let processedDates = 0;
|
|
126
|
+
let skippedDates = 0;
|
|
127
|
+
|
|
128
|
+
// Process each date
|
|
129
|
+
for (const dateStr of uniqueDates) {
|
|
130
|
+
|
|
131
|
+
// Filter by date range if provided
|
|
132
|
+
if (startDate && endDate) {
|
|
133
|
+
if (dateStr < startDate || dateStr > endDate) {
|
|
134
|
+
skippedDates++;
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
try {
|
|
140
|
+
// Get all shards for this date (already grouped)
|
|
141
|
+
const shardsForDate = shardsByDate[dateStr];
|
|
142
|
+
|
|
143
|
+
if (!shardsForDate || shardsForDate.length === 0) {
|
|
144
|
+
logger.log('WARN', `[Backfill] No shards found for ${dateStr}`);
|
|
145
|
+
skippedDates++;
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Aggregate reviews by PI
|
|
150
|
+
const reviewsByPi = {};
|
|
151
|
+
let totalReviewsFound = 0;
|
|
152
|
+
let skippedReviews = 0;
|
|
153
|
+
|
|
154
|
+
// Helper to convert Firestore timestamp to ISO string
|
|
155
|
+
function timestampToISO(timestamp) {
|
|
156
|
+
if (!timestamp) return null;
|
|
157
|
+
if (timestamp.toDate) {
|
|
158
|
+
return timestamp.toDate().toISOString();
|
|
159
|
+
} else if (timestamp._seconds) {
|
|
160
|
+
return new Date(timestamp._seconds * 1000).toISOString();
|
|
161
|
+
} else if (timestamp instanceof Date) {
|
|
162
|
+
return timestamp.toISOString();
|
|
163
|
+
}
|
|
164
|
+
return null;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
for (const shardDoc of shardsForDate) {
|
|
168
|
+
const shardData = tryDecompress(shardDoc.data());
|
|
169
|
+
|
|
170
|
+
if (!shardData || typeof shardData !== 'object') {
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Shard data structure: Keys are like "reviews.29312236_31075566" with review data as values
|
|
175
|
+
// Process all keys that contain review data
|
|
176
|
+
Object.keys(shardData).forEach(key => {
|
|
177
|
+
const entry = shardData[key];
|
|
178
|
+
totalReviewsFound++;
|
|
179
|
+
|
|
180
|
+
// Skip if not an object
|
|
181
|
+
if (!entry || typeof entry !== 'object') {
|
|
182
|
+
skippedReviews++;
|
|
183
|
+
return;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Check for required fields (piCid and rating)
|
|
187
|
+
if (entry.piCid === undefined || entry.rating === undefined) {
|
|
188
|
+
skippedReviews++;
|
|
189
|
+
return;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const piId = String(entry.piCid);
|
|
193
|
+
|
|
194
|
+
if (!reviewsByPi[piId]) {
|
|
195
|
+
reviewsByPi[piId] = {
|
|
196
|
+
allReviews: [], // Store all review objects with full metadata
|
|
197
|
+
ratingsByUser: {},
|
|
198
|
+
userTimestamps: {} // Track timestamps to keep most recent rating per user for aggregation
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Store full review metadata
|
|
203
|
+
const reviewMetadata = {
|
|
204
|
+
action: entry.action || null,
|
|
205
|
+
actualUserCid: entry.actualUserCid || null,
|
|
206
|
+
comment: entry.comment || null,
|
|
207
|
+
createdAt: timestampToISO(entry.createdAt),
|
|
208
|
+
isAnonymous: entry.isAnonymous || false,
|
|
209
|
+
isImpersonating: entry.isImpersonating || false,
|
|
210
|
+
loggedAt: timestampToISO(entry.loggedAt),
|
|
211
|
+
piCid: entry.piCid,
|
|
212
|
+
rating: entry.rating,
|
|
213
|
+
reviewId: entry.reviewId || null,
|
|
214
|
+
reviewerUsername: entry.reviewerUsername || null,
|
|
215
|
+
updatedAt: timestampToISO(entry.updatedAt),
|
|
216
|
+
userCid: entry.userCid || null
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
reviewsByPi[piId].allReviews.push(reviewMetadata);
|
|
220
|
+
|
|
221
|
+
// Also track for aggregation (use most recent rating per user)
|
|
222
|
+
// Add rating (handle legacy format where rating might be 0-50 instead of 1-5)
|
|
223
|
+
let rating = entry.rating;
|
|
224
|
+
if (rating > 5) {
|
|
225
|
+
rating = Math.round(rating / 10);
|
|
226
|
+
}
|
|
227
|
+
// Ensure rating is between 1-5
|
|
228
|
+
rating = Math.max(1, Math.min(5, rating));
|
|
229
|
+
|
|
230
|
+
// Track rating by user (use the most recent rating if user has multiple reviews/edits)
|
|
231
|
+
const userId = String(entry.userCid || entry.actualUserCid || '');
|
|
232
|
+
|
|
233
|
+
if (userId && userId !== 'undefined' && userId !== 'null') {
|
|
234
|
+
// Get timestamp for comparison (use updatedAt if available, else createdAt, else loggedAt)
|
|
235
|
+
let entryTimestamp = 0;
|
|
236
|
+
const timestampField = entry.updatedAt || entry.createdAt || entry.loggedAt;
|
|
237
|
+
|
|
238
|
+
if (timestampField) {
|
|
239
|
+
if (timestampField.toDate) {
|
|
240
|
+
entryTimestamp = timestampField.toDate().getTime();
|
|
241
|
+
} else if (timestampField._seconds) {
|
|
242
|
+
entryTimestamp = timestampField._seconds * 1000;
|
|
243
|
+
} else if (timestampField instanceof Date) {
|
|
244
|
+
entryTimestamp = timestampField.getTime();
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const existingTimestamp = reviewsByPi[piId].userTimestamps[userId] || 0;
|
|
249
|
+
|
|
250
|
+
// Only keep if this is a new review or more recent than existing
|
|
251
|
+
if (entryTimestamp >= existingTimestamp) {
|
|
252
|
+
reviewsByPi[piId].ratingsByUser[userId] = rating;
|
|
253
|
+
reviewsByPi[piId].userTimestamps[userId] = entryTimestamp;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
if (totalReviewsFound > 0) {
|
|
260
|
+
logger.log('INFO', `[Backfill] Processed ${totalReviewsFound} review entries for ${dateStr} (${skippedReviews} skipped, ${totalReviewsFound - skippedReviews} valid)`);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if (Object.keys(reviewsByPi).length === 0) {
|
|
264
|
+
logger.log('WARN', `[Backfill] No reviews found in shards for ${dateStr} (found ${totalReviewsFound} entries, ${skippedReviews} skipped)`);
|
|
265
|
+
skippedDates++;
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
logger.log('INFO', `[Backfill] Aggregated reviews for ${Object.keys(reviewsByPi).length} PIs for ${dateStr}`);
|
|
270
|
+
|
|
271
|
+
// Transform to BigQuery rows
|
|
272
|
+
const lastUpdatedTimestamp = new Date().toISOString();
|
|
273
|
+
const bigqueryRows = [];
|
|
274
|
+
|
|
275
|
+
for (const [piId, piData] of Object.entries(reviewsByPi)) {
|
|
276
|
+
// Use unique user ratings (one rating per user - most recent if edited)
|
|
277
|
+
const uniqueRatings = Object.values(piData.ratingsByUser);
|
|
278
|
+
const totalRatings = uniqueRatings.length; // Count of unique users who rated
|
|
279
|
+
const averageRating = totalRatings > 0
|
|
280
|
+
? uniqueRatings.reduce((sum, r) => sum + r, 0) / totalRatings
|
|
281
|
+
: null;
|
|
282
|
+
|
|
283
|
+
bigqueryRows.push({
|
|
284
|
+
date: dateStr,
|
|
285
|
+
pi_id: parseInt(piId, 10),
|
|
286
|
+
average_rating: averageRating ? Number(averageRating.toFixed(2)) : null,
|
|
287
|
+
total_ratings: totalRatings,
|
|
288
|
+
ratings_by_user: piData.ratingsByUser || {},
|
|
289
|
+
reviews: piData.allReviews || [], // Full review metadata array
|
|
290
|
+
last_updated: lastUpdatedTimestamp
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
logger.log('INFO', `[Backfill] Created ${bigqueryRows.length} BigQuery rows for ${dateStr}`);
|
|
295
|
+
|
|
296
|
+
if (bigqueryRows.length === 0) {
|
|
297
|
+
logger.log('WARN', `[Backfill] No valid ratings rows for ${dateStr}`);
|
|
298
|
+
skippedDates++;
|
|
299
|
+
continue;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Write to BigQuery using load jobs (free, batched)
|
|
303
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
304
|
+
await insertRows(datasetId, 'pi_ratings', bigqueryRows, logger);
|
|
305
|
+
|
|
306
|
+
totalRows += bigqueryRows.length;
|
|
307
|
+
processedDates++;
|
|
308
|
+
|
|
309
|
+
if (processedDates % 10 === 0) {
|
|
310
|
+
logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
|
|
311
|
+
}
|
|
312
|
+
} catch (dateError) {
|
|
313
|
+
logger.log('ERROR', `[Backfill] Failed to process ratings for ${dateStr}: ${dateError.message}`);
|
|
314
|
+
skippedDates++;
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
logger.log('SUCCESS', `[Backfill] ✅ PI ratings backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
|
|
319
|
+
|
|
320
|
+
return { success: true, processedDates, totalRows, skippedDates };
|
|
321
|
+
} catch (error) {
|
|
322
|
+
logger.log('ERROR', `[Backfill] PI ratings backfill failed: ${error.message}`);
|
|
323
|
+
throw error;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Main entry point
|
|
329
|
+
*/
|
|
330
|
+
async function backfillPIRatings(startDate = null, endDate = null) {
|
|
331
|
+
const logger = {
|
|
332
|
+
log: (level, message, ...args) => {
|
|
333
|
+
const timestamp = new Date().toISOString();
|
|
334
|
+
console.log(`[${timestamp}] [${level}] ${message}`, ...args);
|
|
335
|
+
}
|
|
336
|
+
};
|
|
337
|
+
|
|
338
|
+
logger.log('INFO', '[Backfill] Starting PI Ratings backfill...');
|
|
339
|
+
|
|
340
|
+
try {
|
|
341
|
+
const result = await backfillRatings(startDate, endDate, logger);
|
|
342
|
+
|
|
343
|
+
logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
|
|
344
|
+
return result;
|
|
345
|
+
} catch (error) {
|
|
346
|
+
logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
|
|
347
|
+
throw error;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// CLI execution
|
|
352
|
+
if (require.main === module) {
|
|
353
|
+
const args = process.argv.slice(2);
|
|
354
|
+
let startDate = null;
|
|
355
|
+
let endDate = null;
|
|
356
|
+
|
|
357
|
+
args.forEach(arg => {
|
|
358
|
+
if (arg.startsWith('--startDate=')) {
|
|
359
|
+
startDate = arg.split('=')[1];
|
|
360
|
+
} else if (arg.startsWith('--endDate=')) {
|
|
361
|
+
endDate = arg.split('=')[1];
|
|
362
|
+
}
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
backfillPIRatings(startDate, endDate)
|
|
366
|
+
.then(result => {
|
|
367
|
+
console.log('Backfill result:', result);
|
|
368
|
+
process.exit(0);
|
|
369
|
+
})
|
|
370
|
+
.catch(error => {
|
|
371
|
+
console.error('Backfill failed:', error);
|
|
372
|
+
process.exit(1);
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
module.exports = { backfillPIRatings };
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Backfill Price Data from Firestore Shards to BigQuery
|
|
3
|
+
*
|
|
4
|
+
* This function reads existing price data from Firestore shards
|
|
5
|
+
* and writes it to BigQuery table.
|
|
6
|
+
*
|
|
7
|
+
* Usage (Local Node.js script):
|
|
8
|
+
* node index.js
|
|
9
|
+
* node index.js --startDate=2024-01-01 --endDate=2024-12-31
|
|
10
|
+
*
|
|
11
|
+
* Features:
|
|
12
|
+
* - Reads from Firestore shards: asset_prices/shard_0, shard_1, etc.
|
|
13
|
+
* - Transforms Firestore structure to BigQuery rows
|
|
14
|
+
* - Handles compressed Firestore data
|
|
15
|
+
* - Uses load jobs (free) for efficient batching
|
|
16
|
+
* - Does NOT delete any Firestore data
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const { Firestore } = require('@google-cloud/firestore');
|
|
20
|
+
const zlib = require('zlib');
|
|
21
|
+
const {
|
|
22
|
+
ensureAssetPricesTable,
|
|
23
|
+
insertRows
|
|
24
|
+
} = require('../../core/utils/bigquery_utils');
|
|
25
|
+
|
|
26
|
+
const db = new Firestore();
|
|
27
|
+
|
|
28
|
+
// Helper to decompress Firestore data
|
|
29
|
+
function tryDecompress(data) {
|
|
30
|
+
if (!data) return null;
|
|
31
|
+
if (data._compressed && data.payload) {
|
|
32
|
+
try {
|
|
33
|
+
const buffer = Buffer.from(data.payload);
|
|
34
|
+
return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
|
|
35
|
+
} catch (e) {
|
|
36
|
+
console.error(`[Backfill] Decompression failed: ${e.message}`);
|
|
37
|
+
return data;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return data;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Convert Firestore Timestamp to ISO string
|
|
45
|
+
*/
|
|
46
|
+
function convertTimestamp(ts) {
|
|
47
|
+
if (!ts) return new Date().toISOString();
|
|
48
|
+
if (ts.toDate) {
|
|
49
|
+
return ts.toDate().toISOString();
|
|
50
|
+
}
|
|
51
|
+
if (ts instanceof Date) {
|
|
52
|
+
return ts.toISOString();
|
|
53
|
+
}
|
|
54
|
+
if (typeof ts === 'string') {
|
|
55
|
+
return ts;
|
|
56
|
+
}
|
|
57
|
+
return new Date().toISOString();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Backfill price data from Firestore shards to BigQuery
|
|
62
|
+
*/
|
|
63
|
+
async function backfillPriceData(startDate = null, endDate = null, logger = console) {
|
|
64
|
+
logger.log('INFO', `[Backfill] Starting price data backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
await ensureAssetPricesTable(logger);
|
|
68
|
+
|
|
69
|
+
const priceCollection = db.collection('asset_prices');
|
|
70
|
+
|
|
71
|
+
// Get all shard documents
|
|
72
|
+
logger.log('INFO', '[Backfill] Fetching price shard documents from Firestore...');
|
|
73
|
+
const snapshot = await priceCollection.get();
|
|
74
|
+
|
|
75
|
+
if (snapshot.empty) {
|
|
76
|
+
logger.log('WARN', '[Backfill] No price shard documents found in Firestore');
|
|
77
|
+
return { success: false, message: 'No shards found' };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
logger.log('INFO', `[Backfill] Found ${snapshot.size} price shard documents`);
|
|
81
|
+
|
|
82
|
+
let totalRows = 0;
|
|
83
|
+
let processedShards = 0;
|
|
84
|
+
let skippedInstruments = 0;
|
|
85
|
+
|
|
86
|
+
// Process each shard document
|
|
87
|
+
for (const doc of snapshot.docs) {
|
|
88
|
+
const shardId = doc.id; // e.g., "shard_0", "shard_1"
|
|
89
|
+
|
|
90
|
+
try {
|
|
91
|
+
const shardData = tryDecompress(doc.data());
|
|
92
|
+
|
|
93
|
+
// Process each instrument in the shard
|
|
94
|
+
for (const [instrumentId, instrumentData] of Object.entries(shardData)) {
|
|
95
|
+
// Skip metadata fields
|
|
96
|
+
if (instrumentId.startsWith('_')) continue;
|
|
97
|
+
|
|
98
|
+
if (!instrumentData || !instrumentData.prices) {
|
|
99
|
+
skippedInstruments++;
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const ticker = instrumentData.ticker || `unknown_${instrumentId}`;
|
|
104
|
+
const prices = instrumentData.prices || {};
|
|
105
|
+
const lastUpdated = convertTimestamp(instrumentData.lastUpdated);
|
|
106
|
+
|
|
107
|
+
// Transform to BigQuery rows (one row per date)
|
|
108
|
+
const bigqueryRows = [];
|
|
109
|
+
|
|
110
|
+
for (const [dateStr, price] of Object.entries(prices)) {
|
|
111
|
+
// Filter by date range if provided
|
|
112
|
+
if (startDate && endDate) {
|
|
113
|
+
if (dateStr < startDate || dateStr > endDate) {
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Validate date format (YYYY-MM-DD)
|
|
119
|
+
if (!/^\d{4}-\d{2}-\d{2}$/.test(dateStr)) {
|
|
120
|
+
logger.log('WARN', `[Backfill] Skipping invalid date format: ${dateStr} for instrument ${instrumentId}`);
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
bigqueryRows.push({
|
|
125
|
+
date: dateStr,
|
|
126
|
+
instrument_id: parseInt(instrumentId, 10),
|
|
127
|
+
ticker: ticker,
|
|
128
|
+
price: typeof price === 'number' ? price : null,
|
|
129
|
+
open: null, // Firestore shards only store closing price
|
|
130
|
+
high: null,
|
|
131
|
+
low: null,
|
|
132
|
+
close: typeof price === 'number' ? price : null,
|
|
133
|
+
volume: null,
|
|
134
|
+
fetched_at: lastUpdated
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (bigqueryRows.length === 0) {
|
|
139
|
+
skippedInstruments++;
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Write to BigQuery using load jobs (free, batched)
|
|
144
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
145
|
+
await insertRows(datasetId, 'asset_prices', bigqueryRows, logger);
|
|
146
|
+
|
|
147
|
+
totalRows += bigqueryRows.length;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
processedShards++;
|
|
151
|
+
|
|
152
|
+
if (processedShards % 10 === 0) {
|
|
153
|
+
logger.log('INFO', `[Backfill] Processed ${processedShards} shards, ${totalRows} rows so far...`);
|
|
154
|
+
}
|
|
155
|
+
} catch (shardError) {
|
|
156
|
+
logger.log('ERROR', `[Backfill] Failed to process shard ${shardId}: ${shardError.message}`);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
logger.log('SUCCESS', `[Backfill] ✅ Price data backfill complete: ${processedShards} shards processed, ${totalRows} rows, ${skippedInstruments} instruments skipped`);
|
|
161
|
+
|
|
162
|
+
return { success: true, processedShards, totalRows, skippedInstruments };
|
|
163
|
+
} catch (error) {
|
|
164
|
+
logger.log('ERROR', `[Backfill] Price data backfill failed: ${error.message}`);
|
|
165
|
+
throw error;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Main entry point
|
|
171
|
+
*/
|
|
172
|
+
async function backfillPriceDataMain(startDate = null, endDate = null) {
|
|
173
|
+
const logger = {
|
|
174
|
+
log: (level, message, ...args) => {
|
|
175
|
+
const timestamp = new Date().toISOString();
|
|
176
|
+
console.log(`[${timestamp}] [${level}] ${message}`, ...args);
|
|
177
|
+
}
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
logger.log('INFO', '[Backfill] Starting Price Data backfill...');
|
|
181
|
+
|
|
182
|
+
try {
|
|
183
|
+
const result = await backfillPriceData(startDate, endDate, logger);
|
|
184
|
+
|
|
185
|
+
logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
|
|
186
|
+
return result;
|
|
187
|
+
} catch (error) {
|
|
188
|
+
logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
|
|
189
|
+
throw error;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// CLI handling
|
|
194
|
+
if (require.main === module) {
|
|
195
|
+
const args = process.argv.slice(2);
|
|
196
|
+
|
|
197
|
+
let startDate = null;
|
|
198
|
+
let endDate = null;
|
|
199
|
+
|
|
200
|
+
args.forEach(arg => {
|
|
201
|
+
if (arg.startsWith('--startDate=')) {
|
|
202
|
+
startDate = arg.split('=')[1];
|
|
203
|
+
} else if (arg.startsWith('--endDate=')) {
|
|
204
|
+
endDate = arg.split('=')[1];
|
|
205
|
+
}
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
backfillPriceDataMain(startDate, endDate)
|
|
209
|
+
.then(result => {
|
|
210
|
+
console.log('\n✅ Backfill completed successfully!');
|
|
211
|
+
console.log('Results:', JSON.stringify(result, null, 2));
|
|
212
|
+
process.exit(0);
|
|
213
|
+
})
|
|
214
|
+
.catch(error => {
|
|
215
|
+
console.error('\n❌ Backfill failed:', error);
|
|
216
|
+
process.exit(1);
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
module.exports = { backfillPriceData, backfillPriceDataMain };
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Backfill Watchlist Membership from Firestore to BigQuery
|
|
3
|
+
*
|
|
4
|
+
* This function reads existing watchlist membership data from Firestore (WatchlistMembershipData/{date})
|
|
5
|
+
* and writes it to BigQuery table.
|
|
6
|
+
*
|
|
7
|
+
* Usage (Local Node.js script):
|
|
8
|
+
* node index.js --startDate=2024-01-01 --endDate=2024-12-31
|
|
9
|
+
* node index.js (backfills all dates)
|
|
10
|
+
*
|
|
11
|
+
* Features:
|
|
12
|
+
* - Backfills watchlist membership for date range (or all dates)
|
|
13
|
+
* - Uses load jobs (free) for efficient batching
|
|
14
|
+
* - Handles compressed Firestore data
|
|
15
|
+
* - Does NOT delete any Firestore data
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const { Firestore } = require('@google-cloud/firestore');
|
|
19
|
+
const zlib = require('zlib');
|
|
20
|
+
const {
|
|
21
|
+
ensureWatchlistMembershipTable,
|
|
22
|
+
insertRows
|
|
23
|
+
} = require('../../core/utils/bigquery_utils');
|
|
24
|
+
|
|
25
|
+
const db = new Firestore();
|
|
26
|
+
|
|
27
|
+
// Helper to decompress Firestore data
|
|
28
|
+
function tryDecompress(data) {
|
|
29
|
+
if (!data) return null;
|
|
30
|
+
if (data._compressed && data.payload) {
|
|
31
|
+
try {
|
|
32
|
+
const buffer = Buffer.from(data.payload);
|
|
33
|
+
return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
|
|
34
|
+
} catch (e) {
|
|
35
|
+
console.error(`[Backfill] Decompression failed: ${e.message}`);
|
|
36
|
+
return data;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return data;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Backfill watchlist membership from Firestore to BigQuery for a date range
|
|
44
|
+
*/
|
|
45
|
+
async function backfillWatchlistMembership(startDate = null, endDate = null, logger = console) {
|
|
46
|
+
logger.log('INFO', `[Backfill] Starting watchlist membership backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
await ensureWatchlistMembershipTable(logger);
|
|
50
|
+
|
|
51
|
+
const membershipCollection = db.collection('WatchlistMembershipData');
|
|
52
|
+
|
|
53
|
+
// Get all membership documents
|
|
54
|
+
logger.log('INFO', '[Backfill] Fetching watchlist membership documents from Firestore...');
|
|
55
|
+
const snapshot = await membershipCollection.get();
|
|
56
|
+
|
|
57
|
+
if (snapshot.empty) {
|
|
58
|
+
logger.log('WARN', '[Backfill] No watchlist membership documents found in Firestore');
|
|
59
|
+
return { success: false, message: 'No watchlist membership found' };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
logger.log('INFO', `[Backfill] Found ${snapshot.size} watchlist membership documents`);
|
|
63
|
+
|
|
64
|
+
let totalRows = 0;
|
|
65
|
+
let processedDates = 0;
|
|
66
|
+
let skippedDates = 0;
|
|
67
|
+
|
|
68
|
+
// Process each document
|
|
69
|
+
for (const doc of snapshot.docs) {
|
|
70
|
+
const dateStr = doc.id; // Document ID is the date (YYYY-MM-DD)
|
|
71
|
+
|
|
72
|
+
// Filter by date range if provided
|
|
73
|
+
if (startDate && endDate) {
|
|
74
|
+
if (dateStr < startDate || dateStr > endDate) {
|
|
75
|
+
skippedDates++;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
const data = tryDecompress(doc.data());
|
|
82
|
+
const { date, lastUpdated, ...watchlistMembership } = data;
|
|
83
|
+
|
|
84
|
+
if (!watchlistMembership || Object.keys(watchlistMembership).length === 0) {
|
|
85
|
+
logger.log('WARN', `[Backfill] No watchlist membership in document for ${dateStr}`);
|
|
86
|
+
skippedDates++;
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Transform to BigQuery rows
|
|
91
|
+
const lastUpdatedTimestamp = lastUpdated
|
|
92
|
+
? (lastUpdated.toDate ? lastUpdated.toDate().toISOString() : lastUpdated)
|
|
93
|
+
: new Date().toISOString();
|
|
94
|
+
|
|
95
|
+
const bigqueryRows = [];
|
|
96
|
+
for (const [piId, membershipData] of Object.entries(watchlistMembership)) {
|
|
97
|
+
if (membershipData && typeof membershipData === 'object') {
|
|
98
|
+
bigqueryRows.push({
|
|
99
|
+
date: dateStr,
|
|
100
|
+
pi_id: parseInt(piId, 10),
|
|
101
|
+
total_users: membershipData.totalUsers || null,
|
|
102
|
+
public_watchlist_count: membershipData.publicWatchlistCount || null,
|
|
103
|
+
private_watchlist_count: membershipData.privateWatchlistCount || null,
|
|
104
|
+
users: membershipData.users || [],
|
|
105
|
+
last_updated: lastUpdatedTimestamp
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (bigqueryRows.length === 0) {
|
|
111
|
+
logger.log('WARN', `[Backfill] No valid watchlist membership rows for ${dateStr}`);
|
|
112
|
+
skippedDates++;
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Write to BigQuery using load jobs (free, batched)
|
|
117
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
118
|
+
await insertRows(datasetId, 'watchlist_membership', bigqueryRows, logger);
|
|
119
|
+
|
|
120
|
+
totalRows += bigqueryRows.length;
|
|
121
|
+
processedDates++;
|
|
122
|
+
|
|
123
|
+
if (processedDates % 10 === 0) {
|
|
124
|
+
logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
|
|
125
|
+
}
|
|
126
|
+
} catch (dateError) {
|
|
127
|
+
logger.log('ERROR', `[Backfill] Failed to process watchlist membership for ${dateStr}: ${dateError.message}`);
|
|
128
|
+
skippedDates++;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
logger.log('SUCCESS', `[Backfill] ✅ Watchlist membership backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
|
|
133
|
+
|
|
134
|
+
return { success: true, processedDates, totalRows, skippedDates };
|
|
135
|
+
} catch (error) {
|
|
136
|
+
logger.log('ERROR', `[Backfill] Watchlist membership backfill failed: ${error.message}`);
|
|
137
|
+
throw error;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Main entry point
|
|
143
|
+
*/
|
|
144
|
+
async function backfillWatchlistMembershipData(startDate = null, endDate = null) {
|
|
145
|
+
const logger = {
|
|
146
|
+
log: (level, message, ...args) => {
|
|
147
|
+
const timestamp = new Date().toISOString();
|
|
148
|
+
console.log(`[${timestamp}] [${level}] ${message}`, ...args);
|
|
149
|
+
}
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
logger.log('INFO', '[Backfill] Starting Watchlist Membership backfill...');
|
|
153
|
+
|
|
154
|
+
try {
|
|
155
|
+
const result = await backfillWatchlistMembership(startDate, endDate, logger);
|
|
156
|
+
|
|
157
|
+
logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
|
|
158
|
+
return result;
|
|
159
|
+
} catch (error) {
|
|
160
|
+
logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
|
|
161
|
+
throw error;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// CLI execution
|
|
166
|
+
if (require.main === module) {
|
|
167
|
+
const args = process.argv.slice(2);
|
|
168
|
+
let startDate = null;
|
|
169
|
+
let endDate = null;
|
|
170
|
+
|
|
171
|
+
args.forEach(arg => {
|
|
172
|
+
if (arg.startsWith('--startDate=')) {
|
|
173
|
+
startDate = arg.split('=')[1];
|
|
174
|
+
} else if (arg.startsWith('--endDate=')) {
|
|
175
|
+
endDate = arg.split('=')[1];
|
|
176
|
+
}
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
backfillWatchlistMembershipData(startDate, endDate)
|
|
180
|
+
.then(result => {
|
|
181
|
+
console.log('Backfill result:', result);
|
|
182
|
+
process.exit(0);
|
|
183
|
+
})
|
|
184
|
+
.catch(error => {
|
|
185
|
+
console.error('Backfill failed:', error);
|
|
186
|
+
process.exit(1);
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
module.exports = { backfillWatchlistMembershipData };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bulltrackers-module",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.720",
|
|
4
4
|
"description": "Helper Functions for Bulltrackers.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -25,7 +25,12 @@
|
|
|
25
25
|
"functions/maintenance/backfill-instrument-insights",
|
|
26
26
|
"functions/maintenance/backfill-pi-master-list-rankings",
|
|
27
27
|
"functions/maintenance/backfill-task-engine-data",
|
|
28
|
-
"functions/maintenance/backfill-ticker-mappings"
|
|
28
|
+
"functions/maintenance/backfill-ticker-mappings",
|
|
29
|
+
"functions/maintenance/backfill-price-data-from-firestore",
|
|
30
|
+
"functions/maintenance/backfill-pi-alert-history",
|
|
31
|
+
"functions/maintenance/backfill-pi-page-views",
|
|
32
|
+
"functions/maintenance/backfill-pi-ratings",
|
|
33
|
+
"functions/maintenance/backfill-watchlist-membership"
|
|
29
34
|
],
|
|
30
35
|
"keywords": [
|
|
31
36
|
"bulltrackers",
|