bulltrackers-module 1.0.718 → 1.0.720

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -637,6 +637,7 @@ const SCHEMAS = {
637
637
  { name: 'average_rating', type: 'FLOAT64', mode: 'NULLABLE' },
638
638
  { name: 'total_ratings', type: 'INT64', mode: 'NULLABLE' },
639
639
  { name: 'ratings_by_user', type: 'JSON', mode: 'NULLABLE' },
640
+ { name: 'reviews', type: 'JSON', mode: 'NULLABLE' }, // Full review metadata array
640
641
  { name: 'last_updated', type: 'TIMESTAMP', mode: 'REQUIRED' }
641
642
  ],
642
643
  pi_page_views: [
@@ -0,0 +1,201 @@
1
+ /**
2
+ * @fileoverview Backfill PI Alert History from Firestore to BigQuery
3
+ *
4
+ * This function reads existing PI alert history data from Firestore (PIAlertHistoryData/{date})
5
+ * and writes it to BigQuery table.
6
+ *
7
+ * Usage (Local Node.js script):
8
+ * node index.js --startDate=2024-01-01 --endDate=2024-12-31
9
+ * node index.js (backfills all dates)
10
+ *
11
+ * Features:
12
+ * - Backfills alert history for date range (or all dates)
13
+ * - Uses load jobs (free) for efficient batching
14
+ * - Handles compressed Firestore data
15
+ * - Does NOT delete any Firestore data
16
+ */
17
+
18
+ const { Firestore } = require('@google-cloud/firestore');
19
+ const zlib = require('zlib');
20
+ const {
21
+ ensurePIAlertHistoryTable,
22
+ insertRows
23
+ } = require('../../core/utils/bigquery_utils');
24
+
25
+ const db = new Firestore();
26
+
27
+ // Helper to decompress Firestore data
28
+ function tryDecompress(data) {
29
+ if (!data) return null;
30
+ if (data._compressed && data.payload) {
31
+ try {
32
+ const buffer = Buffer.from(data.payload);
33
+ return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
34
+ } catch (e) {
35
+ console.error(`[Backfill] Decompression failed: ${e.message}`);
36
+ return data;
37
+ }
38
+ }
39
+ return data;
40
+ }
41
+
42
+ /**
43
+ * Backfill PI alert history from Firestore to BigQuery for a date range
44
+ */
45
+ async function backfillAlertHistory(startDate = null, endDate = null, logger = console) {
46
+ logger.log('INFO', `[Backfill] Starting PI alert history backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
47
+
48
+ try {
49
+ await ensurePIAlertHistoryTable(logger);
50
+
51
+ const alertHistoryCollection = db.collection('PIAlertHistoryData');
52
+
53
+ // Get all alert history documents
54
+ logger.log('INFO', '[Backfill] Fetching alert history documents from Firestore...');
55
+ const snapshot = await alertHistoryCollection.get();
56
+
57
+ if (snapshot.empty) {
58
+ logger.log('WARN', '[Backfill] No alert history documents found in Firestore');
59
+ return { success: false, message: 'No alert history found' };
60
+ }
61
+
62
+ logger.log('INFO', `[Backfill] Found ${snapshot.size} alert history documents`);
63
+
64
+ let totalRows = 0;
65
+ let processedDates = 0;
66
+ let skippedDates = 0;
67
+
68
+ // Process each document
69
+ for (const doc of snapshot.docs) {
70
+ const dateStr = doc.id; // Document ID is the date (YYYY-MM-DD)
71
+
72
+ // Filter by date range if provided
73
+ if (startDate && endDate) {
74
+ if (dateStr < startDate || dateStr > endDate) {
75
+ skippedDates++;
76
+ continue;
77
+ }
78
+ }
79
+
80
+ try {
81
+ const data = tryDecompress(doc.data());
82
+ const { date, lastUpdated, ...piAlertHistory } = data;
83
+
84
+ if (!piAlertHistory || Object.keys(piAlertHistory).length === 0) {
85
+ logger.log('WARN', `[Backfill] No alert history in document for ${dateStr}`);
86
+ skippedDates++;
87
+ continue;
88
+ }
89
+
90
+ // Transform to BigQuery rows
91
+ const lastUpdatedTimestamp = lastUpdated
92
+ ? (lastUpdated.toDate ? lastUpdated.toDate().toISOString() : lastUpdated)
93
+ : new Date().toISOString();
94
+
95
+ const bigqueryRows = [];
96
+ for (const [piId, alertData] of Object.entries(piAlertHistory)) {
97
+ if (alertData && typeof alertData === 'object') {
98
+ // Each PI can have multiple alert types
99
+ for (const [alertType, alertInfo] of Object.entries(alertData)) {
100
+ if (alertInfo && typeof alertInfo === 'object') {
101
+ const lastTriggered = alertInfo.lastTriggered
102
+ ? (alertInfo.lastTriggered.toDate ? alertInfo.lastTriggered.toDate().toISOString() : alertInfo.lastTriggered)
103
+ : null;
104
+
105
+ bigqueryRows.push({
106
+ date: dateStr,
107
+ pi_id: parseInt(piId, 10),
108
+ alert_type: alertType,
109
+ triggered: alertInfo.triggered || false,
110
+ trigger_count: alertInfo.count || 0,
111
+ triggered_for: alertInfo.triggeredFor || [],
112
+ metadata: alertInfo.metadata || {},
113
+ last_triggered: lastTriggered,
114
+ last_updated: lastUpdatedTimestamp
115
+ });
116
+ }
117
+ }
118
+ }
119
+ }
120
+
121
+ if (bigqueryRows.length === 0) {
122
+ logger.log('WARN', `[Backfill] No valid alert history rows for ${dateStr}`);
123
+ skippedDates++;
124
+ continue;
125
+ }
126
+
127
+ // Write to BigQuery using load jobs (free, batched)
128
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
129
+ await insertRows(datasetId, 'pi_alert_history', bigqueryRows, logger);
130
+
131
+ totalRows += bigqueryRows.length;
132
+ processedDates++;
133
+
134
+ if (processedDates % 10 === 0) {
135
+ logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
136
+ }
137
+ } catch (dateError) {
138
+ logger.log('ERROR', `[Backfill] Failed to process alert history for ${dateStr}: ${dateError.message}`);
139
+ skippedDates++;
140
+ }
141
+ }
142
+
143
+ logger.log('SUCCESS', `[Backfill] ✅ PI alert history backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
144
+
145
+ return { success: true, processedDates, totalRows, skippedDates };
146
+ } catch (error) {
147
+ logger.log('ERROR', `[Backfill] PI alert history backfill failed: ${error.message}`);
148
+ throw error;
149
+ }
150
+ }
151
+
152
+ /**
153
+ * Main entry point
154
+ */
155
+ async function backfillPIAlertHistory(startDate = null, endDate = null) {
156
+ const logger = {
157
+ log: (level, message, ...args) => {
158
+ const timestamp = new Date().toISOString();
159
+ console.log(`[${timestamp}] [${level}] ${message}`, ...args);
160
+ }
161
+ };
162
+
163
+ logger.log('INFO', '[Backfill] Starting PI Alert History backfill...');
164
+
165
+ try {
166
+ const result = await backfillAlertHistory(startDate, endDate, logger);
167
+
168
+ logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
169
+ return result;
170
+ } catch (error) {
171
+ logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
172
+ throw error;
173
+ }
174
+ }
175
+
176
+ // CLI execution
177
+ if (require.main === module) {
178
+ const args = process.argv.slice(2);
179
+ let startDate = null;
180
+ let endDate = null;
181
+
182
+ args.forEach(arg => {
183
+ if (arg.startsWith('--startDate=')) {
184
+ startDate = arg.split('=')[1];
185
+ } else if (arg.startsWith('--endDate=')) {
186
+ endDate = arg.split('=')[1];
187
+ }
188
+ });
189
+
190
+ backfillPIAlertHistory(startDate, endDate)
191
+ .then(result => {
192
+ console.log('Backfill result:', result);
193
+ process.exit(0);
194
+ })
195
+ .catch(error => {
196
+ console.error('Backfill failed:', error);
197
+ process.exit(1);
198
+ });
199
+ }
200
+
201
+ module.exports = { backfillPIAlertHistory };
@@ -0,0 +1,189 @@
1
+ /**
2
+ * @fileoverview Backfill PI Page Views from Firestore to BigQuery
3
+ *
4
+ * This function reads existing PI page views data from Firestore (PIPageViewsData/{date})
5
+ * and writes it to BigQuery table.
6
+ *
7
+ * Usage (Local Node.js script):
8
+ * node index.js --startDate=2024-01-01 --endDate=2024-12-31
9
+ * node index.js (backfills all dates)
10
+ *
11
+ * Features:
12
+ * - Backfills page views for date range (or all dates)
13
+ * - Uses load jobs (free) for efficient batching
14
+ * - Handles compressed Firestore data
15
+ * - Does NOT delete any Firestore data
16
+ */
17
+
18
+ const { Firestore } = require('@google-cloud/firestore');
19
+ const zlib = require('zlib');
20
+ const {
21
+ ensurePIPageViewsTable,
22
+ insertRows
23
+ } = require('../../core/utils/bigquery_utils');
24
+
25
+ const db = new Firestore();
26
+
27
+ // Helper to decompress Firestore data
28
+ function tryDecompress(data) {
29
+ if (!data) return null;
30
+ if (data._compressed && data.payload) {
31
+ try {
32
+ const buffer = Buffer.from(data.payload);
33
+ return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
34
+ } catch (e) {
35
+ console.error(`[Backfill] Decompression failed: ${e.message}`);
36
+ return data;
37
+ }
38
+ }
39
+ return data;
40
+ }
41
+
42
+ /**
43
+ * Backfill PI page views from Firestore to BigQuery for a date range
44
+ */
45
+ async function backfillPageViews(startDate = null, endDate = null, logger = console) {
46
+ logger.log('INFO', `[Backfill] Starting PI page views backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
47
+
48
+ try {
49
+ await ensurePIPageViewsTable(logger);
50
+
51
+ const pageViewsCollection = db.collection('PIPageViewsData');
52
+
53
+ // Get all page views documents
54
+ logger.log('INFO', '[Backfill] Fetching page views documents from Firestore...');
55
+ const snapshot = await pageViewsCollection.get();
56
+
57
+ if (snapshot.empty) {
58
+ logger.log('WARN', '[Backfill] No page views documents found in Firestore');
59
+ return { success: false, message: 'No page views found' };
60
+ }
61
+
62
+ logger.log('INFO', `[Backfill] Found ${snapshot.size} page views documents`);
63
+
64
+ let totalRows = 0;
65
+ let processedDates = 0;
66
+ let skippedDates = 0;
67
+
68
+ // Process each document
69
+ for (const doc of snapshot.docs) {
70
+ const dateStr = doc.id; // Document ID is the date (YYYY-MM-DD)
71
+
72
+ // Filter by date range if provided
73
+ if (startDate && endDate) {
74
+ if (dateStr < startDate || dateStr > endDate) {
75
+ skippedDates++;
76
+ continue;
77
+ }
78
+ }
79
+
80
+ try {
81
+ const data = tryDecompress(doc.data());
82
+ const { date, lastUpdated, ...piPageViews } = data;
83
+
84
+ if (!piPageViews || Object.keys(piPageViews).length === 0) {
85
+ logger.log('WARN', `[Backfill] No page views in document for ${dateStr}`);
86
+ skippedDates++;
87
+ continue;
88
+ }
89
+
90
+ // Transform to BigQuery rows
91
+ const lastUpdatedTimestamp = lastUpdated
92
+ ? (lastUpdated.toDate ? lastUpdated.toDate().toISOString() : lastUpdated)
93
+ : new Date().toISOString();
94
+
95
+ const bigqueryRows = [];
96
+ for (const [piId, pageViewData] of Object.entries(piPageViews)) {
97
+ if (pageViewData && typeof pageViewData === 'object') {
98
+ bigqueryRows.push({
99
+ date: dateStr,
100
+ pi_id: parseInt(piId, 10),
101
+ total_views: pageViewData.totalViews || null,
102
+ unique_viewers: pageViewData.uniqueViewers || null,
103
+ views_by_user: pageViewData.viewsByUser || {},
104
+ last_updated: lastUpdatedTimestamp
105
+ });
106
+ }
107
+ }
108
+
109
+ if (bigqueryRows.length === 0) {
110
+ logger.log('WARN', `[Backfill] No valid page views rows for ${dateStr}`);
111
+ skippedDates++;
112
+ continue;
113
+ }
114
+
115
+ // Write to BigQuery using load jobs (free, batched)
116
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
117
+ await insertRows(datasetId, 'pi_page_views', bigqueryRows, logger);
118
+
119
+ totalRows += bigqueryRows.length;
120
+ processedDates++;
121
+
122
+ if (processedDates % 10 === 0) {
123
+ logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
124
+ }
125
+ } catch (dateError) {
126
+ logger.log('ERROR', `[Backfill] Failed to process page views for ${dateStr}: ${dateError.message}`);
127
+ skippedDates++;
128
+ }
129
+ }
130
+
131
+ logger.log('SUCCESS', `[Backfill] ✅ PI page views backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
132
+
133
+ return { success: true, processedDates, totalRows, skippedDates };
134
+ } catch (error) {
135
+ logger.log('ERROR', `[Backfill] PI page views backfill failed: ${error.message}`);
136
+ throw error;
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Main entry point
142
+ */
143
+ async function backfillPIPageViews(startDate = null, endDate = null) {
144
+ const logger = {
145
+ log: (level, message, ...args) => {
146
+ const timestamp = new Date().toISOString();
147
+ console.log(`[${timestamp}] [${level}] ${message}`, ...args);
148
+ }
149
+ };
150
+
151
+ logger.log('INFO', '[Backfill] Starting PI Page Views backfill...');
152
+
153
+ try {
154
+ const result = await backfillPageViews(startDate, endDate, logger);
155
+
156
+ logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
157
+ return result;
158
+ } catch (error) {
159
+ logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
160
+ throw error;
161
+ }
162
+ }
163
+
164
+ // CLI execution
165
+ if (require.main === module) {
166
+ const args = process.argv.slice(2);
167
+ let startDate = null;
168
+ let endDate = null;
169
+
170
+ args.forEach(arg => {
171
+ if (arg.startsWith('--startDate=')) {
172
+ startDate = arg.split('=')[1];
173
+ } else if (arg.startsWith('--endDate=')) {
174
+ endDate = arg.split('=')[1];
175
+ }
176
+ });
177
+
178
+ backfillPIPageViews(startDate, endDate)
179
+ .then(result => {
180
+ console.log('Backfill result:', result);
181
+ process.exit(0);
182
+ })
183
+ .catch(error => {
184
+ console.error('Backfill failed:', error);
185
+ process.exit(1);
186
+ });
187
+ }
188
+
189
+ module.exports = { backfillPIPageViews };
@@ -0,0 +1,376 @@
1
+ /**
2
+ * @fileoverview Backfill PI Ratings from Firestore to BigQuery
3
+ *
4
+ * This function reads existing PI ratings data from Firestore (PiReviews/{date}/shards/)
5
+ * aggregates them by PI, and writes to BigQuery table.
6
+ *
7
+ * Note: Raw reviews are stored in PiReviews/{date}/shards/, not in PIRatingsData/{date}
8
+ * This script aggregates the raw reviews into the expected format.
9
+ *
10
+ * Usage (Local Node.js script):
11
+ * node index.js --startDate=2024-01-01 --endDate=2024-12-31
12
+ * node index.js (backfills all dates)
13
+ *
14
+ * Features:
15
+ * - Backfills ratings for date range (or all dates)
16
+ * - Aggregates raw reviews from shards
17
+ * - Uses load jobs (free) for efficient batching
18
+ * - Handles compressed Firestore data
19
+ * - Does NOT delete any Firestore data
20
+ */
21
+
22
+ const { Firestore } = require('@google-cloud/firestore');
23
+ const zlib = require('zlib');
24
+ const {
25
+ ensurePIRatingsTable,
26
+ insertRows
27
+ } = require('../../core/utils/bigquery_utils');
28
+
29
+ const db = new Firestore();
30
+
31
+ // Helper to decompress Firestore data
32
+ function tryDecompress(data) {
33
+ if (!data) return null;
34
+ if (data._compressed && data.payload) {
35
+ try {
36
+ const buffer = Buffer.from(data.payload);
37
+ return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
38
+ } catch (e) {
39
+ console.error(`[Backfill] Decompression failed: ${e.message}`);
40
+ return data;
41
+ }
42
+ }
43
+ return data;
44
+ }
45
+
46
+ /**
47
+ * Backfill PI ratings from Firestore to BigQuery for a date range
48
+ * Reads from PiReviews/{date}/shards/ and aggregates by PI
49
+ */
50
+ async function backfillRatings(startDate = null, endDate = null, logger = console) {
51
+ logger.log('INFO', `[Backfill] Starting PI ratings backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
52
+
53
+ try {
54
+ await ensurePIRatingsTable(logger);
55
+
56
+ // Use collection group query to find all shards
57
+ // Path structure: PiReviews/{date}/shards/{shardId}
58
+ logger.log('INFO', '[Backfill] Fetching all shards from PiReviews using collection group query...');
59
+ const shardsCollectionGroup = db.collectionGroup('shards');
60
+ const allShardsSnapshot = await shardsCollectionGroup.get();
61
+
62
+ if (allShardsSnapshot.empty) {
63
+ logger.log('WARN', '[Backfill] No shards found in collection group query');
64
+ return { success: false, message: 'No shards found' };
65
+ }
66
+
67
+ logger.log('INFO', `[Backfill] Found ${allShardsSnapshot.size} shard documents total`);
68
+
69
+ // Group shards by date (extract from parent path: PiReviews/{date}/shards/{shardId})
70
+ // Filter to only shards under PiReviews collection
71
+ const shardsByDate = {};
72
+ let samplePath = null;
73
+
74
+ for (const shardDoc of allShardsSnapshot.docs) {
75
+ // Path structure: PiReviews/{date}/shards/{shardId}
76
+ // Get full path: projects/{project}/databases/{db}/documents/PiReviews/{date}/shards/{shardId}
77
+ const fullPath = shardDoc.ref.path;
78
+
79
+ // Log first path for debugging
80
+ if (!samplePath) {
81
+ samplePath = fullPath;
82
+ logger.log('INFO', `[Backfill] Sample shard path: ${fullPath}`);
83
+ }
84
+
85
+ // Check if this shard is under PiReviews collection
86
+ if (!fullPath.includes('PiReviews')) {
87
+ continue; // Skip shards from other collections
88
+ }
89
+
90
+ // Extract date from path: PiReviews/{date}/shards/{shardId}
91
+ // Path format: projects/{project}/databases/{db}/documents/PiReviews/{date}/shards/{shardId}
92
+ // Or simpler: PiReviews/{date}/shards/{shardId}
93
+ const pathParts = fullPath.split('/');
94
+
95
+ // Find PiReviews in path
96
+ let piReviewsIndex = -1;
97
+ for (let i = 0; i < pathParts.length; i++) {
98
+ if (pathParts[i] === 'PiReviews') {
99
+ piReviewsIndex = i;
100
+ break;
101
+ }
102
+ }
103
+
104
+ if (piReviewsIndex >= 0 && pathParts[piReviewsIndex + 1]) {
105
+ const dateStr = pathParts[piReviewsIndex + 1];
106
+
107
+ // Validate date format (YYYY-MM-DD)
108
+ if (/^\d{4}-\d{2}-\d{2}$/.test(dateStr)) {
109
+ if (!shardsByDate[dateStr]) {
110
+ shardsByDate[dateStr] = [];
111
+ }
112
+ shardsByDate[dateStr].push(shardDoc);
113
+ } else {
114
+ logger.log('DEBUG', `[Backfill] Skipping invalid date format: ${dateStr} from path ${fullPath}`);
115
+ }
116
+ } else {
117
+ logger.log('DEBUG', `[Backfill] Could not extract date from path: ${fullPath}`);
118
+ }
119
+ }
120
+
121
+ const uniqueDates = Object.keys(shardsByDate);
122
+ logger.log('INFO', `[Backfill] Found shards for ${uniqueDates.length} unique dates: ${uniqueDates.slice(0, 5).join(', ')}${uniqueDates.length > 5 ? '...' : ''}`);
123
+
124
+ let totalRows = 0;
125
+ let processedDates = 0;
126
+ let skippedDates = 0;
127
+
128
+ // Process each date
129
+ for (const dateStr of uniqueDates) {
130
+
131
+ // Filter by date range if provided
132
+ if (startDate && endDate) {
133
+ if (dateStr < startDate || dateStr > endDate) {
134
+ skippedDates++;
135
+ continue;
136
+ }
137
+ }
138
+
139
+ try {
140
+ // Get all shards for this date (already grouped)
141
+ const shardsForDate = shardsByDate[dateStr];
142
+
143
+ if (!shardsForDate || shardsForDate.length === 0) {
144
+ logger.log('WARN', `[Backfill] No shards found for ${dateStr}`);
145
+ skippedDates++;
146
+ continue;
147
+ }
148
+
149
+ // Aggregate reviews by PI
150
+ const reviewsByPi = {};
151
+ let totalReviewsFound = 0;
152
+ let skippedReviews = 0;
153
+
154
+ // Helper to convert Firestore timestamp to ISO string
155
+ function timestampToISO(timestamp) {
156
+ if (!timestamp) return null;
157
+ if (timestamp.toDate) {
158
+ return timestamp.toDate().toISOString();
159
+ } else if (timestamp._seconds) {
160
+ return new Date(timestamp._seconds * 1000).toISOString();
161
+ } else if (timestamp instanceof Date) {
162
+ return timestamp.toISOString();
163
+ }
164
+ return null;
165
+ }
166
+
167
+ for (const shardDoc of shardsForDate) {
168
+ const shardData = tryDecompress(shardDoc.data());
169
+
170
+ if (!shardData || typeof shardData !== 'object') {
171
+ continue;
172
+ }
173
+
174
+ // Shard data structure: Keys are like "reviews.29312236_31075566" with review data as values
175
+ // Process all keys that contain review data
176
+ Object.keys(shardData).forEach(key => {
177
+ const entry = shardData[key];
178
+ totalReviewsFound++;
179
+
180
+ // Skip if not an object
181
+ if (!entry || typeof entry !== 'object') {
182
+ skippedReviews++;
183
+ return;
184
+ }
185
+
186
+ // Check for required fields (piCid and rating)
187
+ if (entry.piCid === undefined || entry.rating === undefined) {
188
+ skippedReviews++;
189
+ return;
190
+ }
191
+
192
+ const piId = String(entry.piCid);
193
+
194
+ if (!reviewsByPi[piId]) {
195
+ reviewsByPi[piId] = {
196
+ allReviews: [], // Store all review objects with full metadata
197
+ ratingsByUser: {},
198
+ userTimestamps: {} // Track timestamps to keep most recent rating per user for aggregation
199
+ };
200
+ }
201
+
202
+ // Store full review metadata
203
+ const reviewMetadata = {
204
+ action: entry.action || null,
205
+ actualUserCid: entry.actualUserCid || null,
206
+ comment: entry.comment || null,
207
+ createdAt: timestampToISO(entry.createdAt),
208
+ isAnonymous: entry.isAnonymous || false,
209
+ isImpersonating: entry.isImpersonating || false,
210
+ loggedAt: timestampToISO(entry.loggedAt),
211
+ piCid: entry.piCid,
212
+ rating: entry.rating,
213
+ reviewId: entry.reviewId || null,
214
+ reviewerUsername: entry.reviewerUsername || null,
215
+ updatedAt: timestampToISO(entry.updatedAt),
216
+ userCid: entry.userCid || null
217
+ };
218
+
219
+ reviewsByPi[piId].allReviews.push(reviewMetadata);
220
+
221
+ // Also track for aggregation (use most recent rating per user)
222
+ // Add rating (handle legacy format where rating might be 0-50 instead of 1-5)
223
+ let rating = entry.rating;
224
+ if (rating > 5) {
225
+ rating = Math.round(rating / 10);
226
+ }
227
+ // Ensure rating is between 1-5
228
+ rating = Math.max(1, Math.min(5, rating));
229
+
230
+ // Track rating by user (use the most recent rating if user has multiple reviews/edits)
231
+ const userId = String(entry.userCid || entry.actualUserCid || '');
232
+
233
+ if (userId && userId !== 'undefined' && userId !== 'null') {
234
+ // Get timestamp for comparison (use updatedAt if available, else createdAt, else loggedAt)
235
+ let entryTimestamp = 0;
236
+ const timestampField = entry.updatedAt || entry.createdAt || entry.loggedAt;
237
+
238
+ if (timestampField) {
239
+ if (timestampField.toDate) {
240
+ entryTimestamp = timestampField.toDate().getTime();
241
+ } else if (timestampField._seconds) {
242
+ entryTimestamp = timestampField._seconds * 1000;
243
+ } else if (timestampField instanceof Date) {
244
+ entryTimestamp = timestampField.getTime();
245
+ }
246
+ }
247
+
248
+ const existingTimestamp = reviewsByPi[piId].userTimestamps[userId] || 0;
249
+
250
+ // Only keep if this is a new review or more recent than existing
251
+ if (entryTimestamp >= existingTimestamp) {
252
+ reviewsByPi[piId].ratingsByUser[userId] = rating;
253
+ reviewsByPi[piId].userTimestamps[userId] = entryTimestamp;
254
+ }
255
+ }
256
+ });
257
+ }
258
+
259
+ if (totalReviewsFound > 0) {
260
+ logger.log('INFO', `[Backfill] Processed ${totalReviewsFound} review entries for ${dateStr} (${skippedReviews} skipped, ${totalReviewsFound - skippedReviews} valid)`);
261
+ }
262
+
263
+ if (Object.keys(reviewsByPi).length === 0) {
264
+ logger.log('WARN', `[Backfill] No reviews found in shards for ${dateStr} (found ${totalReviewsFound} entries, ${skippedReviews} skipped)`);
265
+ skippedDates++;
266
+ continue;
267
+ }
268
+
269
+ logger.log('INFO', `[Backfill] Aggregated reviews for ${Object.keys(reviewsByPi).length} PIs for ${dateStr}`);
270
+
271
+ // Transform to BigQuery rows
272
+ const lastUpdatedTimestamp = new Date().toISOString();
273
+ const bigqueryRows = [];
274
+
275
+ for (const [piId, piData] of Object.entries(reviewsByPi)) {
276
+ // Use unique user ratings (one rating per user - most recent if edited)
277
+ const uniqueRatings = Object.values(piData.ratingsByUser);
278
+ const totalRatings = uniqueRatings.length; // Count of unique users who rated
279
+ const averageRating = totalRatings > 0
280
+ ? uniqueRatings.reduce((sum, r) => sum + r, 0) / totalRatings
281
+ : null;
282
+
283
+ bigqueryRows.push({
284
+ date: dateStr,
285
+ pi_id: parseInt(piId, 10),
286
+ average_rating: averageRating ? Number(averageRating.toFixed(2)) : null,
287
+ total_ratings: totalRatings,
288
+ ratings_by_user: piData.ratingsByUser || {},
289
+ reviews: piData.allReviews || [], // Full review metadata array
290
+ last_updated: lastUpdatedTimestamp
291
+ });
292
+ }
293
+
294
+ logger.log('INFO', `[Backfill] Created ${bigqueryRows.length} BigQuery rows for ${dateStr}`);
295
+
296
+ if (bigqueryRows.length === 0) {
297
+ logger.log('WARN', `[Backfill] No valid ratings rows for ${dateStr}`);
298
+ skippedDates++;
299
+ continue;
300
+ }
301
+
302
+ // Write to BigQuery using load jobs (free, batched)
303
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
304
+ await insertRows(datasetId, 'pi_ratings', bigqueryRows, logger);
305
+
306
+ totalRows += bigqueryRows.length;
307
+ processedDates++;
308
+
309
+ if (processedDates % 10 === 0) {
310
+ logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
311
+ }
312
+ } catch (dateError) {
313
+ logger.log('ERROR', `[Backfill] Failed to process ratings for ${dateStr}: ${dateError.message}`);
314
+ skippedDates++;
315
+ }
316
+ }
317
+
318
+ logger.log('SUCCESS', `[Backfill] ✅ PI ratings backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
319
+
320
+ return { success: true, processedDates, totalRows, skippedDates };
321
+ } catch (error) {
322
+ logger.log('ERROR', `[Backfill] PI ratings backfill failed: ${error.message}`);
323
+ throw error;
324
+ }
325
+ }
326
+
327
+ /**
328
+ * Main entry point
329
+ */
330
+ async function backfillPIRatings(startDate = null, endDate = null) {
331
+ const logger = {
332
+ log: (level, message, ...args) => {
333
+ const timestamp = new Date().toISOString();
334
+ console.log(`[${timestamp}] [${level}] ${message}`, ...args);
335
+ }
336
+ };
337
+
338
+ logger.log('INFO', '[Backfill] Starting PI Ratings backfill...');
339
+
340
+ try {
341
+ const result = await backfillRatings(startDate, endDate, logger);
342
+
343
+ logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
344
+ return result;
345
+ } catch (error) {
346
+ logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
347
+ throw error;
348
+ }
349
+ }
350
+
351
+ // CLI execution
352
+ if (require.main === module) {
353
+ const args = process.argv.slice(2);
354
+ let startDate = null;
355
+ let endDate = null;
356
+
357
+ args.forEach(arg => {
358
+ if (arg.startsWith('--startDate=')) {
359
+ startDate = arg.split('=')[1];
360
+ } else if (arg.startsWith('--endDate=')) {
361
+ endDate = arg.split('=')[1];
362
+ }
363
+ });
364
+
365
+ backfillPIRatings(startDate, endDate)
366
+ .then(result => {
367
+ console.log('Backfill result:', result);
368
+ process.exit(0);
369
+ })
370
+ .catch(error => {
371
+ console.error('Backfill failed:', error);
372
+ process.exit(1);
373
+ });
374
+ }
375
+
376
+ module.exports = { backfillPIRatings };
@@ -0,0 +1,220 @@
1
+ /**
2
+ * @fileoverview Backfill Price Data from Firestore Shards to BigQuery
3
+ *
4
+ * This function reads existing price data from Firestore shards
5
+ * and writes it to BigQuery table.
6
+ *
7
+ * Usage (Local Node.js script):
8
+ * node index.js
9
+ * node index.js --startDate=2024-01-01 --endDate=2024-12-31
10
+ *
11
+ * Features:
12
+ * - Reads from Firestore shards: asset_prices/shard_0, shard_1, etc.
13
+ * - Transforms Firestore structure to BigQuery rows
14
+ * - Handles compressed Firestore data
15
+ * - Uses load jobs (free) for efficient batching
16
+ * - Does NOT delete any Firestore data
17
+ */
18
+
19
+ const { Firestore } = require('@google-cloud/firestore');
20
+ const zlib = require('zlib');
21
+ const {
22
+ ensureAssetPricesTable,
23
+ insertRows
24
+ } = require('../../core/utils/bigquery_utils');
25
+
26
+ const db = new Firestore();
27
+
28
+ // Helper to decompress Firestore data
29
+ function tryDecompress(data) {
30
+ if (!data) return null;
31
+ if (data._compressed && data.payload) {
32
+ try {
33
+ const buffer = Buffer.from(data.payload);
34
+ return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
35
+ } catch (e) {
36
+ console.error(`[Backfill] Decompression failed: ${e.message}`);
37
+ return data;
38
+ }
39
+ }
40
+ return data;
41
+ }
42
+
43
+ /**
44
+ * Convert Firestore Timestamp to ISO string
45
+ */
46
+ function convertTimestamp(ts) {
47
+ if (!ts) return new Date().toISOString();
48
+ if (ts.toDate) {
49
+ return ts.toDate().toISOString();
50
+ }
51
+ if (ts instanceof Date) {
52
+ return ts.toISOString();
53
+ }
54
+ if (typeof ts === 'string') {
55
+ return ts;
56
+ }
57
+ return new Date().toISOString();
58
+ }
59
+
60
+ /**
61
+ * Backfill price data from Firestore shards to BigQuery
62
+ */
63
+ async function backfillPriceData(startDate = null, endDate = null, logger = console) {
64
+ logger.log('INFO', `[Backfill] Starting price data backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
65
+
66
+ try {
67
+ await ensureAssetPricesTable(logger);
68
+
69
+ const priceCollection = db.collection('asset_prices');
70
+
71
+ // Get all shard documents
72
+ logger.log('INFO', '[Backfill] Fetching price shard documents from Firestore...');
73
+ const snapshot = await priceCollection.get();
74
+
75
+ if (snapshot.empty) {
76
+ logger.log('WARN', '[Backfill] No price shard documents found in Firestore');
77
+ return { success: false, message: 'No shards found' };
78
+ }
79
+
80
+ logger.log('INFO', `[Backfill] Found ${snapshot.size} price shard documents`);
81
+
82
+ let totalRows = 0;
83
+ let processedShards = 0;
84
+ let skippedInstruments = 0;
85
+
86
+ // Process each shard document
87
+ for (const doc of snapshot.docs) {
88
+ const shardId = doc.id; // e.g., "shard_0", "shard_1"
89
+
90
+ try {
91
+ const shardData = tryDecompress(doc.data());
92
+
93
+ // Process each instrument in the shard
94
+ for (const [instrumentId, instrumentData] of Object.entries(shardData)) {
95
+ // Skip metadata fields
96
+ if (instrumentId.startsWith('_')) continue;
97
+
98
+ if (!instrumentData || !instrumentData.prices) {
99
+ skippedInstruments++;
100
+ continue;
101
+ }
102
+
103
+ const ticker = instrumentData.ticker || `unknown_${instrumentId}`;
104
+ const prices = instrumentData.prices || {};
105
+ const lastUpdated = convertTimestamp(instrumentData.lastUpdated);
106
+
107
+ // Transform to BigQuery rows (one row per date)
108
+ const bigqueryRows = [];
109
+
110
+ for (const [dateStr, price] of Object.entries(prices)) {
111
+ // Filter by date range if provided
112
+ if (startDate && endDate) {
113
+ if (dateStr < startDate || dateStr > endDate) {
114
+ continue;
115
+ }
116
+ }
117
+
118
+ // Validate date format (YYYY-MM-DD)
119
+ if (!/^\d{4}-\d{2}-\d{2}$/.test(dateStr)) {
120
+ logger.log('WARN', `[Backfill] Skipping invalid date format: ${dateStr} for instrument ${instrumentId}`);
121
+ continue;
122
+ }
123
+
124
+ bigqueryRows.push({
125
+ date: dateStr,
126
+ instrument_id: parseInt(instrumentId, 10),
127
+ ticker: ticker,
128
+ price: typeof price === 'number' ? price : null,
129
+ open: null, // Firestore shards only store closing price
130
+ high: null,
131
+ low: null,
132
+ close: typeof price === 'number' ? price : null,
133
+ volume: null,
134
+ fetched_at: lastUpdated
135
+ });
136
+ }
137
+
138
+ if (bigqueryRows.length === 0) {
139
+ skippedInstruments++;
140
+ continue;
141
+ }
142
+
143
+ // Write to BigQuery using load jobs (free, batched)
144
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
145
+ await insertRows(datasetId, 'asset_prices', bigqueryRows, logger);
146
+
147
+ totalRows += bigqueryRows.length;
148
+ }
149
+
150
+ processedShards++;
151
+
152
+ if (processedShards % 10 === 0) {
153
+ logger.log('INFO', `[Backfill] Processed ${processedShards} shards, ${totalRows} rows so far...`);
154
+ }
155
+ } catch (shardError) {
156
+ logger.log('ERROR', `[Backfill] Failed to process shard ${shardId}: ${shardError.message}`);
157
+ }
158
+ }
159
+
160
+ logger.log('SUCCESS', `[Backfill] ✅ Price data backfill complete: ${processedShards} shards processed, ${totalRows} rows, ${skippedInstruments} instruments skipped`);
161
+
162
+ return { success: true, processedShards, totalRows, skippedInstruments };
163
+ } catch (error) {
164
+ logger.log('ERROR', `[Backfill] Price data backfill failed: ${error.message}`);
165
+ throw error;
166
+ }
167
+ }
168
+
169
+ /**
170
+ * Main entry point
171
+ */
172
+ async function backfillPriceDataMain(startDate = null, endDate = null) {
173
+ const logger = {
174
+ log: (level, message, ...args) => {
175
+ const timestamp = new Date().toISOString();
176
+ console.log(`[${timestamp}] [${level}] ${message}`, ...args);
177
+ }
178
+ };
179
+
180
+ logger.log('INFO', '[Backfill] Starting Price Data backfill...');
181
+
182
+ try {
183
+ const result = await backfillPriceData(startDate, endDate, logger);
184
+
185
+ logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
186
+ return result;
187
+ } catch (error) {
188
+ logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
189
+ throw error;
190
+ }
191
+ }
192
+
193
+ // CLI handling
194
+ if (require.main === module) {
195
+ const args = process.argv.slice(2);
196
+
197
+ let startDate = null;
198
+ let endDate = null;
199
+
200
+ args.forEach(arg => {
201
+ if (arg.startsWith('--startDate=')) {
202
+ startDate = arg.split('=')[1];
203
+ } else if (arg.startsWith('--endDate=')) {
204
+ endDate = arg.split('=')[1];
205
+ }
206
+ });
207
+
208
+ backfillPriceDataMain(startDate, endDate)
209
+ .then(result => {
210
+ console.log('\n✅ Backfill completed successfully!');
211
+ console.log('Results:', JSON.stringify(result, null, 2));
212
+ process.exit(0);
213
+ })
214
+ .catch(error => {
215
+ console.error('\n❌ Backfill failed:', error);
216
+ process.exit(1);
217
+ });
218
+ }
219
+
220
+ module.exports = { backfillPriceData, backfillPriceDataMain };
@@ -0,0 +1,190 @@
1
+ /**
2
+ * @fileoverview Backfill Watchlist Membership from Firestore to BigQuery
3
+ *
4
+ * This function reads existing watchlist membership data from Firestore (WatchlistMembershipData/{date})
5
+ * and writes it to BigQuery table.
6
+ *
7
+ * Usage (Local Node.js script):
8
+ * node index.js --startDate=2024-01-01 --endDate=2024-12-31
9
+ * node index.js (backfills all dates)
10
+ *
11
+ * Features:
12
+ * - Backfills watchlist membership for date range (or all dates)
13
+ * - Uses load jobs (free) for efficient batching
14
+ * - Handles compressed Firestore data
15
+ * - Does NOT delete any Firestore data
16
+ */
17
+
18
+ const { Firestore } = require('@google-cloud/firestore');
19
+ const zlib = require('zlib');
20
+ const {
21
+ ensureWatchlistMembershipTable,
22
+ insertRows
23
+ } = require('../../core/utils/bigquery_utils');
24
+
25
+ const db = new Firestore();
26
+
27
+ // Helper to decompress Firestore data
28
+ function tryDecompress(data) {
29
+ if (!data) return null;
30
+ if (data._compressed && data.payload) {
31
+ try {
32
+ const buffer = Buffer.from(data.payload);
33
+ return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
34
+ } catch (e) {
35
+ console.error(`[Backfill] Decompression failed: ${e.message}`);
36
+ return data;
37
+ }
38
+ }
39
+ return data;
40
+ }
41
+
42
+ /**
43
+ * Backfill watchlist membership from Firestore to BigQuery for a date range
44
+ */
45
+ async function backfillWatchlistMembership(startDate = null, endDate = null, logger = console) {
46
+ logger.log('INFO', `[Backfill] Starting watchlist membership backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
47
+
48
+ try {
49
+ await ensureWatchlistMembershipTable(logger);
50
+
51
+ const membershipCollection = db.collection('WatchlistMembershipData');
52
+
53
+ // Get all membership documents
54
+ logger.log('INFO', '[Backfill] Fetching watchlist membership documents from Firestore...');
55
+ const snapshot = await membershipCollection.get();
56
+
57
+ if (snapshot.empty) {
58
+ logger.log('WARN', '[Backfill] No watchlist membership documents found in Firestore');
59
+ return { success: false, message: 'No watchlist membership found' };
60
+ }
61
+
62
+ logger.log('INFO', `[Backfill] Found ${snapshot.size} watchlist membership documents`);
63
+
64
+ let totalRows = 0;
65
+ let processedDates = 0;
66
+ let skippedDates = 0;
67
+
68
+ // Process each document
69
+ for (const doc of snapshot.docs) {
70
+ const dateStr = doc.id; // Document ID is the date (YYYY-MM-DD)
71
+
72
+ // Filter by date range if provided
73
+ if (startDate && endDate) {
74
+ if (dateStr < startDate || dateStr > endDate) {
75
+ skippedDates++;
76
+ continue;
77
+ }
78
+ }
79
+
80
+ try {
81
+ const data = tryDecompress(doc.data());
82
+ const { date, lastUpdated, ...watchlistMembership } = data;
83
+
84
+ if (!watchlistMembership || Object.keys(watchlistMembership).length === 0) {
85
+ logger.log('WARN', `[Backfill] No watchlist membership in document for ${dateStr}`);
86
+ skippedDates++;
87
+ continue;
88
+ }
89
+
90
+ // Transform to BigQuery rows
91
+ const lastUpdatedTimestamp = lastUpdated
92
+ ? (lastUpdated.toDate ? lastUpdated.toDate().toISOString() : lastUpdated)
93
+ : new Date().toISOString();
94
+
95
+ const bigqueryRows = [];
96
+ for (const [piId, membershipData] of Object.entries(watchlistMembership)) {
97
+ if (membershipData && typeof membershipData === 'object') {
98
+ bigqueryRows.push({
99
+ date: dateStr,
100
+ pi_id: parseInt(piId, 10),
101
+ total_users: membershipData.totalUsers || null,
102
+ public_watchlist_count: membershipData.publicWatchlistCount || null,
103
+ private_watchlist_count: membershipData.privateWatchlistCount || null,
104
+ users: membershipData.users || [],
105
+ last_updated: lastUpdatedTimestamp
106
+ });
107
+ }
108
+ }
109
+
110
+ if (bigqueryRows.length === 0) {
111
+ logger.log('WARN', `[Backfill] No valid watchlist membership rows for ${dateStr}`);
112
+ skippedDates++;
113
+ continue;
114
+ }
115
+
116
+ // Write to BigQuery using load jobs (free, batched)
117
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
118
+ await insertRows(datasetId, 'watchlist_membership', bigqueryRows, logger);
119
+
120
+ totalRows += bigqueryRows.length;
121
+ processedDates++;
122
+
123
+ if (processedDates % 10 === 0) {
124
+ logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
125
+ }
126
+ } catch (dateError) {
127
+ logger.log('ERROR', `[Backfill] Failed to process watchlist membership for ${dateStr}: ${dateError.message}`);
128
+ skippedDates++;
129
+ }
130
+ }
131
+
132
+ logger.log('SUCCESS', `[Backfill] ✅ Watchlist membership backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
133
+
134
+ return { success: true, processedDates, totalRows, skippedDates };
135
+ } catch (error) {
136
+ logger.log('ERROR', `[Backfill] Watchlist membership backfill failed: ${error.message}`);
137
+ throw error;
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Main entry point
143
+ */
144
+ async function backfillWatchlistMembershipData(startDate = null, endDate = null) {
145
+ const logger = {
146
+ log: (level, message, ...args) => {
147
+ const timestamp = new Date().toISOString();
148
+ console.log(`[${timestamp}] [${level}] ${message}`, ...args);
149
+ }
150
+ };
151
+
152
+ logger.log('INFO', '[Backfill] Starting Watchlist Membership backfill...');
153
+
154
+ try {
155
+ const result = await backfillWatchlistMembership(startDate, endDate, logger);
156
+
157
+ logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
158
+ return result;
159
+ } catch (error) {
160
+ logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
161
+ throw error;
162
+ }
163
+ }
164
+
165
+ // CLI execution
166
+ if (require.main === module) {
167
+ const args = process.argv.slice(2);
168
+ let startDate = null;
169
+ let endDate = null;
170
+
171
+ args.forEach(arg => {
172
+ if (arg.startsWith('--startDate=')) {
173
+ startDate = arg.split('=')[1];
174
+ } else if (arg.startsWith('--endDate=')) {
175
+ endDate = arg.split('=')[1];
176
+ }
177
+ });
178
+
179
+ backfillWatchlistMembershipData(startDate, endDate)
180
+ .then(result => {
181
+ console.log('Backfill result:', result);
182
+ process.exit(0);
183
+ })
184
+ .catch(error => {
185
+ console.error('Backfill failed:', error);
186
+ process.exit(1);
187
+ });
188
+ }
189
+
190
+ module.exports = { backfillWatchlistMembershipData };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.718",
3
+ "version": "1.0.720",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -25,7 +25,12 @@
25
25
  "functions/maintenance/backfill-instrument-insights",
26
26
  "functions/maintenance/backfill-pi-master-list-rankings",
27
27
  "functions/maintenance/backfill-task-engine-data",
28
- "functions/maintenance/backfill-ticker-mappings"
28
+ "functions/maintenance/backfill-ticker-mappings",
29
+ "functions/maintenance/backfill-price-data-from-firestore",
30
+ "functions/maintenance/backfill-pi-alert-history",
31
+ "functions/maintenance/backfill-pi-page-views",
32
+ "functions/maintenance/backfill-pi-ratings",
33
+ "functions/maintenance/backfill-watchlist-membership"
29
34
  ],
30
35
  "keywords": [
31
36
  "bulltrackers",