bulltrackers-module 1.0.712 → 1.0.714

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,180 @@
1
+ /**
2
+ * @fileoverview Backfill Instrument Insights from Firestore to BigQuery
3
+ *
4
+ * This function reads existing insights data from Firestore
5
+ * and writes it to BigQuery table.
6
+ *
7
+ * Usage (Local Node.js script):
8
+ * node index.js --startDate=2024-01-01 --endDate=2024-12-31
9
+ * node index.js (backfills all dates)
10
+ *
11
+ * Features:
12
+ * - Backfills insights for date range (or all dates)
13
+ * - Uses load jobs (free) for efficient batching
14
+ * - Handles compressed Firestore data
15
+ * - Does NOT delete any Firestore data
16
+ */
17
+
18
+ const { Firestore } = require('@google-cloud/firestore');
19
+ const zlib = require('zlib');
20
+ const {
21
+ ensureInstrumentInsightsTable,
22
+ insertRows
23
+ } = require('../../core/utils/bigquery_utils');
24
+
25
+ const db = new Firestore();
26
+
27
+ // Helper to decompress Firestore data
28
+ function tryDecompress(data) {
29
+ if (!data) return null;
30
+ if (data._compressed && data.payload) {
31
+ try {
32
+ const buffer = Buffer.from(data.payload);
33
+ return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
34
+ } catch (e) {
35
+ console.error(`[Backfill] Decompression failed: ${e.message}`);
36
+ return data;
37
+ }
38
+ }
39
+ return data;
40
+ }
41
+
42
+ /**
43
+ * Backfill insights from Firestore to BigQuery for a date range
44
+ */
45
+ async function backfillInsights(startDate = null, endDate = null, logger = console) {
46
+ logger.log('INFO', `[Backfill] Starting insights backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
47
+
48
+ try {
49
+ await ensureInstrumentInsightsTable(logger);
50
+
51
+ const insightsCollection = db.collection('daily_instrument_insights');
52
+
53
+ // Get all insights documents
54
+ logger.log('INFO', '[Backfill] Fetching insights documents from Firestore...');
55
+ const snapshot = await insightsCollection.get();
56
+
57
+ if (snapshot.empty) {
58
+ logger.log('WARN', '[Backfill] No insights documents found in Firestore');
59
+ return { success: false, message: 'No insights found' };
60
+ }
61
+
62
+ logger.log('INFO', `[Backfill] Found ${snapshot.size} insights documents`);
63
+
64
+ let totalRows = 0;
65
+ let processedDates = 0;
66
+ let skippedDates = 0;
67
+
68
+ // Process each document
69
+ for (const doc of snapshot.docs) {
70
+ const dateStr = doc.id; // Document ID is the date (YYYY-MM-DD)
71
+
72
+ // Filter by date range if provided
73
+ if (startDate && endDate) {
74
+ if (dateStr < startDate || dateStr > endDate) {
75
+ skippedDates++;
76
+ continue;
77
+ }
78
+ }
79
+
80
+ try {
81
+ const data = tryDecompress(doc.data());
82
+ const insights = data.insights || [];
83
+
84
+ if (insights.length === 0) {
85
+ logger.log('WARN', `[Backfill] No insights in document for ${dateStr}`);
86
+ skippedDates++;
87
+ continue;
88
+ }
89
+
90
+ // Transform to BigQuery rows
91
+ const fetchedAt = data.fetchedAt
92
+ ? (data.fetchedAt.toDate ? data.fetchedAt.toDate().toISOString() : data.fetchedAt)
93
+ : new Date().toISOString();
94
+
95
+ const bigqueryRows = insights.map(insight => {
96
+ return {
97
+ date: dateStr,
98
+ instrument_id: parseInt(insight.instrumentId, 10),
99
+ insights_data: insight, // Store full insight object as JSON
100
+ fetched_at: fetchedAt
101
+ };
102
+ });
103
+
104
+ // Write to BigQuery using load jobs (free, batched)
105
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
106
+ await insertRows(datasetId, 'instrument_insights', bigqueryRows, logger);
107
+
108
+ totalRows += bigqueryRows.length;
109
+ processedDates++;
110
+
111
+ if (processedDates % 10 === 0) {
112
+ logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
113
+ }
114
+ } catch (dateError) {
115
+ logger.log('ERROR', `[Backfill] Failed to process insights for ${dateStr}: ${dateError.message}`);
116
+ skippedDates++;
117
+ }
118
+ }
119
+
120
+ logger.log('SUCCESS', `[Backfill] ✅ Insights backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
121
+
122
+ return { success: true, processedDates, totalRows, skippedDates };
123
+ } catch (error) {
124
+ logger.log('ERROR', `[Backfill] Insights backfill failed: ${error.message}`);
125
+ throw error;
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Main entry point
131
+ */
132
+ async function backfillInstrumentInsights(startDate = null, endDate = null) {
133
+ const logger = {
134
+ log: (level, message, ...args) => {
135
+ const timestamp = new Date().toISOString();
136
+ console.log(`[${timestamp}] [${level}] ${message}`, ...args);
137
+ }
138
+ };
139
+
140
+ logger.log('INFO', '[Backfill] Starting Instrument Insights backfill...');
141
+
142
+ try {
143
+ const result = await backfillInsights(startDate, endDate, logger);
144
+
145
+ logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
146
+ return result;
147
+ } catch (error) {
148
+ logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
149
+ throw error;
150
+ }
151
+ }
152
+
153
+ // CLI handling
154
+ if (require.main === module) {
155
+ const args = process.argv.slice(2);
156
+
157
+ let startDate = null;
158
+ let endDate = null;
159
+
160
+ args.forEach(arg => {
161
+ if (arg.startsWith('--startDate=')) {
162
+ startDate = arg.split('=')[1];
163
+ } else if (arg.startsWith('--endDate=')) {
164
+ endDate = arg.split('=')[1];
165
+ }
166
+ });
167
+
168
+ backfillInstrumentInsights(startDate, endDate)
169
+ .then(result => {
170
+ console.log('\n✅ Backfill completed successfully!');
171
+ console.log('Results:', JSON.stringify(result, null, 2));
172
+ process.exit(0);
173
+ })
174
+ .catch(error => {
175
+ console.error('\n❌ Backfill failed:', error);
176
+ process.exit(1);
177
+ });
178
+ }
179
+
180
+ module.exports = { backfillInstrumentInsights, backfillInsights };
@@ -0,0 +1,293 @@
1
+ /**
2
+ * @fileoverview Backfill Popular Investor Master List and Rankings from Firestore to BigQuery
3
+ *
4
+ * This function reads existing master list and rankings data from Firestore
5
+ * and writes it to BigQuery tables.
6
+ *
7
+ * Usage (Local Node.js script):
8
+ * node index.js --masterList=true --rankings=true
9
+ * node index.js --masterList=true
10
+ * node index.js --rankings=true --startDate=2024-01-01 --endDate=2024-12-31
11
+ *
12
+ * Features:
13
+ * - Backfills master list (all PIs from Firestore)
14
+ * - Backfills rankings for date range (or all dates)
15
+ * - Uses load jobs (free) for efficient batching
16
+ * - Does NOT delete any Firestore data
17
+ */
18
+
19
+ const { Firestore } = require('@google-cloud/firestore');
20
+ const zlib = require('zlib');
21
+ const {
22
+ ensurePIMasterListTable,
23
+ ensurePIRankingsTable,
24
+ insertRows,
25
+ insertRowsWithMerge
26
+ } = require('../../core/utils/bigquery_utils');
27
+
28
+ const db = new Firestore();
29
+
30
+ // Helper to decompress Firestore data
31
+ function tryDecompress(data) {
32
+ if (!data) return null;
33
+ if (data._compressed && data.payload) {
34
+ try {
35
+ const buffer = Buffer.from(data.payload);
36
+ return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
37
+ } catch (e) {
38
+ console.error(`[Backfill] Decompression failed: ${e.message}`);
39
+ return data;
40
+ }
41
+ }
42
+ return data;
43
+ }
44
+
45
+ /**
46
+ * Backfill master list from Firestore to BigQuery
47
+ */
48
+ async function backfillMasterList(logger = console) {
49
+ logger.log('INFO', '[Backfill] Starting master list backfill...');
50
+
51
+ try {
52
+ await ensurePIMasterListTable(logger);
53
+
54
+ // Get master list from Firestore
55
+ const masterListPath = 'system_state/popular_investor_master_list';
56
+ const masterListRef = db.doc(masterListPath);
57
+ const masterListDoc = await masterListRef.get();
58
+
59
+ if (!masterListDoc.exists) {
60
+ logger.log('WARN', '[Backfill] Master list document not found in Firestore');
61
+ return { success: false, message: 'Master list not found' };
62
+ }
63
+
64
+ const data = tryDecompress(masterListDoc.data());
65
+ const investors = data.investors || {};
66
+
67
+ if (Object.keys(investors).length === 0) {
68
+ logger.log('WARN', '[Backfill] Master list is empty');
69
+ return { success: false, message: 'Master list is empty' };
70
+ }
71
+
72
+ logger.log('INFO', `[Backfill] Found ${Object.keys(investors).length} investors in master list`);
73
+
74
+ // Transform to BigQuery rows
75
+ const now = new Date().toISOString();
76
+
77
+ // Helper to convert Firestore Timestamp to ISO string
78
+ const convertTimestamp = (ts) => {
79
+ if (!ts) return now;
80
+ if (ts instanceof Date) return ts.toISOString();
81
+ if (ts.toDate && typeof ts.toDate === 'function') return ts.toDate().toISOString();
82
+ if (typeof ts === 'string') return ts;
83
+ // Handle Firestore Timestamp object
84
+ if (ts._seconds !== undefined) {
85
+ return new Date(ts._seconds * 1000 + (ts._nanoseconds || 0) / 1000000).toISOString();
86
+ }
87
+ return now;
88
+ };
89
+
90
+ const bigqueryRows = Object.entries(investors).map(([cid, investorData]) => {
91
+ return {
92
+ cid: parseInt(cid, 10),
93
+ username: investorData.username || null,
94
+ first_seen_at: convertTimestamp(investorData.firstSeenAt),
95
+ last_seen_at: convertTimestamp(investorData.lastSeenAt),
96
+ last_updated: now
97
+ };
98
+ });
99
+
100
+ // Write to BigQuery using MERGE (updates existing, inserts new)
101
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
102
+ await insertRowsWithMerge(datasetId, 'pi_master_list', bigqueryRows, ['cid'], logger);
103
+
104
+ logger.log('SUCCESS', `[Backfill] ✅ Successfully backfilled ${bigqueryRows.length} master list records to BigQuery`);
105
+
106
+ return { success: true, count: bigqueryRows.length };
107
+ } catch (error) {
108
+ logger.log('ERROR', `[Backfill] Master list backfill failed: ${error.message}`);
109
+ throw error;
110
+ }
111
+ }
112
+
113
+ /**
114
+ * Backfill rankings from Firestore to BigQuery for a date range
115
+ */
116
+ async function backfillRankings(startDate = null, endDate = null, logger = console) {
117
+ logger.log('INFO', `[Backfill] Starting rankings backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
118
+
119
+ try {
120
+ await ensurePIRankingsTable(logger);
121
+
122
+ const rankingsCollection = db.collection('popular_investor_rankings');
123
+
124
+ // Get all ranking documents
125
+ let query = rankingsCollection;
126
+
127
+ if (startDate && endDate) {
128
+ // Query date range (Firestore doesn't support date range queries directly on doc IDs)
129
+ // We'll fetch all and filter
130
+ logger.log('INFO', `[Backfill] Fetching rankings documents (will filter by date range)...`);
131
+ } else {
132
+ logger.log('INFO', '[Backfill] Fetching all rankings documents...');
133
+ }
134
+
135
+ const snapshot = await query.get();
136
+
137
+ if (snapshot.empty) {
138
+ logger.log('WARN', '[Backfill] No rankings documents found in Firestore');
139
+ return { success: false, message: 'No rankings found' };
140
+ }
141
+
142
+ logger.log('INFO', `[Backfill] Found ${snapshot.size} rankings documents`);
143
+
144
+ let totalRows = 0;
145
+ let processedDates = 0;
146
+ let skippedDates = 0;
147
+
148
+ // Process each document
149
+ for (const doc of snapshot.docs) {
150
+ const dateStr = doc.id; // Document ID is the date (YYYY-MM-DD)
151
+
152
+ // Filter by date range if provided
153
+ if (startDate && endDate) {
154
+ if (dateStr < startDate || dateStr > endDate) {
155
+ skippedDates++;
156
+ continue;
157
+ }
158
+ }
159
+
160
+ try {
161
+ const data = tryDecompress(doc.data());
162
+ const items = data.Items || [];
163
+
164
+ if (items.length === 0) {
165
+ logger.log('WARN', `[Backfill] No items in rankings for ${dateStr}`);
166
+ skippedDates++;
167
+ continue;
168
+ }
169
+
170
+ // Transform to BigQuery rows
171
+ // Helper to convert Firestore Timestamp to ISO string
172
+ const convertTimestamp = (ts) => {
173
+ if (!ts) return new Date().toISOString();
174
+ if (ts instanceof Date) return ts.toISOString();
175
+ if (ts.toDate && typeof ts.toDate === 'function') return ts.toDate().toISOString();
176
+ if (typeof ts === 'string') return ts;
177
+ return new Date().toISOString();
178
+ };
179
+
180
+ const fetchedAt = convertTimestamp(data.fetchedAt);
181
+
182
+ const bigqueryRows = items.map((item, index) => {
183
+ return {
184
+ date: dateStr,
185
+ pi_id: parseInt(item.CustomerId, 10),
186
+ username: item.UserName || null,
187
+ rank: item.Rank || (index + 1), // Use Rank from item, or position in array
188
+ category: item.Category || null,
189
+ rankings_data: item, // Store full item data as JSON
190
+ fetched_at: fetchedAt
191
+ };
192
+ });
193
+
194
+ // Write to BigQuery using load jobs (free, batched)
195
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
196
+ await insertRows(datasetId, 'pi_rankings', bigqueryRows, logger);
197
+
198
+ totalRows += bigqueryRows.length;
199
+ processedDates++;
200
+
201
+ if (processedDates % 10 === 0) {
202
+ logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
203
+ }
204
+ } catch (dateError) {
205
+ logger.log('ERROR', `[Backfill] Failed to process rankings for ${dateStr}: ${dateError.message}`);
206
+ skippedDates++;
207
+ }
208
+ }
209
+
210
+ logger.log('SUCCESS', `[Backfill] ✅ Rankings backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
211
+
212
+ return { success: true, processedDates, totalRows, skippedDates };
213
+ } catch (error) {
214
+ logger.log('ERROR', `[Backfill] Rankings backfill failed: ${error.message}`);
215
+ throw error;
216
+ }
217
+ }
218
+
219
+ /**
220
+ * Main entry point
221
+ */
222
+ async function backfillPIMasterListRankings(startDate = null, endDate = null, masterList = true, rankings = true) {
223
+ const logger = {
224
+ log: (level, message, ...args) => {
225
+ const timestamp = new Date().toISOString();
226
+ console.log(`[${timestamp}] [${level}] ${message}`, ...args);
227
+ }
228
+ };
229
+
230
+ logger.log('INFO', '[Backfill] Starting PI Master List and Rankings backfill...');
231
+
232
+ const results = {
233
+ masterList: null,
234
+ rankings: null
235
+ };
236
+
237
+ try {
238
+ // Backfill master list
239
+ if (masterList) {
240
+ results.masterList = await backfillMasterList(logger);
241
+ } else {
242
+ logger.log('INFO', '[Backfill] Skipping master list backfill');
243
+ }
244
+
245
+ // Backfill rankings
246
+ if (rankings) {
247
+ results.rankings = await backfillRankings(startDate, endDate, logger);
248
+ } else {
249
+ logger.log('INFO', '[Backfill] Skipping rankings backfill');
250
+ }
251
+
252
+ logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
253
+ return results;
254
+ } catch (error) {
255
+ logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
256
+ throw error;
257
+ }
258
+ }
259
+
260
+ // CLI handling
261
+ if (require.main === module) {
262
+ const args = process.argv.slice(2);
263
+
264
+ let startDate = null;
265
+ let endDate = null;
266
+ let masterList = true;
267
+ let rankings = true;
268
+
269
+ args.forEach(arg => {
270
+ if (arg.startsWith('--startDate=')) {
271
+ startDate = arg.split('=')[1];
272
+ } else if (arg.startsWith('--endDate=')) {
273
+ endDate = arg.split('=')[1];
274
+ } else if (arg.startsWith('--masterList=')) {
275
+ masterList = arg.split('=')[1] === 'true';
276
+ } else if (arg.startsWith('--rankings=')) {
277
+ rankings = arg.split('=')[1] === 'true';
278
+ }
279
+ });
280
+
281
+ backfillPIMasterListRankings(startDate, endDate, masterList, rankings)
282
+ .then(results => {
283
+ console.log('\n✅ Backfill completed successfully!');
284
+ console.log('Results:', JSON.stringify(results, null, 2));
285
+ process.exit(0);
286
+ })
287
+ .catch(error => {
288
+ console.error('\n❌ Backfill failed:', error);
289
+ process.exit(1);
290
+ });
291
+ }
292
+
293
+ module.exports = { backfillPIMasterListRankings, backfillMasterList, backfillRankings };
@@ -0,0 +1,72 @@
1
+ # Backfill Task Engine Data
2
+
3
+ Backfills portfolio, trade history, and social post data from Firestore to BigQuery.
4
+
5
+ ## Local Usage
6
+
7
+ Run from the project root directory:
8
+
9
+ ```bash
10
+ # Backfill all data types
11
+ node Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/maintenance/backfill-task-engine-data/index.js \
12
+ --startDate=2024-01-01 \
13
+ --endDate=2024-12-31 \
14
+ --dataType=all
15
+
16
+ # Backfill only portfolio data
17
+ node Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/maintenance/backfill-task-engine-data/index.js \
18
+ --startDate=2024-01-01 \
19
+ --endDate=2024-12-31 \
20
+ --dataType=portfolio
21
+
22
+ # Backfill only trade history
23
+ node Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/maintenance/backfill-task-engine-data/index.js \
24
+ --startDate=2024-01-01 \
25
+ --endDate=2024-12-31 \
26
+ --dataType=history
27
+
28
+ # Backfill only social posts
29
+ node Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/maintenance/backfill-task-engine-data/index.js \
30
+ --startDate=2024-01-01 \
31
+ --endDate=2024-12-31 \
32
+ --dataType=social
33
+ ```
34
+
35
+ ## Prerequisites
36
+
37
+ 1. **Authentication**: Make sure you're authenticated with Google Cloud:
38
+ ```bash
39
+ gcloud auth application-default login
40
+ ```
41
+
42
+ 2. **Environment Variables** (optional):
43
+ ```bash
44
+ export BIGQUERY_DATASET_ID=bulltrackers_data
45
+ export GCP_PROJECT_ID=stocks-12345
46
+ ```
47
+
48
+ 3. **Dependencies**: Make sure you're in the correct directory with node_modules installed.
49
+
50
+ ## Cloud Function Usage
51
+
52
+ The script also works as a Cloud Function with HTTP trigger:
53
+
54
+ ```
55
+ GET /backfill-task-engine-data?startDate=2024-01-01&endDate=2024-12-31&dataType=all
56
+ ```
57
+
58
+ ## Features
59
+
60
+ - ✅ **Resume capability**: Tracks progress in Firestore, can resume if interrupted
61
+ - ✅ **Batch processing**: Processes dates in batches to avoid timeouts
62
+ - ✅ **Safe**: Does NOT delete any Firestore data
63
+ - ✅ **Progress tracking**: Logs progress and summary at the end
64
+
65
+ ## Progress Tracking
66
+
67
+ Progress is stored in Firestore at:
68
+ - `backfill_progress/portfolio/dates/{date}`
69
+ - `backfill_progress/history/dates/{date}`
70
+ - `backfill_progress/social/dates/{date}`
71
+
72
+ If a date is already backfilled, it will be skipped automatically.