bulltrackers-module 1.0.712 → 1.0.714
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/maintenance/backfill-instrument-insights/index.js +180 -0
- package/functions/maintenance/backfill-pi-master-list-rankings/index.js +293 -0
- package/functions/maintenance/backfill-task-engine-data/README.md +72 -0
- package/functions/maintenance/backfill-task-engine-data/index.js +844 -0
- package/functions/task-engine/helpers/data_storage_helpers.js +11 -10
- package/package.json +5 -2
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Backfill Instrument Insights from Firestore to BigQuery
|
|
3
|
+
*
|
|
4
|
+
* This function reads existing insights data from Firestore
|
|
5
|
+
* and writes it to BigQuery table.
|
|
6
|
+
*
|
|
7
|
+
* Usage (Local Node.js script):
|
|
8
|
+
* node index.js --startDate=2024-01-01 --endDate=2024-12-31
|
|
9
|
+
* node index.js (backfills all dates)
|
|
10
|
+
*
|
|
11
|
+
* Features:
|
|
12
|
+
* - Backfills insights for date range (or all dates)
|
|
13
|
+
* - Uses load jobs (free) for efficient batching
|
|
14
|
+
* - Handles compressed Firestore data
|
|
15
|
+
* - Does NOT delete any Firestore data
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const { Firestore } = require('@google-cloud/firestore');
|
|
19
|
+
const zlib = require('zlib');
|
|
20
|
+
const {
|
|
21
|
+
ensureInstrumentInsightsTable,
|
|
22
|
+
insertRows
|
|
23
|
+
} = require('../../core/utils/bigquery_utils');
|
|
24
|
+
|
|
25
|
+
const db = new Firestore();
|
|
26
|
+
|
|
27
|
+
// Helper to decompress Firestore data
|
|
28
|
+
function tryDecompress(data) {
|
|
29
|
+
if (!data) return null;
|
|
30
|
+
if (data._compressed && data.payload) {
|
|
31
|
+
try {
|
|
32
|
+
const buffer = Buffer.from(data.payload);
|
|
33
|
+
return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
|
|
34
|
+
} catch (e) {
|
|
35
|
+
console.error(`[Backfill] Decompression failed: ${e.message}`);
|
|
36
|
+
return data;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return data;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Backfill insights from Firestore to BigQuery for a date range
|
|
44
|
+
*/
|
|
45
|
+
async function backfillInsights(startDate = null, endDate = null, logger = console) {
|
|
46
|
+
logger.log('INFO', `[Backfill] Starting insights backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
await ensureInstrumentInsightsTable(logger);
|
|
50
|
+
|
|
51
|
+
const insightsCollection = db.collection('daily_instrument_insights');
|
|
52
|
+
|
|
53
|
+
// Get all insights documents
|
|
54
|
+
logger.log('INFO', '[Backfill] Fetching insights documents from Firestore...');
|
|
55
|
+
const snapshot = await insightsCollection.get();
|
|
56
|
+
|
|
57
|
+
if (snapshot.empty) {
|
|
58
|
+
logger.log('WARN', '[Backfill] No insights documents found in Firestore');
|
|
59
|
+
return { success: false, message: 'No insights found' };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
logger.log('INFO', `[Backfill] Found ${snapshot.size} insights documents`);
|
|
63
|
+
|
|
64
|
+
let totalRows = 0;
|
|
65
|
+
let processedDates = 0;
|
|
66
|
+
let skippedDates = 0;
|
|
67
|
+
|
|
68
|
+
// Process each document
|
|
69
|
+
for (const doc of snapshot.docs) {
|
|
70
|
+
const dateStr = doc.id; // Document ID is the date (YYYY-MM-DD)
|
|
71
|
+
|
|
72
|
+
// Filter by date range if provided
|
|
73
|
+
if (startDate && endDate) {
|
|
74
|
+
if (dateStr < startDate || dateStr > endDate) {
|
|
75
|
+
skippedDates++;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
const data = tryDecompress(doc.data());
|
|
82
|
+
const insights = data.insights || [];
|
|
83
|
+
|
|
84
|
+
if (insights.length === 0) {
|
|
85
|
+
logger.log('WARN', `[Backfill] No insights in document for ${dateStr}`);
|
|
86
|
+
skippedDates++;
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Transform to BigQuery rows
|
|
91
|
+
const fetchedAt = data.fetchedAt
|
|
92
|
+
? (data.fetchedAt.toDate ? data.fetchedAt.toDate().toISOString() : data.fetchedAt)
|
|
93
|
+
: new Date().toISOString();
|
|
94
|
+
|
|
95
|
+
const bigqueryRows = insights.map(insight => {
|
|
96
|
+
return {
|
|
97
|
+
date: dateStr,
|
|
98
|
+
instrument_id: parseInt(insight.instrumentId, 10),
|
|
99
|
+
insights_data: insight, // Store full insight object as JSON
|
|
100
|
+
fetched_at: fetchedAt
|
|
101
|
+
};
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// Write to BigQuery using load jobs (free, batched)
|
|
105
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
106
|
+
await insertRows(datasetId, 'instrument_insights', bigqueryRows, logger);
|
|
107
|
+
|
|
108
|
+
totalRows += bigqueryRows.length;
|
|
109
|
+
processedDates++;
|
|
110
|
+
|
|
111
|
+
if (processedDates % 10 === 0) {
|
|
112
|
+
logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
|
|
113
|
+
}
|
|
114
|
+
} catch (dateError) {
|
|
115
|
+
logger.log('ERROR', `[Backfill] Failed to process insights for ${dateStr}: ${dateError.message}`);
|
|
116
|
+
skippedDates++;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
logger.log('SUCCESS', `[Backfill] ✅ Insights backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
|
|
121
|
+
|
|
122
|
+
return { success: true, processedDates, totalRows, skippedDates };
|
|
123
|
+
} catch (error) {
|
|
124
|
+
logger.log('ERROR', `[Backfill] Insights backfill failed: ${error.message}`);
|
|
125
|
+
throw error;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Main entry point
|
|
131
|
+
*/
|
|
132
|
+
async function backfillInstrumentInsights(startDate = null, endDate = null) {
|
|
133
|
+
const logger = {
|
|
134
|
+
log: (level, message, ...args) => {
|
|
135
|
+
const timestamp = new Date().toISOString();
|
|
136
|
+
console.log(`[${timestamp}] [${level}] ${message}`, ...args);
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
logger.log('INFO', '[Backfill] Starting Instrument Insights backfill...');
|
|
141
|
+
|
|
142
|
+
try {
|
|
143
|
+
const result = await backfillInsights(startDate, endDate, logger);
|
|
144
|
+
|
|
145
|
+
logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
|
|
146
|
+
return result;
|
|
147
|
+
} catch (error) {
|
|
148
|
+
logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
|
|
149
|
+
throw error;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// CLI handling
|
|
154
|
+
if (require.main === module) {
|
|
155
|
+
const args = process.argv.slice(2);
|
|
156
|
+
|
|
157
|
+
let startDate = null;
|
|
158
|
+
let endDate = null;
|
|
159
|
+
|
|
160
|
+
args.forEach(arg => {
|
|
161
|
+
if (arg.startsWith('--startDate=')) {
|
|
162
|
+
startDate = arg.split('=')[1];
|
|
163
|
+
} else if (arg.startsWith('--endDate=')) {
|
|
164
|
+
endDate = arg.split('=')[1];
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
backfillInstrumentInsights(startDate, endDate)
|
|
169
|
+
.then(result => {
|
|
170
|
+
console.log('\n✅ Backfill completed successfully!');
|
|
171
|
+
console.log('Results:', JSON.stringify(result, null, 2));
|
|
172
|
+
process.exit(0);
|
|
173
|
+
})
|
|
174
|
+
.catch(error => {
|
|
175
|
+
console.error('\n❌ Backfill failed:', error);
|
|
176
|
+
process.exit(1);
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
module.exports = { backfillInstrumentInsights, backfillInsights };
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Backfill Popular Investor Master List and Rankings from Firestore to BigQuery
|
|
3
|
+
*
|
|
4
|
+
* This function reads existing master list and rankings data from Firestore
|
|
5
|
+
* and writes it to BigQuery tables.
|
|
6
|
+
*
|
|
7
|
+
* Usage (Local Node.js script):
|
|
8
|
+
* node index.js --masterList=true --rankings=true
|
|
9
|
+
* node index.js --masterList=true
|
|
10
|
+
* node index.js --rankings=true --startDate=2024-01-01 --endDate=2024-12-31
|
|
11
|
+
*
|
|
12
|
+
* Features:
|
|
13
|
+
* - Backfills master list (all PIs from Firestore)
|
|
14
|
+
* - Backfills rankings for date range (or all dates)
|
|
15
|
+
* - Uses load jobs (free) for efficient batching
|
|
16
|
+
* - Does NOT delete any Firestore data
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const { Firestore } = require('@google-cloud/firestore');
|
|
20
|
+
const zlib = require('zlib');
|
|
21
|
+
const {
|
|
22
|
+
ensurePIMasterListTable,
|
|
23
|
+
ensurePIRankingsTable,
|
|
24
|
+
insertRows,
|
|
25
|
+
insertRowsWithMerge
|
|
26
|
+
} = require('../../core/utils/bigquery_utils');
|
|
27
|
+
|
|
28
|
+
const db = new Firestore();
|
|
29
|
+
|
|
30
|
+
// Helper to decompress Firestore data
|
|
31
|
+
function tryDecompress(data) {
|
|
32
|
+
if (!data) return null;
|
|
33
|
+
if (data._compressed && data.payload) {
|
|
34
|
+
try {
|
|
35
|
+
const buffer = Buffer.from(data.payload);
|
|
36
|
+
return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
|
|
37
|
+
} catch (e) {
|
|
38
|
+
console.error(`[Backfill] Decompression failed: ${e.message}`);
|
|
39
|
+
return data;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return data;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Backfill master list from Firestore to BigQuery
|
|
47
|
+
*/
|
|
48
|
+
async function backfillMasterList(logger = console) {
|
|
49
|
+
logger.log('INFO', '[Backfill] Starting master list backfill...');
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
await ensurePIMasterListTable(logger);
|
|
53
|
+
|
|
54
|
+
// Get master list from Firestore
|
|
55
|
+
const masterListPath = 'system_state/popular_investor_master_list';
|
|
56
|
+
const masterListRef = db.doc(masterListPath);
|
|
57
|
+
const masterListDoc = await masterListRef.get();
|
|
58
|
+
|
|
59
|
+
if (!masterListDoc.exists) {
|
|
60
|
+
logger.log('WARN', '[Backfill] Master list document not found in Firestore');
|
|
61
|
+
return { success: false, message: 'Master list not found' };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const data = tryDecompress(masterListDoc.data());
|
|
65
|
+
const investors = data.investors || {};
|
|
66
|
+
|
|
67
|
+
if (Object.keys(investors).length === 0) {
|
|
68
|
+
logger.log('WARN', '[Backfill] Master list is empty');
|
|
69
|
+
return { success: false, message: 'Master list is empty' };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
logger.log('INFO', `[Backfill] Found ${Object.keys(investors).length} investors in master list`);
|
|
73
|
+
|
|
74
|
+
// Transform to BigQuery rows
|
|
75
|
+
const now = new Date().toISOString();
|
|
76
|
+
|
|
77
|
+
// Helper to convert Firestore Timestamp to ISO string
|
|
78
|
+
const convertTimestamp = (ts) => {
|
|
79
|
+
if (!ts) return now;
|
|
80
|
+
if (ts instanceof Date) return ts.toISOString();
|
|
81
|
+
if (ts.toDate && typeof ts.toDate === 'function') return ts.toDate().toISOString();
|
|
82
|
+
if (typeof ts === 'string') return ts;
|
|
83
|
+
// Handle Firestore Timestamp object
|
|
84
|
+
if (ts._seconds !== undefined) {
|
|
85
|
+
return new Date(ts._seconds * 1000 + (ts._nanoseconds || 0) / 1000000).toISOString();
|
|
86
|
+
}
|
|
87
|
+
return now;
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
const bigqueryRows = Object.entries(investors).map(([cid, investorData]) => {
|
|
91
|
+
return {
|
|
92
|
+
cid: parseInt(cid, 10),
|
|
93
|
+
username: investorData.username || null,
|
|
94
|
+
first_seen_at: convertTimestamp(investorData.firstSeenAt),
|
|
95
|
+
last_seen_at: convertTimestamp(investorData.lastSeenAt),
|
|
96
|
+
last_updated: now
|
|
97
|
+
};
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
// Write to BigQuery using MERGE (updates existing, inserts new)
|
|
101
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
102
|
+
await insertRowsWithMerge(datasetId, 'pi_master_list', bigqueryRows, ['cid'], logger);
|
|
103
|
+
|
|
104
|
+
logger.log('SUCCESS', `[Backfill] ✅ Successfully backfilled ${bigqueryRows.length} master list records to BigQuery`);
|
|
105
|
+
|
|
106
|
+
return { success: true, count: bigqueryRows.length };
|
|
107
|
+
} catch (error) {
|
|
108
|
+
logger.log('ERROR', `[Backfill] Master list backfill failed: ${error.message}`);
|
|
109
|
+
throw error;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Backfill rankings from Firestore to BigQuery for a date range
|
|
115
|
+
*/
|
|
116
|
+
async function backfillRankings(startDate = null, endDate = null, logger = console) {
|
|
117
|
+
logger.log('INFO', `[Backfill] Starting rankings backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
|
|
118
|
+
|
|
119
|
+
try {
|
|
120
|
+
await ensurePIRankingsTable(logger);
|
|
121
|
+
|
|
122
|
+
const rankingsCollection = db.collection('popular_investor_rankings');
|
|
123
|
+
|
|
124
|
+
// Get all ranking documents
|
|
125
|
+
let query = rankingsCollection;
|
|
126
|
+
|
|
127
|
+
if (startDate && endDate) {
|
|
128
|
+
// Query date range (Firestore doesn't support date range queries directly on doc IDs)
|
|
129
|
+
// We'll fetch all and filter
|
|
130
|
+
logger.log('INFO', `[Backfill] Fetching rankings documents (will filter by date range)...`);
|
|
131
|
+
} else {
|
|
132
|
+
logger.log('INFO', '[Backfill] Fetching all rankings documents...');
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const snapshot = await query.get();
|
|
136
|
+
|
|
137
|
+
if (snapshot.empty) {
|
|
138
|
+
logger.log('WARN', '[Backfill] No rankings documents found in Firestore');
|
|
139
|
+
return { success: false, message: 'No rankings found' };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
logger.log('INFO', `[Backfill] Found ${snapshot.size} rankings documents`);
|
|
143
|
+
|
|
144
|
+
let totalRows = 0;
|
|
145
|
+
let processedDates = 0;
|
|
146
|
+
let skippedDates = 0;
|
|
147
|
+
|
|
148
|
+
// Process each document
|
|
149
|
+
for (const doc of snapshot.docs) {
|
|
150
|
+
const dateStr = doc.id; // Document ID is the date (YYYY-MM-DD)
|
|
151
|
+
|
|
152
|
+
// Filter by date range if provided
|
|
153
|
+
if (startDate && endDate) {
|
|
154
|
+
if (dateStr < startDate || dateStr > endDate) {
|
|
155
|
+
skippedDates++;
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
try {
|
|
161
|
+
const data = tryDecompress(doc.data());
|
|
162
|
+
const items = data.Items || [];
|
|
163
|
+
|
|
164
|
+
if (items.length === 0) {
|
|
165
|
+
logger.log('WARN', `[Backfill] No items in rankings for ${dateStr}`);
|
|
166
|
+
skippedDates++;
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Transform to BigQuery rows
|
|
171
|
+
// Helper to convert Firestore Timestamp to ISO string
|
|
172
|
+
const convertTimestamp = (ts) => {
|
|
173
|
+
if (!ts) return new Date().toISOString();
|
|
174
|
+
if (ts instanceof Date) return ts.toISOString();
|
|
175
|
+
if (ts.toDate && typeof ts.toDate === 'function') return ts.toDate().toISOString();
|
|
176
|
+
if (typeof ts === 'string') return ts;
|
|
177
|
+
return new Date().toISOString();
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
const fetchedAt = convertTimestamp(data.fetchedAt);
|
|
181
|
+
|
|
182
|
+
const bigqueryRows = items.map((item, index) => {
|
|
183
|
+
return {
|
|
184
|
+
date: dateStr,
|
|
185
|
+
pi_id: parseInt(item.CustomerId, 10),
|
|
186
|
+
username: item.UserName || null,
|
|
187
|
+
rank: item.Rank || (index + 1), // Use Rank from item, or position in array
|
|
188
|
+
category: item.Category || null,
|
|
189
|
+
rankings_data: item, // Store full item data as JSON
|
|
190
|
+
fetched_at: fetchedAt
|
|
191
|
+
};
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
// Write to BigQuery using load jobs (free, batched)
|
|
195
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
196
|
+
await insertRows(datasetId, 'pi_rankings', bigqueryRows, logger);
|
|
197
|
+
|
|
198
|
+
totalRows += bigqueryRows.length;
|
|
199
|
+
processedDates++;
|
|
200
|
+
|
|
201
|
+
if (processedDates % 10 === 0) {
|
|
202
|
+
logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
|
|
203
|
+
}
|
|
204
|
+
} catch (dateError) {
|
|
205
|
+
logger.log('ERROR', `[Backfill] Failed to process rankings for ${dateStr}: ${dateError.message}`);
|
|
206
|
+
skippedDates++;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
logger.log('SUCCESS', `[Backfill] ✅ Rankings backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
|
|
211
|
+
|
|
212
|
+
return { success: true, processedDates, totalRows, skippedDates };
|
|
213
|
+
} catch (error) {
|
|
214
|
+
logger.log('ERROR', `[Backfill] Rankings backfill failed: ${error.message}`);
|
|
215
|
+
throw error;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Main entry point
|
|
221
|
+
*/
|
|
222
|
+
async function backfillPIMasterListRankings(startDate = null, endDate = null, masterList = true, rankings = true) {
|
|
223
|
+
const logger = {
|
|
224
|
+
log: (level, message, ...args) => {
|
|
225
|
+
const timestamp = new Date().toISOString();
|
|
226
|
+
console.log(`[${timestamp}] [${level}] ${message}`, ...args);
|
|
227
|
+
}
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
logger.log('INFO', '[Backfill] Starting PI Master List and Rankings backfill...');
|
|
231
|
+
|
|
232
|
+
const results = {
|
|
233
|
+
masterList: null,
|
|
234
|
+
rankings: null
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
try {
|
|
238
|
+
// Backfill master list
|
|
239
|
+
if (masterList) {
|
|
240
|
+
results.masterList = await backfillMasterList(logger);
|
|
241
|
+
} else {
|
|
242
|
+
logger.log('INFO', '[Backfill] Skipping master list backfill');
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Backfill rankings
|
|
246
|
+
if (rankings) {
|
|
247
|
+
results.rankings = await backfillRankings(startDate, endDate, logger);
|
|
248
|
+
} else {
|
|
249
|
+
logger.log('INFO', '[Backfill] Skipping rankings backfill');
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
|
|
253
|
+
return results;
|
|
254
|
+
} catch (error) {
|
|
255
|
+
logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
|
|
256
|
+
throw error;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// CLI handling
|
|
261
|
+
if (require.main === module) {
|
|
262
|
+
const args = process.argv.slice(2);
|
|
263
|
+
|
|
264
|
+
let startDate = null;
|
|
265
|
+
let endDate = null;
|
|
266
|
+
let masterList = true;
|
|
267
|
+
let rankings = true;
|
|
268
|
+
|
|
269
|
+
args.forEach(arg => {
|
|
270
|
+
if (arg.startsWith('--startDate=')) {
|
|
271
|
+
startDate = arg.split('=')[1];
|
|
272
|
+
} else if (arg.startsWith('--endDate=')) {
|
|
273
|
+
endDate = arg.split('=')[1];
|
|
274
|
+
} else if (arg.startsWith('--masterList=')) {
|
|
275
|
+
masterList = arg.split('=')[1] === 'true';
|
|
276
|
+
} else if (arg.startsWith('--rankings=')) {
|
|
277
|
+
rankings = arg.split('=')[1] === 'true';
|
|
278
|
+
}
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
backfillPIMasterListRankings(startDate, endDate, masterList, rankings)
|
|
282
|
+
.then(results => {
|
|
283
|
+
console.log('\n✅ Backfill completed successfully!');
|
|
284
|
+
console.log('Results:', JSON.stringify(results, null, 2));
|
|
285
|
+
process.exit(0);
|
|
286
|
+
})
|
|
287
|
+
.catch(error => {
|
|
288
|
+
console.error('\n❌ Backfill failed:', error);
|
|
289
|
+
process.exit(1);
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
module.exports = { backfillPIMasterListRankings, backfillMasterList, backfillRankings };
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Backfill Task Engine Data
|
|
2
|
+
|
|
3
|
+
Backfills portfolio, trade history, and social post data from Firestore to BigQuery.
|
|
4
|
+
|
|
5
|
+
## Local Usage
|
|
6
|
+
|
|
7
|
+
Run from the project root directory:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# Backfill all data types
|
|
11
|
+
node Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/maintenance/backfill-task-engine-data/index.js \
|
|
12
|
+
--startDate=2024-01-01 \
|
|
13
|
+
--endDate=2024-12-31 \
|
|
14
|
+
--dataType=all
|
|
15
|
+
|
|
16
|
+
# Backfill only portfolio data
|
|
17
|
+
node Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/maintenance/backfill-task-engine-data/index.js \
|
|
18
|
+
--startDate=2024-01-01 \
|
|
19
|
+
--endDate=2024-12-31 \
|
|
20
|
+
--dataType=portfolio
|
|
21
|
+
|
|
22
|
+
# Backfill only trade history
|
|
23
|
+
node Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/maintenance/backfill-task-engine-data/index.js \
|
|
24
|
+
--startDate=2024-01-01 \
|
|
25
|
+
--endDate=2024-12-31 \
|
|
26
|
+
--dataType=history
|
|
27
|
+
|
|
28
|
+
# Backfill only social posts
|
|
29
|
+
node Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/maintenance/backfill-task-engine-data/index.js \
|
|
30
|
+
--startDate=2024-01-01 \
|
|
31
|
+
--endDate=2024-12-31 \
|
|
32
|
+
--dataType=social
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Prerequisites
|
|
36
|
+
|
|
37
|
+
1. **Authentication**: Make sure you're authenticated with Google Cloud:
|
|
38
|
+
```bash
|
|
39
|
+
gcloud auth application-default login
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
2. **Environment Variables** (optional):
|
|
43
|
+
```bash
|
|
44
|
+
export BIGQUERY_DATASET_ID=bulltrackers_data
|
|
45
|
+
export GCP_PROJECT_ID=stocks-12345
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
3. **Dependencies**: Make sure you're in the correct directory with node_modules installed.
|
|
49
|
+
|
|
50
|
+
## Cloud Function Usage
|
|
51
|
+
|
|
52
|
+
The script also works as a Cloud Function with HTTP trigger:
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
GET /backfill-task-engine-data?startDate=2024-01-01&endDate=2024-12-31&dataType=all
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Features
|
|
59
|
+
|
|
60
|
+
- ✅ **Resume capability**: Tracks progress in Firestore, can resume if interrupted
|
|
61
|
+
- ✅ **Batch processing**: Processes dates in batches to avoid timeouts
|
|
62
|
+
- ✅ **Safe**: Does NOT delete any Firestore data
|
|
63
|
+
- ✅ **Progress tracking**: Logs progress and summary at the end
|
|
64
|
+
|
|
65
|
+
## Progress Tracking
|
|
66
|
+
|
|
67
|
+
Progress is stored in Firestore at:
|
|
68
|
+
- `backfill_progress/portfolio/dates/{date}`
|
|
69
|
+
- `backfill_progress/history/dates/{date}`
|
|
70
|
+
- `backfill_progress/social/dates/{date}`
|
|
71
|
+
|
|
72
|
+
If a date is already backfilled, it will be skipped automatically.
|