bulltrackers-module 1.0.61 → 1.0.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,217 +1,291 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Main pipe: pipe.maintenance.runUserActivitySampler
|
|
3
|
-
*
|
|
4
|
-
*
|
|
3
|
+
* REFACTORED: Now split into an orchestrator and a task handler.
|
|
4
|
+
* - runUserActivitySamplerOrchestrator: Publishes one task per block.
|
|
5
|
+
* - handleSampleBlockTask: Processes a single block with parallel fetching.
|
|
5
6
|
*/
|
|
6
7
|
const { FieldValue } = require('@google-cloud/firestore');
|
|
7
8
|
|
|
8
9
|
/**
|
|
9
|
-
* Main pipe: pipe.maintenance.
|
|
10
|
+
* Main pipe (Orchestrator): pipe.maintenance.runUserActivitySamplerOrchestrator
|
|
11
|
+
* This function is triggered by a schedule. It fans out the work by
|
|
12
|
+
* publishing one Pub/Sub message for each block to be sampled.
|
|
13
|
+
*
|
|
10
14
|
* @param {object} config - Configuration object.
|
|
11
|
-
* @param {object} dependencies - Contains db, logger,
|
|
12
|
-
* @returns {Promise<object>} Summary of the
|
|
15
|
+
* @param {object} dependencies - Contains db, logger, firestoreUtils, pubsubUtils.
|
|
16
|
+
* @returns {Promise<object>} Summary of the orchestration.
|
|
13
17
|
*/
|
|
14
|
-
exports.
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
logger.log('ERROR', '[UserActivitySampler] Missing required configuration.');
|
|
23
|
-
throw new Error('Missing required configuration for User Activity Sampler.');
|
|
18
|
+
exports.runUserActivitySamplerOrchestrator = async (config, dependencies) => {
|
|
19
|
+
const { logger, firestoreUtils, pubsubUtils } = dependencies;
|
|
20
|
+
logger.log('INFO', '[SamplerOrchestrator] Starting user activity sampling orchestration...');
|
|
21
|
+
|
|
22
|
+
// Validate configuration
|
|
23
|
+
if (!config.allHighValueBlocks || !Array.isArray(config.allHighValueBlocks) || !config.samplerTaskTopicName) {
|
|
24
|
+
logger.log('ERROR', '[SamplerOrchestrator] Missing required configuration: allHighValueBlocks (array) or samplerTaskTopicName.');
|
|
25
|
+
throw new Error('Missing required configuration for Sampler Orchestrator.');
|
|
24
26
|
}
|
|
25
27
|
|
|
26
28
|
try {
|
|
27
|
-
|
|
29
|
+
// Reset locks once for all tasks
|
|
30
|
+
await firestoreUtils.resetProxyLocks(dependencies, config);
|
|
28
31
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
32
|
+
const tasks = [];
|
|
33
|
+
for (const block of config.allHighValueBlocks) {
|
|
34
|
+
// Ensure block and block.startId exist
|
|
35
|
+
if (!block || typeof block.startId === 'undefined') {
|
|
36
|
+
logger.log('WARN', '[SamplerOrchestrator] Skipping invalid block configuration:', block);
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
tasks.push({
|
|
40
|
+
type: 'sample-block',
|
|
41
|
+
blockId: block.startId
|
|
42
|
+
});
|
|
43
|
+
}
|
|
33
44
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
45
|
+
if (tasks.length === 0) {
|
|
46
|
+
logger.log('WARN', '[SamplerOrchestrator] No valid blocks found to sample.');
|
|
47
|
+
return { success: true, message: "No valid blocks configured." };
|
|
48
|
+
}
|
|
37
49
|
|
|
38
|
-
//
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
50
|
+
// Use pubsubUtils to batch publish all block tasks
|
|
51
|
+
await pubsubUtils.batchPublishTasks(dependencies, {
|
|
52
|
+
topicName: config.samplerTaskTopicName,
|
|
53
|
+
tasks: tasks,
|
|
54
|
+
taskType: 'sampler-block-task'
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
logger.log('SUCCESS', `[SamplerOrchestrator] Successfully published ${tasks.length} block sampling tasks.`);
|
|
58
|
+
return { success: true, blocksQueued: tasks.length };
|
|
59
|
+
|
|
60
|
+
} catch (error) {
|
|
61
|
+
logger.log('ERROR', '[SamplerOrchestrator] Fatal error during orchestration.', { errorMessage: error.message, errorStack: error.stack });
|
|
62
|
+
throw error;
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Internal Helper: Fetches a single batch of CIDs for the sampler.
|
|
68
|
+
* This is designed to be called in parallel.
|
|
69
|
+
*/
|
|
70
|
+
async function fetchSampleBatch(blockId, config, dependencies, processedInThisRun) {
|
|
71
|
+
const { logger, headerManager, proxyManager } = dependencies;
|
|
72
|
+
const cidsToSample = [];
|
|
73
|
+
|
|
74
|
+
// 1. Generate CIDs for this batch
|
|
75
|
+
while (cidsToSample.length < config.apiBatchSize) {
|
|
76
|
+
const randomId = String(Math.floor(Math.random() * 1000000) + blockId);
|
|
77
|
+
// Use a Set to prevent processing the same ID twice *within this run*
|
|
78
|
+
if (!processedInThisRun.has(randomId)) {
|
|
79
|
+
cidsToSample.push(parseInt(randomId, 10));
|
|
80
|
+
processedInThisRun.add(randomId);
|
|
42
81
|
}
|
|
82
|
+
// Note: If this loops too long, it could be inefficient, but it's
|
|
83
|
+
// unlikely with a large ID space and a reasonable sample size.
|
|
84
|
+
}
|
|
43
85
|
|
|
86
|
+
let selectedHeader = null;
|
|
87
|
+
let wasSuccess = false;
|
|
88
|
+
try {
|
|
89
|
+
selectedHeader = await headerManager.selectHeader();
|
|
90
|
+
if (!selectedHeader) throw new Error("Could not select header.");
|
|
91
|
+
|
|
92
|
+
const urlWithParam = `${config.rankingsApiUrl}?Period=LastTwoYears`;
|
|
93
|
+
const response = await proxyManager.fetch(urlWithParam, {
|
|
94
|
+
method: 'POST',
|
|
95
|
+
headers: { ...selectedHeader.header, 'Content-Type': 'application/json' },
|
|
96
|
+
body: JSON.stringify(cidsToSample),
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
if (!response || typeof response.json !== 'function') {
|
|
100
|
+
logger.log('WARN', `[SamplerTask] Invalid response structure from proxy for block ${blockId}.`);
|
|
101
|
+
logger.log('DEBUG', `[SamplerTask] Response details: ${JSON.stringify(response)}`);
|
|
102
|
+
return { success: false, cidsSent: cidsToSample.length, publicUsers: [] };
|
|
103
|
+
}
|
|
44
104
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
const blockId = block.startId;
|
|
52
|
-
logger.log('INFO', `[UserActivitySampler] Processing block ${blockId}...`);
|
|
53
|
-
|
|
54
|
-
let N_public_sampled_block = 0;
|
|
55
|
-
let N_sampled_total_block = 0;
|
|
56
|
-
const public_users_data = []; // Store { CID, LastActivity }
|
|
57
|
-
|
|
58
|
-
const MAX_ATTEMPTS = config.maxSamplingAttemptsPerBlock || 1000; // Prevent infinite loops
|
|
59
|
-
let attempts = 0;
|
|
60
|
-
|
|
61
|
-
while (N_public_sampled_block < config.targetPublicUsersPerBlock && attempts < MAX_ATTEMPTS) {
|
|
62
|
-
attempts++;
|
|
63
|
-
const cidsToSample = [];
|
|
64
|
-
// Generate CIDs, ensuring they are not already processed in this run
|
|
65
|
-
while (cidsToSample.length < config.apiBatchSize) {
|
|
66
|
-
const randomId = String(Math.floor(Math.random() * 1000000) + blockId);
|
|
67
|
-
// --- REMOVED exclusionIds check, added check for processedInThisRun ---
|
|
68
|
-
if (!processedInThisRun.has(randomId)) {
|
|
69
|
-
cidsToSample.push(parseInt(randomId, 10));
|
|
70
|
-
// Add temporarily to processed set for this run to avoid duplicates within batches
|
|
71
|
-
processedInThisRun.add(randomId);
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
N_sampled_total_block += cidsToSample.length;
|
|
75
|
-
|
|
76
|
-
let selectedHeader = null;
|
|
77
|
-
let wasSuccess = false;
|
|
78
|
-
try {
|
|
79
|
-
selectedHeader = await headerManager.selectHeader();
|
|
80
|
-
if (!selectedHeader) throw new Error("Could not select header.");
|
|
81
|
-
const urlWithParam = `${config.rankingsApiUrl}?Period=LastTwoYears`;
|
|
82
|
-
const response = await proxyManager.fetch(urlWithParam, {
|
|
83
|
-
method: 'POST',
|
|
84
|
-
headers: { ...selectedHeader.header, 'Content-Type': 'application/json' },
|
|
85
|
-
body: JSON.stringify(cidsToSample),
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
if (!response || typeof response.json !== 'function') {
|
|
89
|
-
logger.log('WARN', `[UserActivitySampler] Invalid response structure from proxy for block ${blockId}, batch attempt ${attempts}. Skipping batch.`);
|
|
90
|
-
continue; // Skip to next attempt
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
if (!response.ok) {
|
|
95
|
-
const errorText = await response.text();
|
|
96
|
-
logger.log('WARN', `[UserActivitySampler] API error ${response.status} for block ${blockId}, batch attempt ${attempts}. Skipping batch. Error: ${errorText}`);
|
|
97
|
-
wasSuccess = false;
|
|
98
|
-
continue; // Skip to next attempt
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
wasSuccess = true;
|
|
102
|
-
const publicUsersBatch = await response.json();
|
|
103
|
-
|
|
104
|
-
if (Array.isArray(publicUsersBatch)) {
|
|
105
|
-
const N_public_returned_batch = publicUsersBatch.length;
|
|
106
|
-
N_public_sampled_block += N_public_returned_batch;
|
|
107
|
-
public_users_data.push(...publicUsersBatch.map(u => ({
|
|
108
|
-
CID: u.CID,
|
|
109
|
-
LastActivity: u.Value?.LastActivity // Handle potential missing Value
|
|
110
|
-
})));
|
|
111
|
-
} else {
|
|
112
|
-
logger.log('WARN', `[UserActivitySampler] API response was not an array for block ${blockId}, batch attempt ${attempts}. Skipping batch.`);
|
|
113
|
-
continue;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
} catch (fetchError) {
|
|
118
|
-
logger.log('ERROR', `[UserActivitySampler] Fetch failed for block ${blockId}, batch attempt ${attempts}. Skipping batch.`, { errorMessage: fetchError.message });
|
|
119
|
-
wasSuccess = false; // Mark as failure for header performance
|
|
120
|
-
} finally {
|
|
121
|
-
if (selectedHeader) {
|
|
122
|
-
headerManager.updatePerformance(selectedHeader.id, wasSuccess);
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
await new Promise(resolve => setTimeout(resolve, config.delayBetweenBatchesMs || 200));
|
|
105
|
+
if (!response.ok) {
|
|
106
|
+
const errorText = await response.text();
|
|
107
|
+
logger.log('WARN', `[SamplerTask] API error ${response.status} for block ${blockId}. Error: ${errorText}`);
|
|
108
|
+
wasSuccess = false;
|
|
109
|
+
return { success: false, cidsSent: cidsToSample.length, publicUsers: [] };
|
|
110
|
+
}
|
|
126
111
|
|
|
127
|
-
|
|
112
|
+
wasSuccess = true;
|
|
113
|
+
const publicUsersBatch = await response.json();
|
|
128
114
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
}
|
|
115
|
+
if (!Array.isArray(publicUsersBatch)) {
|
|
116
|
+
logger.log('WARN', `[SamplerTask] API response was not an array for block ${blockId}.`);
|
|
117
|
+
return { success: false, cidsSent: cidsToSample.length, publicUsers: [] };
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Return a successful result
|
|
121
|
+
return {
|
|
122
|
+
success: true,
|
|
123
|
+
cidsSent: cidsToSample.length,
|
|
124
|
+
publicUsers: publicUsersBatch.map(u => ({
|
|
125
|
+
CID: u.CID,
|
|
126
|
+
LastActivity: u.Value?.LastActivity
|
|
127
|
+
}))
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
} catch (fetchError) {
|
|
131
|
+
logger.log('ERROR', `[SamplerTask] Fetch failed for block ${blockId}.`, { errorMessage: fetchError.message });
|
|
132
|
+
wasSuccess = false;
|
|
133
|
+
return { success: false, cidsSent: cidsToSample.length, publicUsers: [] };
|
|
134
|
+
} finally {
|
|
135
|
+
if (selectedHeader) {
|
|
136
|
+
headerManager.updatePerformance(selectedHeader.id, wasSuccess);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
132
140
|
|
|
141
|
+
/**
|
|
142
|
+
* Main pipe (Task Handler): pipe.maintenance.handleSampleBlockTask
|
|
143
|
+
* This function is triggered by Pub/Sub for a single block.
|
|
144
|
+
* It runs the sampling in parallel to finish within timeout.
|
|
145
|
+
*
|
|
146
|
+
* @param {object} message - The Pub/Sub message.
|
|
147
|
+
* @param {object} context - The message context.
|
|
148
|
+
* @param {object} config - Configuration object.
|
|
149
|
+
* @param {object} dependencies - Contains db, logger, headerManager, proxyManager.
|
|
150
|
+
* @returns {Promise<void>}
|
|
151
|
+
*/
|
|
152
|
+
exports.handleSampleBlockTask = async (message, context, config, dependencies) => {
|
|
153
|
+
const { db, logger, headerManager } = dependencies;
|
|
154
|
+
|
|
155
|
+
let task;
|
|
156
|
+
try {
|
|
157
|
+
task = JSON.parse(Buffer.from(message.data, 'base64').toString('utf-8'));
|
|
158
|
+
} catch (e) {
|
|
159
|
+
logger.log('ERROR', '[SamplerTask] Failed to parse Pub/Sub message data.', { error: e.message, data: message.data });
|
|
160
|
+
return; // Acknowledge the message to prevent retries
|
|
161
|
+
}
|
|
133
162
|
|
|
134
|
-
|
|
163
|
+
const { blockId } = task;
|
|
164
|
+
const taskId = `block-${blockId}-${context.eventId || Date.now()}`;
|
|
165
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
166
|
+
logger.log('INFO', `[SamplerTask/${taskId}] Processing block ${blockId}...`);
|
|
135
167
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
const threeMonthsAgo = new Date(now.getTime() - (90 * 24 * 60 * 60 * 1000)); // Approx
|
|
168
|
+
// --- Config validation for this task ---
|
|
169
|
+
if (!config.rankingsApiUrl || !config.targetPublicUsersPerBlock || !config.apiBatchSize || !config.outputCollectionName || !config.parallelRequests) {
|
|
170
|
+
logger.log('ERROR', `[SamplerTask/${taskId}] Missing required configuration for task execution.`);
|
|
171
|
+
throw new Error('Missing required configuration for Sampler Task.');
|
|
172
|
+
}
|
|
142
173
|
|
|
174
|
+
const processedInThisRun = new Set();
|
|
175
|
+
let N_public_sampled_block = 0;
|
|
176
|
+
let N_sampled_total_block = 0;
|
|
177
|
+
const public_users_data = []; // Store { CID, LastActivity }
|
|
143
178
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
return;
|
|
148
|
-
}
|
|
149
|
-
try {
|
|
150
|
-
const lastActivityDate = new Date(user.LastActivity);
|
|
151
|
-
if (isNaN(lastActivityDate)) {
|
|
152
|
-
counts.A4++; // Treat invalid dates as inactive
|
|
153
|
-
return;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
if (lastActivityDate >= oneDayAgo) counts.A1++;
|
|
157
|
-
else if (lastActivityDate >= oneWeekAgo) counts.A2++;
|
|
158
|
-
else if (lastActivityDate >= threeMonthsAgo) counts.A3++;
|
|
159
|
-
else counts.A4++;
|
|
160
|
-
} catch(e) {
|
|
161
|
-
logger.log('WARN', `[UserActivitySampler] Error parsing LastActivity date '${user.LastActivity}' for user ${user.CID}. Counting as A4.`);
|
|
162
|
-
counts.A4++;
|
|
163
|
-
}
|
|
179
|
+
const MAX_ATTEMPTS = config.maxSamplingAttemptsPerBlock || 1000;
|
|
180
|
+
let totalBatchesAttempted = 0;
|
|
181
|
+
const CONCURRENT_REQUESTS = config.parallelRequests;
|
|
164
182
|
|
|
165
|
-
|
|
183
|
+
try {
|
|
184
|
+
// --- Start Parallel Loop ---
|
|
185
|
+
while (N_public_sampled_block < config.targetPublicUsersPerBlock && totalBatchesAttempted < MAX_ATTEMPTS) {
|
|
186
|
+
const promises = [];
|
|
187
|
+
const numRequests = Math.min(CONCURRENT_REQUESTS, MAX_ATTEMPTS - totalBatchesAttempted);
|
|
188
|
+
|
|
189
|
+
logger.log('TRACE', `[SamplerTask/${taskId}] Starting parallel batch of ${numRequests} requests...`);
|
|
190
|
+
for (let i = 0; i < numRequests; i++) {
|
|
191
|
+
promises.push(fetchSampleBatch(blockId, config, dependencies, processedInThisRun));
|
|
192
|
+
}
|
|
166
193
|
|
|
167
|
-
const
|
|
168
|
-
|
|
169
|
-
|
|
194
|
+
const results = await Promise.allSettled(promises);
|
|
195
|
+
totalBatchesAttempted += numRequests;
|
|
196
|
+
|
|
197
|
+
// Process results from the parallel batch
|
|
198
|
+
for (const result of results) {
|
|
199
|
+
if (result.status === 'fulfilled' && result.value.success) {
|
|
200
|
+
const batchResult = result.value;
|
|
201
|
+
N_sampled_total_block += batchResult.cidsSent;
|
|
202
|
+
N_public_sampled_block += batchResult.publicUsers.length;
|
|
203
|
+
public_users_data.push(...batchResult.publicUsers);
|
|
204
|
+
} else if (result.status === 'fulfilled' && !result.value.success) {
|
|
205
|
+
// Failed API call, but we still count the CIDs we tried to sample
|
|
206
|
+
N_sampled_total_block += result.value.cidsSent;
|
|
207
|
+
} else {
|
|
208
|
+
// Promise rejected (unexpected error)
|
|
209
|
+
logger.log('WARN', `[SamplerTask/${taskId}] A sample fetch promise was rejected.`, { reason: result.reason });
|
|
210
|
+
}
|
|
170
211
|
}
|
|
212
|
+
|
|
213
|
+
logger.log('INFO', `[SamplerTask/${taskId}] Batch complete. Total public sampled: ${N_public_sampled_block}/${config.targetPublicUsersPerBlock}`);
|
|
171
214
|
|
|
172
|
-
|
|
215
|
+
// --- REMOVED artificial delay ---
|
|
216
|
+
}
|
|
217
|
+
// --- End Parallel Loop ---
|
|
173
218
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
}
|
|
219
|
+
if (totalBatchesAttempted >= MAX_ATTEMPTS) {
|
|
220
|
+
logger.log('WARN', `[SamplerTask/${taskId}] Reached max sampling attempts (${MAX_ATTEMPTS}). Proceeding with ${N_public_sampled_block} users.`);
|
|
221
|
+
}
|
|
178
222
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
sampledDate: today,
|
|
182
|
-
f_private: f_private_block,
|
|
183
|
-
publicSampleSize: N_public_sampled_block,
|
|
184
|
-
totalUsersInBlock: N_block,
|
|
185
|
-
activityCounts_Sample: counts,
|
|
186
|
-
activityFractions_Sample: fractions,
|
|
187
|
-
estimatedCounts_TotalBlock: estimatedCounts,
|
|
188
|
-
lastUpdated: FieldValue.serverTimestamp()
|
|
189
|
-
};
|
|
223
|
+
// --- Calculate and Store Results (same as before) ---
|
|
224
|
+
const f_private_block = N_sampled_total_block > 0 ? 1 - (N_public_sampled_block / N_sampled_total_block) : 0;
|
|
190
225
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
226
|
+
const counts = { A1: 0, A2: 0, A3: 0, A4: 0 };
|
|
227
|
+
const now = new Date();
|
|
228
|
+
const oneDayAgo = new Date(now.getTime() - (24 * 60 * 60 * 1000));
|
|
229
|
+
const oneWeekAgo = new Date(now.getTime() - (7 * 24 * 60 * 60 * 1000));
|
|
230
|
+
const threeMonthsAgo = new Date(now.getTime() - (90 * 24 * 60 * 60 * 1000));
|
|
194
231
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
232
|
+
public_users_data.forEach(user => {
|
|
233
|
+
if (!user.LastActivity) {
|
|
234
|
+
counts.A4++; return;
|
|
235
|
+
}
|
|
236
|
+
try {
|
|
237
|
+
const lastActivityDate = new Date(user.LastActivity);
|
|
238
|
+
if (isNaN(lastActivityDate)) { counts.A4++; return; }
|
|
239
|
+
if (lastActivityDate >= oneDayAgo) counts.A1++;
|
|
240
|
+
else if (lastActivityDate >= oneWeekAgo) counts.A2++;
|
|
241
|
+
else if (lastActivityDate >= threeMonthsAgo) counts.A3++;
|
|
242
|
+
else counts.A4++;
|
|
243
|
+
} catch (e) {
|
|
244
|
+
logger.log('WARN', `[SamplerTask/${taskId}] Error parsing LastActivity date '${user.LastActivity}' for user ${user.CID}. Counting as A4.`);
|
|
245
|
+
counts.A4++;
|
|
246
|
+
}
|
|
247
|
+
});
|
|
199
248
|
|
|
200
|
-
|
|
249
|
+
const fractions = {};
|
|
250
|
+
for (const category in counts) {
|
|
251
|
+
fractions[category] = N_public_sampled_block > 0 ? (counts[category] / N_public_sampled_block) : 0;
|
|
252
|
+
}
|
|
201
253
|
|
|
202
|
-
|
|
203
|
-
|
|
254
|
+
const N_block = 1000000;
|
|
255
|
+
const estimatedCounts = {};
|
|
256
|
+
for (const category in fractions) {
|
|
257
|
+
estimatedCounts[category] = Math.round(fractions[category] * N_block);
|
|
258
|
+
}
|
|
204
259
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
260
|
+
const result = {
|
|
261
|
+
blockId: blockId,
|
|
262
|
+
sampledDate: today,
|
|
263
|
+
f_private: f_private_block,
|
|
264
|
+
publicSampleSize: N_public_sampled_block,
|
|
265
|
+
totalSampleSize: N_sampled_total_block,
|
|
266
|
+
totalUsersInBlock: N_block,
|
|
267
|
+
activityCounts_Sample: counts,
|
|
268
|
+
activityFractions_Sample: fractions,
|
|
269
|
+
estimatedCounts_TotalBlock: estimatedCounts,
|
|
270
|
+
lastUpdated: FieldValue.serverTimestamp()
|
|
210
271
|
};
|
|
211
272
|
|
|
273
|
+
const docRef = db.collection(config.outputCollectionName).doc(`${blockId}_${today}`);
|
|
274
|
+
await docRef.set(result);
|
|
275
|
+
|
|
276
|
+
logger.log('SUCCESS', `[SamplerTask/${taskId}] Stored results for block ${blockId}. Sampled: ${N_public_sampled_block}. Private fraction: ${f_private_block.toFixed(3)}.`);
|
|
277
|
+
|
|
212
278
|
} catch (error) {
|
|
213
|
-
logger.log('ERROR',
|
|
214
|
-
|
|
279
|
+
logger.log('ERROR', `[SamplerTask/${taskId}] Fatal error during task execution.`, { errorMessage: error.message, errorStack: error.stack });
|
|
280
|
+
// Re-throw the error to signal failure to Cloud Functions, which will trigger a retry.
|
|
215
281
|
throw error;
|
|
282
|
+
} finally {
|
|
283
|
+
// Always flush header performance at the end of the task, even on failure.
|
|
284
|
+
try {
|
|
285
|
+
await headerManager.flushPerformanceUpdates();
|
|
286
|
+
logger.log('INFO', `[SamplerTask/${taskId}] Header performance flushed.`);
|
|
287
|
+
} catch (flushError) {
|
|
288
|
+
logger.log('ERROR', `[SamplerTask/${taskId}] Failed to flush header performance.`, { errorMessage: flushError.message });
|
|
289
|
+
}
|
|
216
290
|
}
|
|
217
291
|
};
|
package/index.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
// --- Core Utilities (Classes and Stateless Helpers) ---
|
|
8
|
-
//
|
|
8
|
+
// ... (no changes here) ...
|
|
9
9
|
const core = {
|
|
10
10
|
IntelligentHeaderManager: require('./functions/core/utils/intelligent_header_manager').IntelligentHeaderManager,
|
|
11
11
|
IntelligentProxyManager: require('./functions/core/utils/intelligent_proxy_manager').IntelligentProxyManager,
|
|
@@ -15,7 +15,7 @@ const core = {
|
|
|
15
15
|
};
|
|
16
16
|
|
|
17
17
|
// --- Pipe 1: Orchestrator ---
|
|
18
|
-
//
|
|
18
|
+
// ... (no changes here) ...
|
|
19
19
|
const orchestrator = {
|
|
20
20
|
// Main Pipes (Entry points for Cloud Functions)
|
|
21
21
|
runDiscoveryOrchestrator: require('./functions/orchestrator/index').runDiscoveryOrchestrator,
|
|
@@ -32,7 +32,7 @@ const orchestrator = {
|
|
|
32
32
|
};
|
|
33
33
|
|
|
34
34
|
// --- Pipe 2: Dispatcher ---
|
|
35
|
-
//
|
|
35
|
+
// ... (no changes here) ...
|
|
36
36
|
const dispatcher = {
|
|
37
37
|
// Main Pipe
|
|
38
38
|
handleRequest: require('./functions/dispatcher/index').handleRequest,
|
|
@@ -42,7 +42,7 @@ const dispatcher = {
|
|
|
42
42
|
};
|
|
43
43
|
|
|
44
44
|
// --- Pipe 3: Task Engine ---
|
|
45
|
-
//
|
|
45
|
+
// ... (no changes here) ...
|
|
46
46
|
const taskEngine = {
|
|
47
47
|
// Main Pipe
|
|
48
48
|
handleRequest: require('./functions/task-engine/handler_creator').handleRequest,
|
|
@@ -54,7 +54,7 @@ const taskEngine = {
|
|
|
54
54
|
};
|
|
55
55
|
|
|
56
56
|
// --- Pipe 4: Computation System ---
|
|
57
|
-
//
|
|
57
|
+
// ... (no changes here) ...
|
|
58
58
|
const computationSystem = {
|
|
59
59
|
// Main Pipe
|
|
60
60
|
runOrchestration: require('./functions/computation-system/helpers/orchestration_helpers').runComputationOrchestrator,
|
|
@@ -65,7 +65,7 @@ const computationSystem = {
|
|
|
65
65
|
};
|
|
66
66
|
|
|
67
67
|
// --- Pipe 5: API ---
|
|
68
|
-
//
|
|
68
|
+
// ... (no changes here) ...
|
|
69
69
|
const api = {
|
|
70
70
|
// Main Pipe
|
|
71
71
|
createApiApp: require('./functions/generic-api/index').createApiApp,
|
|
@@ -81,11 +81,15 @@ const maintenance = {
|
|
|
81
81
|
handleInvalidSpeculator: require('./functions/invalid-speculator-handler/helpers/handler_helpers').handleInvalidSpeculator,
|
|
82
82
|
runFetchInsights: require('./functions/fetch-insights/helpers/handler_helpers').fetchAndStoreInsights,
|
|
83
83
|
runFetchPrices: require('./functions/etoro-price-fetcher/helpers/handler_helpers').fetchAndStorePrices,
|
|
84
|
-
|
|
84
|
+
|
|
85
|
+
// --- UPDATED ---
|
|
86
|
+
runUserActivitySamplerOrchestrator: require('./functions/user-activity-sampler/helpers/sampler_helpers').runUserActivitySamplerOrchestrator,
|
|
87
|
+
handleSampleBlockTask: require('./functions/user-activity-sampler/helpers/sampler_helpers').handleSampleBlockTask,
|
|
88
|
+
// --- END UPDATE ---
|
|
85
89
|
};
|
|
86
90
|
|
|
87
91
|
// --- Pipe 7: Proxy ---
|
|
88
|
-
//
|
|
92
|
+
// ... (no changes here) ...
|
|
89
93
|
const proxy = {
|
|
90
94
|
handlePost: require('./functions/appscript-api/index').handlePost,
|
|
91
95
|
};
|
|
@@ -102,4 +106,4 @@ module.exports = {
|
|
|
102
106
|
maintenance,
|
|
103
107
|
proxy,
|
|
104
108
|
}
|
|
105
|
-
};
|
|
109
|
+
};
|