bulltrackers-module 1.0.105 → 1.0.106
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.MD +222 -222
- package/functions/appscript-api/helpers/errors.js +19 -19
- package/functions/appscript-api/index.js +58 -58
- package/functions/computation-system/helpers/orchestration_helpers.js +647 -113
- package/functions/computation-system/utils/data_loader.js +191 -191
- package/functions/computation-system/utils/utils.js +149 -254
- package/functions/core/utils/firestore_utils.js +433 -433
- package/functions/core/utils/pubsub_utils.js +53 -53
- package/functions/dispatcher/helpers/dispatch_helpers.js +47 -47
- package/functions/dispatcher/index.js +52 -52
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +124 -124
- package/functions/fetch-insights/helpers/handler_helpers.js +91 -91
- package/functions/generic-api/helpers/api_helpers.js +379 -379
- package/functions/generic-api/index.js +150 -150
- package/functions/invalid-speculator-handler/helpers/handler_helpers.js +75 -75
- package/functions/orchestrator/helpers/discovery_helpers.js +226 -226
- package/functions/orchestrator/helpers/update_helpers.js +92 -92
- package/functions/orchestrator/index.js +147 -147
- package/functions/price-backfill/helpers/handler_helpers.js +116 -123
- package/functions/social-orchestrator/helpers/orchestrator_helpers.js +61 -61
- package/functions/social-task-handler/helpers/handler_helpers.js +288 -288
- package/functions/task-engine/handler_creator.js +78 -78
- package/functions/task-engine/helpers/discover_helpers.js +125 -125
- package/functions/task-engine/helpers/update_helpers.js +118 -118
- package/functions/task-engine/helpers/verify_helpers.js +162 -162
- package/functions/task-engine/utils/firestore_batch_manager.js +258 -258
- package/index.js +105 -113
- package/package.json +45 -45
- package/functions/computation-system/computation_dependencies.json +0 -120
- package/functions/computation-system/helpers/worker_helpers.js +0 -340
- package/functions/computation-system/utils/computation_state_manager.js +0 -178
- package/functions/computation-system/utils/dependency_graph.js +0 -191
- package/functions/speculator-cleanup-orchestrator/helpers/cleanup_helpers.js +0 -160
|
@@ -1,289 +1,289 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @fileoverview Main pipe: pipe.maintenance.handleSocialTask
|
|
3
|
-
* This function is triggered by Pub/Sub for a single ticker.
|
|
4
|
-
* It fetches posts, deduplicates, analyzes, and stores them.
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
const { FieldValue, FieldPath } = require('@google-cloud/firestore');
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* --- UPDATED HELPER FUNCTION ---
|
|
11
|
-
* Calls Gemini to classify sentiment and extract topics.
|
|
12
|
-
* @param {object} dependencies - Contains logger and geminiModel.
|
|
13
|
-
* @param {string} snippet - The text snippet to analyze.
|
|
14
|
-
* @returns {Promise<object>} An object: { overallSentiment: '...', topics: [...] }.
|
|
15
|
-
*/
|
|
16
|
-
async function getSentimentFromGemini(dependencies, snippet) {
|
|
17
|
-
const { logger, geminiModel } = dependencies;
|
|
18
|
-
|
|
19
|
-
if (!geminiModel) {
|
|
20
|
-
logger.log('WARN', '[getSentimentFromGemini] Gemini model not found in dependencies.');
|
|
21
|
-
return { overallSentiment: "Neutral", topics: [] }; // Return default object
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
// --- NEW PROMPT ---
|
|
25
|
-
const prompt = `You are a financial analyst. Analyze the following social media post.
|
|
26
|
-
Your task is to provide:
|
|
27
|
-
1. The overall sentiment (Bullish, Bearish, or Neutral) toward the main asset. Return only one word.
|
|
28
|
-
2. A list of key topics or events mentioned (e.g., "FOMC", "CPI", "Earnings", "Inflation", "War", "Acquisition"). If no specific event is mentioned, return an empty array.
|
|
29
|
-
|
|
30
|
-
Return ONLY a valid JSON object in this exact format:
|
|
31
|
-
{
|
|
32
|
-
"overallSentiment": "...",
|
|
33
|
-
"topics": ["...", "..."]
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
Post: "${snippet}"`
|
|
37
|
-
// --- END NEW PROMPT ---
|
|
38
|
-
|
|
39
|
-
try {
|
|
40
|
-
const request = {
|
|
41
|
-
contents: [
|
|
42
|
-
{
|
|
43
|
-
role: "user",
|
|
44
|
-
parts: [ { text: prompt } ]
|
|
45
|
-
}
|
|
46
|
-
],
|
|
47
|
-
generationConfig: {
|
|
48
|
-
temperature: 0.1,
|
|
49
|
-
topP: 0.1,
|
|
50
|
-
maxOutputTokens: 256 // Increased tokens for JSON
|
|
51
|
-
}
|
|
52
|
-
};
|
|
53
|
-
|
|
54
|
-
const result = await geminiModel.generateContent(request);
|
|
55
|
-
const response = result.response;
|
|
56
|
-
|
|
57
|
-
const text = response?.candidates?.[0]?.content?.parts?.[0]?.text?.trim() || '';
|
|
58
|
-
|
|
59
|
-
// --- NEW: Parse JSON response ---
|
|
60
|
-
try {
|
|
61
|
-
// Find the JSON block
|
|
62
|
-
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
63
|
-
if (jsonMatch && jsonMatch[0]) {
|
|
64
|
-
const parsedJson = JSON.parse(jsonMatch[0]);
|
|
65
|
-
// Validate structure
|
|
66
|
-
if (parsedJson && parsedJson.overallSentiment && Array.isArray(parsedJson.topics)) {
|
|
67
|
-
logger.log('INFO', `[getSentimentFromGemini] Classified sentiment: ${parsedJson.overallSentiment}, Topics: ${parsedJson.topics.join(', ')}`);
|
|
68
|
-
return parsedJson; // Return the full object
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
logger.log('WARN', `[getSentimentFromGemini] Unexpected JSON structure from AI: "${text}". Defaulting.`);
|
|
72
|
-
return { overallSentiment: "Neutral", topics: [] };
|
|
73
|
-
} catch (parseError) {
|
|
74
|
-
logger.log('WARN', `[getSentimentFromGemini] Failed to parse JSON response from AI: "${text}". Defaulting.`, { error: parseError.message });
|
|
75
|
-
return { overallSentiment: "Neutral", topics: [] };
|
|
76
|
-
}
|
|
77
|
-
// --- END NEW JSON PARSE ---
|
|
78
|
-
|
|
79
|
-
} catch (error) {
|
|
80
|
-
logger.log('ERROR', '[getSentimentFromGemini] Error calling Gemini API.', {
|
|
81
|
-
errorMessage: error.message,
|
|
82
|
-
errorStack: error.stack
|
|
83
|
-
});
|
|
84
|
-
// Default to 'Neutral' on API error to avoid halting the pipeline
|
|
85
|
-
return { overallSentiment: "Neutral", topics: [] }; // Return default object
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* Main pipe (Task Handler): pipe.maintenance.handleSocialTask
|
|
92
|
-
* @param {object} message - The Pub/Sub message.
|
|
93
|
-
* @param {object} context - The message context.
|
|
94
|
-
* @param {object} config - Configuration object.
|
|
95
|
-
* @param {object} dependencies - Contains db, logger, headerManager, proxyManager, geminiModel.
|
|
96
|
-
* @returns {Promise<void>}
|
|
97
|
-
*/
|
|
98
|
-
exports.handleSocialTask = async (message, context, config, dependencies) => {
|
|
99
|
-
const { db, logger, headerManager, proxyManager } = dependencies;
|
|
100
|
-
|
|
101
|
-
let task;
|
|
102
|
-
try {
|
|
103
|
-
task = JSON.parse(Buffer.from(message.data, 'base64').toString('utf-8'));
|
|
104
|
-
} catch (e) {
|
|
105
|
-
logger.log('ERROR', '[SocialTask] Failed to parse Pub/Sub message data.', { error: e.message, data: message.data });
|
|
106
|
-
return; // Acknowledge the message
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
const { tickerId, since } = task;
|
|
110
|
-
const sinceDate = new Date(since);
|
|
111
|
-
const taskId = `social-${tickerId}-${context.eventId || Date.now()}`;
|
|
112
|
-
logger.log('INFO', `[SocialTask/${taskId}] Processing ticker ${tickerId} for posts since ${since}.`);
|
|
113
|
-
|
|
114
|
-
// --- Config validation ---
|
|
115
|
-
if (!config.socialApiBaseUrl || !config.socialInsightsCollectionName || !config.processedPostsCollectionName) {
|
|
116
|
-
logger.log('ERROR', `[SocialTask/${taskId}] Missing required configuration.`);
|
|
117
|
-
throw new Error('Missing required configuration for Social Task.');
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
const processedPostsRef = db.collection(config.processedPostsCollectionName);
|
|
121
|
-
const today = new Date().toISOString().slice(0, 10);
|
|
122
|
-
const insightsCollectionRef = db.collection(config.socialInsightsCollectionName).doc(today).collection('posts');
|
|
123
|
-
|
|
124
|
-
let offset = 0;
|
|
125
|
-
const take = 10; // hardcode to 10
|
|
126
|
-
let keepFetching = true;
|
|
127
|
-
let postsProcessed = 0;
|
|
128
|
-
let postsSaved = 0;
|
|
129
|
-
const processedInThisRun = new Set(); // Local dedupe
|
|
130
|
-
|
|
131
|
-
try {
|
|
132
|
-
while (keepFetching) {
|
|
133
|
-
const url = `${config.socialApiBaseUrl}${tickerId}?take=${take}&offset=${offset}&reactionsPageSize=20`;
|
|
134
|
-
logger.log('TRACE', `[SocialTask/${taskId}] Fetching: ${url}`);
|
|
135
|
-
|
|
136
|
-
const selectedHeader = await headerManager.selectHeader();
|
|
137
|
-
let wasSuccess = false;
|
|
138
|
-
let response;
|
|
139
|
-
|
|
140
|
-
try {
|
|
141
|
-
response = await proxyManager.fetch(url, { headers: selectedHeader.header });
|
|
142
|
-
if (!response.ok) {
|
|
143
|
-
throw new Error(`API error ${response.status}`);
|
|
144
|
-
}
|
|
145
|
-
wasSuccess = true;
|
|
146
|
-
} catch (fetchError) {
|
|
147
|
-
logger.log('WARN', `[SocialTask/${taskId}] Fetch failed for offset ${offset}.`, { err: fetchError.message });
|
|
148
|
-
keepFetching = false; // Stop on fetch error
|
|
149
|
-
if (selectedHeader) headerManager.updatePerformance(selectedHeader.id, false);
|
|
150
|
-
continue;
|
|
151
|
-
} finally {
|
|
152
|
-
if (selectedHeader) headerManager.updatePerformance(selectedHeader.id, wasSuccess);
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
const page = await response.json();
|
|
156
|
-
const discussions = page?.discussions;
|
|
157
|
-
|
|
158
|
-
if (!Array.isArray(discussions) || discussions.length === 0) {
|
|
159
|
-
logger.log('INFO', `[SocialTask/${taskId}] No more posts found at offset ${offset}. Stopping.`);
|
|
160
|
-
keepFetching = false;
|
|
161
|
-
continue;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
const postIds = discussions.map(d => d.post.id).filter(Boolean);
|
|
165
|
-
if (postIds.length === 0) {
|
|
166
|
-
offset += take;
|
|
167
|
-
continue;
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
// --- Deduplication Check ---
|
|
171
|
-
const existingDocs = await processedPostsRef.where(FieldPath.documentId(), 'in', postIds).get();
|
|
172
|
-
const existingIds = new Set(existingDocs.docs.map(d => d.id));
|
|
173
|
-
|
|
174
|
-
const batch = db.batch();
|
|
175
|
-
let newPostsInBatch = 0;
|
|
176
|
-
|
|
177
|
-
for (const discussion of discussions) {
|
|
178
|
-
const post = discussion?.post;
|
|
179
|
-
if (!post || !post.id || !post.message?.text) continue;
|
|
180
|
-
|
|
181
|
-
// Stop pagination if we've reached posts older than our window
|
|
182
|
-
const postCreatedDate = new Date(post.created);
|
|
183
|
-
if (postCreatedDate < sinceDate) {
|
|
184
|
-
keepFetching = false;
|
|
185
|
-
continue;
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
// Skip if already processed
|
|
189
|
-
if (existingIds.has(post.id) || processedInThisRun.has(post.id)) {
|
|
190
|
-
continue;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
// Filter language (accept 'en' and 'en-gb')
|
|
194
|
-
const lang = post.message.languageCode || 'unknown';
|
|
195
|
-
if (lang !== 'en' && lang !== 'en-gb') {
|
|
196
|
-
continue; // Skip non-English posts
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
// --- Process the new post ---
|
|
200
|
-
postsProcessed++;
|
|
201
|
-
processedInThisRun.add(post.id);
|
|
202
|
-
const text = post.message.text;
|
|
203
|
-
|
|
204
|
-
// --- START: Enhanced Data Extraction ---
|
|
205
|
-
const likeCount = discussion.emotionsData?.like?.paging?.totalCount || 0;
|
|
206
|
-
const commentCount = discussion.summary?.totalCommentsAndReplies || 0;
|
|
207
|
-
let pollData = null;
|
|
208
|
-
|
|
209
|
-
// Check for poll data and extract it
|
|
210
|
-
if (post.type === 'Poll' && post.metadata?.poll?.options) {
|
|
211
|
-
pollData = {
|
|
212
|
-
id: post.metadata.poll.id,
|
|
213
|
-
options: post.metadata.poll.options.map(opt => ({
|
|
214
|
-
id: opt.id,
|
|
215
|
-
text: opt.text,
|
|
216
|
-
votesCount: opt.votesCount || 0
|
|
217
|
-
}))
|
|
218
|
-
};
|
|
219
|
-
}
|
|
220
|
-
// --- END: Enhanced Data Extraction ---
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
// 1. Truncate for AI
|
|
224
|
-
const MAX_CHARS = 500; // ~125 tokens, very cheap
|
|
225
|
-
let snippet = text;
|
|
226
|
-
if (text.length > (MAX_CHARS * 2)) {
|
|
227
|
-
// Create a "summary" snippet
|
|
228
|
-
snippet = text.substring(0, MAX_CHARS) + " ... " + text.substring(text.length - MAX_CHARS);
|
|
229
|
-
} else if (text.length > MAX_CHARS) {
|
|
230
|
-
// Just truncate
|
|
231
|
-
snippet = text.substring(0, MAX_CHARS);
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// 2. AI Sentiment Analysis
|
|
235
|
-
// Pass full dependencies object
|
|
236
|
-
// --- MODIFIED: Store the entire result object ---
|
|
237
|
-
const sentimentResult = await getSentimentFromGemini(dependencies, snippet);
|
|
238
|
-
|
|
239
|
-
// 3. Prepare data for storage
|
|
240
|
-
const postData = {
|
|
241
|
-
sentiment: sentimentResult, // <-- Store the full {overallSentiment, topics} object
|
|
242
|
-
textSnippet: snippet,
|
|
243
|
-
fullText: text, // Store the full original text
|
|
244
|
-
language: lang,
|
|
245
|
-
tickers: post.tags.map(t => t.market?.symbolName).filter(Boolean),
|
|
246
|
-
postOwnerId: post.owner?.id,
|
|
247
|
-
likeCount: likeCount, // Store like count
|
|
248
|
-
commentCount: commentCount, // Store comment count
|
|
249
|
-
pollData: pollData, // Store poll data (will be null if not a poll)
|
|
250
|
-
createdAt: post.created,
|
|
251
|
-
fetchedAt: FieldValue.serverTimestamp()
|
|
252
|
-
};
|
|
253
|
-
// --- END MODIFICATION ---
|
|
254
|
-
|
|
255
|
-
// 4. Add to batch for `daily_social_insights`
|
|
256
|
-
const insightDocRef = insightsCollectionRef.doc(post.id);
|
|
257
|
-
batch.set(insightDocRef, postData);
|
|
258
|
-
|
|
259
|
-
// 5. Add to batch for `processed_social_posts` (dedupe collection)
|
|
260
|
-
const dedupeDocRef = processedPostsRef.doc(post.id);
|
|
261
|
-
batch.set(dedupeDocRef, { processedAt: FieldValue.serverTimestamp() });
|
|
262
|
-
|
|
263
|
-
newPostsInBatch++;
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
if (newPostsInBatch > 0) {
|
|
267
|
-
await batch.commit();
|
|
268
|
-
postsSaved += newPostsInBatch;
|
|
269
|
-
logger.log('INFO', `[SocialTask/${taskId}] Saved ${newPostsInBatch} new posts from offset ${offset}.`);
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
// Continue to next page
|
|
273
|
-
offset += take;
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
logger.log('SUCCESS', `[SocialTask/${taskId}] Run complete. Processed ${postsProcessed} new posts, saved ${postsSaved}.`);
|
|
277
|
-
|
|
278
|
-
} catch (error) {
|
|
279
|
-
logger.log('ERROR', `[SocialTask/${taskId}] Fatal error during task execution.`, { errorMessage: error.message, errorStack: error.stack });
|
|
280
|
-
throw error;
|
|
281
|
-
} finally {
|
|
282
|
-
// Always flush header performance
|
|
283
|
-
try {
|
|
284
|
-
await headerManager.flushPerformanceUpdates();
|
|
285
|
-
} catch (flushError) {
|
|
286
|
-
logger.log('ERROR', `[SocialTask/${taskId}] Failed to flush header performance.`, { errorMessage: flushError.message });
|
|
287
|
-
}
|
|
288
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Main pipe: pipe.maintenance.handleSocialTask
|
|
3
|
+
* This function is triggered by Pub/Sub for a single ticker.
|
|
4
|
+
* It fetches posts, deduplicates, analyzes, and stores them.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const { FieldValue, FieldPath } = require('@google-cloud/firestore');
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* --- UPDATED HELPER FUNCTION ---
|
|
11
|
+
* Calls Gemini to classify sentiment and extract topics.
|
|
12
|
+
* @param {object} dependencies - Contains logger and geminiModel.
|
|
13
|
+
* @param {string} snippet - The text snippet to analyze.
|
|
14
|
+
* @returns {Promise<object>} An object: { overallSentiment: '...', topics: [...] }.
|
|
15
|
+
*/
|
|
16
|
+
async function getSentimentFromGemini(dependencies, snippet) {
|
|
17
|
+
const { logger, geminiModel } = dependencies;
|
|
18
|
+
|
|
19
|
+
if (!geminiModel) {
|
|
20
|
+
logger.log('WARN', '[getSentimentFromGemini] Gemini model not found in dependencies.');
|
|
21
|
+
return { overallSentiment: "Neutral", topics: [] }; // Return default object
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// --- NEW PROMPT ---
|
|
25
|
+
const prompt = `You are a financial analyst. Analyze the following social media post.
|
|
26
|
+
Your task is to provide:
|
|
27
|
+
1. The overall sentiment (Bullish, Bearish, or Neutral) toward the main asset. Return only one word.
|
|
28
|
+
2. A list of key topics or events mentioned (e.g., "FOMC", "CPI", "Earnings", "Inflation", "War", "Acquisition"). If no specific event is mentioned, return an empty array.
|
|
29
|
+
|
|
30
|
+
Return ONLY a valid JSON object in this exact format:
|
|
31
|
+
{
|
|
32
|
+
"overallSentiment": "...",
|
|
33
|
+
"topics": ["...", "..."]
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
Post: "${snippet}"`
|
|
37
|
+
// --- END NEW PROMPT ---
|
|
38
|
+
|
|
39
|
+
try {
|
|
40
|
+
const request = {
|
|
41
|
+
contents: [
|
|
42
|
+
{
|
|
43
|
+
role: "user",
|
|
44
|
+
parts: [ { text: prompt } ]
|
|
45
|
+
}
|
|
46
|
+
],
|
|
47
|
+
generationConfig: {
|
|
48
|
+
temperature: 0.1,
|
|
49
|
+
topP: 0.1,
|
|
50
|
+
maxOutputTokens: 256 // Increased tokens for JSON
|
|
51
|
+
}
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
const result = await geminiModel.generateContent(request);
|
|
55
|
+
const response = result.response;
|
|
56
|
+
|
|
57
|
+
const text = response?.candidates?.[0]?.content?.parts?.[0]?.text?.trim() || '';
|
|
58
|
+
|
|
59
|
+
// --- NEW: Parse JSON response ---
|
|
60
|
+
try {
|
|
61
|
+
// Find the JSON block
|
|
62
|
+
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
63
|
+
if (jsonMatch && jsonMatch[0]) {
|
|
64
|
+
const parsedJson = JSON.parse(jsonMatch[0]);
|
|
65
|
+
// Validate structure
|
|
66
|
+
if (parsedJson && parsedJson.overallSentiment && Array.isArray(parsedJson.topics)) {
|
|
67
|
+
logger.log('INFO', `[getSentimentFromGemini] Classified sentiment: ${parsedJson.overallSentiment}, Topics: ${parsedJson.topics.join(', ')}`);
|
|
68
|
+
return parsedJson; // Return the full object
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
logger.log('WARN', `[getSentimentFromGemini] Unexpected JSON structure from AI: "${text}". Defaulting.`);
|
|
72
|
+
return { overallSentiment: "Neutral", topics: [] };
|
|
73
|
+
} catch (parseError) {
|
|
74
|
+
logger.log('WARN', `[getSentimentFromGemini] Failed to parse JSON response from AI: "${text}". Defaulting.`, { error: parseError.message });
|
|
75
|
+
return { overallSentiment: "Neutral", topics: [] };
|
|
76
|
+
}
|
|
77
|
+
// --- END NEW JSON PARSE ---
|
|
78
|
+
|
|
79
|
+
} catch (error) {
|
|
80
|
+
logger.log('ERROR', '[getSentimentFromGemini] Error calling Gemini API.', {
|
|
81
|
+
errorMessage: error.message,
|
|
82
|
+
errorStack: error.stack
|
|
83
|
+
});
|
|
84
|
+
// Default to 'Neutral' on API error to avoid halting the pipeline
|
|
85
|
+
return { overallSentiment: "Neutral", topics: [] }; // Return default object
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Main pipe (Task Handler): pipe.maintenance.handleSocialTask
|
|
92
|
+
* @param {object} message - The Pub/Sub message.
|
|
93
|
+
* @param {object} context - The message context.
|
|
94
|
+
* @param {object} config - Configuration object.
|
|
95
|
+
* @param {object} dependencies - Contains db, logger, headerManager, proxyManager, geminiModel.
|
|
96
|
+
* @returns {Promise<void>}
|
|
97
|
+
*/
|
|
98
|
+
exports.handleSocialTask = async (message, context, config, dependencies) => {
|
|
99
|
+
const { db, logger, headerManager, proxyManager } = dependencies;
|
|
100
|
+
|
|
101
|
+
let task;
|
|
102
|
+
try {
|
|
103
|
+
task = JSON.parse(Buffer.from(message.data, 'base64').toString('utf-8'));
|
|
104
|
+
} catch (e) {
|
|
105
|
+
logger.log('ERROR', '[SocialTask] Failed to parse Pub/Sub message data.', { error: e.message, data: message.data });
|
|
106
|
+
return; // Acknowledge the message
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const { tickerId, since } = task;
|
|
110
|
+
const sinceDate = new Date(since);
|
|
111
|
+
const taskId = `social-${tickerId}-${context.eventId || Date.now()}`;
|
|
112
|
+
logger.log('INFO', `[SocialTask/${taskId}] Processing ticker ${tickerId} for posts since ${since}.`);
|
|
113
|
+
|
|
114
|
+
// --- Config validation ---
|
|
115
|
+
if (!config.socialApiBaseUrl || !config.socialInsightsCollectionName || !config.processedPostsCollectionName) {
|
|
116
|
+
logger.log('ERROR', `[SocialTask/${taskId}] Missing required configuration.`);
|
|
117
|
+
throw new Error('Missing required configuration for Social Task.');
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const processedPostsRef = db.collection(config.processedPostsCollectionName);
|
|
121
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
122
|
+
const insightsCollectionRef = db.collection(config.socialInsightsCollectionName).doc(today).collection('posts');
|
|
123
|
+
|
|
124
|
+
let offset = 0;
|
|
125
|
+
const take = 10; // hardcode to 10
|
|
126
|
+
let keepFetching = true;
|
|
127
|
+
let postsProcessed = 0;
|
|
128
|
+
let postsSaved = 0;
|
|
129
|
+
const processedInThisRun = new Set(); // Local dedupe
|
|
130
|
+
|
|
131
|
+
try {
|
|
132
|
+
while (keepFetching) {
|
|
133
|
+
const url = `${config.socialApiBaseUrl}${tickerId}?take=${take}&offset=${offset}&reactionsPageSize=20`;
|
|
134
|
+
logger.log('TRACE', `[SocialTask/${taskId}] Fetching: ${url}`);
|
|
135
|
+
|
|
136
|
+
const selectedHeader = await headerManager.selectHeader();
|
|
137
|
+
let wasSuccess = false;
|
|
138
|
+
let response;
|
|
139
|
+
|
|
140
|
+
try {
|
|
141
|
+
response = await proxyManager.fetch(url, { headers: selectedHeader.header });
|
|
142
|
+
if (!response.ok) {
|
|
143
|
+
throw new Error(`API error ${response.status}`);
|
|
144
|
+
}
|
|
145
|
+
wasSuccess = true;
|
|
146
|
+
} catch (fetchError) {
|
|
147
|
+
logger.log('WARN', `[SocialTask/${taskId}] Fetch failed for offset ${offset}.`, { err: fetchError.message });
|
|
148
|
+
keepFetching = false; // Stop on fetch error
|
|
149
|
+
if (selectedHeader) headerManager.updatePerformance(selectedHeader.id, false);
|
|
150
|
+
continue;
|
|
151
|
+
} finally {
|
|
152
|
+
if (selectedHeader) headerManager.updatePerformance(selectedHeader.id, wasSuccess);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const page = await response.json();
|
|
156
|
+
const discussions = page?.discussions;
|
|
157
|
+
|
|
158
|
+
if (!Array.isArray(discussions) || discussions.length === 0) {
|
|
159
|
+
logger.log('INFO', `[SocialTask/${taskId}] No more posts found at offset ${offset}. Stopping.`);
|
|
160
|
+
keepFetching = false;
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const postIds = discussions.map(d => d.post.id).filter(Boolean);
|
|
165
|
+
if (postIds.length === 0) {
|
|
166
|
+
offset += take;
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// --- Deduplication Check ---
|
|
171
|
+
const existingDocs = await processedPostsRef.where(FieldPath.documentId(), 'in', postIds).get();
|
|
172
|
+
const existingIds = new Set(existingDocs.docs.map(d => d.id));
|
|
173
|
+
|
|
174
|
+
const batch = db.batch();
|
|
175
|
+
let newPostsInBatch = 0;
|
|
176
|
+
|
|
177
|
+
for (const discussion of discussions) {
|
|
178
|
+
const post = discussion?.post;
|
|
179
|
+
if (!post || !post.id || !post.message?.text) continue;
|
|
180
|
+
|
|
181
|
+
// Stop pagination if we've reached posts older than our window
|
|
182
|
+
const postCreatedDate = new Date(post.created);
|
|
183
|
+
if (postCreatedDate < sinceDate) {
|
|
184
|
+
keepFetching = false;
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Skip if already processed
|
|
189
|
+
if (existingIds.has(post.id) || processedInThisRun.has(post.id)) {
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Filter language (accept 'en' and 'en-gb')
|
|
194
|
+
const lang = post.message.languageCode || 'unknown';
|
|
195
|
+
if (lang !== 'en' && lang !== 'en-gb') {
|
|
196
|
+
continue; // Skip non-English posts
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// --- Process the new post ---
|
|
200
|
+
postsProcessed++;
|
|
201
|
+
processedInThisRun.add(post.id);
|
|
202
|
+
const text = post.message.text;
|
|
203
|
+
|
|
204
|
+
// --- START: Enhanced Data Extraction ---
|
|
205
|
+
const likeCount = discussion.emotionsData?.like?.paging?.totalCount || 0;
|
|
206
|
+
const commentCount = discussion.summary?.totalCommentsAndReplies || 0;
|
|
207
|
+
let pollData = null;
|
|
208
|
+
|
|
209
|
+
// Check for poll data and extract it
|
|
210
|
+
if (post.type === 'Poll' && post.metadata?.poll?.options) {
|
|
211
|
+
pollData = {
|
|
212
|
+
id: post.metadata.poll.id,
|
|
213
|
+
options: post.metadata.poll.options.map(opt => ({
|
|
214
|
+
id: opt.id,
|
|
215
|
+
text: opt.text,
|
|
216
|
+
votesCount: opt.votesCount || 0
|
|
217
|
+
}))
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
// --- END: Enhanced Data Extraction ---
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
// 1. Truncate for AI
|
|
224
|
+
const MAX_CHARS = 500; // ~125 tokens, very cheap
|
|
225
|
+
let snippet = text;
|
|
226
|
+
if (text.length > (MAX_CHARS * 2)) {
|
|
227
|
+
// Create a "summary" snippet
|
|
228
|
+
snippet = text.substring(0, MAX_CHARS) + " ... " + text.substring(text.length - MAX_CHARS);
|
|
229
|
+
} else if (text.length > MAX_CHARS) {
|
|
230
|
+
// Just truncate
|
|
231
|
+
snippet = text.substring(0, MAX_CHARS);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// 2. AI Sentiment Analysis
|
|
235
|
+
// Pass full dependencies object
|
|
236
|
+
// --- MODIFIED: Store the entire result object ---
|
|
237
|
+
const sentimentResult = await getSentimentFromGemini(dependencies, snippet);
|
|
238
|
+
|
|
239
|
+
// 3. Prepare data for storage
|
|
240
|
+
const postData = {
|
|
241
|
+
sentiment: sentimentResult, // <-- Store the full {overallSentiment, topics} object
|
|
242
|
+
textSnippet: snippet,
|
|
243
|
+
fullText: text, // Store the full original text
|
|
244
|
+
language: lang,
|
|
245
|
+
tickers: post.tags.map(t => t.market?.symbolName).filter(Boolean),
|
|
246
|
+
postOwnerId: post.owner?.id,
|
|
247
|
+
likeCount: likeCount, // Store like count
|
|
248
|
+
commentCount: commentCount, // Store comment count
|
|
249
|
+
pollData: pollData, // Store poll data (will be null if not a poll)
|
|
250
|
+
createdAt: post.created,
|
|
251
|
+
fetchedAt: FieldValue.serverTimestamp()
|
|
252
|
+
};
|
|
253
|
+
// --- END MODIFICATION ---
|
|
254
|
+
|
|
255
|
+
// 4. Add to batch for `daily_social_insights`
|
|
256
|
+
const insightDocRef = insightsCollectionRef.doc(post.id);
|
|
257
|
+
batch.set(insightDocRef, postData);
|
|
258
|
+
|
|
259
|
+
// 5. Add to batch for `processed_social_posts` (dedupe collection)
|
|
260
|
+
const dedupeDocRef = processedPostsRef.doc(post.id);
|
|
261
|
+
batch.set(dedupeDocRef, { processedAt: FieldValue.serverTimestamp() });
|
|
262
|
+
|
|
263
|
+
newPostsInBatch++;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
if (newPostsInBatch > 0) {
|
|
267
|
+
await batch.commit();
|
|
268
|
+
postsSaved += newPostsInBatch;
|
|
269
|
+
logger.log('INFO', `[SocialTask/${taskId}] Saved ${newPostsInBatch} new posts from offset ${offset}.`);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Continue to next page
|
|
273
|
+
offset += take;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
logger.log('SUCCESS', `[SocialTask/${taskId}] Run complete. Processed ${postsProcessed} new posts, saved ${postsSaved}.`);
|
|
277
|
+
|
|
278
|
+
} catch (error) {
|
|
279
|
+
logger.log('ERROR', `[SocialTask/${taskId}] Fatal error during task execution.`, { errorMessage: error.message, errorStack: error.stack });
|
|
280
|
+
throw error;
|
|
281
|
+
} finally {
|
|
282
|
+
// Always flush header performance
|
|
283
|
+
try {
|
|
284
|
+
await headerManager.flushPerformanceUpdates();
|
|
285
|
+
} catch (flushError) {
|
|
286
|
+
logger.log('ERROR', `[SocialTask/${taskId}] Failed to flush header performance.`, { errorMessage: flushError.message });
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
289
|
};
|