bulltrackers-module 1.0.771 → 1.0.773

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,7 @@ const RESULT_TABLE_NAME = 'computation_results_v3';
20
20
  function loadComputations() {
21
21
  const computationsDir = path.join(__dirname, '../computations');
22
22
  const files = fs.readdirSync(computationsDir);
23
-
23
+
24
24
  return files
25
25
  .filter(file => file.endsWith('.js') && !file.startsWith('_')) // Skip hidden/test files
26
26
  .map(file => {
@@ -38,20 +38,20 @@ module.exports = {
38
38
  // =========================================================================
39
39
  // PROJECT CONFIGURATION
40
40
  // =========================================================================
41
-
41
+
42
42
  project: {
43
43
  id: 'bulltrackers',
44
44
  name: 'BullTrackers Analytics',
45
45
  version: '2.0.0'
46
46
  },
47
-
47
+
48
48
  // System epoch - bump this to force all computations to re-run
49
- epoch: 'v2.0.0',
50
-
49
+ epoch: 'v2.0.1',
50
+
51
51
  // =========================================================================
52
52
  // BIGQUERY CONFIGURATION
53
53
  // =========================================================================
54
-
54
+
55
55
  bigquery: {
56
56
  projectId: process.env.GCP_PROJECT_ID || 'stocks-12345',
57
57
  dataset: process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data',
@@ -62,12 +62,12 @@ module.exports = {
62
62
  // =========================================================================
63
63
  // GCS CONFIGURATION (NEW: For Batch Loading)
64
64
  // =========================================================================
65
-
65
+
66
66
  gcs: {
67
67
  bucket: process.env.GCS_BUCKET_ID || 'bulltrackers-computation-staging',
68
68
  prefix: 'staging'
69
69
  },
70
-
70
+
71
71
  // =========================================================================
72
72
  // TABLE DEFINITIONS
73
73
  // =========================================================================
@@ -80,7 +80,7 @@ module.exports = {
80
80
  // The framework will automatically discover the full schema from BigQuery.
81
81
  // These hints just tell it how to interpret the data.
82
82
  // =========================================================================
83
-
83
+
84
84
  tables: {
85
85
  // User Portfolio Snapshots
86
86
  'portfolio_snapshots': {
@@ -89,7 +89,7 @@ module.exports = {
89
89
  clusterFields: ['user_type', 'user_id'], // Clustered by user_type, user_id
90
90
  description: 'Daily portfolio snapshots for all users'
91
91
  },
92
-
92
+
93
93
  // User Trade History
94
94
  'trade_history_snapshots': {
95
95
  dateField: 'date',
@@ -97,7 +97,7 @@ module.exports = {
97
97
  clusterFields: ['user_type', 'user_id'], // Clustered by user_type, user_id
98
98
  description: 'Daily trade history snapshots'
99
99
  },
100
-
100
+
101
101
  // Social Posts
102
102
  'social_post_snapshots': {
103
103
  dateField: 'date',
@@ -105,7 +105,7 @@ module.exports = {
105
105
  clusterFields: ['user_type', 'user_id'], // Clustered by user_type, user_id
106
106
  description: 'Daily social post snapshots'
107
107
  },
108
-
108
+
109
109
  // Asset Prices
110
110
  'asset_prices': {
111
111
  dateField: 'date',
@@ -113,7 +113,7 @@ module.exports = {
113
113
  clusterFields: ['ticker', 'instrument_id'], // Clustered by ticker, instrument_id
114
114
  description: 'Daily asset prices'
115
115
  },
116
-
116
+
117
117
  // PI Rankings
118
118
  'pi_rankings': {
119
119
  dateField: 'date',
@@ -121,7 +121,7 @@ module.exports = {
121
121
  clusterFields: ['pi_id', 'category'], // Clustered by pi_id, category
122
122
  description: 'Daily PI rankings snapshot'
123
123
  },
124
-
124
+
125
125
  // PI Master List (not date-partitioned)
126
126
  'pi_master_list': {
127
127
  dateField: null, // Not date-partitioned
@@ -129,7 +129,7 @@ module.exports = {
129
129
  clusterFields: ['cid'], // Clustered by cid
130
130
  description: 'Master list of all Popular Investors'
131
131
  },
132
-
132
+
133
133
  // PI Ratings
134
134
  'pi_ratings': {
135
135
  dateField: 'date',
@@ -137,7 +137,7 @@ module.exports = {
137
137
  clusterFields: ['pi_id'], // Clustered by pi_id
138
138
  description: 'Daily PI ratings'
139
139
  },
140
-
140
+
141
141
  // PI Page Views
142
142
  'pi_page_views': {
143
143
  dateField: 'date',
@@ -145,7 +145,7 @@ module.exports = {
145
145
  clusterFields: ['pi_id'], // Clustered by pi_id
146
146
  description: 'Daily PI page view metrics'
147
147
  },
148
-
148
+
149
149
  // Watchlist Membership
150
150
  'watchlist_membership': {
151
151
  dateField: 'date',
@@ -153,7 +153,7 @@ module.exports = {
153
153
  clusterFields: ['pi_id'], // Clustered by pi_id
154
154
  description: 'Daily watchlist membership counts'
155
155
  },
156
-
156
+
157
157
  // PI Alert History
158
158
  'pi_alert_history': {
159
159
  dateField: 'date',
@@ -161,7 +161,7 @@ module.exports = {
161
161
  clusterFields: ['pi_id', 'alert_type'], // Clustered by pi_id, alert_type
162
162
  description: 'Daily alert trigger history'
163
163
  },
164
-
164
+
165
165
  // Instrument Insights
166
166
  'instrument_insights': {
167
167
  dateField: 'date',
@@ -169,7 +169,7 @@ module.exports = {
169
169
  // Note: Clustering not specified in metadata provided, assuming similar to others or unclustered
170
170
  description: 'Daily instrument insights'
171
171
  },
172
-
172
+
173
173
  // Ticker Mappings (not date-partitioned)
174
174
  'ticker_mappings': {
175
175
  dateField: null,
@@ -177,20 +177,20 @@ module.exports = {
177
177
  clusterFields: ['instrument_id'], // Clustered by instrument_id
178
178
  description: 'Instrument ID to ticker symbol mappings'
179
179
  },
180
-
180
+
181
181
  // Computation Results
182
182
  'computation_results': {
183
- tableName: RESULT_TABLE_NAME,
183
+ tableName: RESULT_TABLE_NAME,
184
184
  dateField: 'date',
185
185
  entityField: null, // Keyed by computation_name
186
186
  clusterFields: ['computation_name', 'category'], // Optimization for result lookups
187
187
  description: 'Stored computation results'
188
188
  },
189
-
189
+
190
190
  // Sector Mappings Table
191
191
  'sector_mappings': {
192
192
  dateField: null, // Static data
193
- entityField: 'symbol',
193
+ entityField: 'symbol',
194
194
  // Assuming fast lookup on symbol is desired
195
195
  description: 'Ticker to Sector mappings migrated from Firestore'
196
196
  },
@@ -199,7 +199,7 @@ module.exports = {
199
199
  'behavioral_features': {
200
200
  tableName: 'daily_behavioral_features',
201
201
  dateField: 'date',
202
- entityField: 'user_id',
202
+ entityField: 'user_id',
203
203
  schema: [
204
204
  { name: 'user_id', type: 'STRING' },
205
205
  { name: 'hhi_score', type: 'FLOAT' },
@@ -207,36 +207,36 @@ module.exports = {
207
207
  ]
208
208
  }
209
209
  },
210
-
210
+
211
211
  // NEW: Data to load globally for every computation
212
212
  // FIX: Define fields to satisfy QueryBuilder safety checks
213
213
  referenceData: [
214
- {
215
- table: 'sector_mappings',
214
+ {
215
+ table: 'sector_mappings',
216
216
  fields: ['symbol', 'sector'] // Adjust these column names if your DB differs
217
217
  }
218
218
  ],
219
-
219
+
220
220
  // =========================================================================
221
221
  // RESULT STORAGE CONFIGURATION
222
222
  // =========================================================================
223
-
223
+
224
224
  resultStore: {
225
225
  table: RESULT_TABLE_NAME,
226
226
  partitionField: 'date',
227
227
  clusterFields: ['computation_name', 'category']
228
228
  },
229
-
229
+
230
230
  // =========================================================================
231
231
  // COMPUTATIONS
232
232
  // =========================================================================
233
-
233
+
234
234
  computations: loadComputations(),
235
-
235
+
236
236
  // =========================================================================
237
237
  // PREDEFINED FILTER SETS
238
238
  // =========================================================================
239
-
239
+
240
240
  filterSets: {
241
241
  'popular_investors': {
242
242
  user_type: 'POPULAR_INVESTOR'
@@ -248,28 +248,28 @@ module.exports = {
248
248
  user_type: ['POPULAR_INVESTOR', 'SIGNED_IN_USER']
249
249
  }
250
250
  },
251
-
251
+
252
252
  // =========================================================================
253
253
  // BUSINESS RULES
254
254
  // =========================================================================
255
-
255
+
256
256
  rules,
257
-
257
+
258
258
  // =========================================================================
259
259
  // EXECUTION CONFIGURATION
260
260
  // =========================================================================
261
-
261
+
262
262
  execution: {
263
263
  entityConcurrency: 50,
264
264
  insertBatchSize: 500,
265
265
  fetchBatchSize: 30000,
266
266
  maxDependencyEntities: 10000
267
267
  },
268
-
268
+
269
269
  // =========================================================================
270
270
  // SCHEDULING CONFIGURATION
271
271
  // =========================================================================
272
-
272
+
273
273
  scheduling: {
274
274
  default: {
275
275
  frequency: 'daily',
@@ -278,39 +278,39 @@ module.exports = {
278
278
  },
279
279
  dependencyGapMinutes: 5
280
280
  },
281
-
281
+
282
282
  // =========================================================================
283
283
  // CLOUD TASKS CONFIGURATION
284
284
  // =========================================================================
285
-
285
+
286
286
  cloudTasks: {
287
287
  projectId: process.env.GCP_PROJECT_ID || 'stocks-12345',
288
288
  location: 'europe-west1',
289
289
  queueName: 'computation-triggers',
290
- dispatcherUrl: process.env.DISPATCHER_URL ||
290
+ dispatcherUrl: process.env.DISPATCHER_URL ||
291
291
  'https://europe-west1-stocks-12345.cloudfunctions.net/compute-dispatcher',
292
- serviceAccountEmail: process.env.CLOUD_TASKS_SA_EMAIL ||
292
+ serviceAccountEmail: process.env.CLOUD_TASKS_SA_EMAIL ||
293
293
  '879684846540-compute@developer.gserviceaccount.com'
294
294
  },
295
-
295
+
296
296
  // =========================================================================
297
297
  // ON-DEMAND API CONFIGURATION
298
298
  // =========================================================================
299
-
299
+
300
300
  onDemand: {
301
301
  maxRequestsPerMinute: 5,
302
302
  timeout: 60000,
303
303
  allowedComputations: null
304
304
  },
305
-
305
+
306
306
  // =========================================================================
307
307
  // WORKER POOL CONFIGURATION (SERVERLESS WORKERS)
308
308
  // =========================================================================
309
-
309
+
310
310
  workerPool: {
311
311
  enabled: process.env.WORKER_POOL_ENABLED === 'true',
312
312
  localMode: process.env.WORKER_LOCAL_MODE === 'true',
313
- workerUrl: process.env.WORKER_URL ||
313
+ workerUrl: process.env.WORKER_URL ||
314
314
  'https://europe-west1-stocks-12345.cloudfunctions.net/computation-worker',
315
315
  tempBucket: process.env.WORKER_TEMP_BUCKET || 'bulltrackers-worker-staging',
316
316
  concurrency: 100,
@@ -318,8 +318,8 @@ module.exports = {
318
318
  retries: 2,
319
319
  minEntitiesForOffload: 100, // Fixed duplicate key issue (removed the lower value)
320
320
  excludeComputations: [],
321
- forceOffloadComputations: process.env.WORKER_FORCE_COMPUTATIONS
322
- ? process.env.WORKER_FORCE_COMPUTATIONS.split(',')
321
+ forceOffloadComputations: process.env.WORKER_FORCE_COMPUTATIONS
322
+ ? process.env.WORKER_FORCE_COMPUTATIONS.split(',')
323
323
  : [],
324
324
  }
325
325
  };
@@ -41,16 +41,14 @@ async function initialize() {
41
41
 
42
42
  console.log(`[Scheduler] Loaded ${manifest.length} computations.`);
43
43
  }
44
-
45
44
  /**
46
- * ENTRY POINT 1: The Reconciler & Garbage Collector
47
- * Trigger: Cloud Scheduler -> "0 * * * *" (Every Hour)
45
+ * REPLACEMENT for planComputations
48
46
  */
49
47
  async function planComputations(req, res) {
50
48
  try {
51
49
  await initialize();
52
50
 
53
- // --- PHASE 1: RECONCILIATION (Ensure valid tasks exist) ---
51
+ // --- PHASE 1: RECONCILIATION ---
54
52
  const now = new Date();
55
53
  const windowStart = new Date(now);
56
54
  windowStart.setDate(now.getDate() - PLANNING_LOOKBACK_DAYS);
@@ -61,10 +59,21 @@ async function planComputations(req, res) {
61
59
 
62
60
  console.log(`[Planner] Reconciling window: ${windowStart.toISOString()} to ${windowEnd.toISOString()}`);
63
61
 
64
- const tasksToSchedule = [];
62
+ // Helper to find Roots for any given computation (Pass 1..N)
63
+ const manifestMap = new Map(manifest.map(m => [m.name, m]));
64
+ const getRoots = (entry, visited = new Set()) => {
65
+ if (visited.has(entry.name)) return [];
66
+ visited.add(entry.name);
67
+ if (entry.pass === 1) return [entry];
68
+ return (entry.dependencies || [])
69
+ .map(d => manifestMap.get(d))
70
+ .filter(Boolean)
71
+ .flatMap(p => getRoots(p, visited));
72
+ };
73
+
74
+ const tasksToSchedule = new Map(); // Use Map to deduplicate by Task Name
65
75
  const stats = { checked: 0, scheduled: 0, mismatched: 0, missing: 0 };
66
76
 
67
- // Iterate dates in window
68
77
  const targetDates = [];
69
78
  let cursor = new Date(windowStart);
70
79
  while (cursor <= windowEnd) {
@@ -77,8 +86,9 @@ async function planComputations(req, res) {
77
86
  const dateStr = dateObj.toISOString().split('T')[0];
78
87
  const dailyStatus = await stateRepository.getDailyStatus(dateStr);
79
88
 
89
+ // Iterate ALL computations (not just Pass 1) to find stale nodes
80
90
  for (const entry of manifest) {
81
- if (entry.pass !== 1) continue; // Only schedule Roots
91
+ // If this specific entry is not scheduled for today, skip it
82
92
  if (!shouldRunOnDate(entry.schedule, dateObj)) continue;
83
93
 
84
94
  stats.checked++;
@@ -94,26 +104,37 @@ async function planComputations(req, res) {
94
104
  }
95
105
 
96
106
  if (reason) {
97
- tasksToSchedule.push({
98
- computation: entry.originalName,
99
- targetDate: dateStr,
100
- runAtSeconds: getRunTimeSeconds(entry.schedule, dateObj),
101
- configHash: entry.hash,
102
- queuePath: getQueuePath(),
103
- reason
107
+ // If entry is stale, we must schedule its ROOT(s) to trigger the chain
108
+ const roots = getRoots(entry);
109
+
110
+ roots.forEach(root => {
111
+ const taskKey = `root-${toKebab(root.originalName)}-${dateStr}-${root.hash}`;
112
+
113
+ if (!tasksToSchedule.has(taskKey)) {
114
+ tasksToSchedule.set(taskKey, {
115
+ computation: root.originalName,
116
+ targetDate: dateStr,
117
+ runAtSeconds: getRunTimeSeconds(root.schedule, dateObj),
118
+ configHash: root.hash,
119
+ queuePath: getQueuePath(),
120
+ reason: `TRIGGERED_BY_${entry.name}_${reason}` // Track what triggered this root
121
+ });
122
+ }
104
123
  });
105
124
  }
106
125
  }
107
126
  })));
108
127
 
109
- // --- PHASE 2: GARBAGE COLLECTION (Remove invalid tasks) ---
128
+ // --- PHASE 2: GARBAGE COLLECTION ---
129
+ // (Keep your existing GC logic here)
110
130
  console.log('[Planner] Starting Garbage Collection...');
111
131
  const deletedCount = await cleanupOrphanedTasks();
112
132
 
113
133
  // --- PHASE 3: DISPATCH ---
134
+ const taskList = Array.from(tasksToSchedule.values());
114
135
  let scheduledCount = 0;
115
- if (tasksToSchedule.length > 0) {
116
- const results = await dispatchTasks(tasksToSchedule);
136
+ if (taskList.length > 0) {
137
+ const results = await dispatchTasks(taskList);
117
138
  scheduledCount = results.filter(r => r.status === 'scheduled').length;
118
139
  }
119
140
 
@@ -121,9 +142,8 @@ async function planComputations(req, res) {
121
142
 
122
143
  return res.status(200).json({
123
144
  status: 'success',
124
- window: `${PLANNING_LOOKBACK_DAYS}d back, ${PLANNING_LOOKAHEAD_HOURS}h fwd`,
145
+ window: `${PLANNING_LOOKBACK_DAYS}d back`,
125
146
  scheduled: scheduledCount,
126
- deletedOrphans: deletedCount,
127
147
  stats
128
148
  });
129
149
 
@@ -168,46 +188,67 @@ async function runWatchdog(req, res) {
168
188
  return res.status(500).json({ error: error.message });
169
189
  }
170
190
  }
171
-
172
191
  // =============================================================================
173
192
  // ACTIVE GARBAGE COLLECTION LOGIC
174
193
  // =============================================================================
175
194
 
176
195
  async function cleanupOrphanedTasks() {
177
196
  const parent = getQueuePath();
178
- const validKebabNames = new Set(manifest.map(m => toKebab(m.originalName)));
197
+
198
+ // Create a map of { kebabName: activeHash } for O(1) lookups
199
+ const activeComputations = new Map(
200
+ manifest.map(m => [toKebab(m.originalName), m.hash])
201
+ );
202
+
179
203
  const limit = pLimit(CLOUD_TASKS_CONCURRENCY);
180
204
  let deletedCount = 0;
181
205
 
182
206
  try {
183
- // Iterate over ALL tasks in the queue
184
- // Note: listTasksAsync handles pagination automatically
185
207
  const tasksToDelete = [];
186
208
 
187
- for await (const task of tasksClient.listTasksAsync({ parent, responseView: 'BASIC' })) {
209
+ // Note: listTasksAsync handles pagination, but if you have thousands of tasks,
210
+ // you might eventually need to handle page tokens explicitly if the library version is old.
211
+ for await (const task of tasksClient.listTasksAsync({
212
+ parent,
213
+ responseView: 'BASIC',
214
+ pageSize: 1000 // Increase page size to capture more per request
215
+ })) {
188
216
  const taskNameFull = task.name;
189
- const taskNameShort = taskNameFull.split('/').pop(); // e.g., root-my-comp-2023-01-01-abcdef
217
+ const taskNameShort = taskNameFull.split('/').pop();
190
218
 
191
- // 1. Regex Match: Capture the computation name part
192
- // Pattern: (root|recovery)-{kebabName}-{date}-{hash}
193
- // Date is YYYY-MM-DD (10 chars)
194
- // Hash is 8 chars (or more)
195
- const match = taskNameShort.match(/^(?:root|recovery)-(.+)-\d{4}-\d{2}-\d{2}-/);
219
+ // 1. Handle ROOT Tasks: root-{kebabName}-{date}-{hash}
220
+ // We capture the name AND the hash at the end
221
+ const rootMatch = taskNameShort.match(/^root-(.+)-\d{4}-\d{2}-\d{2}-(.+)$/);
196
222
 
197
- if (!match) continue; // Skip tasks that don't match our naming convention
223
+ if (rootMatch) {
224
+ const [_, kebabName, taskHash] = rootMatch;
225
+ const activeHash = activeComputations.get(kebabName);
198
226
 
199
- const extractedKebabName = match[1];
227
+ // DELETE IF:
228
+ // A) Computation removed from manifest (!activeHash)
229
+ // B) Hash mismatch (Old deployment/Stale) (activeHash !== taskHash)
230
+ if (!activeHash || activeHash !== taskHash) {
231
+ tasksToDelete.push(taskNameFull);
232
+ }
233
+ continue;
234
+ }
235
+
236
+ // 2. Handle RECOVERY Tasks: recovery-{kebabName}-{date}-{timestamp}
237
+ // We only delete these if the computation is completely gone.
238
+ // (Timestamps won't match a config hash, so we just check existence)
239
+ const recoveryMatch = taskNameShort.match(/^recovery-(.+)-\d{4}-\d{2}-\d{2}-/);
200
240
 
201
- // 2. Check Validity
202
- if (!validKebabNames.has(extractedKebabName)) {
203
- // ORPHAN DETECTED!
204
- tasksToDelete.push(taskNameFull);
241
+ if (recoveryMatch) {
242
+ const [_, kebabName] = recoveryMatch;
243
+ if (!activeComputations.has(kebabName)) {
244
+ tasksToDelete.push(taskNameFull);
245
+ }
205
246
  }
206
247
  }
207
248
 
208
249
  if (tasksToDelete.length === 0) return 0;
209
250
 
210
- console.log(`[Planner] 🗑️ Found ${tasksToDelete.length} orphaned tasks. Deleting...`);
251
+ console.log(`[Planner] 🗑️ Found ${tasksToDelete.length} stale/orphaned tasks. Deleting...`);
211
252
 
212
253
  // 3. Delete in parallel
213
254
  await Promise.all(tasksToDelete.map(name => limit(async () => {
@@ -215,7 +256,10 @@ async function cleanupOrphanedTasks() {
215
256
  await tasksClient.deleteTask({ name });
216
257
  deletedCount++;
217
258
  } catch (e) {
218
- console.warn(`[Planner] Failed to delete orphan ${name}: ${e.message}`);
259
+ // Ignore "NOT_FOUND" errors in case of race conditions
260
+ if (e.code !== 5) {
261
+ console.warn(`[Planner] Failed to delete ${name}: ${e.message}`);
262
+ }
219
263
  }
220
264
  })));
221
265
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.771",
3
+ "version": "1.0.773",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [