bulltrackers-module 1.0.762 → 1.0.764

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,7 @@
13
13
  * - Fault isolation - one entity failure doesn't affect others
14
14
  * - Cost efficient - workers scale to zero between runs
15
15
  * - RESILIENCE: Implements Circuit Breaker to prevent Retry Cost Spirals [Fix #2]
16
+ * * UPDATE: Implemented Hybrid Batching (Chunks of 50) to reduce invocations [Optimization].
16
17
  */
17
18
 
18
19
  const { Storage } = require('@google-cloud/storage');
@@ -30,13 +31,17 @@ class RemoteTaskRunner {
30
31
  const poolConfig = config.workerPool || {};
31
32
  this.bucketName = poolConfig.tempBucket || 'bulltrackers-worker-staging';
32
33
  this.workerUrl = poolConfig.workerUrl;
33
- this.concurrency = poolConfig.concurrency || 100;
34
- this.timeout = poolConfig.timeout || 60000; // 60s default
34
+
35
+ // Optimization: Batch Size for Hybrid Execution
36
+ this.batchSize = poolConfig.workerBatchSize || 50;
37
+
38
+ this.concurrency = poolConfig.concurrency || 20; // Lower concurrency since each task does more work
39
+ this.timeout = poolConfig.timeout || 120000; // Increased timeout for batches (2m)
35
40
  this.retries = poolConfig.retries || 2;
36
41
 
37
42
  // Circuit Breaker Config [Fix #2]
38
43
  this.cbConfig = {
39
- minInvocations: 20, // Minimum calls before checking rate
44
+ minInvocations: 10, // Minimum calls before checking rate
40
45
  failureThreshold: 0.30, // Trip if failure rate > 30%
41
46
  ...poolConfig.circuitBreaker
42
47
  };
@@ -58,17 +63,10 @@ class RemoteTaskRunner {
58
63
 
59
64
  /**
60
65
  * Execute a batch of entities remotely (or locally for testing)
61
- * * @param {Object} entry - Manifest entry for the computation
62
- * @param {string} dateStr - Target date (YYYY-MM-DD)
63
- * @param {Object} baseContext - Shared context (references, config)
64
- * @param {string[]} entityIds - Entity IDs to process
65
- * @param {Map<string, Object>} entityDataMap - Pre-filtered data per entity
66
- * @param {Object} depResults - Pre-loaded dependency results
67
- * @returns {Promise<{results: Object, errors: Array}>}
68
66
  */
69
67
  async runBatch(entry, dateStr, baseContext, entityIds, entityDataMap, depResults) {
70
68
  const startTime = Date.now();
71
- this._log('INFO', `Starting batch: ${entityIds.length} entities for ${entry.name}`);
69
+ this._log('INFO', `Starting run: ${entityIds.length} entities for ${entry.name}`);
72
70
 
73
71
  if (this.localMode) {
74
72
  return this._runBatchLocal(entry, dateStr, baseContext, entityIds, entityDataMap, depResults);
@@ -79,32 +77,38 @@ class RemoteTaskRunner {
79
77
 
80
78
  /**
81
79
  * Local execution mode - runs workers in-process
82
- * Perfect for testing without GCS or network overhead
83
80
  */
84
81
  async _runBatchLocal(entry, dateStr, baseContext, entityIds, entityDataMap, depResults) {
82
+ // For local execution, we still process one by one to keep debugging simple,
83
+ // or we could simulate batching. Sticking to simple pLimit for now.
85
84
  const limit = pLimit(this.concurrency);
86
85
  const results = {};
87
86
  const errors = [];
88
87
 
89
88
  const tasks = entityIds.map(entityId => limit(async () => {
90
89
  try {
91
- const contextPackage = this._buildContextPackage(
90
+ // Simulate the "Batch" structure but for single local execution
91
+ const contextPackage = this._buildBatchContextPackage(
92
92
  entry,
93
- entityId,
94
- entityDataMap.get(entityId),
93
+ [entityId],
94
+ entityDataMap,
95
95
  baseContext,
96
96
  depResults
97
97
  );
98
98
 
99
- const { result } = await executeLocal({
99
+ // We use the same context structure, but pass it to local handler
100
+ const response = await executeLocal({
100
101
  computationName: entry.originalName || entry.name,
101
- entityId,
102
+ entityIds: [entityId],
102
103
  date: dateStr,
103
104
  contextPackage
104
105
  });
105
106
 
106
- if (result !== null && result !== undefined) {
107
- results[entityId] = result;
107
+ if (response.batchResults && response.batchResults[entityId]) {
108
+ results[entityId] = response.batchResults[entityId];
109
+ }
110
+ if (response.batchErrors && response.batchErrors.length > 0) {
111
+ errors.push(...response.batchErrors);
108
112
  }
109
113
 
110
114
  } catch (e) {
@@ -115,87 +119,97 @@ class RemoteTaskRunner {
115
119
 
116
120
  await Promise.all(tasks);
117
121
 
118
- this._log('INFO', `Local batch complete: ${Object.keys(results).length} results, ${errors.length} errors`);
122
+ this._log('INFO', `Local run complete: ${Object.keys(results).length} results, ${errors.length} errors`);
119
123
  return { results, errors };
120
124
  }
121
125
 
122
126
  /**
123
127
  * Remote execution mode - invokes Cloud Functions via HTTP
128
+ * NOW USES CHUNKING (Hybrid Batching)
124
129
  */
125
130
  async _runBatchRemote(entry, dateStr, baseContext, entityIds, entityDataMap, depResults) {
126
- const uploadLimit = pLimit(50); // Concurrent uploads to GCS
131
+ // 1. Chunk the entities
132
+ const chunks = this._chunk(entityIds, this.batchSize);
133
+ this._log('INFO', `Split ${entityIds.length} entities into ${chunks.length} worker batches (size: ${this.batchSize})`);
134
+
135
+ const uploadLimit = pLimit(20); // Concurrent uploads
127
136
  const invokeLimit = pLimit(this.concurrency); // Concurrent worker invocations
128
137
 
129
- const results = {};
130
- const errors = [];
131
- const uploadedPaths = [];
138
+ const finalResults = {};
139
+ const finalErrors = [];
140
+ const uploadedBatches = [];
132
141
 
133
- // Circuit Breaker Stats (scoped to this batch)
134
- const batchStats = {
135
- invocations: 0,
136
- failures: 0,
137
- tripped: false
138
- };
142
+ // Circuit Breaker Stats
143
+ const batchStats = { invocations: 0, failures: 0, tripped: false };
139
144
 
140
- // Phase 1: Upload context packages to GCS
141
- this._log('INFO', 'Uploading context packages to GCS...');
145
+ // Phase 1: Upload context packages (One per chunk)
146
+ this._log('INFO', 'Uploading batch contexts to GCS...');
142
147
  const uploadStart = Date.now();
143
148
 
144
- const uploadTasks = entityIds.map(entityId => uploadLimit(async () => {
145
- // Check tripped status early to save uploads if massive failure occurring
149
+ const uploadTasks = chunks.map((chunkIds, index) => uploadLimit(async () => {
146
150
  if (batchStats.tripped) return;
147
151
 
148
- const contextPackage = this._buildContextPackage(
152
+ const contextPackage = this._buildBatchContextPackage(
149
153
  entry,
150
- entityId,
151
- entityDataMap.get(entityId),
154
+ chunkIds,
155
+ entityDataMap,
152
156
  baseContext,
153
157
  depResults
154
158
  );
155
159
 
156
- const path = `${dateStr}/${entry.name}/${entityId}.json`;
160
+ // Use the first entity ID + index to make unique but identifiable path
161
+ const batchId = `${chunkIds[0]}_batch_${index}`;
162
+ const path = `${dateStr}/${entry.name}/batches/${batchId}.json`;
157
163
 
158
164
  try {
159
165
  await this._uploadToGCS(path, contextPackage);
160
- uploadedPaths.push({ entityId, path });
166
+ uploadedBatches.push({ chunkIds, path, batchId });
161
167
  } catch (e) {
162
- errors.push({ entityId, error: `Upload failed: ${e.message}` });
168
+ // If upload fails, all entities in chunk fail
169
+ chunkIds.forEach(id => finalErrors.push({ entityId: id, error: `Upload failed: ${e.message}` }));
163
170
  }
164
171
  }));
165
172
 
166
173
  await Promise.all(uploadTasks);
167
- this._log('INFO', `Uploaded ${uploadedPaths.length} packages in ${Date.now() - uploadStart}ms`);
174
+ this._log('INFO', `Uploaded ${uploadedBatches.length} batches in ${Date.now() - uploadStart}ms`);
168
175
 
169
- // Phase 2: Invoke workers in parallel
176
+ // Phase 2: Invoke workers
170
177
  this._log('INFO', 'Invoking workers...');
171
178
  const invokeStart = Date.now();
172
179
 
173
- const invokeTasks = uploadedPaths.map(({ entityId, path }) =>
180
+ const invokeTasks = uploadedBatches.map(({ chunkIds, path, batchId }) =>
174
181
  invokeLimit(async () => {
175
- // FAIL FAST: If circuit tripped, do not invoke worker
176
182
  if (batchStats.tripped) {
177
- errors.push({ entityId, error: 'Skipped: Circuit Breaker Tripped due to high failure rate' });
183
+ chunkIds.forEach(id => finalErrors.push({ entityId: id, error: 'Circuit Breaker Tripped' }));
178
184
  return;
179
185
  }
180
186
 
181
187
  try {
182
188
  const response = await this._invokeWorkerWithRetry({
183
189
  computationName: entry.originalName || entry.name,
184
- entityId,
190
+ entityIds: chunkIds, // Pass the LIST of IDs
185
191
  date: dateStr,
186
192
  dataUri: { bucket: this.bucketName, path }
187
- }, 1, batchStats); // Pass stats object to retry logic
188
-
189
- if (response.status === 'success' && response.result !== null) {
190
- results[entityId] = response.result;
191
- } else if (response.status === 'error') {
192
- errors.push({ entityId, error: response.error });
193
+ }, 1, batchStats);
194
+
195
+ // Process Bulk Response
196
+ if (response.status === 'success' || response.status === 'partial') {
197
+ // Merge Results
198
+ if (response.batchResults) {
199
+ Object.assign(finalResults, response.batchResults);
200
+ }
201
+ // Merge Errors
202
+ if (response.batchErrors && Array.isArray(response.batchErrors)) {
203
+ finalErrors.push(...response.batchErrors);
204
+ }
205
+ } else {
206
+ throw new Error(`Worker returned status: ${response.status}`);
193
207
  }
194
- // status === 'success' with result === null means skipped (filtered out)
195
208
 
196
209
  } catch (e) {
197
- // Circuit Breaker errors are thrown here
198
- errors.push({ entityId, error: e.message });
210
+ // Entire batch failed (network error, timeout, 500)
211
+ this._log('WARN', `Batch ${batchId} failed completely: ${e.message}`);
212
+ chunkIds.forEach(id => finalErrors.push({ entityId: id, error: `Batch failure: ${e.message}` }));
199
213
  }
200
214
  })
201
215
  );
@@ -203,40 +217,49 @@ class RemoteTaskRunner {
203
217
  await Promise.all(invokeTasks);
204
218
 
205
219
  if (batchStats.tripped) {
206
- this._log('ERROR', `Batch ABORTED by Circuit Breaker. Stats: ${batchStats.failures} failures / ${batchStats.invocations} invocations.`);
220
+ this._log('ERROR', `Run ABORTED by Circuit Breaker.`);
207
221
  }
208
222
 
209
223
  this._log('INFO', `Invocations complete in ${Date.now() - invokeStart}ms`);
210
224
 
211
- // Phase 3: Cleanup GCS (fire and forget)
212
- this._cleanupGCS(uploadedPaths.map(p => p.path)).catch(e => {
213
- this._log('WARN', `GCS cleanup failed: ${e.message}`);
214
- });
225
+ // Phase 3: Cleanup
226
+ this._cleanupGCS(uploadedBatches.map(b => b.path)).catch(() => {});
215
227
 
216
- return { results, errors };
228
+ return { results: finalResults, errors: finalErrors };
217
229
  }
218
230
 
219
231
  /**
220
- * Build the context package for a single entity
232
+ * Build a combined context package for multiple entities
221
233
  */
222
- _buildContextPackage(entry, entityId, entityData, baseContext, depResults) {
223
- // Extract only this entity's dependencies
224
- const entityDeps = {};
234
+ _buildBatchContextPackage(entry, chunkIds, entityDataMap, baseContext, depResults) {
235
+ // 1. Extract Data for chunk members
236
+ const batchDataMap = {};
237
+ chunkIds.forEach(id => {
238
+ if (entityDataMap.has(id)) {
239
+ batchDataMap[id] = entityDataMap.get(id);
240
+ }
241
+ });
242
+
243
+ // 2. Extract Dependencies for chunk members
244
+ const batchDeps = {};
225
245
  for (const [depName, allResults] of Object.entries(depResults || {})) {
226
- if (allResults === null) continue; // Large dependency not preloaded
246
+ if (allResults === null) continue;
227
247
 
228
248
  if (typeof allResults === 'object') {
229
- // If it's a map of entity -> result, extract this entity's
230
- if (allResults[entityId]) {
231
- entityDeps[depName] = { [entityId]: allResults[entityId] };
232
- }
249
+ batchDeps[depName] = {};
250
+ // Only include dependency data for entities in this chunk
251
+ chunkIds.forEach(id => {
252
+ if (allResults[id] !== undefined) {
253
+ batchDeps[depName][id] = allResults[id];
254
+ }
255
+ });
233
256
  }
234
257
  }
235
258
 
236
259
  return {
237
- entityData: entityData || {},
260
+ entityDataMap: batchDataMap, // Changed from single entityData
238
261
  references: baseContext.references || {},
239
- dependencies: entityDeps,
262
+ dependencies: batchDeps,
240
263
  computationMeta: {
241
264
  name: entry.name,
242
265
  originalName: entry.originalName,
@@ -248,131 +271,88 @@ class RemoteTaskRunner {
248
271
  }
249
272
 
250
273
  /**
251
- * Upload a context package to GCS
274
+ * Helper to split array into chunks
252
275
  */
276
+ _chunk(array, size) {
277
+ const chunks = [];
278
+ for (let i = 0; i < array.length; i += size) {
279
+ chunks.push(array.slice(i, i + size));
280
+ }
281
+ return chunks;
282
+ }
283
+
284
+ // ... [Rest of methods: _uploadToGCS, _invokeWorkerWithRetry, _checkCircuitBreaker, _invokeWorker, _isRetryableError, _cleanupGCS, _log] ...
285
+ // These remain unchanged from previous version, just ensure they are included in the class.
286
+
253
287
  async _uploadToGCS(path, data) {
254
288
  const file = this.storage.bucket(this.bucketName).file(path);
255
-
256
289
  await file.save(JSON.stringify(data), {
257
290
  contentType: 'application/json',
258
- resumable: false, // Faster for small files
259
- metadata: {
260
- cacheControl: 'no-cache' // Don't cache temp files
261
- }
291
+ resumable: false,
292
+ metadata: { cacheControl: 'no-cache' }
262
293
  });
263
294
  }
264
295
 
265
- /**
266
- * Invoke a worker with retry logic and Circuit Breaker
267
- */
268
296
  async _invokeWorkerWithRetry(payload, attempt = 1, stats = null) {
269
- // Track Invocation (Cost)
270
297
  if (stats) stats.invocations++;
271
-
272
298
  try {
273
299
  return await this._invokeWorker(payload);
274
300
  } catch (e) {
275
- // Track Failure
276
301
  if (stats) {
277
302
  stats.failures++;
278
303
  this._checkCircuitBreaker(stats);
279
304
  }
280
-
281
- const isRetryable = this._isRetryableError(e);
282
-
283
- if (isRetryable && attempt < this.retries) {
284
- // Exponential backoff
305
+ if (this._isRetryableError(e) && attempt < this.retries) {
285
306
  const delay = Math.min(1000 * Math.pow(2, attempt - 1), 10000);
286
307
  await new Promise(r => setTimeout(r, delay));
287
-
288
- // Re-check circuit before retrying (another thread might have tripped it)
289
308
  if (stats) this._checkCircuitBreaker(stats);
290
-
291
- this._log('DEBUG', `Retrying ${payload.entityId} (attempt ${attempt + 1})`);
292
309
  return this._invokeWorkerWithRetry(payload, attempt + 1, stats);
293
310
  }
294
-
295
311
  throw e;
296
312
  }
297
313
  }
298
314
 
299
- /**
300
- * Check circuit breaker status and throw if tripped
301
- */
302
315
  _checkCircuitBreaker(stats) {
303
- if (stats.tripped) {
304
- throw new Error('Circuit Breaker: Batch aborted due to high failure rate');
305
- }
306
-
307
- // Only check after minimum invocations (warmup)
316
+ if (stats.tripped) throw new Error('Circuit Breaker: Run aborted');
308
317
  if (stats.invocations >= this.cbConfig.minInvocations) {
309
318
  const failureRate = stats.failures / stats.invocations;
310
-
311
319
  if (failureRate > this.cbConfig.failureThreshold) {
312
320
  stats.tripped = true;
313
- const msg = `🚨 CIRCUIT BREAKER TRIPPED! Failure rate ${(failureRate * 100).toFixed(1)}% ` +
314
- `(${stats.failures}/${stats.invocations}) exceeds threshold of ${(this.cbConfig.failureThreshold * 100)}%`;
315
-
316
- this._log('ERROR', msg);
317
- throw new Error(msg);
321
+ this._log('ERROR', `🚨 CIRCUIT BREAKER TRIPPED! Rate: ${(failureRate * 100).toFixed(1)}%`);
322
+ throw new Error('Circuit Breaker Tripped');
318
323
  }
319
324
  }
320
325
  }
321
326
 
322
- /**
323
- * Invoke a single worker via HTTP
324
- */
325
327
  async _invokeWorker(payload) {
326
- // Lazy-load auth client
327
328
  if (!this._authClient) {
328
329
  const { GoogleAuth } = require('google-auth-library');
329
330
  const auth = new GoogleAuth();
330
331
  this._authClient = await auth.getIdTokenClient(this.workerUrl);
331
332
  }
332
-
333
333
  const response = await this._authClient.request({
334
334
  url: this.workerUrl,
335
335
  method: 'POST',
336
336
  data: payload,
337
337
  timeout: this.timeout,
338
- headers: {
339
- 'Content-Type': 'application/json'
340
- }
338
+ headers: { 'Content-Type': 'application/json' }
341
339
  });
342
-
343
340
  return response.data;
344
341
  }
345
342
 
346
- /**
347
- * Check if an error is retryable
348
- */
349
343
  _isRetryableError(error) {
350
- // Network errors
351
344
  if (error.code === 'ECONNRESET' || error.code === 'ETIMEDOUT') return true;
352
-
353
- // HTTP 5xx errors (server errors)
354
345
  if (error.response && error.response.status >= 500) return true;
355
-
356
- // Rate limiting
357
346
  if (error.response && error.response.status === 429) return true;
358
-
359
347
  return false;
360
348
  }
361
349
 
362
- /**
363
- * Cleanup uploaded files from GCS
364
- */
365
350
  async _cleanupGCS(paths) {
366
- // Batch delete
367
351
  const bucket = this.storage.bucket(this.bucketName);
368
-
369
- // GCS batch delete has limits, process in chunks
370
352
  const chunkSize = 100;
371
353
  for (let i = 0; i < paths.length; i += chunkSize) {
372
354
  const chunk = paths.slice(i, i + chunkSize);
373
- await Promise.all(chunk.map(path =>
374
- bucket.file(path).delete().catch(() => {})
375
- ));
355
+ await Promise.all(chunk.map(path => bucket.file(path).delete().catch(() => {})));
376
356
  }
377
357
  }
378
358
 
@@ -247,7 +247,10 @@ async function dispatchPlannedTasks(tasks) {
247
247
  url: dispatcherUrl,
248
248
  headers: { 'Content-Type': 'application/json' },
249
249
  body: Buffer.from(JSON.stringify(payload)).toString('base64'),
250
- oidcToken: { serviceAccountEmail }
250
+ oidcToken: {
251
+ serviceAccountEmail,
252
+ audience: dispatcherUrl // <--- FIXED: Must exactly match function URL
253
+ }
251
254
  },
252
255
  scheduleTime: { seconds: t.runAtSeconds },
253
256
  name: taskName
@@ -291,7 +294,10 @@ async function dispatchRecoveryTasks(tasks) {
291
294
  url: dispatcherUrl,
292
295
  headers: { 'Content-Type': 'application/json' },
293
296
  body: Buffer.from(JSON.stringify(payload)).toString('base64'),
294
- oidcToken: { serviceAccountEmail }
297
+ oidcToken: {
298
+ serviceAccountEmail,
299
+ audience: dispatcherUrl // <--- FIXED: Must exactly match function URL
300
+ }
295
301
  },
296
302
  // Run Immediately (no scheduleTime)
297
303
  name: taskName