bulltrackers-module 1.0.292 → 1.0.294

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -89,12 +89,12 @@ function getDependencySet(endpoints, adjacencyList) {
89
89
  * Returns a string description of the first cycle found.
90
90
  */
91
91
  function detectCircularDependencies(manifestMap) {
92
- let index = 0;
93
- const stack = [];
94
- const indices = new Map();
92
+ let index = 0;
93
+ const stack = [];
94
+ const indices = new Map();
95
95
  const lowLinks = new Map();
96
- const onStack = new Set();
97
- const cycles = [];
96
+ const onStack = new Set();
97
+ const cycles = [];
98
98
 
99
99
  function strongconnect(v) {
100
100
  indices.set(v, index);
@@ -1,7 +1,8 @@
1
1
  /**
2
2
  * @fileoverview Executor for "Standard" (per-user) calculations.
3
3
  * UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
4
- * UPDATED: Removes manual global.gc() calls.
4
+ * UPDATED: Implements "Circuit Breaker" to fail fast on high error rates.
5
+ * UPDATED: Implements "Adaptive Flushing" based on V8 Heap usage.
5
6
  * UPDATED: Manages incremental sharding states.
6
7
  * UPDATED: Implements 'isInitialWrite' flag for robust cleanup.
7
8
  */
@@ -12,6 +13,7 @@ const { ContextFactory } = require
12
13
  const { commitResults } = require('../persistence/ResultCommitter');
13
14
  const mathLayer = require('../layers/index');
14
15
  const { performance } = require('perf_hooks');
16
+ const v8 = require('v8'); // [NEW] For Memory introspection
15
17
 
16
18
  class StandardExecutor {
17
19
  static async run(date, calcs, passName, config, deps, rootData, fetchedDeps, previousFetchedDeps, skipStatusWrite = false) {
@@ -59,6 +61,9 @@ class StandardExecutor {
59
61
  const aggregatedSuccess = {};
60
62
  const aggregatedFailures = [];
61
63
 
64
+ // [NEW] Global Error Tracking for Circuit Breaker
65
+ const errorStats = { count: 0, total: 0 };
66
+
62
67
  Object.keys(state).forEach(name => {
63
68
  executionStats[name] = {
64
69
  processedUsers: 0,
@@ -89,7 +94,7 @@ class StandardExecutor {
89
94
 
90
95
  let yP_chunk = {}, tH_chunk = {};
91
96
 
92
- const BATCH_SIZE = 5000;
97
+ const MIN_BATCH_SIZE = 1000; // Minimum to process before checking stats
93
98
  let usersSinceLastFlush = 0;
94
99
 
95
100
  try {
@@ -103,6 +108,8 @@ class StandardExecutor {
103
108
  const chunkSize = Object.keys(tP_chunk).length;
104
109
 
105
110
  const startProcessing = performance.now();
111
+
112
+ // [UPDATED] Collect execution results (success/failure counts)
106
113
  const promises = streamingCalcs.map(calc =>
107
114
  StandardExecutor.executePerUser(
108
115
  calc, calc.manifest, dateStr, tP_chunk, yP_chunk, tH_chunk,
@@ -110,15 +117,37 @@ class StandardExecutor {
110
117
  executionStats[normalizeName(calc.manifest.name)]
111
118
  )
112
119
  );
113
- await Promise.all(promises);
120
+
121
+ const batchResults = await Promise.all(promises);
114
122
  const procDuration = performance.now() - startProcessing;
115
123
 
116
124
  Object.keys(executionStats).forEach(name => executionStats[name].timings.processing += procDuration);
117
125
 
126
+ // [NEW] Update Error Stats
127
+ batchResults.forEach(r => {
128
+ errorStats.total += (r.success + r.failures);
129
+ errorStats.count += r.failures;
130
+ });
131
+
132
+ // [NEW] Circuit Breaker: Fail fast if error rate > 10% after processing 100+ items
133
+ // We check total > 100 to avoid failing on the very first user if they happen to be bad.
134
+ if (errorStats.total > 100 && (errorStats.count / errorStats.total) > 0.10) {
135
+ const failRate = (errorStats.count / errorStats.total * 100).toFixed(1);
136
+ throw new Error(`[Circuit Breaker] High failure rate detected (${failRate}%). Aborting batch to prevent silent data loss.`);
137
+ }
138
+
118
139
  usersSinceLastFlush += chunkSize;
119
140
 
120
- if (usersSinceLastFlush >= BATCH_SIZE) {
121
- logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users...`);
141
+ // [NEW] Adaptive Flushing (Memory Pressure Check)
142
+ const heapStats = v8.getHeapStatistics();
143
+ const heapUsedRatio = heapStats.used_heap_size / heapStats.heap_size_limit;
144
+ const MEMORY_THRESHOLD = 0.70; // 70% of available RAM
145
+ const COUNT_THRESHOLD = 5000;
146
+
147
+ if (usersSinceLastFlush >= COUNT_THRESHOLD || heapUsedRatio > MEMORY_THRESHOLD) {
148
+ const reason = heapUsedRatio > MEMORY_THRESHOLD ? `MEMORY_PRESSURE (${(heapUsedRatio*100).toFixed(0)}%)` : 'BATCH_LIMIT';
149
+
150
+ logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users. Reason: ${reason}`);
122
151
 
123
152
  // [UPDATED] Pass isInitialWrite: true only on the first flush
124
153
  const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true, !hasFlushed);
@@ -171,6 +200,7 @@ class StandardExecutor {
171
200
  _executionStats: executionStats[name]
172
201
  };
173
202
 
203
+ // Clear the memory immediately after preparing the commit
174
204
  inst.results = {};
175
205
  }
176
206
 
@@ -226,6 +256,10 @@ class StandardExecutor {
226
256
  const insights = metadata.rootDataDependencies?.includes('insights') ? { today: await loader.loadInsights(dateStr) } : null;
227
257
  const SCHEMAS = mathLayer.SCHEMAS;
228
258
 
259
+ // [NEW] Track local batch success/failure
260
+ let chunkSuccess = 0;
261
+ let chunkFailures = 0;
262
+
229
263
  for (const [userId, todayPortfolio] of Object.entries(portfolioData)) {
230
264
  const yesterdayPortfolio = yesterdayPortfolioData ? yesterdayPortfolioData[userId] : null;
231
265
  const todayHistory = historyData ? historyData[userId] : null;
@@ -249,10 +283,16 @@ class StandardExecutor {
249
283
  try {
250
284
  await calcInstance.process(context);
251
285
  if (stats) stats.processedUsers++;
286
+ chunkSuccess++;
252
287
  }
253
- catch (e) { logger.log('WARN', `Calc ${metadata.name} failed for user ${userId}: ${e.message}`); }
288
+ catch (e) {
289
+ logger.log('WARN', `Calc ${metadata.name} failed for user ${userId}: ${e.message}`);
290
+ chunkFailures++;
291
+ }
254
292
  }
293
+
294
+ return { success: chunkSuccess, failures: chunkFailures };
255
295
  }
256
296
  }
257
297
 
258
- module.exports = { StandardExecutor };
298
+ module.exports = { StandardExecutor };
@@ -2,9 +2,11 @@
2
2
  * FILENAME: computation-system/helpers/computation_dispatcher.js
3
3
  * PURPOSE: "Smart Dispatcher" - Analyzes state, initializes Run Counters, and dispatches tasks.
4
4
  * UPDATED: Implements Callback Pattern. Initializes 'computation_runs' doc for worker coordination.
5
+ * UPDATED: Implements Forensic Crash Analysis & Intelligent Resource Routing.
6
+ * FIXED: Implemented "Catch-Up" logic to scan full history (Start -> Target Date) instead of just Target Date.
5
7
  */
6
8
 
7
- const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
9
+ const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
8
10
  const { groupByPass, analyzeDateExecution } = require('../WorkflowOrchestrator.js');
9
11
  const { PubSubUtils } = require('../../core/utils/pubsub_utils');
10
12
  const { fetchComputationStatus, updateComputationStatus } = require('../persistence/StatusRepository');
@@ -13,12 +15,49 @@ const { generateCodeHash } = require('../topology/HashManag
13
15
  const pLimit = require('p-limit');
14
16
  const crypto = require('crypto');
15
17
 
16
- const TOPIC_NAME = 'computation-tasks';
17
- const STATUS_IMPOSSIBLE = 'IMPOSSIBLE';
18
+ const STATUS_IMPOSSIBLE = 'IMPOSSIBLE';
19
+
20
+ // Threshold to trigger high-mem routing (e.g., 1.5 GB for a 2GB worker)
21
+ const OOM_THRESHOLD_MB = 1500;
22
+
23
+ /**
24
+ * [NEW] Forensics: Checks if the calculation crashed previously due to Memory.
25
+ * Reads the 'telemetry.lastMemory' from the audit ledger.
26
+ */
27
+ async function checkCrashForensics(db, date, pass, computationName) {
28
+ try {
29
+ const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computationName}`;
30
+ const doc = await db.doc(ledgerPath).get();
31
+
32
+ if (!doc.exists) return 'standard';
33
+
34
+ const data = doc.data();
35
+
36
+ // Check if we have telemetry from a previous run
37
+ if (data.telemetry && data.telemetry.lastMemory) {
38
+ const lastRSS = data.telemetry.lastMemory.rssMB || 0;
39
+
40
+ if (lastRSS > OOM_THRESHOLD_MB) {
41
+ console.log(`[Dispatcher] 🕵️‍♀️ Forensics: ${computationName} likely OOM'd at ${lastRSS}MB. Routing to HIGH-MEM.`);
42
+ return 'high-mem';
43
+ }
44
+ }
45
+
46
+ // Also check if it's explicitly marked FAILED with 'Memory' in error
47
+ if (data.status === 'FAILED' && data.error && /memory/i.test(data.error)) {
48
+ return 'high-mem';
49
+ }
50
+
51
+ } catch (e) {
52
+ console.warn(`[Dispatcher] Forensics check failed for ${computationName}: ${e.message}`);
53
+ }
54
+
55
+ return 'standard';
56
+ }
18
57
 
19
58
  /**
20
59
  * Dispatches computation tasks for a specific pass.
21
- * @param {Object} config - System config
60
+ * @param {Object} config - System config (Injected with topics)
22
61
  * @param {Object} dependencies - { db, logger, ... }
23
62
  * @param {Array} computationManifest - List of calculations
24
63
  * @param {Object} reqBody - (Optional) HTTP Body containing 'callbackUrl' and 'date'
@@ -28,7 +67,8 @@ async function dispatchComputationPass(config, dependencies, computationManifest
28
67
  const pubsubUtils = new PubSubUtils(dependencies);
29
68
  const passToRun = String(config.COMPUTATION_PASS_TO_RUN);
30
69
 
31
- // [NEW] Extract Date and Callback from request body (pushed by Workflow)
70
+ // Extract Date and Callback from request body (pushed by Workflow)
71
+ // NOTE: 'dateStr' acts as the "Target Date" (Ceiling), usually T-1.
32
72
  const dateStr = reqBody.date || config.date;
33
73
  const callbackUrl = reqBody.callbackUrl || null;
34
74
 
@@ -44,18 +84,30 @@ async function dispatchComputationPass(config, dependencies, computationManifest
44
84
 
45
85
  if (!calcsInThisPass.length) { return logger.log('WARN', `[Dispatcher] No calcs for Pass ${passToRun}. Exiting.`); }
46
86
 
47
- const calcNames = calcsInThisPass.map(c => c.name);
48
- logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun} for ${dateStr}`);
87
+ logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun} (Target: ${dateStr})`);
49
88
 
50
- // -- DATE ANALYSIS LOGIC (Unchanged) --
51
- const passEarliestDate = Object.values(DEFINITIVE_EARLIEST_DATES).reduce((a, b) => a < b ? a : b);
52
- const endDateUTC = new Date(Date.UTC(new Date().getUTCFullYear(), new Date().getUTCMonth(), new Date().getUTCDate() - 1));
89
+ // -- DATE ANALYSIS LOGIC (FIXED: RANGE SCAN) --
53
90
 
54
- // We only analyze the specific requested date to keep dispatch fast for the workflow
55
- const allExpectedDates = [dateStr];
91
+ // 1. Determine the absolute start of data history
92
+ const earliestDates = await getEarliestDataDates(config, dependencies);
93
+ const startDate = earliestDates.absoluteEarliest;
94
+ const endDate = new Date(dateStr + 'T00:00:00Z');
95
+
96
+ // 2. Generate the full range of dates to check
97
+ let allExpectedDates = getExpectedDateStrings(startDate, endDate);
98
+
99
+ // Safety fallback: if range is invalid or empty, default to target date only
100
+ if (!allExpectedDates || allExpectedDates.length === 0) {
101
+ logger.log('WARN', `[Dispatcher] Date range calculation returned empty (Start: ${startDate.toISOString()} -> End: ${endDate.toISOString()}). Defaulting to single target date.`);
102
+ allExpectedDates = [dateStr];
103
+ } else {
104
+ logger.log('INFO', `[Dispatcher] 📅 Analysis Range: ${allExpectedDates.length} days (${allExpectedDates[0]} to ${allExpectedDates[allExpectedDates.length-1]})`);
105
+ }
56
106
 
57
107
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
58
108
  const tasksToDispatch = [];
109
+
110
+ // Concurrency limit for analysis & forensics (Parallelize the historical scan)
59
111
  const limit = pLimit(20);
60
112
 
61
113
  const analysisPromises = allExpectedDates.map(d => limit(async () => {
@@ -71,6 +123,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
71
123
  prevDate.setUTCDate(prevDate.getUTCDate() - 1);
72
124
  prevDateStr = prevDate.toISOString().slice(0, 10);
73
125
 
126
+ // Only fetch previous status if it's within valid range
74
127
  if (prevDate >= DEFINITIVE_EARLIEST_DATES.absoluteEarliest) {
75
128
  fetchPromises.push(fetchComputationStatus(prevDateStr, config, dependencies));
76
129
  }
@@ -87,18 +140,16 @@ async function dispatchComputationPass(config, dependencies, computationManifest
87
140
 
88
141
  const report = analyzeDateExecution(d, calcsInThisPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
89
142
 
143
+ // Handle Status Updates (Impossible / Blocked)
90
144
  const statusUpdates = {};
91
-
92
145
  report.impossible.forEach(item => {
93
146
  if (dailyStatus[item.name]?.hash !== STATUS_IMPOSSIBLE) {
94
147
  statusUpdates[item.name] = { hash: STATUS_IMPOSSIBLE, category: 'unknown', reason: item.reason };
95
148
  }
96
149
  });
97
-
98
150
  report.blocked.forEach(item => {
99
151
  statusUpdates[item.name] = { hash: false, category: 'unknown', reason: item.reason };
100
152
  });
101
-
102
153
  report.failedDependency.forEach(item => {
103
154
  const missingStr = item.missing ? item.missing.join(', ') : 'unknown';
104
155
  statusUpdates[item.name] = { hash: false, category: 'unknown', reason: `Dependency Missing: ${missingStr}` };
@@ -109,21 +160,29 @@ async function dispatchComputationPass(config, dependencies, computationManifest
109
160
  }
110
161
 
111
162
  const validToRun = [...report.runnable, ...report.reRuns];
112
- validToRun.forEach(item => {
163
+
164
+ // [NEW] Parallel Forensics Check
165
+ await Promise.all(validToRun.map(item => limit(async () => {
166
+ const compName = normalizeName(item.name);
167
+
168
+ // 1. Determine Resource Requirements
169
+ const requiredResource = await checkCrashForensics(db, d, passToRun, compName);
170
+
113
171
  const uniqueDispatchId = crypto.randomUUID();
114
172
  tasksToDispatch.push({
115
173
  action: 'RUN_COMPUTATION_DATE',
116
174
  dispatchId: uniqueDispatchId,
117
175
  date: d,
118
176
  pass: passToRun,
119
- computation: normalizeName(item.name),
177
+ computation: compName,
120
178
  hash: item.hash || item.newHash,
121
179
  previousCategory: item.previousCategory || null,
122
180
  triggerReason: item.reason || "Unknown",
123
181
  dependencyResultHashes: item.dependencyResultHashes || {},
124
- timestamp: Date.now()
182
+ timestamp: Date.now(),
183
+ resources: requiredResource // 'standard' or 'high-mem'
125
184
  });
126
- });
185
+ })));
127
186
 
128
187
  } catch (e) {
129
188
  logger.log('ERROR', `[Dispatcher] Failed analysis for ${d}: ${e.message}`);
@@ -132,10 +191,9 @@ async function dispatchComputationPass(config, dependencies, computationManifest
132
191
 
133
192
  await Promise.all(analysisPromises);
134
193
 
135
- // -- NEW: CALLBACK & COUNTER INITIALIZATION --
194
+ // -- CALLBACK & COUNTER INITIALIZATION --
136
195
 
137
196
  if (tasksToDispatch.length > 0) {
138
- logger.log('INFO', `[Dispatcher] 📝 Preparing ${tasksToDispatch.length} tasks for execution...`);
139
197
 
140
198
  // 1. Initialize Shared State Document (The Counter)
141
199
  const runId = crypto.randomUUID();
@@ -144,17 +202,17 @@ async function dispatchComputationPass(config, dependencies, computationManifest
144
202
  if (callbackUrl) {
145
203
  await db.doc(metaStatePath).set({
146
204
  createdAt: new Date(),
147
- date: dateStr,
205
+ date: dateStr, // Acts as the "Job Label" (target date)
148
206
  pass: passToRun,
149
207
  totalTasks: tasksToDispatch.length,
150
- remainingTasks: tasksToDispatch.length, // <--- The Countdown
151
- callbackUrl: callbackUrl, // <--- The Workflow Hook
208
+ remainingTasks: tasksToDispatch.length,
209
+ callbackUrl: callbackUrl,
152
210
  status: 'IN_PROGRESS'
153
211
  });
154
- logger.log('INFO', `[Dispatcher] 🏁 Run State Initialized: ${runId}`);
212
+ logger.log('INFO', `[Dispatcher] 🏁 Run State Initialized: ${runId}. Tasks: ${tasksToDispatch.length}`);
155
213
  }
156
214
 
157
- // 2. Attach Run Metadata to every task
215
+ // 2. Attach Run Metadata
158
216
  tasksToDispatch.forEach(task => {
159
217
  task.runId = runId;
160
218
  task.metaStatePath = callbackUrl ? metaStatePath : null;
@@ -180,12 +238,13 @@ async function dispatchComputationPass(config, dependencies, computationManifest
180
238
  t.set(ledgerRef, {
181
239
  status: 'PENDING',
182
240
  dispatchId: task.dispatchId,
183
- runId: task.runId, // Track the batch ID
241
+ runId: task.runId,
184
242
  computation: task.computation,
185
243
  expectedHash: task.hash || 'unknown',
186
244
  createdAt: new Date(),
187
245
  dispatcherHash: currentManifestHash,
188
246
  triggerReason: task.triggerReason,
247
+ resources: task.resources, // Log intended resource type
189
248
  retries: 0
190
249
  }, { merge: true });
191
250
 
@@ -201,22 +260,36 @@ async function dispatchComputationPass(config, dependencies, computationManifest
201
260
 
202
261
  await Promise.all(txnPromises);
203
262
 
204
- // 4. Publish to Pub/Sub
263
+ // 4. Publish to Pub/Sub (Segregated by Resources)
205
264
  if (finalDispatched.length > 0) {
206
- logger.log('INFO', `[Dispatcher] ✅ Publishing ${finalDispatched.length} tasks to Pub/Sub...`);
207
265
 
208
- await pubsubUtils.batchPublishTasks(dependencies, {
209
- topicName: TOPIC_NAME,
210
- tasks: finalDispatched,
211
- taskType: `computation-pass-${passToRun}`,
212
- maxPubsubBatchSize: 100
213
- });
266
+ const standardTasks = finalDispatched.filter(t => t.resources !== 'high-mem');
267
+ const highMemTasks = finalDispatched.filter(t => t.resources === 'high-mem');
268
+
269
+ // Publish Standard
270
+ if (standardTasks.length > 0) {
271
+ logger.log('INFO', `[Dispatcher] ✅ Publishing ${standardTasks.length} Standard tasks...`);
272
+ await pubsubUtils.batchPublishTasks(dependencies, {
273
+ topicName: config.computationTopicStandard || 'computation-tasks',
274
+ tasks: standardTasks,
275
+ taskType: `computation-pass-${passToRun}-std`,
276
+ maxPubsubBatchSize: 100
277
+ });
278
+ }
279
+
280
+ // Publish High-Mem
281
+ if (highMemTasks.length > 0) {
282
+ logger.log('INFO', `[Dispatcher] 🏋️‍♀️ Publishing ${highMemTasks.length} tasks to HIGH-MEM infrastructure.`);
283
+ await pubsubUtils.batchPublishTasks(dependencies, {
284
+ topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
285
+ tasks: highMemTasks,
286
+ taskType: `computation-pass-${passToRun}-highmem`,
287
+ maxPubsubBatchSize: 100
288
+ });
289
+ }
214
290
 
215
- // Return count so workflow knows to wait
216
291
  return { dispatched: finalDispatched.length, runId };
217
292
  } else {
218
- // Edge Case: Analysis said "Run", but Ledger said "Already Done"
219
- // We must update the state doc to 0 or delete it, OR return 0 so workflow doesn't wait.
220
293
  logger.log('INFO', `[Dispatcher] All tasks were already COMPLETED.`);
221
294
  return { dispatched: 0 };
222
295
  }
@@ -2,6 +2,7 @@
2
2
  * FILENAME: computation-system/helpers/computation_worker.js
3
3
  * PURPOSE: Consumes tasks, executes logic, and signals Workflow upon Batch Completion.
4
4
  * UPDATED: Implements IAM Auth for Workflow Callbacks.
5
+ * UPDATED: Implements Memory Heartbeat (Flight Recorder) for OOM detection.
5
6
  */
6
7
 
7
8
  const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
@@ -9,7 +10,7 @@ const { getManifest } = require('../topology/ManifestLoader');
9
10
  const { StructuredLogger } = require('../logger/logger');
10
11
  const { recordRunAttempt } = require('../persistence/RunRecorder');
11
12
  const https = require('https');
12
- const { GoogleAuth } = require('google-auth-library'); // [NEW] Required for Auth
13
+ const { GoogleAuth } = require('google-auth-library');
13
14
 
14
15
  let calculationPackage;
15
16
  try { calculationPackage = require('aiden-shared-calculations-unified');
@@ -19,51 +20,68 @@ const calculations = calculationPackage.calculations;
19
20
  const MAX_RETRIES = 3;
20
21
 
21
22
  /**
22
- * [NEW] Helper: Fires the webhook back to Google Cloud Workflows.
23
- * UPDATED: Now generates an IAM Bearer Token to authenticate the request.
23
+ * [NEW] Helper: Starts a background heartbeat to track memory usage.
24
+ * This acts as a "Black Box Recorder". If the worker crashes (OOM),
25
+ * the last written value will remain in Firestore for the Dispatcher to analyze.
26
+ */
27
+ function startMemoryHeartbeat(db, ledgerPath, intervalMs = 2000) {
28
+ const getMemStats = () => {
29
+ const mem = process.memoryUsage();
30
+ return {
31
+ rssMB: Math.round(mem.rss / 1024 / 1024), // Resident Set Size (OOM Killer Metric)
32
+ heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
33
+ timestamp: new Date()
34
+ };
35
+ };
36
+
37
+ const timer = setInterval(async () => {
38
+ try {
39
+ const stats = getMemStats();
40
+ // Use update() to minimize payload size and avoid overwriting status
41
+ await db.doc(ledgerPath).update({
42
+ 'telemetry.lastMemory': stats,
43
+ 'telemetry.lastHeartbeat': new Date()
44
+ }).catch(() => {}); // Ignore write errors to prevent crashing the worker
45
+ } catch (e) {
46
+ // Silently fail on telemetry errors
47
+ }
48
+ }, intervalMs);
49
+
50
+ // Unref so this timer doesn't prevent the process from exiting naturally
51
+ timer.unref();
52
+
53
+ return timer;
54
+ }
55
+
56
+ /**
57
+ * Helper: Fires the webhook back to Google Cloud Workflows.
24
58
  */
25
59
  async function triggerWorkflowCallback(url, status, logger) {
26
60
  if (!url) return;
27
61
  logger.log('INFO', `[Worker] 🔔 BATCH COMPLETE! Triggering Workflow Callback: ${status}`);
28
62
 
29
63
  try {
30
- // 1. Get OAuth2 Access Token (Required for Workflows Callbacks)
31
- const auth = new GoogleAuth({
32
- scopes: ['https://www.googleapis.com/auth/cloud-platform']
33
- });
64
+ const auth = new GoogleAuth({ scopes: ['https://www.googleapis.com/auth/cloud-platform'] });
34
65
  const client = await auth.getClient();
35
66
  const accessToken = await client.getAccessToken();
36
67
  const token = accessToken.token;
37
68
 
38
- // 2. Send Authenticated Request
39
69
  return new Promise((resolve, reject) => {
40
- const body = JSON.stringify({
41
- status: status,
42
- timestamp: new Date().toISOString()
43
- });
70
+ const body = JSON.stringify({ status: status, timestamp: new Date().toISOString() });
44
71
 
45
72
  const req = https.request(url, {
46
73
  method: 'POST',
47
74
  headers: {
48
75
  'Content-Type': 'application/json',
49
76
  'Content-Length': Buffer.byteLength(body),
50
- 'Authorization': `Bearer ${token}` // <--- CRITICAL FIX
77
+ 'Authorization': `Bearer ${token}`
51
78
  }
52
79
  }, (res) => {
53
- if (res.statusCode >= 200 && res.statusCode < 300) {
54
- resolve();
55
- } else {
56
- logger.log('WARN', `Callback responded with ${res.statusCode}`);
57
- // We resolve anyway to avoid crashing the worker logic
58
- resolve();
59
- }
80
+ if (res.statusCode >= 200 && res.statusCode < 300) { resolve(); }
81
+ else { logger.log('WARN', `Callback responded with ${res.statusCode}`); resolve(); }
60
82
  });
61
83
 
62
- req.on('error', (e) => {
63
- logger.log('ERROR', `Failed to trigger callback: ${e.message}`);
64
- resolve();
65
- });
66
-
84
+ req.on('error', (e) => { logger.log('ERROR', `Failed to trigger callback: ${e.message}`); resolve(); });
67
85
  req.write(body);
68
86
  req.end();
69
87
  });
@@ -73,37 +91,21 @@ async function triggerWorkflowCallback(url, status, logger) {
73
91
  }
74
92
 
75
93
  /**
76
- * [NEW] Helper: Decrements 'remainingTasks' in Firestore.
77
- * Returns the callbackUrl IF this was the last task.
94
+ * Helper: Decrements 'remainingTasks' in Firestore.
78
95
  */
79
96
  async function decrementAndCheck(db, metaStatePath, logger) {
80
97
  if (!metaStatePath) return null;
81
-
82
98
  try {
83
99
  const result = await db.runTransaction(async (t) => {
84
100
  const ref = db.doc(metaStatePath);
85
101
  const doc = await t.get(ref);
86
-
87
- if (!doc.exists) return null; // State might have expired or been deleted
102
+ if (!doc.exists) return null;
88
103
  const data = doc.data();
89
-
90
104
  const newRemaining = (data.remainingTasks || 0) - 1;
91
-
92
- t.update(ref, {
93
- remainingTasks: newRemaining,
94
- lastUpdated: new Date()
95
- });
96
-
97
- // Return needed data only if we hit 0 (or lower, for safety)
98
- return {
99
- remaining: newRemaining,
100
- callbackUrl: data.callbackUrl
101
- };
105
+ t.update(ref, { remainingTasks: newRemaining, lastUpdated: new Date() });
106
+ return { remaining: newRemaining, callbackUrl: data.callbackUrl };
102
107
  });
103
-
104
- if (result && result.remaining <= 0) {
105
- return result.callbackUrl;
106
- }
108
+ if (result && result.remaining <= 0) return result.callbackUrl;
107
109
  } catch (e) {
108
110
  logger.log('ERROR', `[Worker] Failed to decrement batch counter: ${e.message}`);
109
111
  }
@@ -125,12 +127,12 @@ async function handleComputationTask(message, config, dependencies) {
125
127
 
126
128
  if (!data || data.action !== 'RUN_COMPUTATION_DATE') { return; }
127
129
 
128
- // Extract fields including new metaStatePath
129
130
  const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes, metaStatePath } = data;
130
131
 
131
132
  if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload.`, data); return; }
132
133
 
133
134
  const retryCount = message.deliveryAttempt || 1;
135
+ const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computation}`;
134
136
 
135
137
  // --- POISON MESSAGE HANDLING (DLQ) ---
136
138
  if (retryCount > MAX_RETRIES) {
@@ -144,36 +146,38 @@ async function handleComputationTask(message, config, dependencies) {
144
146
  failureReason: 'MAX_RETRIES_EXCEEDED'
145
147
  });
146
148
 
147
- await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
149
+ await db.doc(ledgerPath).set({
148
150
  status: 'FAILED',
149
151
  error: 'Max Retries Exceeded (Poison Message)',
150
152
  failedAt: new Date()
151
153
  }, { merge: true });
152
154
 
153
155
  const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
154
- if (callbackUrl) {
155
- await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger);
156
- }
157
-
156
+ if (callbackUrl) { await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger); }
158
157
  return;
159
158
  } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
160
159
  }
161
160
 
162
161
  logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date}) [Attempt ${retryCount}/${MAX_RETRIES}]`);
163
162
 
164
- // Update Status to IN_PROGRESS
163
+ // 1. Update Status to IN_PROGRESS & Initialize Telemetry
165
164
  try {
166
- await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
165
+ await db.doc(ledgerPath).set({
167
166
  status: 'IN_PROGRESS',
168
167
  workerId: process.env.K_REVISION || 'unknown',
169
168
  startedAt: new Date(),
170
- dispatchId: dispatchId
169
+ dispatchId: dispatchId,
170
+ telemetry: { startTime: new Date(), lastMemory: null } // Init for heartbeat
171
171
  }, { merge: true });
172
172
  } catch (leaseErr) {}
173
173
 
174
+ // 2. START HEARTBEAT (The Flight Recorder)
175
+ const heartbeatTimer = startMemoryHeartbeat(db, ledgerPath, 2000);
176
+
174
177
  let computationManifest;
175
178
  try { computationManifest = getManifest(config.activeProductLines || [], calculations, runDependencies);
176
179
  } catch (manifestError) {
180
+ clearInterval(heartbeatTimer); // Stop if we fail early
177
181
  logger.log('FATAL', `[Worker] Failed to load Manifest: ${manifestError.message}`);
178
182
  return;
179
183
  }
@@ -186,6 +190,9 @@ async function handleComputationTask(message, config, dependencies) {
186
190
  );
187
191
  const duration = Date.now() - startTime;
188
192
 
193
+ // STOP HEARTBEAT ON SUCCESS
194
+ clearInterval(heartbeatTimer);
195
+
189
196
  const failureReport = result?.updates?.failureReport || [];
190
197
  const successUpdates = result?.updates?.successUpdates || {};
191
198
 
@@ -194,26 +201,23 @@ async function handleComputationTask(message, config, dependencies) {
194
201
  throw new Error(failReason.error.message || 'Computation Logic Failed');
195
202
  }
196
203
  else {
197
- if (Object.keys(successUpdates).length > 0) {
198
- logger.log('INFO', `[Worker] Stored: ${computation}`);
199
- } else {
200
- logger.log('WARN', `[Worker] ⚠️ Empty Result: ${computation}`);
201
- }
204
+ if (Object.keys(successUpdates).length > 0) { logger.log('INFO', `[Worker] ✅ Stored: ${computation}`); }
205
+ else { logger.log('WARN', `[Worker] ⚠️ Empty Result: ${computation}`); }
202
206
 
203
- await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).update({
207
+ await db.doc(ledgerPath).update({
204
208
  status: 'COMPLETED',
205
209
  completedAt: new Date()
206
210
  }).catch(() => {});
207
211
 
208
212
  await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, { durationMs: duration }, triggerReason);
209
213
 
210
- // Decrement & Callback
211
214
  const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
212
- if (callbackUrl) {
213
- await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger);
214
- }
215
+ if (callbackUrl) { await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger); }
215
216
  }
216
217
  } catch (err) {
218
+ // STOP HEARTBEAT ON ERROR
219
+ clearInterval(heartbeatTimer);
220
+
217
221
  // --- ERROR HANDLING ---
218
222
  const isDeterministicError = err.stage === 'SHARDING_LIMIT_EXCEEDED' ||
219
223
  err.stage === 'QUALITY_CIRCUIT_BREAKER' ||
@@ -231,7 +235,7 @@ async function handleComputationTask(message, config, dependencies) {
231
235
  failureReason: 'PERMANENT_DETERMINISTIC_ERROR'
232
236
  });
233
237
 
234
- await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
238
+ await db.doc(ledgerPath).set({
235
239
  status: 'FAILED',
236
240
  error: err.message || 'Permanent Deterministic Error',
237
241
  failedAt: new Date()
@@ -240,23 +244,17 @@ async function handleComputationTask(message, config, dependencies) {
240
244
  await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'PERMANENT_FAIL' }, { durationMs: 0 }, triggerReason);
241
245
 
242
246
  const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
243
- if (callbackUrl) {
244
- await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger);
245
- }
246
-
247
+ if (callbackUrl) { await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger); }
247
248
  return;
248
249
  } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
249
250
  }
250
251
 
251
- if (retryCount >= MAX_RETRIES) {
252
- throw err;
253
- }
252
+ if (retryCount >= MAX_RETRIES) { throw err; }
254
253
 
255
254
  logger.log('ERROR', `[Worker] ❌ Crash: ${computation}: ${err.message}`);
256
255
  await recordRunAttempt(db, { date, computation, pass }, 'CRASH', { message: err.message, stack: err.stack, stage: 'SYSTEM_CRASH' }, { durationMs: 0 }, triggerReason);
257
-
258
256
  throw err;
259
257
  }
260
258
  }
261
259
 
262
- module.exports = { handleComputationTask };
260
+ module.exports = { handleComputationTask };
@@ -0,0 +1,93 @@
1
+ # The BullTrackers Computation System: An Advanced DAG-Based Architecture for High-Fidelity Financial Simulation
2
+
3
+ ## Abstract
4
+
5
+ This paper details the design, implementation, and theoretical underpinnings of the BullTrackers Computation System, a proprietary high-performance execution engine designed for complex financial modeling and user behavior analysis. The system leverages a Directed Acyclic Graph (DAG) architecture to orchestrate interdependent calculations, employing Kahn’s Algorithm for topological sorting and Tarjan’s Algorithm for cycle detection. Key innovations include "Content-Based Dependency Short-Circuiting" for massive optimization, a "System Epoch" and "Infrastructure Hash" based auditing system for absolute reproducibility, and a batch-flushing execution model designed to mitigate Out-Of-Memory (OOM) errors during high-volume processing. We further explore the application of this system in running advanced psychometric and risk-geometry models ("Smart Money" scoring) and how the architecture supports self-healing workflows through granular state management.
6
+
7
+ ## 1. Introduction
8
+
9
+ In modern financial analytics, derived data often depends on a complex web of varying input frequencies—real-time price ticks, daily portfolio snapshots, and historical trade logs. Traditional linear batch processing protocols fail to capture the nuances of these interdependencies, often leading to race conditions or redundant computations.
10
+
11
+ The BullTrackers Computation System was devised to solve this by treating the entire domain logic as a **Directed Acyclic Graph (DAG)**. Every calculation is a node, and every data requirement is an edge. By resolving the topography of this graph dynamically at runtime, the system ensures that:
12
+ 1. Data is always available before it is consumed (referential integrity).
13
+ 2. Only necessary computations are executed (efficiency).
14
+ 3. Changes in code or infrastructure propagate deterministically through the graph (auditability).
15
+
16
+ ## 2. Theoretical Foundations
17
+
18
+ The core utility of the system is its ability to turn a collection of loosely coupled JavaScript classes into a strictly ordered execution plan.
19
+
20
+ ### 2.1 Directed Acyclic Graphs (DAGs)
21
+ We model the computation space as a DAG where $G = (V, E)$.
22
+ * **Vertices ($V$)**: Individual Calculation Units (e.g., `NetProfit`, [SmartMoneyScore](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/layers/profiling.js#24-236)).
23
+ * **Edges ($E$)**: Data dependencies, where an edge $(u, v)$ implies $v$ requires the output of $u$.
24
+
25
+ ### 2.2 Topological Sorting (Kahn’s Algorithm)
26
+ To execute the graph, we must linearize it such that for every dependency $u \rightarrow v$, $u$ precedes $v$ in the execution order. We implement **Kahn’s Algorithm** within [ManifestBuilder.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/context/ManifestBuilder.js) to achieve this:
27
+ 1. Calculate the **in-degree** (number of incoming edges) for all nodes.
28
+ 2. Initialize a queue with all nodes having an in-degree of 0 (independent nodes).
29
+ 3. While the queue is not empty:
30
+ * Dequeue node $N$ and add it to the `SortedManifest`.
31
+ * For each neighbor $M$ dependent on $N$, decrement $M$'s in-degree.
32
+ * If $M$'s in-degree becomes 0, enqueue $M$.
33
+ 4. This generates a series of "Passes" or "Waves" of execution, allowing parallel processing of independent nodes within the same pass.
34
+
35
+ ### 2.3 Cycle Detection (Tarjan’s Algorithm)
36
+ A critical failure mode in DAGs is the introduction of a cycle (e.g., A needs B, B needs A), effectively turning the DAG into a DCG (Directed Cyclic Graph), which is unresolvable.
37
+ If Kahn’s algorithm fails to visit all nodes (indicating a cycle exists), the system falls back to **Tarjan’s Strongly Connected Components (SCC) Algorithm**. This uses depth-first search to identify the exact cycle chain (e.g., `Calc A -> Calc B -> Calc C -> Calc A`), reporting the "First Cycle Found" to the developer for immediate remediation.
38
+
39
+ ## 3. System Architecture & "Source of Truth"
40
+
41
+ The architecture is centered around the **Manifest**, a dynamic, immutable registry of all capabilities within the system.
42
+
43
+ ### 3.1 The Dynamic Manifest
44
+ Unlike static build tools, the Manifest is built at runtime by [ManifestLoader.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/topology/ManifestLoader.js) and [ManifestBuilder.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/context/ManifestBuilder.js). It employs an **Auto-Discovery** mechanism that scans directories for calculation classes.
45
+ * **Static Metadata**: Each class exposes `getMetadata()` and `getDependencies()`.
46
+ * **Product Line Filtering**: The builder can slice the graph, generating a subgraph relevant only to specific product lines (e.g., "Crypto", "Stocks"), reducing overhead.
47
+
48
+ ### 3.2 Granular Hashing & The Audit Chain
49
+ To ensure that "if the code hasn't changed, the result shouldn't change," the system implements a multi-layered hashing strategy ([HashManager.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/topology/HashManager.js)):
50
+ 1. **Code Hash**: The raw string content of the calculation class.
51
+ 2. **Layer Hash**: Hashes of shared utility layers (`mathematics`, `profiling`) used by the class.
52
+ 3. **Dependency Hash**: A composite hash of all upstream dependencies.
53
+ 4. **Infrastructure Hash**: A hash representing the underlying system environment.
54
+ 5. **System Epoch**: A manual versioning flag to force global re-computation.
55
+
56
+ This results in a `Composite Hash`. If this hash matches the `storedHash` in the database, execution can be skipped entirely.
57
+
58
+ ## 4. Execution Engine: Flow, Resilience & Optimization
59
+
60
+ The `WorkflowOrchestrator` acts as the runtime kernel, utilizing [StandardExecutor](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/executors/StandardExecutor.js#16-257) and [MetaExecutor](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/executors/MetaExecutor.js#12-83) for the heavy lifting.
61
+
62
+ ### 4.1 Content-Based Dependency Short-Circuiting
63
+ A major optimization (O(n) gain) is the **Content-Based Short-Circuiting** logic found in [WorkflowOrchestrator.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/WorkflowOrchestrator.js):
64
+ Even if an upstream dependency *re-runs* (e.g., its timestamp changed), its *output* might be identical to the previous run.
65
+ 1. The system tracks `ResultHash` (hash of the actual output data).
66
+ 2. When checking dependencies for Node B (which depends on A), if A has re-run but its `ResultHash` is unchanged from what B used last time, B **does not need to re-run**.
67
+ 3. This effectively stops "change propagation" dead in its tracks if the data change is semantically null.
68
+
69
+ ### 4.2 Batch Flushing & OOM Prevention
70
+ Financial datasets (processing 100k+ users with daily portfolios) often exceed Node.js heap limits. The [StandardExecutor](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/executors/StandardExecutor.js#16-257) implements a **Streaming & Flushing** architecture:
71
+ * **Streams** inputs (Portfolio/History) using generators (`yield`), preventing loading all users into memory.
72
+ * **Buffers** results in a `state` object.
73
+ * **Flushes** to the database (Firestore/Storage) every $N$ users (e.g., 5000), clearing the internal buffer helps avoid Out-Of-Memory crashes.
74
+ * **Incremental Sharding**: It manages shard indices dynamically to split massive result sets into retrievable chunks.
75
+
76
+ ### 4.3 Handling "Impossible" States
77
+ If a dependency fails or is missing critical data, the Orchestrator marks dependent nodes as `IMPOSSIBLE` rather than failing them. This allows the rest of the graph (independent branches) to continue execution, maximizing system throughput even in a partially degraded state.
78
+
79
+ ## 5. Advanced Application: Psychometrics & Risk Geometry
80
+
81
+ The capabilities of this computation engine are best demonstrated by the [profiling.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/layers/profiling.js) layer it powers. Because the DAG ensures all historical and portfolio data is perfectly aligned, we can run sophisticated O(n^2) or O(n log n) algorithms on user data reliably.
82
+
83
+ ### 5.1 "Smart Money" & Cognitive Profiling
84
+ The system executes a [UserClassifier](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/layers/profiling.js#382-399) that computes:
85
+ * **Risk Geometry**: Using the **Monotone Chain** algorithm to compute the Convex Hull of a user's risk/reward performance (Efficient Frontier analysis).
86
+ * **Psychometrics**: Detecting "Revenge Trading" (increasing risk after losses) and "Disposition Skew" (holding losers too long).
87
+ * **Attribution**: Separating "Luck" (market beta) from "Skill" (Alpha) by comparing performance against sector benchmarks.
88
+
89
+ These complex models depend on the *guarantee* provided by the DAG that all necessary history and price data is pre-computed and available in the [Context](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/simulation/Fabricator.js#20-69).
90
+
91
+ ## 6. Conclusion
92
+
93
+ The BullTrackers Computation System represents a shift from "Action-Based" to "State-Based" architecture. By encoding the domain logic into a Directed Acyclic Graph, we achieve a system that is self-healing, massively scalable via short-circuiting and batching, and capable of supporting deep analytical models. It provides the robustness required for high-stakes financial simulation, ensuring that every decimal point is traceable, reproducible, and verifiable.
@@ -43,14 +43,14 @@ async function recordRunAttempt(db, context, status, error = null, detailedMetri
43
43
  const timings = rawExecStats.timings || {};
44
44
 
45
45
  const runEntry = {
46
- runId: runId,
46
+ runId: runId,
47
47
  computationName: computation,
48
- pass: String(pass),
49
- workerId: workerId,
50
- targetDate: targetDate,
51
- triggerTime: now.toISOString(),
52
- durationMs: detailedMetrics.durationMs || 0,
53
- status: status,
48
+ pass: String(pass),
49
+ workerId: workerId,
50
+ targetDate: targetDate,
51
+ triggerTime: now.toISOString(),
52
+ durationMs: detailedMetrics.durationMs || 0,
53
+ status: status,
54
54
 
55
55
  // [NEW] Trigger Context
56
56
  trigger: {
@@ -325,13 +325,13 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
325
325
  }
326
326
 
327
327
  // 3. BLOCKED / IMPOSSIBLE / UPTODATE
328
- analysis.blocked.forEach(item => pushIfValid(dateSummary.blocked, item));
329
- analysis.failedDependency.forEach(item => pushIfValid(dateSummary.blocked, item, "Dependency Missing"));
330
- analysis.impossible.forEach(item => pushIfValid(dateSummary.impossible, item));
331
- analysis.skipped.forEach(item => pushIfValid(dateSummary.uptodate, item, "Up To Date"));
328
+ analysis.blocked.forEach (item => pushIfValid(dateSummary.blocked, item));
329
+ analysis.failedDependency.forEach (item => pushIfValid(dateSummary.blocked, item, "Dependency Missing"));
330
+ analysis.impossible.forEach (item => pushIfValid(dateSummary.impossible, item));
331
+ analysis.skipped.forEach (item => pushIfValid(dateSummary.uptodate, item, "Up To Date"));
332
332
 
333
333
  // Meta stats
334
- const includedCount = dateSummary.run.length + dateSummary.rerun.length + dateSummary.stable.length +
334
+ const includedCount = dateSummary.run.length + dateSummary.rerun.length + dateSummary.stable.length +
335
335
  dateSummary.blocked.length + dateSummary.impossible.length + dateSummary.uptodate.length;
336
336
  dateSummary.meta.totalIncluded = includedCount;
337
337
  dateSummary.meta.match = (includedCount === expectedCount);
@@ -1,7 +1,6 @@
1
1
  # Cloud Workflows Definition for BullTrackers Computation Pipeline
2
2
  # Orchestrates 5 sequential passes using Event-Driven Callbacks (Zero Polling).
3
- # FIXED: Replaced invalid 'sys' callback functions with 'events' library functions.
4
- # FIXED: Proper extraction of 'callback_details.url' for the dispatcher.
3
+ # FIXED: Restored 'passes' and 'max_retries' variables in init step.
5
4
 
6
5
  main:
7
6
  params: [input]
@@ -10,8 +9,14 @@ main:
10
9
  assign:
11
10
  - project: ${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}
12
11
  - location: "europe-west1"
13
- # If 'date' is provided in input, use it. Otherwise default to today (YYYY-MM-DD).
14
- - date_to_run: ${default(map.get(input, "date"), text.substring(time.format(sys.now()), 0, 10))}
12
+
13
+ # T-1 Date Logic (Process Yesterday)
14
+ - now: ${sys.now()}
15
+ - yesterday_timestamp: ${now - 86400}
16
+ - yesterday_str: ${text.substring(time.format(yesterday_timestamp), 0, 10)}
17
+ - date_to_run: ${default(map.get(input, "date"), yesterday_str)}
18
+
19
+ # Configuration Variables (Restored)
15
20
  - passes: ["1", "2", "3", "4", "5"]
16
21
  - max_retries: 3
17
22
 
@@ -42,7 +47,6 @@ main:
42
47
  - attempt_count: ${attempt_count + 1}
43
48
 
44
49
  # 1. GENERATE CALLBACK ENDPOINT
45
- # We use the 'events' library. This returns an object containing the URL.
46
50
  - create_callback:
47
51
  call: events.create_callback_endpoint
48
52
  args:
@@ -60,7 +64,6 @@ main:
60
64
  severity: "INFO"
61
65
 
62
66
  # 2. TRIGGER DISPATCHER
63
- # We pass the extracted 'callback_url' string to the dispatcher.
64
67
  - trigger_dispatcher:
65
68
  call: http.post
66
69
  args:
@@ -89,12 +92,11 @@ main:
89
92
  next: pass_retry_loop
90
93
 
91
94
  # 4. WAIT FOR WORKER SIGNAL
92
- # We must pass the original 'callback_details' object here, not the URL string.
93
95
  - wait_for_completion:
94
96
  call: events.await_callback
95
97
  args:
96
98
  callback: ${callback_details}
97
- timeout: 86400 # Wait up to 24 hours
99
+ timeout: 10800 # UPDATED: Reduced from 86400 (24h) to 10800 (3h) to detect crashes faster
98
100
  result: callback_request
99
101
 
100
102
  # 5. PROCESS SIGNAL
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.292",
3
+ "version": "1.0.294",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -1,210 +0,0 @@
1
- # BullTrackers Computation System: Architecture & Operational Manual
2
-
3
- This document provides a comprehensive overview of the BullTrackers Computation System, a distributed, deterministic, and self-optimizing data pipeline. Unlike traditional task schedulers, this system operates on "Build System" principles, treating data calculations as compiled artifacts with strict versioning and dependency guarantees.
4
-
5
- ---
6
-
7
- ## 1. System Philosophy & Core Concepts
8
-
9
- ### The "Build System" Paradigm
10
- We treat the computation pipeline like a large-scale software build system (e.g., Bazel or Make). Every data point is an "artifact" produced by a specific version of code (Code Hash) acting on specific versions of dependencies (Dependency Hashes).
11
- * **Determinism**: If the input data and code haven't changed, the output *must* be identical. We verify this to skip unnecessary work.
12
- * **Merkle Tree Structure**: The state of the system is a DAG (Directed Acyclic Graph) of hashes. A change in a root node propagates potential invalidation down the tree, but invalidation stops as soon as a node produces the same output as before (Short-Circuiting).
13
-
14
- ### Source-of-Truth Architecture
15
- The **Root Data Index** is the absolute source of truth. No computation can start until the underlying raw data (prices, signals) is indexed and verified "Available" for the target date. This prevents partial runs and "garbage-in-garbage-out".
16
-
17
- ### The Three-Layer Hash Model
18
- To optimize execution, we track three distinct hashes for every calculation:
19
- 1. **Code Hash (Static)**: A SHA-256 hash of the cleaned source code (comments and whitespace stripped). This tells us if the logic *might* have changed.
20
- 2. **SimHash (Behavioral)**: Generated by running the code against a deterministic "Fabricated" context. This tells us if the logic *actually* changed behavior (e.g., a refactor that changes variable names but not logic will have a different Code Hash but the same SimHash).
21
- 3. **ResultHash (Output)**: A hash of the actual production output from a run. This tells us if the data changed. Used for downstream short-circuiting.
22
-
23
- ---
24
-
25
- ## 2. Core Components Overview
26
-
27
- ### Root Data Indexer
28
- A scheduled crawler that verifies the availability of raw external data (e.g., asset prices, global signals) for a given date. It produces an "Availability Manifest" that the Dispatcher consults before scheduling anything.
29
-
30
- ### Manifest Builder
31
- * **Role**: Topology Discovery.
32
- * **Mechanism**: It scans the `calculations/` directory, loads every module, and builds the global Dependency Graph (DAG) in memory.
33
- * **Output**: A topological sort of all calculations assigned to "Passes" (Pass 0, Pass 1, etc.).
34
-
35
- ### The Dispatcher (`WorkflowOrchestrator.js`)
36
- The "Brain" of the system. It runs largely stateless, analyzing the `StatusRepository` against the `Manifest`.
37
- * **Responsibility**: For a given Grid (Date x Calculation), it determines if the state is `RUNNABLE`, `BLOCKED`, `SKIPPED`, or `IMPOSSIBLE`.
38
- * **Key Logic**: It implements the "Short-Circuiting" and "Historical Continuity" checks.
39
-
40
- ### The Build Optimizer
41
- A pre-flight tool that attempts to avoiding running tasks by proving they are identical to previous versions.
42
- * **Mechanism**: If a calculation's Code Hash changes, the Optimizer runs a **Simulation** (using `SimRunner`) to generate a SimHash. If the SimHash matches the registry, the system acts as if the code never changed, skipping the production re-run.
43
-
44
- ### The Worker (`StandardExecutor` / `MetaExecutor`)
45
- The execution unit. It is unaware of the broader topology.
46
- * **Input**: A target Calculation and Date.
47
- * **Action**: Fetches inputs, runs `process()`, validates results, and writes to Firestore.
48
- * **Output**: The computed data + the **ResultHash**.
49
-
50
- ---
51
-
52
- ## 3. The Daily Lifecycle (Chronological Process)
53
-
54
- ### Phase 1: Indexing
55
- The system waits for the `SystemEpoch` to advance. The Root Data Indexer checks for "Canary Blocks" (indicators that external data providers have finished for the day). Once confirmed, the date is marked `OPEN`.
56
-
57
- ### Phase 2: Pre-Flight Optimization
58
- Before dispatching workers:
59
- 1. The system identifies all calculations with new **Code Hashes**.
60
- 2. It runs `SimRunner` for these calculations to generate fresh **SimHashes**.
61
- 3. If `SimHash(New) == SimHash(Old)`, the system updates the Status Ledger to enable the new Code Hash without flagging it as "Changed".
62
-
63
- ### Phase 3: Dispatch Analysis
64
- The Dispatcher iterates through the Topological Passes (0 -> N). For each calculation, it queries `calculateExecutionStatus`:
65
- * Are dependencies done?
66
- * Did dependencies change their output (`ResultHash`)?
67
- * Is historical context available?
68
-
69
- ### Phase 4: Execution Waves
70
- Workers are triggered via Pub/Sub or direct method invocation.
71
- * **Pass 1**: Primitive conversions (e.g., Price Extractor).
72
- * **Pass 2**: Technical Indicators that depend on Pass 1.
73
- * **Pass 3**: Aggregations and Complex Metrics.
74
-
75
- ### Phase 5: Reconciliation
76
- After all queues drain, the system performs a final sweep. Any tasks marked `FAILED` are retried (up to a limit). Impossible tasks are finalized as `IMPOSSIBLE`.
77
-
78
- ---
79
-
80
- ## 4. Deep Dive: Hashing & Dependency Logic
81
-
82
- ### Intrinsic Code Hashing
83
- Located in `topology/HashManager.js`.
84
- We generate a unique fingerprint for every calculation file:
85
- ```javascript
86
- clean = codeString.replace(comments).replace(whitespace);
87
- hash = sha256(clean);
88
- ```
89
- This ensures that changes to comments or formatting do *not* trigger re-runs.
90
-
91
- ### Behavioral Hashing (SimHash)
92
- Located in `simulation/SimRunner.js`.
93
- When code changes, we can't be 100% sure it's safe just by looking at the source.
94
- 1. **The Fabricator**: Generates a deterministic mock `Context` (prices, previous results) based on the input schema.
95
- 2. **Simulation Run**: The calculation `process()` method is executed against this mock data.
96
- 3. **The Registry**: The hash of the *output* of this simulation is stored.
97
- If a refactor results in the exact same Mock Output, the system considers the change "Cosmetic".
98
-
99
- ### Dependency Short-Circuiting
100
- Implemented in `WorkflowOrchestrator.js` (`analyzeDateExecution`).
101
- Even if an upstream calculation re-runs, downstream dependents might not need to.
102
- * **Logic**:
103
- * Calc A (Upstream) re-runs. Old Output Hash: `HashX`. New Output Hash: `HashX`.
104
- * Calc B (Downstream) sees that Calc A "changed" (new timestamp), BUT the content hash `HashX` is identical to what Calc B used last time.
105
- * **Result**: Calc B is `SKIPPED`.
106
-
107
- ---
108
-
109
- ## 5. Decision Logic & Edge Case Scenarios
110
-
111
- ### Scenario A: Standard Code Change (Logic)
112
- * **Trigger**: You change the formula for `RSI`. Code Hash changes. SimHash changes.
113
- * **Dispatcher**: Sees `storedHash !== currentHash`.
114
- * **Result**: Marks as `RUNNABLE`. Worker runs.
115
-
116
- ### Scenario B: Cosmetic Code Change (Refactor)
117
- * **Trigger**: You rename a variable in `RSI`. Code Hash changes. SimHash remains identical.
118
- * **Optimizer**: Updates the centralized Status Ledger: "Version `Desc_v2` is equivalent to `Desc_v1`".
119
- * **Dispatcher**: Sees the new hash in the ledger as "Verified".
120
- * **Result**: Task is `SKIPPED`.
121
-
122
- ### Scenario C: Upstream Invalidation (The Cascade)
123
- * **Condition**: `PriceExtractor` fixes a bug. `ResultHash` changes from `HashA` to `HashB`.
124
- * **Downstream**: `RSI` checks detailed dependency report.
125
- * **Check**: `LastRunDeps['PriceExtractor'] (HashA) !== CurrentDeps['PriceExtractor'] (HashB)`.
126
- * **Result**: `RSI` is forced to re-run.
127
-
128
- ### Scenario D: Upstream Stability (The Firewall)
129
- * **Condition**: `PriceExtractor` runs an optimization. Output is exact same data. `ResultHash` remains `HashA`.
130
- * **Downstream**: `RSI` checks dependency report.
131
- * **Check**: `LastRunDeps['PriceExtractor'] (HashA) === CurrentDeps['PriceExtractor'] (HashA)`.
132
- * **Result**: `RSI` is `SKIPPED`. This firewall prevents massive re-calculation storms for non-functional upstream changes.
133
-
134
- ### Scenario E: The "Impossible" State
135
- * **Condition**: Core market data is missing for `1990-01-01`.
136
- * **Root Indexer**: Marks date as providing `[]` (empty) for critical inputs.
137
- * **Dispatcher**: Marks `PriceExtractor` as `IMPOSSIBLE: NO_DATA`.
138
- * **Propagation**: Any calculation depending on `PriceExtractor` sees the `IMPOSSIBLE` status and marks *itself* as `IMPOSSIBLE: UPSTREAM`.
139
- * **Benefit**: The system doesn't waste cycles retrying calculations that can never succeed.
140
-
141
- ### Scenario F: Category Migration
142
- * **Condition**: You change `getMetadata()` for a calculation, moving it from `signals` to `risk`.
143
- * **Dispatcher**: Detects `storedCategory !== newCategory`.
144
- * **Worker**:
145
- 1. Runs `process()` and writes to the *new* path (`risk/CalculateX`).
146
- 2. Detects the `previousCategory` flag.
147
- 3. Deletes the data at the *old* path (`signals/CalculateX`) to prevent orphan data.
148
-
149
- ---
150
-
151
- ## 6. Data Management & Storage
152
-
153
- ### Input Streaming
154
- To handle large datasets without OOM (Out Of Memory) errors:
155
- * `StandardExecutor` does not load all users/tickers at once.
156
- * It utilizes wait-and-stream logic (e.g., batches of 50 ids) to process the `Context`.
157
-
158
- ### Transparent Auto-Sharding
159
- Firestore has a 1MB document limit.
160
- * **Write Path**: If a calculation result > 900KB, it is split into `DocID`, `DocID_shard1`, `DocID_shard2`.
161
- * **Read Path**: The `DependencyFetcher` automatically detects sharding pointers and re-assembles (hydrates) the full object before passing it to `process()`.
162
-
163
- ### Compression Strategy
164
- * Payloads are inspected before write.
165
- * If efficient (high entropy text/JSON), Zlib compression is applied.
166
- * Metadata is tagged `encoding: 'zlib'` so readers know to inflate.
167
-
168
- ---
169
-
170
- ## 7. Quality Assurance & Self-Healing
171
-
172
- ### The Heuristic Validator
173
- Before saving *any* result, the Executor runs heuristics:
174
- * **NaN Check**: Are there `NaN` or `Infinity` values in key fields?
175
- * **Flatline Check**: Is the data variance 0.00 across a large timespan?
176
- * **Null Density**: Is >50% of the dataset null?
177
- * **Circuit Breaker**: If heuristics fail, the task throws an error. It is better to fail and alert than to persist corrupted data that pollutes the cache.
178
-
179
- ### Zombie Task Recovery
180
- * **Lease Mechanism**: When a task starts, it sets a `startedAt` timestamp.
181
- * **Detection**: The Dispatcher checks for tasks marked `RUNNING` where `startedAt` > 15 minutes ago.
182
- * **Resolution**: These are assumed crashed (OOM/Timeout). They are reset to `PENDING` (or `FAILED` if retry count exceeded).
183
-
184
- ### Dead Letter Queue (DLQ)
185
- Tasks that deterministically fail (crash every time) after N retries are moved to a special DLQ status. This prevents the system from getting stuck in an infinite retry loop.
186
-
187
- ---
188
-
189
- ## 8. Developer Workflows
190
-
191
- ### How to Add a New Calculation
192
- 1. Create `calculations/category/MyNewCalc.js`.
193
- 2. Implement `getMetadata()` to define dependencies.
194
- 3. Implement `process(context)`.
195
- 4. Run `npm run build-manifest` to register it in the topology.
196
-
197
- ### How to Force a Global Re-Run
198
- * Change the `SYSTEM_EPOCH` constant in `system_epoch.js`.
199
- * This changes the "Global Salt" for all hashes, processing every calculation as "New".
200
-
201
- ### How to Backfill History
202
- * **Standard Dispatcher**: Good for recent history (last 30 days).
203
- * **BatchPriceExecutor**: Specialized for massive historical backfills (e.g., 20 years of price data). It bypasses some topology checks for raw speed.
204
-
205
- ### Local Debugging
206
- Run the orchestrator in "Dry Run" mode:
207
- ```bash
208
- node scripts/run_orchestrator.js --date=2024-01-01 --dry-run
209
- ```
210
- This prints the `Analysis Report` (Runnable/Blocked lists) without actually triggering workers.