bulltrackers-module 1.0.293 → 1.0.294
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/executors/StandardExecutor.js +47 -7
- package/functions/computation-system/helpers/computation_dispatcher.js +26 -7
- package/functions/computation-system/paper.md +93 -0
- package/functions/computation-system/persistence/RunRecorder.js +7 -7
- package/package.json +1 -1
- package/functions/computation-system/onboarding.md +0 -210
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Executor for "Standard" (per-user) calculations.
|
|
3
3
|
* UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
|
|
4
|
-
* UPDATED:
|
|
4
|
+
* UPDATED: Implements "Circuit Breaker" to fail fast on high error rates.
|
|
5
|
+
* UPDATED: Implements "Adaptive Flushing" based on V8 Heap usage.
|
|
5
6
|
* UPDATED: Manages incremental sharding states.
|
|
6
7
|
* UPDATED: Implements 'isInitialWrite' flag for robust cleanup.
|
|
7
8
|
*/
|
|
@@ -12,6 +13,7 @@ const { ContextFactory } = require
|
|
|
12
13
|
const { commitResults } = require('../persistence/ResultCommitter');
|
|
13
14
|
const mathLayer = require('../layers/index');
|
|
14
15
|
const { performance } = require('perf_hooks');
|
|
16
|
+
const v8 = require('v8'); // [NEW] For Memory introspection
|
|
15
17
|
|
|
16
18
|
class StandardExecutor {
|
|
17
19
|
static async run(date, calcs, passName, config, deps, rootData, fetchedDeps, previousFetchedDeps, skipStatusWrite = false) {
|
|
@@ -59,6 +61,9 @@ class StandardExecutor {
|
|
|
59
61
|
const aggregatedSuccess = {};
|
|
60
62
|
const aggregatedFailures = [];
|
|
61
63
|
|
|
64
|
+
// [NEW] Global Error Tracking for Circuit Breaker
|
|
65
|
+
const errorStats = { count: 0, total: 0 };
|
|
66
|
+
|
|
62
67
|
Object.keys(state).forEach(name => {
|
|
63
68
|
executionStats[name] = {
|
|
64
69
|
processedUsers: 0,
|
|
@@ -89,7 +94,7 @@ class StandardExecutor {
|
|
|
89
94
|
|
|
90
95
|
let yP_chunk = {}, tH_chunk = {};
|
|
91
96
|
|
|
92
|
-
const
|
|
97
|
+
const MIN_BATCH_SIZE = 1000; // Minimum to process before checking stats
|
|
93
98
|
let usersSinceLastFlush = 0;
|
|
94
99
|
|
|
95
100
|
try {
|
|
@@ -103,6 +108,8 @@ class StandardExecutor {
|
|
|
103
108
|
const chunkSize = Object.keys(tP_chunk).length;
|
|
104
109
|
|
|
105
110
|
const startProcessing = performance.now();
|
|
111
|
+
|
|
112
|
+
// [UPDATED] Collect execution results (success/failure counts)
|
|
106
113
|
const promises = streamingCalcs.map(calc =>
|
|
107
114
|
StandardExecutor.executePerUser(
|
|
108
115
|
calc, calc.manifest, dateStr, tP_chunk, yP_chunk, tH_chunk,
|
|
@@ -110,15 +117,37 @@ class StandardExecutor {
|
|
|
110
117
|
executionStats[normalizeName(calc.manifest.name)]
|
|
111
118
|
)
|
|
112
119
|
);
|
|
113
|
-
|
|
120
|
+
|
|
121
|
+
const batchResults = await Promise.all(promises);
|
|
114
122
|
const procDuration = performance.now() - startProcessing;
|
|
115
123
|
|
|
116
124
|
Object.keys(executionStats).forEach(name => executionStats[name].timings.processing += procDuration);
|
|
117
125
|
|
|
126
|
+
// [NEW] Update Error Stats
|
|
127
|
+
batchResults.forEach(r => {
|
|
128
|
+
errorStats.total += (r.success + r.failures);
|
|
129
|
+
errorStats.count += r.failures;
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// [NEW] Circuit Breaker: Fail fast if error rate > 10% after processing 100+ items
|
|
133
|
+
// We check total > 100 to avoid failing on the very first user if they happen to be bad.
|
|
134
|
+
if (errorStats.total > 100 && (errorStats.count / errorStats.total) > 0.10) {
|
|
135
|
+
const failRate = (errorStats.count / errorStats.total * 100).toFixed(1);
|
|
136
|
+
throw new Error(`[Circuit Breaker] High failure rate detected (${failRate}%). Aborting batch to prevent silent data loss.`);
|
|
137
|
+
}
|
|
138
|
+
|
|
118
139
|
usersSinceLastFlush += chunkSize;
|
|
119
140
|
|
|
120
|
-
|
|
121
|
-
|
|
141
|
+
// [NEW] Adaptive Flushing (Memory Pressure Check)
|
|
142
|
+
const heapStats = v8.getHeapStatistics();
|
|
143
|
+
const heapUsedRatio = heapStats.used_heap_size / heapStats.heap_size_limit;
|
|
144
|
+
const MEMORY_THRESHOLD = 0.70; // 70% of available RAM
|
|
145
|
+
const COUNT_THRESHOLD = 5000;
|
|
146
|
+
|
|
147
|
+
if (usersSinceLastFlush >= COUNT_THRESHOLD || heapUsedRatio > MEMORY_THRESHOLD) {
|
|
148
|
+
const reason = heapUsedRatio > MEMORY_THRESHOLD ? `MEMORY_PRESSURE (${(heapUsedRatio*100).toFixed(0)}%)` : 'BATCH_LIMIT';
|
|
149
|
+
|
|
150
|
+
logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users. Reason: ${reason}`);
|
|
122
151
|
|
|
123
152
|
// [UPDATED] Pass isInitialWrite: true only on the first flush
|
|
124
153
|
const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true, !hasFlushed);
|
|
@@ -171,6 +200,7 @@ class StandardExecutor {
|
|
|
171
200
|
_executionStats: executionStats[name]
|
|
172
201
|
};
|
|
173
202
|
|
|
203
|
+
// Clear the memory immediately after preparing the commit
|
|
174
204
|
inst.results = {};
|
|
175
205
|
}
|
|
176
206
|
|
|
@@ -226,6 +256,10 @@ class StandardExecutor {
|
|
|
226
256
|
const insights = metadata.rootDataDependencies?.includes('insights') ? { today: await loader.loadInsights(dateStr) } : null;
|
|
227
257
|
const SCHEMAS = mathLayer.SCHEMAS;
|
|
228
258
|
|
|
259
|
+
// [NEW] Track local batch success/failure
|
|
260
|
+
let chunkSuccess = 0;
|
|
261
|
+
let chunkFailures = 0;
|
|
262
|
+
|
|
229
263
|
for (const [userId, todayPortfolio] of Object.entries(portfolioData)) {
|
|
230
264
|
const yesterdayPortfolio = yesterdayPortfolioData ? yesterdayPortfolioData[userId] : null;
|
|
231
265
|
const todayHistory = historyData ? historyData[userId] : null;
|
|
@@ -249,10 +283,16 @@ class StandardExecutor {
|
|
|
249
283
|
try {
|
|
250
284
|
await calcInstance.process(context);
|
|
251
285
|
if (stats) stats.processedUsers++;
|
|
286
|
+
chunkSuccess++;
|
|
252
287
|
}
|
|
253
|
-
catch (e) {
|
|
288
|
+
catch (e) {
|
|
289
|
+
logger.log('WARN', `Calc ${metadata.name} failed for user ${userId}: ${e.message}`);
|
|
290
|
+
chunkFailures++;
|
|
291
|
+
}
|
|
254
292
|
}
|
|
293
|
+
|
|
294
|
+
return { success: chunkSuccess, failures: chunkFailures };
|
|
255
295
|
}
|
|
256
296
|
}
|
|
257
297
|
|
|
258
|
-
module.exports = { StandardExecutor };
|
|
298
|
+
module.exports = { StandardExecutor };
|
|
@@ -3,9 +3,10 @@
|
|
|
3
3
|
* PURPOSE: "Smart Dispatcher" - Analyzes state, initializes Run Counters, and dispatches tasks.
|
|
4
4
|
* UPDATED: Implements Callback Pattern. Initializes 'computation_runs' doc for worker coordination.
|
|
5
5
|
* UPDATED: Implements Forensic Crash Analysis & Intelligent Resource Routing.
|
|
6
|
+
* FIXED: Implemented "Catch-Up" logic to scan full history (Start -> Target Date) instead of just Target Date.
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
|
-
const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
9
|
+
const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
9
10
|
const { groupByPass, analyzeDateExecution } = require('../WorkflowOrchestrator.js');
|
|
10
11
|
const { PubSubUtils } = require('../../core/utils/pubsub_utils');
|
|
11
12
|
const { fetchComputationStatus, updateComputationStatus } = require('../persistence/StatusRepository');
|
|
@@ -67,6 +68,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
67
68
|
const passToRun = String(config.COMPUTATION_PASS_TO_RUN);
|
|
68
69
|
|
|
69
70
|
// Extract Date and Callback from request body (pushed by Workflow)
|
|
71
|
+
// NOTE: 'dateStr' acts as the "Target Date" (Ceiling), usually T-1.
|
|
70
72
|
const dateStr = reqBody.date || config.date;
|
|
71
73
|
const callbackUrl = reqBody.callbackUrl || null;
|
|
72
74
|
|
|
@@ -82,14 +84,30 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
82
84
|
|
|
83
85
|
if (!calcsInThisPass.length) { return logger.log('WARN', `[Dispatcher] No calcs for Pass ${passToRun}. Exiting.`); }
|
|
84
86
|
|
|
85
|
-
logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun}
|
|
87
|
+
logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun} (Target: ${dateStr})`);
|
|
86
88
|
|
|
87
|
-
// -- DATE ANALYSIS LOGIC --
|
|
88
|
-
|
|
89
|
+
// -- DATE ANALYSIS LOGIC (FIXED: RANGE SCAN) --
|
|
90
|
+
|
|
91
|
+
// 1. Determine the absolute start of data history
|
|
92
|
+
const earliestDates = await getEarliestDataDates(config, dependencies);
|
|
93
|
+
const startDate = earliestDates.absoluteEarliest;
|
|
94
|
+
const endDate = new Date(dateStr + 'T00:00:00Z');
|
|
95
|
+
|
|
96
|
+
// 2. Generate the full range of dates to check
|
|
97
|
+
let allExpectedDates = getExpectedDateStrings(startDate, endDate);
|
|
98
|
+
|
|
99
|
+
// Safety fallback: if range is invalid or empty, default to target date only
|
|
100
|
+
if (!allExpectedDates || allExpectedDates.length === 0) {
|
|
101
|
+
logger.log('WARN', `[Dispatcher] Date range calculation returned empty (Start: ${startDate.toISOString()} -> End: ${endDate.toISOString()}). Defaulting to single target date.`);
|
|
102
|
+
allExpectedDates = [dateStr];
|
|
103
|
+
} else {
|
|
104
|
+
logger.log('INFO', `[Dispatcher] 📅 Analysis Range: ${allExpectedDates.length} days (${allExpectedDates[0]} to ${allExpectedDates[allExpectedDates.length-1]})`);
|
|
105
|
+
}
|
|
106
|
+
|
|
89
107
|
const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
|
|
90
108
|
const tasksToDispatch = [];
|
|
91
109
|
|
|
92
|
-
// Concurrency limit for analysis & forensics
|
|
110
|
+
// Concurrency limit for analysis & forensics (Parallelize the historical scan)
|
|
93
111
|
const limit = pLimit(20);
|
|
94
112
|
|
|
95
113
|
const analysisPromises = allExpectedDates.map(d => limit(async () => {
|
|
@@ -105,6 +123,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
105
123
|
prevDate.setUTCDate(prevDate.getUTCDate() - 1);
|
|
106
124
|
prevDateStr = prevDate.toISOString().slice(0, 10);
|
|
107
125
|
|
|
126
|
+
// Only fetch previous status if it's within valid range
|
|
108
127
|
if (prevDate >= DEFINITIVE_EARLIEST_DATES.absoluteEarliest) {
|
|
109
128
|
fetchPromises.push(fetchComputationStatus(prevDateStr, config, dependencies));
|
|
110
129
|
}
|
|
@@ -183,7 +202,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
183
202
|
if (callbackUrl) {
|
|
184
203
|
await db.doc(metaStatePath).set({
|
|
185
204
|
createdAt: new Date(),
|
|
186
|
-
date: dateStr,
|
|
205
|
+
date: dateStr, // Acts as the "Job Label" (target date)
|
|
187
206
|
pass: passToRun,
|
|
188
207
|
totalTasks: tasksToDispatch.length,
|
|
189
208
|
remainingTasks: tasksToDispatch.length,
|
|
@@ -281,4 +300,4 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
281
300
|
}
|
|
282
301
|
}
|
|
283
302
|
|
|
284
|
-
module.exports = { dispatchComputationPass };
|
|
303
|
+
module.exports = { dispatchComputationPass };
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# The BullTrackers Computation System: An Advanced DAG-Based Architecture for High-Fidelity Financial Simulation
|
|
2
|
+
|
|
3
|
+
## Abstract
|
|
4
|
+
|
|
5
|
+
This paper details the design, implementation, and theoretical underpinnings of the BullTrackers Computation System, a proprietary high-performance execution engine designed for complex financial modeling and user behavior analysis. The system leverages a Directed Acyclic Graph (DAG) architecture to orchestrate interdependent calculations, employing Kahn’s Algorithm for topological sorting and Tarjan’s Algorithm for cycle detection. Key innovations include "Content-Based Dependency Short-Circuiting" for massive optimization, a "System Epoch" and "Infrastructure Hash" based auditing system for absolute reproducibility, and a batch-flushing execution model designed to mitigate Out-Of-Memory (OOM) errors during high-volume processing. We further explore the application of this system in running advanced psychometric and risk-geometry models ("Smart Money" scoring) and how the architecture supports self-healing workflows through granular state management.
|
|
6
|
+
|
|
7
|
+
## 1. Introduction
|
|
8
|
+
|
|
9
|
+
In modern financial analytics, derived data often depends on a complex web of varying input frequencies—real-time price ticks, daily portfolio snapshots, and historical trade logs. Traditional linear batch processing protocols fail to capture the nuances of these interdependencies, often leading to race conditions or redundant computations.
|
|
10
|
+
|
|
11
|
+
The BullTrackers Computation System was devised to solve this by treating the entire domain logic as a **Directed Acyclic Graph (DAG)**. Every calculation is a node, and every data requirement is an edge. By resolving the topography of this graph dynamically at runtime, the system ensures that:
|
|
12
|
+
1. Data is always available before it is consumed (referential integrity).
|
|
13
|
+
2. Only necessary computations are executed (efficiency).
|
|
14
|
+
3. Changes in code or infrastructure propagate deterministically through the graph (auditability).
|
|
15
|
+
|
|
16
|
+
## 2. Theoretical Foundations
|
|
17
|
+
|
|
18
|
+
The core utility of the system is its ability to turn a collection of loosely coupled JavaScript classes into a strictly ordered execution plan.
|
|
19
|
+
|
|
20
|
+
### 2.1 Directed Acyclic Graphs (DAGs)
|
|
21
|
+
We model the computation space as a DAG where $G = (V, E)$.
|
|
22
|
+
* **Vertices ($V$)**: Individual Calculation Units (e.g., `NetProfit`, [SmartMoneyScore](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/layers/profiling.js#24-236)).
|
|
23
|
+
* **Edges ($E$)**: Data dependencies, where an edge $(u, v)$ implies $v$ requires the output of $u$.
|
|
24
|
+
|
|
25
|
+
### 2.2 Topological Sorting (Kahn’s Algorithm)
|
|
26
|
+
To execute the graph, we must linearize it such that for every dependency $u \rightarrow v$, $u$ precedes $v$ in the execution order. We implement **Kahn’s Algorithm** within [ManifestBuilder.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/context/ManifestBuilder.js) to achieve this:
|
|
27
|
+
1. Calculate the **in-degree** (number of incoming edges) for all nodes.
|
|
28
|
+
2. Initialize a queue with all nodes having an in-degree of 0 (independent nodes).
|
|
29
|
+
3. While the queue is not empty:
|
|
30
|
+
* Dequeue node $N$ and add it to the `SortedManifest`.
|
|
31
|
+
* For each neighbor $M$ dependent on $N$, decrement $M$'s in-degree.
|
|
32
|
+
* If $M$'s in-degree becomes 0, enqueue $M$.
|
|
33
|
+
4. This generates a series of "Passes" or "Waves" of execution, allowing parallel processing of independent nodes within the same pass.
|
|
34
|
+
|
|
35
|
+
### 2.3 Cycle Detection (Tarjan’s Algorithm)
|
|
36
|
+
A critical failure mode in DAGs is the introduction of a cycle (e.g., A needs B, B needs A), effectively turning the DAG into a DCG (Directed Cyclic Graph), which is unresolvable.
|
|
37
|
+
If Kahn’s algorithm fails to visit all nodes (indicating a cycle exists), the system falls back to **Tarjan’s Strongly Connected Components (SCC) Algorithm**. This uses depth-first search to identify the exact cycle chain (e.g., `Calc A -> Calc B -> Calc C -> Calc A`), reporting the "First Cycle Found" to the developer for immediate remediation.
|
|
38
|
+
|
|
39
|
+
## 3. System Architecture & "Source of Truth"
|
|
40
|
+
|
|
41
|
+
The architecture is centered around the **Manifest**, a dynamic, immutable registry of all capabilities within the system.
|
|
42
|
+
|
|
43
|
+
### 3.1 The Dynamic Manifest
|
|
44
|
+
Unlike static build tools, the Manifest is built at runtime by [ManifestLoader.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/topology/ManifestLoader.js) and [ManifestBuilder.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/context/ManifestBuilder.js). It employs an **Auto-Discovery** mechanism that scans directories for calculation classes.
|
|
45
|
+
* **Static Metadata**: Each class exposes `getMetadata()` and `getDependencies()`.
|
|
46
|
+
* **Product Line Filtering**: The builder can slice the graph, generating a subgraph relevant only to specific product lines (e.g., "Crypto", "Stocks"), reducing overhead.
|
|
47
|
+
|
|
48
|
+
### 3.2 Granular Hashing & The Audit Chain
|
|
49
|
+
To ensure that "if the code hasn't changed, the result shouldn't change," the system implements a multi-layered hashing strategy ([HashManager.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/topology/HashManager.js)):
|
|
50
|
+
1. **Code Hash**: The raw string content of the calculation class.
|
|
51
|
+
2. **Layer Hash**: Hashes of shared utility layers (`mathematics`, `profiling`) used by the class.
|
|
52
|
+
3. **Dependency Hash**: A composite hash of all upstream dependencies.
|
|
53
|
+
4. **Infrastructure Hash**: A hash representing the underlying system environment.
|
|
54
|
+
5. **System Epoch**: A manual versioning flag to force global re-computation.
|
|
55
|
+
|
|
56
|
+
This results in a `Composite Hash`. If this hash matches the `storedHash` in the database, execution can be skipped entirely.
|
|
57
|
+
|
|
58
|
+
## 4. Execution Engine: Flow, Resilience & Optimization
|
|
59
|
+
|
|
60
|
+
The `WorkflowOrchestrator` acts as the runtime kernel, utilizing [StandardExecutor](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/executors/StandardExecutor.js#16-257) and [MetaExecutor](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/executors/MetaExecutor.js#12-83) for the heavy lifting.
|
|
61
|
+
|
|
62
|
+
### 4.1 Content-Based Dependency Short-Circuiting
|
|
63
|
+
A major optimization (O(n) gain) is the **Content-Based Short-Circuiting** logic found in [WorkflowOrchestrator.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/WorkflowOrchestrator.js):
|
|
64
|
+
Even if an upstream dependency *re-runs* (e.g., its timestamp changed), its *output* might be identical to the previous run.
|
|
65
|
+
1. The system tracks `ResultHash` (hash of the actual output data).
|
|
66
|
+
2. When checking dependencies for Node B (which depends on A), if A has re-run but its `ResultHash` is unchanged from what B used last time, B **does not need to re-run**.
|
|
67
|
+
3. This effectively stops "change propagation" dead in its tracks if the data change is semantically null.
|
|
68
|
+
|
|
69
|
+
### 4.2 Batch Flushing & OOM Prevention
|
|
70
|
+
Financial datasets (processing 100k+ users with daily portfolios) often exceed Node.js heap limits. The [StandardExecutor](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/executors/StandardExecutor.js#16-257) implements a **Streaming & Flushing** architecture:
|
|
71
|
+
* **Streams** inputs (Portfolio/History) using generators (`yield`), preventing loading all users into memory.
|
|
72
|
+
* **Buffers** results in a `state` object.
|
|
73
|
+
* **Flushes** to the database (Firestore/Storage) every $N$ users (e.g., 5000), clearing the internal buffer helps avoid Out-Of-Memory crashes.
|
|
74
|
+
* **Incremental Sharding**: It manages shard indices dynamically to split massive result sets into retrievable chunks.
|
|
75
|
+
|
|
76
|
+
### 4.3 Handling "Impossible" States
|
|
77
|
+
If a dependency fails or is missing critical data, the Orchestrator marks dependent nodes as `IMPOSSIBLE` rather than failing them. This allows the rest of the graph (independent branches) to continue execution, maximizing system throughput even in a partially degraded state.
|
|
78
|
+
|
|
79
|
+
## 5. Advanced Application: Psychometrics & Risk Geometry
|
|
80
|
+
|
|
81
|
+
The capabilities of this computation engine are best demonstrated by the [profiling.js](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/layers/profiling.js) layer it powers. Because the DAG ensures all historical and portfolio data is perfectly aligned, we can run sophisticated O(n^2) or O(n log n) algorithms on user data reliably.
|
|
82
|
+
|
|
83
|
+
### 5.1 "Smart Money" & Cognitive Profiling
|
|
84
|
+
The system executes a [UserClassifier](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/layers/profiling.js#382-399) that computes:
|
|
85
|
+
* **Risk Geometry**: Using the **Monotone Chain** algorithm to compute the Convex Hull of a user's risk/reward performance (Efficient Frontier analysis).
|
|
86
|
+
* **Psychometrics**: Detecting "Revenge Trading" (increasing risk after losses) and "Disposition Skew" (holding losers too long).
|
|
87
|
+
* **Attribution**: Separating "Luck" (market beta) from "Skill" (Alpha) by comparing performance against sector benchmarks.
|
|
88
|
+
|
|
89
|
+
These complex models depend on the *guarantee* provided by the DAG that all necessary history and price data is pre-computed and available in the [Context](file:///C:/Users/aiden/Desktop/code_projects/Bulltrackers2025/Backend/Entrypoints/BullTrackers/Backend/Core/bulltrackers-module/functions/computation-system/simulation/Fabricator.js#20-69).
|
|
90
|
+
|
|
91
|
+
## 6. Conclusion
|
|
92
|
+
|
|
93
|
+
The BullTrackers Computation System represents a shift from "Action-Based" to "State-Based" architecture. By encoding the domain logic into a Directed Acyclic Graph, we achieve a system that is self-healing, massively scalable via short-circuiting and batching, and capable of supporting deep analytical models. It provides the robustness required for high-stakes financial simulation, ensuring that every decimal point is traceable, reproducible, and verifiable.
|
|
@@ -43,14 +43,14 @@ async function recordRunAttempt(db, context, status, error = null, detailedMetri
|
|
|
43
43
|
const timings = rawExecStats.timings || {};
|
|
44
44
|
|
|
45
45
|
const runEntry = {
|
|
46
|
-
runId:
|
|
46
|
+
runId: runId,
|
|
47
47
|
computationName: computation,
|
|
48
|
-
pass:
|
|
49
|
-
workerId:
|
|
50
|
-
targetDate:
|
|
51
|
-
triggerTime:
|
|
52
|
-
durationMs:
|
|
53
|
-
status:
|
|
48
|
+
pass: String(pass),
|
|
49
|
+
workerId: workerId,
|
|
50
|
+
targetDate: targetDate,
|
|
51
|
+
triggerTime: now.toISOString(),
|
|
52
|
+
durationMs: detailedMetrics.durationMs || 0,
|
|
53
|
+
status: status,
|
|
54
54
|
|
|
55
55
|
// [NEW] Trigger Context
|
|
56
56
|
trigger: {
|
package/package.json
CHANGED
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
# BullTrackers Computation System: Architecture & Operational Manual
|
|
2
|
-
|
|
3
|
-
This document provides a comprehensive overview of the BullTrackers Computation System, a distributed, deterministic, and self-optimizing data pipeline. Unlike traditional task schedulers, this system operates on "Build System" principles, treating data calculations as compiled artifacts with strict versioning and dependency guarantees.
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## 1. System Philosophy & Core Concepts
|
|
8
|
-
|
|
9
|
-
### The "Build System" Paradigm
|
|
10
|
-
We treat the computation pipeline like a large-scale software build system (e.g., Bazel or Make). Every data point is an "artifact" produced by a specific version of code (Code Hash) acting on specific versions of dependencies (Dependency Hashes).
|
|
11
|
-
* **Determinism**: If the input data and code haven't changed, the output *must* be identical. We verify this to skip unnecessary work.
|
|
12
|
-
* **Merkle Tree Structure**: The state of the system is a DAG (Directed Acyclic Graph) of hashes. A change in a root node propagates potential invalidation down the tree, but invalidation stops as soon as a node produces the same output as before (Short-Circuiting).
|
|
13
|
-
|
|
14
|
-
### Source-of-Truth Architecture
|
|
15
|
-
The **Root Data Index** is the absolute source of truth. No computation can start until the underlying raw data (prices, signals) is indexed and verified "Available" for the target date. This prevents partial runs and "garbage-in-garbage-out".
|
|
16
|
-
|
|
17
|
-
### The Three-Layer Hash Model
|
|
18
|
-
To optimize execution, we track three distinct hashes for every calculation:
|
|
19
|
-
1. **Code Hash (Static)**: A SHA-256 hash of the cleaned source code (comments and whitespace stripped). This tells us if the logic *might* have changed.
|
|
20
|
-
2. **SimHash (Behavioral)**: Generated by running the code against a deterministic "Fabricated" context. This tells us if the logic *actually* changed behavior (e.g., a refactor that changes variable names but not logic will have a different Code Hash but the same SimHash).
|
|
21
|
-
3. **ResultHash (Output)**: A hash of the actual production output from a run. This tells us if the data changed. Used for downstream short-circuiting.
|
|
22
|
-
|
|
23
|
-
---
|
|
24
|
-
|
|
25
|
-
## 2. Core Components Overview
|
|
26
|
-
|
|
27
|
-
### Root Data Indexer
|
|
28
|
-
A scheduled crawler that verifies the availability of raw external data (e.g., asset prices, global signals) for a given date. It produces an "Availability Manifest" that the Dispatcher consults before scheduling anything.
|
|
29
|
-
|
|
30
|
-
### Manifest Builder
|
|
31
|
-
* **Role**: Topology Discovery.
|
|
32
|
-
* **Mechanism**: It scans the `calculations/` directory, loads every module, and builds the global Dependency Graph (DAG) in memory.
|
|
33
|
-
* **Output**: A topological sort of all calculations assigned to "Passes" (Pass 0, Pass 1, etc.).
|
|
34
|
-
|
|
35
|
-
### The Dispatcher (`WorkflowOrchestrator.js`)
|
|
36
|
-
The "Brain" of the system. It runs largely stateless, analyzing the `StatusRepository` against the `Manifest`.
|
|
37
|
-
* **Responsibility**: For a given Grid (Date x Calculation), it determines if the state is `RUNNABLE`, `BLOCKED`, `SKIPPED`, or `IMPOSSIBLE`.
|
|
38
|
-
* **Key Logic**: It implements the "Short-Circuiting" and "Historical Continuity" checks.
|
|
39
|
-
|
|
40
|
-
### The Build Optimizer
|
|
41
|
-
A pre-flight tool that attempts to avoiding running tasks by proving they are identical to previous versions.
|
|
42
|
-
* **Mechanism**: If a calculation's Code Hash changes, the Optimizer runs a **Simulation** (using `SimRunner`) to generate a SimHash. If the SimHash matches the registry, the system acts as if the code never changed, skipping the production re-run.
|
|
43
|
-
|
|
44
|
-
### The Worker (`StandardExecutor` / `MetaExecutor`)
|
|
45
|
-
The execution unit. It is unaware of the broader topology.
|
|
46
|
-
* **Input**: A target Calculation and Date.
|
|
47
|
-
* **Action**: Fetches inputs, runs `process()`, validates results, and writes to Firestore.
|
|
48
|
-
* **Output**: The computed data + the **ResultHash**.
|
|
49
|
-
|
|
50
|
-
---
|
|
51
|
-
|
|
52
|
-
## 3. The Daily Lifecycle (Chronological Process)
|
|
53
|
-
|
|
54
|
-
### Phase 1: Indexing
|
|
55
|
-
The system waits for the `SystemEpoch` to advance. The Root Data Indexer checks for "Canary Blocks" (indicators that external data providers have finished for the day). Once confirmed, the date is marked `OPEN`.
|
|
56
|
-
|
|
57
|
-
### Phase 2: Pre-Flight Optimization
|
|
58
|
-
Before dispatching workers:
|
|
59
|
-
1. The system identifies all calculations with new **Code Hashes**.
|
|
60
|
-
2. It runs `SimRunner` for these calculations to generate fresh **SimHashes**.
|
|
61
|
-
3. If `SimHash(New) == SimHash(Old)`, the system updates the Status Ledger to enable the new Code Hash without flagging it as "Changed".
|
|
62
|
-
|
|
63
|
-
### Phase 3: Dispatch Analysis
|
|
64
|
-
The Dispatcher iterates through the Topological Passes (0 -> N). For each calculation, it queries `calculateExecutionStatus`:
|
|
65
|
-
* Are dependencies done?
|
|
66
|
-
* Did dependencies change their output (`ResultHash`)?
|
|
67
|
-
* Is historical context available?
|
|
68
|
-
|
|
69
|
-
### Phase 4: Execution Waves
|
|
70
|
-
Workers are triggered via Pub/Sub or direct method invocation.
|
|
71
|
-
* **Pass 1**: Primitive conversions (e.g., Price Extractor).
|
|
72
|
-
* **Pass 2**: Technical Indicators that depend on Pass 1.
|
|
73
|
-
* **Pass 3**: Aggregations and Complex Metrics.
|
|
74
|
-
|
|
75
|
-
### Phase 5: Reconciliation
|
|
76
|
-
After all queues drain, the system performs a final sweep. Any tasks marked `FAILED` are retried (up to a limit). Impossible tasks are finalized as `IMPOSSIBLE`.
|
|
77
|
-
|
|
78
|
-
---
|
|
79
|
-
|
|
80
|
-
## 4. Deep Dive: Hashing & Dependency Logic
|
|
81
|
-
|
|
82
|
-
### Intrinsic Code Hashing
|
|
83
|
-
Located in `topology/HashManager.js`.
|
|
84
|
-
We generate a unique fingerprint for every calculation file:
|
|
85
|
-
```javascript
|
|
86
|
-
clean = codeString.replace(comments).replace(whitespace);
|
|
87
|
-
hash = sha256(clean);
|
|
88
|
-
```
|
|
89
|
-
This ensures that changes to comments or formatting do *not* trigger re-runs.
|
|
90
|
-
|
|
91
|
-
### Behavioral Hashing (SimHash)
|
|
92
|
-
Located in `simulation/SimRunner.js`.
|
|
93
|
-
When code changes, we can't be 100% sure it's safe just by looking at the source.
|
|
94
|
-
1. **The Fabricator**: Generates a deterministic mock `Context` (prices, previous results) based on the input schema.
|
|
95
|
-
2. **Simulation Run**: The calculation `process()` method is executed against this mock data.
|
|
96
|
-
3. **The Registry**: The hash of the *output* of this simulation is stored.
|
|
97
|
-
If a refactor results in the exact same Mock Output, the system considers the change "Cosmetic".
|
|
98
|
-
|
|
99
|
-
### Dependency Short-Circuiting
|
|
100
|
-
Implemented in `WorkflowOrchestrator.js` (`analyzeDateExecution`).
|
|
101
|
-
Even if an upstream calculation re-runs, downstream dependents might not need to.
|
|
102
|
-
* **Logic**:
|
|
103
|
-
* Calc A (Upstream) re-runs. Old Output Hash: `HashX`. New Output Hash: `HashX`.
|
|
104
|
-
* Calc B (Downstream) sees that Calc A "changed" (new timestamp), BUT the content hash `HashX` is identical to what Calc B used last time.
|
|
105
|
-
* **Result**: Calc B is `SKIPPED`.
|
|
106
|
-
|
|
107
|
-
---
|
|
108
|
-
|
|
109
|
-
## 5. Decision Logic & Edge Case Scenarios
|
|
110
|
-
|
|
111
|
-
### Scenario A: Standard Code Change (Logic)
|
|
112
|
-
* **Trigger**: You change the formula for `RSI`. Code Hash changes. SimHash changes.
|
|
113
|
-
* **Dispatcher**: Sees `storedHash !== currentHash`.
|
|
114
|
-
* **Result**: Marks as `RUNNABLE`. Worker runs.
|
|
115
|
-
|
|
116
|
-
### Scenario B: Cosmetic Code Change (Refactor)
|
|
117
|
-
* **Trigger**: You rename a variable in `RSI`. Code Hash changes. SimHash remains identical.
|
|
118
|
-
* **Optimizer**: Updates the centralized Status Ledger: "Version `Desc_v2` is equivalent to `Desc_v1`".
|
|
119
|
-
* **Dispatcher**: Sees the new hash in the ledger as "Verified".
|
|
120
|
-
* **Result**: Task is `SKIPPED`.
|
|
121
|
-
|
|
122
|
-
### Scenario C: Upstream Invalidation (The Cascade)
|
|
123
|
-
* **Condition**: `PriceExtractor` fixes a bug. `ResultHash` changes from `HashA` to `HashB`.
|
|
124
|
-
* **Downstream**: `RSI` checks detailed dependency report.
|
|
125
|
-
* **Check**: `LastRunDeps['PriceExtractor'] (HashA) !== CurrentDeps['PriceExtractor'] (HashB)`.
|
|
126
|
-
* **Result**: `RSI` is forced to re-run.
|
|
127
|
-
|
|
128
|
-
### Scenario D: Upstream Stability (The Firewall)
|
|
129
|
-
* **Condition**: `PriceExtractor` runs an optimization. Output is exact same data. `ResultHash` remains `HashA`.
|
|
130
|
-
* **Downstream**: `RSI` checks dependency report.
|
|
131
|
-
* **Check**: `LastRunDeps['PriceExtractor'] (HashA) === CurrentDeps['PriceExtractor'] (HashA)`.
|
|
132
|
-
* **Result**: `RSI` is `SKIPPED`. This firewall prevents massive re-calculation storms for non-functional upstream changes.
|
|
133
|
-
|
|
134
|
-
### Scenario E: The "Impossible" State
|
|
135
|
-
* **Condition**: Core market data is missing for `1990-01-01`.
|
|
136
|
-
* **Root Indexer**: Marks date as providing `[]` (empty) for critical inputs.
|
|
137
|
-
* **Dispatcher**: Marks `PriceExtractor` as `IMPOSSIBLE: NO_DATA`.
|
|
138
|
-
* **Propagation**: Any calculation depending on `PriceExtractor` sees the `IMPOSSIBLE` status and marks *itself* as `IMPOSSIBLE: UPSTREAM`.
|
|
139
|
-
* **Benefit**: The system doesn't waste cycles retrying calculations that can never succeed.
|
|
140
|
-
|
|
141
|
-
### Scenario F: Category Migration
|
|
142
|
-
* **Condition**: You change `getMetadata()` for a calculation, moving it from `signals` to `risk`.
|
|
143
|
-
* **Dispatcher**: Detects `storedCategory !== newCategory`.
|
|
144
|
-
* **Worker**:
|
|
145
|
-
1. Runs `process()` and writes to the *new* path (`risk/CalculateX`).
|
|
146
|
-
2. Detects the `previousCategory` flag.
|
|
147
|
-
3. Deletes the data at the *old* path (`signals/CalculateX`) to prevent orphan data.
|
|
148
|
-
|
|
149
|
-
---
|
|
150
|
-
|
|
151
|
-
## 6. Data Management & Storage
|
|
152
|
-
|
|
153
|
-
### Input Streaming
|
|
154
|
-
To handle large datasets without OOM (Out Of Memory) errors:
|
|
155
|
-
* `StandardExecutor` does not load all users/tickers at once.
|
|
156
|
-
* It utilizes wait-and-stream logic (e.g., batches of 50 ids) to process the `Context`.
|
|
157
|
-
|
|
158
|
-
### Transparent Auto-Sharding
|
|
159
|
-
Firestore has a 1MB document limit.
|
|
160
|
-
* **Write Path**: If a calculation result > 900KB, it is split into `DocID`, `DocID_shard1`, `DocID_shard2`.
|
|
161
|
-
* **Read Path**: The `DependencyFetcher` automatically detects sharding pointers and re-assembles (hydrates) the full object before passing it to `process()`.
|
|
162
|
-
|
|
163
|
-
### Compression Strategy
|
|
164
|
-
* Payloads are inspected before write.
|
|
165
|
-
* If efficient (high entropy text/JSON), Zlib compression is applied.
|
|
166
|
-
* Metadata is tagged `encoding: 'zlib'` so readers know to inflate.
|
|
167
|
-
|
|
168
|
-
---
|
|
169
|
-
|
|
170
|
-
## 7. Quality Assurance & Self-Healing
|
|
171
|
-
|
|
172
|
-
### The Heuristic Validator
|
|
173
|
-
Before saving *any* result, the Executor runs heuristics:
|
|
174
|
-
* **NaN Check**: Are there `NaN` or `Infinity` values in key fields?
|
|
175
|
-
* **Flatline Check**: Is the data variance 0.00 across a large timespan?
|
|
176
|
-
* **Null Density**: Is >50% of the dataset null?
|
|
177
|
-
* **Circuit Breaker**: If heuristics fail, the task throws an error. It is better to fail and alert than to persist corrupted data that pollutes the cache.
|
|
178
|
-
|
|
179
|
-
### Zombie Task Recovery
|
|
180
|
-
* **Lease Mechanism**: When a task starts, it sets a `startedAt` timestamp.
|
|
181
|
-
* **Detection**: The Dispatcher checks for tasks marked `RUNNING` where `startedAt` > 15 minutes ago.
|
|
182
|
-
* **Resolution**: These are assumed crashed (OOM/Timeout). They are reset to `PENDING` (or `FAILED` if retry count exceeded).
|
|
183
|
-
|
|
184
|
-
### Dead Letter Queue (DLQ)
|
|
185
|
-
Tasks that deterministically fail (crash every time) after N retries are moved to a special DLQ status. This prevents the system from getting stuck in an infinite retry loop.
|
|
186
|
-
|
|
187
|
-
---
|
|
188
|
-
|
|
189
|
-
## 8. Developer Workflows
|
|
190
|
-
|
|
191
|
-
### How to Add a New Calculation
|
|
192
|
-
1. Create `calculations/category/MyNewCalc.js`.
|
|
193
|
-
2. Implement `getMetadata()` to define dependencies.
|
|
194
|
-
3. Implement `process(context)`.
|
|
195
|
-
4. Run `npm run build-manifest` to register it in the topology.
|
|
196
|
-
|
|
197
|
-
### How to Force a Global Re-Run
|
|
198
|
-
* Change the `SYSTEM_EPOCH` constant in `system_epoch.js`.
|
|
199
|
-
* This changes the "Global Salt" for all hashes, processing every calculation as "New".
|
|
200
|
-
|
|
201
|
-
### How to Backfill History
|
|
202
|
-
* **Standard Dispatcher**: Good for recent history (last 30 days).
|
|
203
|
-
* **BatchPriceExecutor**: Specialized for massive historical backfills (e.g., 20 years of price data). It bypasses some topology checks for raw speed.
|
|
204
|
-
|
|
205
|
-
### Local Debugging
|
|
206
|
-
Run the orchestrator in "Dry Run" mode:
|
|
207
|
-
```bash
|
|
208
|
-
node scripts/run_orchestrator.js --date=2024-01-01 --dry-run
|
|
209
|
-
```
|
|
210
|
-
This prints the `Analysis Report` (Runnable/Blocked lists) without actually triggering workers.
|