bulltrackers-module 1.0.293 → 1.0.295
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/executors/PriceBatchExecutor.js +0 -1
- package/functions/computation-system/executors/StandardExecutor.js +47 -7
- package/functions/computation-system/features.md +395 -0
- package/functions/computation-system/helpers/computation_dispatcher.js +35 -17
- package/functions/computation-system/layers/extractors.js +9 -9
- package/functions/computation-system/paper.md +93 -0
- package/functions/computation-system/persistence/RunRecorder.js +16 -16
- package/functions/generic-api/admin-api/index.js +233 -0
- package/functions/generic-api/helpers/api_helpers.js +30 -4
- package/functions/generic-api/index.js +8 -1
- package/package.json +1 -1
- package/functions/computation-system/onboarding.md +0 -210
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Executor for "Standard" (per-user) calculations.
|
|
3
3
|
* UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
|
|
4
|
-
* UPDATED:
|
|
4
|
+
* UPDATED: Implements "Circuit Breaker" to fail fast on high error rates.
|
|
5
|
+
* UPDATED: Implements "Adaptive Flushing" based on V8 Heap usage.
|
|
5
6
|
* UPDATED: Manages incremental sharding states.
|
|
6
7
|
* UPDATED: Implements 'isInitialWrite' flag for robust cleanup.
|
|
7
8
|
*/
|
|
@@ -12,6 +13,7 @@ const { ContextFactory } = require
|
|
|
12
13
|
const { commitResults } = require('../persistence/ResultCommitter');
|
|
13
14
|
const mathLayer = require('../layers/index');
|
|
14
15
|
const { performance } = require('perf_hooks');
|
|
16
|
+
const v8 = require('v8');
|
|
15
17
|
|
|
16
18
|
class StandardExecutor {
|
|
17
19
|
static async run(date, calcs, passName, config, deps, rootData, fetchedDeps, previousFetchedDeps, skipStatusWrite = false) {
|
|
@@ -59,6 +61,9 @@ class StandardExecutor {
|
|
|
59
61
|
const aggregatedSuccess = {};
|
|
60
62
|
const aggregatedFailures = [];
|
|
61
63
|
|
|
64
|
+
// [NEW] Global Error Tracking for Circuit Breaker
|
|
65
|
+
const errorStats = { count: 0, total: 0 };
|
|
66
|
+
|
|
62
67
|
Object.keys(state).forEach(name => {
|
|
63
68
|
executionStats[name] = {
|
|
64
69
|
processedUsers: 0,
|
|
@@ -89,7 +94,7 @@ class StandardExecutor {
|
|
|
89
94
|
|
|
90
95
|
let yP_chunk = {}, tH_chunk = {};
|
|
91
96
|
|
|
92
|
-
const
|
|
97
|
+
const MIN_BATCH_SIZE = 1000; // Minimum to process before checking stats
|
|
93
98
|
let usersSinceLastFlush = 0;
|
|
94
99
|
|
|
95
100
|
try {
|
|
@@ -103,6 +108,8 @@ class StandardExecutor {
|
|
|
103
108
|
const chunkSize = Object.keys(tP_chunk).length;
|
|
104
109
|
|
|
105
110
|
const startProcessing = performance.now();
|
|
111
|
+
|
|
112
|
+
// [UPDATED] Collect execution results (success/failure counts)
|
|
106
113
|
const promises = streamingCalcs.map(calc =>
|
|
107
114
|
StandardExecutor.executePerUser(
|
|
108
115
|
calc, calc.manifest, dateStr, tP_chunk, yP_chunk, tH_chunk,
|
|
@@ -110,15 +117,37 @@ class StandardExecutor {
|
|
|
110
117
|
executionStats[normalizeName(calc.manifest.name)]
|
|
111
118
|
)
|
|
112
119
|
);
|
|
113
|
-
|
|
120
|
+
|
|
121
|
+
const batchResults = await Promise.all(promises);
|
|
114
122
|
const procDuration = performance.now() - startProcessing;
|
|
115
123
|
|
|
116
124
|
Object.keys(executionStats).forEach(name => executionStats[name].timings.processing += procDuration);
|
|
117
125
|
|
|
126
|
+
// [NEW] Update Error Stats
|
|
127
|
+
batchResults.forEach(r => {
|
|
128
|
+
errorStats.total += (r.success + r.failures);
|
|
129
|
+
errorStats.count += r.failures;
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// [NEW] Circuit Breaker: Fail fast if error rate > 10% after processing 100+ items
|
|
133
|
+
// We check total > 100 to avoid failing on the very first user if they happen to be bad.
|
|
134
|
+
if (errorStats.total > 100 && (errorStats.count / errorStats.total) > 0.10) {
|
|
135
|
+
const failRate = (errorStats.count / errorStats.total * 100).toFixed(1);
|
|
136
|
+
throw new Error(`[Circuit Breaker] High failure rate detected (${failRate}%). Aborting batch to prevent silent data loss.`);
|
|
137
|
+
}
|
|
138
|
+
|
|
118
139
|
usersSinceLastFlush += chunkSize;
|
|
119
140
|
|
|
120
|
-
|
|
121
|
-
|
|
141
|
+
// [NEW] Adaptive Flushing (Memory Pressure Check)
|
|
142
|
+
const heapStats = v8.getHeapStatistics();
|
|
143
|
+
const heapUsedRatio = heapStats.used_heap_size / heapStats.heap_size_limit;
|
|
144
|
+
const MEMORY_THRESHOLD = 0.70; // 70% of available RAM
|
|
145
|
+
const COUNT_THRESHOLD = 5000;
|
|
146
|
+
|
|
147
|
+
if (usersSinceLastFlush >= COUNT_THRESHOLD || heapUsedRatio > MEMORY_THRESHOLD) {
|
|
148
|
+
const reason = heapUsedRatio > MEMORY_THRESHOLD ? `MEMORY_PRESSURE (${(heapUsedRatio*100).toFixed(0)}%)` : 'BATCH_LIMIT';
|
|
149
|
+
|
|
150
|
+
logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users. Reason: ${reason}`);
|
|
122
151
|
|
|
123
152
|
// [UPDATED] Pass isInitialWrite: true only on the first flush
|
|
124
153
|
const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true, !hasFlushed);
|
|
@@ -171,6 +200,7 @@ class StandardExecutor {
|
|
|
171
200
|
_executionStats: executionStats[name]
|
|
172
201
|
};
|
|
173
202
|
|
|
203
|
+
// Clear the memory immediately after preparing the commit
|
|
174
204
|
inst.results = {};
|
|
175
205
|
}
|
|
176
206
|
|
|
@@ -226,6 +256,10 @@ class StandardExecutor {
|
|
|
226
256
|
const insights = metadata.rootDataDependencies?.includes('insights') ? { today: await loader.loadInsights(dateStr) } : null;
|
|
227
257
|
const SCHEMAS = mathLayer.SCHEMAS;
|
|
228
258
|
|
|
259
|
+
// [NEW] Track local batch success/failure
|
|
260
|
+
let chunkSuccess = 0;
|
|
261
|
+
let chunkFailures = 0;
|
|
262
|
+
|
|
229
263
|
for (const [userId, todayPortfolio] of Object.entries(portfolioData)) {
|
|
230
264
|
const yesterdayPortfolio = yesterdayPortfolioData ? yesterdayPortfolioData[userId] : null;
|
|
231
265
|
const todayHistory = historyData ? historyData[userId] : null;
|
|
@@ -249,10 +283,16 @@ class StandardExecutor {
|
|
|
249
283
|
try {
|
|
250
284
|
await calcInstance.process(context);
|
|
251
285
|
if (stats) stats.processedUsers++;
|
|
286
|
+
chunkSuccess++;
|
|
252
287
|
}
|
|
253
|
-
catch (e) {
|
|
288
|
+
catch (e) {
|
|
289
|
+
logger.log('WARN', `Calc ${metadata.name} failed for user ${userId}: ${e.message}`);
|
|
290
|
+
chunkFailures++;
|
|
291
|
+
}
|
|
254
292
|
}
|
|
293
|
+
|
|
294
|
+
return { success: chunkSuccess, failures: chunkFailures };
|
|
255
295
|
}
|
|
256
296
|
}
|
|
257
297
|
|
|
258
|
-
module.exports = { StandardExecutor };
|
|
298
|
+
module.exports = { StandardExecutor };
|
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
# Complete Feature Inventory of BullTrackers Computation System
|
|
2
|
+
|
|
3
|
+
## Core DAG Engine Features
|
|
4
|
+
|
|
5
|
+
### 1. **Topological Sorting (Kahn's Algorithm)**
|
|
6
|
+
- **Files**: `ManifestBuilder.js:187-205`
|
|
7
|
+
- **Implementation**: Builds execution passes by tracking in-degrees, queuing zero-dependency nodes
|
|
8
|
+
- **Niche aspect**: Dynamic pass assignment (line 201: `neighborEntry.pass = currentEntry.pass + 1`)
|
|
9
|
+
- **Common in**: Airflow, Prefect, Dagster (all use topological sort)
|
|
10
|
+
|
|
11
|
+
### 2. **Cycle Detection (Tarjan's SCC Algorithm)**
|
|
12
|
+
- **Files**: `ManifestBuilder.js:98-141`
|
|
13
|
+
- **Implementation**: Strongly Connected Components detection with stack-based traversal
|
|
14
|
+
- **Niche aspect**: Returns human-readable cycle chain (line 137: `cycle.join(' -> ') + ' -> ' + cycle[0]`)
|
|
15
|
+
- **Common in**: Academic graph libraries, rare in production DAG systems (most use simpler DFS)
|
|
16
|
+
|
|
17
|
+
### 3. **Auto-Discovery Manifest Building**
|
|
18
|
+
- **Files**: `ManifestBuilder.js:143-179`, `ManifestLoader.js:9-42`
|
|
19
|
+
- **Implementation**: Scans directories, instantiates classes, extracts metadata via `getMetadata()` static method
|
|
20
|
+
- **Niche aspect**: Singleton caching with multi-key support (ManifestLoader.js:9)
|
|
21
|
+
- **Common in**: Plugin systems (Airflow providers), less common for computation graphs
|
|
22
|
+
|
|
23
|
+
## Dependency Management & Optimization
|
|
24
|
+
|
|
25
|
+
### 4. **Multi-Layered Hash Composition**
|
|
26
|
+
- **Files**: `ManifestBuilder.js:56-95`, `HashManager.js:25-36`
|
|
27
|
+
- **Implementation**: Composite hash from code + epoch + infrastructure + layers + dependencies
|
|
28
|
+
- **Niche aspect**: Infrastructure hash (recursive file tree hashing, HashManager.js:38-79)
|
|
29
|
+
- **Common in**: Build systems (Bazel, Buck), **very rare** in data pipelines
|
|
30
|
+
|
|
31
|
+
### 5. **Content-Based Dependency Short-Circuiting**
|
|
32
|
+
- **Files**: `WorkflowOrchestrator.js:51-73`
|
|
33
|
+
- **Implementation**: Tracks `resultHash` (output data hash), skips re-run if output unchanged despite code change
|
|
34
|
+
- **Niche aspect**: `dependencyResultHashes` tracking (line 59-67)
|
|
35
|
+
- **Common in**: **Extremely rare** - only seen in specialized incremental computation systems
|
|
36
|
+
|
|
37
|
+
### 6. **Behavioral Stability Detection (SimHash)**
|
|
38
|
+
- **Files**: `BuildReporter.js:55-89`, `SimRunner.js:12-42`, `Fabricator.js:20-244`
|
|
39
|
+
- **Implementation**: Runs code against deterministic mock data, hashes output to detect "logic changes" vs "cosmetic changes"
|
|
40
|
+
- **Niche aspect**: Seeded random data generation (SeededRandom.js:1-38) for reproducible simulations
|
|
41
|
+
- **Common in**: **Unique** - haven't seen this elsewhere. Conceptually similar to property-based testing but for optimization
|
|
42
|
+
|
|
43
|
+
### 7. **System Epoch Forcing**
|
|
44
|
+
- **Files**: `system_epoch.js:1-2`, `ManifestBuilder.js:65`
|
|
45
|
+
- **Implementation**: Manual version bump to force global re-computation
|
|
46
|
+
- **Niche aspect**: Single-line file that invalidates all cached results
|
|
47
|
+
- **Common in**: Cache invalidation patterns, but unusual to have a dedicated module
|
|
48
|
+
|
|
49
|
+
## Execution & Resource Management
|
|
50
|
+
|
|
51
|
+
### 8. **Streaming Execution with Batch Flushing**
|
|
52
|
+
- **Files**: `StandardExecutor.js:86-158`
|
|
53
|
+
- **Implementation**: Async generators yield data chunks, flush to DB every N users
|
|
54
|
+
- **Niche aspect**: Adaptive flushing based on V8 heap pressure (line 128-145)
|
|
55
|
+
- **Common in**: ETL tools (Spark, Flink use micro-batching), **heap-aware flushing is rare**
|
|
56
|
+
|
|
57
|
+
### 9. **Memory Heartbeat (Flight Recorder)**
|
|
58
|
+
- **Files**: `computation_worker.js:30-53`
|
|
59
|
+
- **Implementation**: Background timer writes memory stats to Firestore every 2 seconds
|
|
60
|
+
- **Niche aspect**: Uses `.unref()` to prevent blocking process exit (line 50)
|
|
61
|
+
- **Common in**: APM tools (DataDog, New Relic), **embedding in workers is custom**
|
|
62
|
+
|
|
63
|
+
### 10. **Forensic Crash Analysis & Intelligent Routing**
|
|
64
|
+
- **Files**: `computation_dispatcher.js:31-68`
|
|
65
|
+
- **Implementation**: Reads last memory stats from failed runs, routes to high-mem queue if OOM suspected
|
|
66
|
+
- **Niche aspect**: Parses telemetry to distinguish crash types (line 44-50)
|
|
67
|
+
- **Common in**: Kubernetes autoscaling heuristics, **application-level routing is rare**
|
|
68
|
+
|
|
69
|
+
### 11. **Circuit Breaker Pattern**
|
|
70
|
+
- **Files**: `StandardExecutor.js:164-173`
|
|
71
|
+
- **Implementation**: Tracks error rate, fails fast if >10% failures after 100 items
|
|
72
|
+
- **Niche aspect**: Runs mid-stream (not just at job start)
|
|
73
|
+
- **Common in**: Microservices (Hystrix, Resilience4j), uncommon in data pipelines
|
|
74
|
+
|
|
75
|
+
### 12. **Incremental Auto-Sharding**
|
|
76
|
+
- **Files**: `ResultCommitter.js:234-302`
|
|
77
|
+
- **Implementation**: Dynamically splits results into Firestore subcollection shards, tracks shard index across flushes
|
|
78
|
+
- **Niche aspect**: `flushMode: INTERMEDIATE` flag (line 150) to avoid pointer updates mid-stream
|
|
79
|
+
- **Common in**: Database sharding, **dynamic document sharding is custom**
|
|
80
|
+
|
|
81
|
+
### 13. **GZIP Compression Strategy**
|
|
82
|
+
- **Files**: `ResultCommitter.js:128-157`
|
|
83
|
+
- **Implementation**: Compresses results >50KB, stores as binary blob if <900KB compressed
|
|
84
|
+
- **Niche aspect**: Falls back to sharding if compression fails or exceeds limit
|
|
85
|
+
- **Common in**: Storage layers, integration at application level is custom
|
|
86
|
+
|
|
87
|
+
## Data Quality & Validation
|
|
88
|
+
|
|
89
|
+
### 14. **Heuristic Validation (Grey Box)**
|
|
90
|
+
- **Files**: `ResultsValidator.js:8-96`
|
|
91
|
+
- **Implementation**: Statistical analysis (zero%, null%, flatline detection) without knowing schema
|
|
92
|
+
- **Niche aspect**: Weekend mode (line 57-64) - relaxes thresholds on Saturdays/Sundays
|
|
93
|
+
- **Common in**: Data quality tools (Great Expectations, Soda), **weekend-aware thresholds are domain-specific**
|
|
94
|
+
|
|
95
|
+
### 15. **Contract Discovery & Enforcement**
|
|
96
|
+
- **Files**: `ContractDiscoverer.js:11-120`, `ContractValidator.js:9-64`
|
|
97
|
+
- **Implementation**: Monte Carlo simulation learns behavioral bounds, enforces at runtime
|
|
98
|
+
- **Niche aspect**: Distinguishes "physics limits" (ratios 0-1) from "statistical envelopes" (6-sigma)
|
|
99
|
+
- **Common in**: **Unique** - closest analogue is schema inference (Pandas Profiling) but this is probabilistic + enforced
|
|
100
|
+
|
|
101
|
+
### 16. **Semantic Gates**
|
|
102
|
+
- **Files**: `ResultCommitter.js:118-127`
|
|
103
|
+
- **Implementation**: Blocks results that violate contracts before writing
|
|
104
|
+
- **Niche aspect**: Differentiated error handling - `SEMANTIC_GATE` errors are non-retryable (line 210-225)
|
|
105
|
+
- **Common in**: Type systems (TypeScript, Mypy), **runtime probabilistic checks are rare**
|
|
106
|
+
|
|
107
|
+
### 17. **Root Data Availability Tracking**
|
|
108
|
+
- **Files**: `AvailabilityChecker.js:49-87`, `utils.js:11-17`
|
|
109
|
+
- **Implementation**: Centralized index (`system_root_data_index`) tracks what data exists per day
|
|
110
|
+
- **Niche aspect**: Granular user-type checks (speculator vs normal portfolio, line 23-47)
|
|
111
|
+
- **Common in**: Data catalogs (Amundsen, DataHub), **day-level granularity is custom**
|
|
112
|
+
|
|
113
|
+
### 18. **Impossible State Propagation**
|
|
114
|
+
- **Files**: `WorkflowOrchestrator.js:94-96`, `logger.js:77-93`
|
|
115
|
+
- **Implementation**: Marks calculations as `IMPOSSIBLE` instead of failing them, allows graph to continue
|
|
116
|
+
- **Niche aspect**: Separate "impossible" category in analysis reports (logger.js:86-91)
|
|
117
|
+
- **Common in**: Workflow engines handle failures, **explicit impossible state is rare**
|
|
118
|
+
|
|
119
|
+
## Orchestration & Coordination
|
|
120
|
+
|
|
121
|
+
### 19. **Event-Driven Callback Pattern (Zero Polling)**
|
|
122
|
+
- **Files**: `bulltrackers_pipeline.yaml:49-76`, `computation_worker.js:82-104`
|
|
123
|
+
- **Implementation**: Workflow creates callback endpoint, worker POSTs on completion, workflow wakes
|
|
124
|
+
- **Niche aspect**: IAM authentication for callbacks (computation_worker.js:88-91)
|
|
125
|
+
- **Common in**: Cloud Workflows, AWS Step Functions (both support callbacks), **IAM-secured callbacks are best practice but not default**
|
|
126
|
+
|
|
127
|
+
### 20. **Run State Counter Pattern**
|
|
128
|
+
- **Files**: `computation_dispatcher.js:107-115`, `computation_worker.js:106-123`
|
|
129
|
+
- **Implementation**: Shared Firestore doc tracks `remainingTasks`, workers decrement on completion
|
|
130
|
+
- **Niche aspect**: Transaction-based decrement (computation_worker.js:109-119) ensures atomicity
|
|
131
|
+
- **Common in**: Distributed systems, **Firestore-specific implementation is custom**
|
|
132
|
+
|
|
133
|
+
### 21. **Audit Ledger (Ledger-DB Pattern)**
|
|
134
|
+
- **Files**: `computation_dispatcher.js:143-163`, `RunRecorder.js:26-99`
|
|
135
|
+
- **Implementation**: Write-once ledger per task (`computation_audit_ledger/{date}/passes/{pass}/tasks/{calc}`)
|
|
136
|
+
- **Niche aspect**: Stores granular timing breakdown (RunRecorder.js:64-70)
|
|
137
|
+
- **Common in**: Event sourcing systems, **granular profiling in ledger is uncommon**
|
|
138
|
+
|
|
139
|
+
### 22. **Poison Message Handling (DLQ)**
|
|
140
|
+
- **Files**: `computation_worker.js:36-60`
|
|
141
|
+
- **Implementation**: Max retries check via Pub/Sub `deliveryAttempt`, moves to dead letter queue
|
|
142
|
+
- **Niche aspect**: Differentiates deterministic errors (line 194-222) from transient failures
|
|
143
|
+
- **Common in**: Message queues (RabbitMQ, SQS), **logic-aware routing is custom**
|
|
144
|
+
|
|
145
|
+
### 23. **Catch-Up Logic (Historical Scan)**
|
|
146
|
+
- **Files**: `computation_dispatcher.js:65-81`
|
|
147
|
+
- **Implementation**: Scans full date range (earliest data → target date) instead of just target date
|
|
148
|
+
- **Niche aspect**: Parallel analysis with concurrency limit (line 85)
|
|
149
|
+
- **Common in**: Data pipelines (backfill mode), **integrated into dispatcher is convenient**
|
|
150
|
+
|
|
151
|
+
## Observability & Debugging
|
|
152
|
+
|
|
153
|
+
### 24. **Structured Logging System**
|
|
154
|
+
- **Files**: `logger.js:27-118`
|
|
155
|
+
- **Implementation**: Dual output (human-readable + JSON), process tracking, context inheritance
|
|
156
|
+
- **Niche aspect**: `ProcessLogger` class (line 120-148) for scoped logging with auto-stats
|
|
157
|
+
- **Common in**: Production apps (Winston, Bunyan), **process-scoped loggers are nice touch**
|
|
158
|
+
|
|
159
|
+
### 25. **Date Analysis Reports**
|
|
160
|
+
- **Files**: `logger.js:77-132`
|
|
161
|
+
- **Implementation**: Per-date breakdown of runnable/blocked/impossible/skipped calculations
|
|
162
|
+
- **Niche aspect**: Unicode symbols for visual parsing (line 103)
|
|
163
|
+
- **Common in**: DAG visualization tools, **inline CLI reports are developer-friendly**
|
|
164
|
+
|
|
165
|
+
### 26. **Build Report Generator**
|
|
166
|
+
- **Files**: `BuildReporter.js:138-248`
|
|
167
|
+
- **Implementation**: Pre-deployment impact analysis showing blast radius of code changes
|
|
168
|
+
- **Niche aspect**: Blast radius calculation (line 62-77) - finds all downstream dependents
|
|
169
|
+
- **Common in**: CI/CD tools (GitHub's "affected projects"), **calculation-level granularity is detailed**
|
|
170
|
+
|
|
171
|
+
### 27. **System Fingerprinting**
|
|
172
|
+
- **Files**: `BuildReporter.js:28-51`, `HashManager.js:80-111`
|
|
173
|
+
- **Implementation**: SHA-256 hash of entire codebase + manifest, triggers report on change
|
|
174
|
+
- **Niche aspect**: Recursive directory walk with ignore patterns (HashManager.js:44-60)
|
|
175
|
+
- **Common in**: Docker layer caching, **for change detection at deploy-time is creative**
|
|
176
|
+
|
|
177
|
+
### 28. **Execution Statistics Tracking**
|
|
178
|
+
- **Files**: `StandardExecutor.js:64-71`, `RunRecorder.js:57-70`
|
|
179
|
+
- **Implementation**: Tracks processed/skipped users, setup/stream/processing time breakdowns
|
|
180
|
+
- **Niche aspect**: Profiler-ready structure (RunRecorder.js:64-70) for BigQuery analysis
|
|
181
|
+
- **Common in**: Profilers (cProfile, pyflame), **baked into business logic is pragmatic**
|
|
182
|
+
|
|
183
|
+
## Data Access Patterns
|
|
184
|
+
|
|
185
|
+
### 29. **Smart Shard Indexing**
|
|
186
|
+
- **Files**: `data_loader.js:152-213`
|
|
187
|
+
- **Implementation**: Maintains `instrumentId → shardId` index to avoid scanning all shards
|
|
188
|
+
- **Niche aspect**: 24-hour TTL with rebuild logic (line 167-172)
|
|
189
|
+
- **Common in**: Database indexes, **application-level shard routing is custom**
|
|
190
|
+
|
|
191
|
+
### 30. **Async Generator Streaming**
|
|
192
|
+
- **Files**: `data_loader.js:130-150`
|
|
193
|
+
- **Implementation**: `async function*` yields data chunks, caller consumes with `for await`
|
|
194
|
+
- **Niche aspect**: Supports pre-provided refs (line 132) for dependency injection
|
|
195
|
+
- **Common in**: Node.js streams, **generator-based approach is modern/clean**
|
|
196
|
+
|
|
197
|
+
### 31. **Cached Data Loader**
|
|
198
|
+
- **Files**: `CachedDataLoader.js:14-73`
|
|
199
|
+
- **Implementation**: Execution-scoped cache for mappings/insights/social data
|
|
200
|
+
- **Niche aspect**: Decompression helper (line 24-32) for transparent GZIP handling
|
|
201
|
+
- **Common in**: Data layers (Apollo Client, React Query), **per-execution scope is appropriate**
|
|
202
|
+
|
|
203
|
+
### 32. **Deferred Hydration**
|
|
204
|
+
- **Files**: `DependencyFetcher.js:23-66`
|
|
205
|
+
- **Implementation**: Fetches metadata documents, hydrates sharded data on-demand
|
|
206
|
+
- **Niche aspect**: Parallel hydration promises (line 44-47)
|
|
207
|
+
- **Common in**: ORMs (lazy loading), **manual shard hydration is low-level**
|
|
208
|
+
|
|
209
|
+
## Domain-Specific Intelligence
|
|
210
|
+
|
|
211
|
+
### 33. **User Classification Engine**
|
|
212
|
+
- **Files**: `profiling.js:24-236`
|
|
213
|
+
- **Implementation**: "Smart Money" scoring with 18+ behavioral signals
|
|
214
|
+
- **Niche aspect**: Multi-factor scoring (portfolio allocation + trade history + execution timing)
|
|
215
|
+
- **Common in**: Fintech risk models, **granularity is impressive**
|
|
216
|
+
|
|
217
|
+
### 34. **Convex Hull Risk Geometry**
|
|
218
|
+
- **Files**: `profiling.js:338-365`
|
|
219
|
+
- **Implementation**: Monotone Chain algorithm for efficient frontier analysis
|
|
220
|
+
- **Niche aspect**: O(n log n) algorithm choice (profiling.js:345-363)
|
|
221
|
+
- **Common in**: Computational geometry libraries, **integration into user profiling is domain-specific**
|
|
222
|
+
|
|
223
|
+
### 35. **Kadane's Maximum Drawdown**
|
|
224
|
+
- **Files**: `extractors.js:27-52`
|
|
225
|
+
- **Implementation**: O(n) single-pass algorithm for peak-to-trough decline
|
|
226
|
+
- **Niche aspect**: Returns indices for visualization (line 47)
|
|
227
|
+
- **Common in**: Finance libraries (QuantLib), **clean implementation**
|
|
228
|
+
|
|
229
|
+
### 36. **Fast Fourier Transform (Cooley-Tukey)**
|
|
230
|
+
- **Files**: `mathematics.js:148-184`
|
|
231
|
+
- **Implementation**: O(n log n) frequency domain analysis with zero-padding
|
|
232
|
+
- **Niche aspect**: Recursive implementation (line 163-183)
|
|
233
|
+
- **Common in**: Signal processing (NumPy, SciPy), **JavaScript implementation is rare**
|
|
234
|
+
|
|
235
|
+
### 37. **Sliding Window Extrema (Monotonic Queue)**
|
|
236
|
+
- **Files**: `mathematics.js:227-259`
|
|
237
|
+
- **Implementation**: O(n) min/max calculation using deque
|
|
238
|
+
- **Niche aspect**: Dual deques (one for min, one for max, line 236-237)
|
|
239
|
+
- **Common in**: Competitive programming, **production usage is uncommon**
|
|
240
|
+
|
|
241
|
+
### 38. **Geometric Brownian Motion Simulator**
|
|
242
|
+
- **Files**: `mathematics.js:99-118`
|
|
243
|
+
- **Implementation**: Box-Muller transform for normal random variates, Monte Carlo simulation
|
|
244
|
+
- **Niche aspect**: Returns `Float32Array` for memory efficiency (line 106)
|
|
245
|
+
- **Common in**: Quant finance (Black-Scholes), **typed arrays are performance-conscious**
|
|
246
|
+
|
|
247
|
+
### 39. **Hit Probability Calculator**
|
|
248
|
+
- **Files**: `mathematics.js:75-97`
|
|
249
|
+
- **Implementation**: Closed-form barrier option pricing formula
|
|
250
|
+
- **Niche aspect**: Custom `normCDF` implementation (line 85-89) avoids external deps
|
|
251
|
+
- **Common in**: Options pricing libraries, **standalone implementation is self-contained**
|
|
252
|
+
|
|
253
|
+
### 40. **Kernel Density Estimation**
|
|
254
|
+
- **Files**: `mathematics.js:263-288`
|
|
255
|
+
- **Implementation**: Gaussian kernel with weighted samples
|
|
256
|
+
- **Niche aspect**: 3-bandwidth cutoff for performance (line 276)
|
|
257
|
+
- **Common in**: Stats packages (SciPy, R), **production KDE is uncommon**
|
|
258
|
+
|
|
259
|
+
## Schema & Type Management
|
|
260
|
+
|
|
261
|
+
### 41. **Schema Capture System**
|
|
262
|
+
- **Files**: `schema_capture.js:28-68`
|
|
263
|
+
- **Implementation**: Batch stores class-defined schemas to Firestore
|
|
264
|
+
- **Niche aspect**: Pre-commit validation (line 32-34) prevents batch failures
|
|
265
|
+
- **Common in**: Schema registries (Confluent), **lightweight alternative**
|
|
266
|
+
|
|
267
|
+
### 42. **Production Schema Validators**
|
|
268
|
+
- **Files**: `validators.js:14-137`
|
|
269
|
+
- **Implementation**: Structural validation matching schema.md definitions
|
|
270
|
+
- **Niche aspect**: Separate validators per data type (portfolio/history/social/insights/prices)
|
|
271
|
+
- **Common in**: Data quality frameworks, **schema.md alignment is discipline**
|
|
272
|
+
|
|
273
|
+
### 43. **Legacy Mapping System**
|
|
274
|
+
- **Files**: `HashManager.js:8-23`, `ContextFactory.js:12-17`
|
|
275
|
+
- **Implementation**: Alias mapping for backward compatibility (e.g., `extract` → `DataExtractor`)
|
|
276
|
+
- **Niche aspect**: Dual injection into context (line 14-16)
|
|
277
|
+
- **Common in**: API versioning, **maintaining during refactor is good practice**
|
|
278
|
+
|
|
279
|
+
## Infrastructure & Operations
|
|
280
|
+
|
|
281
|
+
### 44. **Self-Healing Sharding Strategy**
|
|
282
|
+
- **Files**: `ResultCommitter.js:234-302`
|
|
283
|
+
- **Implementation**: Progressively stricter sharding on failure (900KB → 450KB → 200KB → 100KB)
|
|
284
|
+
- **Niche aspect**: Strategy array iteration (line 241-246)
|
|
285
|
+
- **Common in**: Resilience patterns, **adaptive sharding is creative**
|
|
286
|
+
|
|
287
|
+
### 45. **Initial Write Cleanup Logic**
|
|
288
|
+
- **Files**: `ResultCommitter.js:111-127`, `StandardExecutor.js:122-124`
|
|
289
|
+
- **Implementation**: `isInitialWrite` flag triggers shard deletion before first write
|
|
290
|
+
- **Niche aspect**: Transition detection (line 115-121) from sharded → compressed
|
|
291
|
+
- **Common in**: Migration scripts, **baked into write path is convenient**
|
|
292
|
+
|
|
293
|
+
### 46. **Firestore Byte Calculator**
|
|
294
|
+
- **Files**: `ResultCommitter.js:319-324`
|
|
295
|
+
- **Implementation**: Estimates document size for batch limits
|
|
296
|
+
- **Niche aspect**: Handles `DocumentReference` paths (line 322)
|
|
297
|
+
- **Common in**: Firestore SDKs (internal), **custom implementation for control**
|
|
298
|
+
|
|
299
|
+
### 47. **Retry with Exponential Backoff**
|
|
300
|
+
- **Files**: `utils.js:65-79`
|
|
301
|
+
- **Implementation**: Async retry wrapper with configurable attempts and backoff
|
|
302
|
+
- **Niche aspect**: 1s → 2s → 4s progression (line 75)
|
|
303
|
+
- **Common in**: HTTP clients (axios, got), **standalone utility is reusable**
|
|
304
|
+
|
|
305
|
+
### 48. **Batch Commit Chunker**
|
|
306
|
+
- **Files**: `utils.js:86-128`
|
|
307
|
+
- **Implementation**: Splits writes into Firestore 500-op/10MB batches
|
|
308
|
+
- **Niche aspect**: Supports DELETE operations (line 103-108)
|
|
309
|
+
- **Common in**: ORMs (SQLAlchemy bulk), **DELETE support is complete**
|
|
310
|
+
|
|
311
|
+
### 49. **Date Range Generator**
|
|
312
|
+
- **Files**: `utils.js:131-139`
|
|
313
|
+
- **Implementation**: UTC-aware date string generation
|
|
314
|
+
- **Niche aspect**: Forces UTC via `Date.UTC()` constructor (line 133-134)
|
|
315
|
+
- **Common in**: Date libraries (date-fns, Luxon), **UTC enforcement is critical for finance**
|
|
316
|
+
|
|
317
|
+
### 50. **Earliest Date Discovery**
|
|
318
|
+
- **Files**: `utils.js:158-207`
|
|
319
|
+
- **Implementation**: Scans multiple collections to find first available data
|
|
320
|
+
- **Niche aspect**: Handles both flat and sharded collections (line 142-157, 160-174)
|
|
321
|
+
- **Common in**: Data discovery tools, **multi-source aggregation is thorough**
|
|
322
|
+
|
|
323
|
+
## Advanced Patterns
|
|
324
|
+
|
|
325
|
+
### 51. **Tarjan's Stack Management**
|
|
326
|
+
- **Files**: `ManifestBuilder.js:98-141`
|
|
327
|
+
- **Implementation**: Manual stack tracking for SCC detection
|
|
328
|
+
- **Niche aspect**: `onStack` Set for O(1) membership checks (line 106)
|
|
329
|
+
- **Common in**: Graph algorithm implementations, **production usage is advanced**
|
|
330
|
+
|
|
331
|
+
### 52. **Dependency-Injection Context Factory**
|
|
332
|
+
- **Files**: `ContextFactory.js:17-61`
|
|
333
|
+
- **Implementation**: Separate builders for per-user vs meta contexts
|
|
334
|
+
- **Niche aspect**: Math layer injection with legacy aliases (line 12-17)
|
|
335
|
+
- **Common in**: DI frameworks (Spring, Guice), **manual factory is lightweight**
|
|
336
|
+
|
|
337
|
+
### 53. **Price Batch Executor**
|
|
338
|
+
- **Files**: `PriceBatchExecutor.js:12-104`
|
|
339
|
+
- **Implementation**: Specialized executor for price-only calculations (optimization pass)
|
|
340
|
+
- **Niche aspect**: Outer concurrency (2) + shard batching (20) + write batching (50) nested limits
|
|
341
|
+
- **Common in**: MapReduce systems, **three-level batching is complex**
|
|
342
|
+
|
|
343
|
+
### 54. **Deterministic Mock Data Fabrication**
|
|
344
|
+
- **Files**: `Fabricator.js:20-244`, `SeededRandom.js:8-38`
|
|
345
|
+
- **Implementation**: LCG PRNG seeded by calculation name for reproducible fakes
|
|
346
|
+
- **Niche aspect**: Iteration-based seed rotation (Fabricator.js:29)
|
|
347
|
+
- **Common in**: Property-based testing (Hypothesis, QuickCheck), **for optimization is novel**
|
|
348
|
+
|
|
349
|
+
### 55. **Schema-Driven Fake Generation**
|
|
350
|
+
- **Files**: `Fabricator.js:48-71`
|
|
351
|
+
- **Implementation**: Recursively generates data matching JSON schema
|
|
352
|
+
- **Niche aspect**: Volume scaling flag (line 49) for aggregate vs per-item data
|
|
353
|
+
- **Common in**: Schema-based generators (JSF, json-schema-faker), **custom to domain**
|
|
354
|
+
|
|
355
|
+
### 56. **Migration Cleanup Hook**
|
|
356
|
+
- **Files**: `ResultCommitter.js:81-83`, `ResultCommitter.js:305-317`
|
|
357
|
+
- **Implementation**: Deletes old category data when calculation moves
|
|
358
|
+
- **Niche aspect**: `previousCategory` tracking in manifest (WorkflowOrchestrator.js:50-54)
|
|
359
|
+
- **Common in**: Schema migration tools (Alembic, Flyway), **inline cleanup is pragmatic**
|
|
360
|
+
|
|
361
|
+
### 57. **Non-Retryable Error Classification**
|
|
362
|
+
- **Files**: `ResultCommitter.js:18-21`, `computation_worker.js:194-225`
|
|
363
|
+
- **Implementation**: Distinguishes deterministic failures from transient errors
|
|
364
|
+
- **Niche aspect**: `error.stage` property for categorization (computation_worker.js:205-209)
|
|
365
|
+
- **Common in**: Error handling libraries (Sentry), **semantic error types are good practice**
|
|
366
|
+
|
|
367
|
+
### 58. **Reverse Adjacency Graph**
|
|
368
|
+
- **Files**: `BuildReporter.js:62-77`
|
|
369
|
+
- **Implementation**: Maintains child → parent edges for impact analysis
|
|
370
|
+
- **Niche aspect**: Used for blast radius calculation (line 66-74)
|
|
371
|
+
- **Common in**: Dependency analyzers (npm-why), **runtime maintenance is useful**
|
|
372
|
+
|
|
373
|
+
### 59. **Multi-Key Manifest Cache**
|
|
374
|
+
- **Files**: `ManifestLoader.js:9-14`
|
|
375
|
+
- **Implementation**: Cache key is JSON-stringified sorted product lines
|
|
376
|
+
- **Niche aspect**: Handles `['ALL']` vs `['crypto', 'stocks']` as different keys
|
|
377
|
+
- **Common in**: Memoization libraries (lodash.memoize), **cache key design is thoughtful**
|
|
378
|
+
|
|
379
|
+
### 60. **Workflow Variable Restoration**
|
|
380
|
+
- **Files**: `bulltrackers_pipeline.yaml:11-17`
|
|
381
|
+
- **Implementation**: Comment notes a bug fix restoring `passes` and `max_retries` variables
|
|
382
|
+
- **Niche aspect**: T-1 date logic (line 13-15) for "process yesterday" pattern
|
|
383
|
+
- **Common in**: Production YAML configs, **inline documentation is helpful**
|
|
384
|
+
|
|
385
|
+
---
|
|
386
|
+
|
|
387
|
+
## Summary Statistics
|
|
388
|
+
|
|
389
|
+
- **Total Features Identified**: 60
|
|
390
|
+
- **Unique/Rare Features**: ~15 (SimHash, content-based short-circuit, forensic routing, contract discovery, weekend validation, behavioral stability, heap-aware flushing, monotonic queue extrema, FFT, KDE, smart shard indexing, recursive infra hash, semantic gates, impossible propagation, blast radius)
|
|
391
|
+
- **Advanced CS Algorithms**: 8 (Kahn's, Tarjan's, Convex Hull, Kadane's, FFT, Box-Muller, Monotonic Queue, LCG)
|
|
392
|
+
- **Common Patterns (Elevated)**: ~25 (executed exceptionally well or with domain-specific twist)
|
|
393
|
+
- **Standard Infrastructure**: ~22 (logging, retries, batching, streaming, caching, validation, etc.)
|
|
394
|
+
|
|
395
|
+
**Verdict**: About 25% truly novel, 40% common patterns elevated to production-grade, 35% standard infrastructure executed well.
|
|
@@ -3,9 +3,10 @@
|
|
|
3
3
|
* PURPOSE: "Smart Dispatcher" - Analyzes state, initializes Run Counters, and dispatches tasks.
|
|
4
4
|
* UPDATED: Implements Callback Pattern. Initializes 'computation_runs' doc for worker coordination.
|
|
5
5
|
* UPDATED: Implements Forensic Crash Analysis & Intelligent Resource Routing.
|
|
6
|
+
* FIXED: Implemented "Catch-Up" logic to scan full history (Start -> Target Date) instead of just Target Date.
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
|
-
const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
9
|
+
const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
9
10
|
const { groupByPass, analyzeDateExecution } = require('../WorkflowOrchestrator.js');
|
|
10
11
|
const { PubSubUtils } = require('../../core/utils/pubsub_utils');
|
|
11
12
|
const { fetchComputationStatus, updateComputationStatus } = require('../persistence/StatusRepository');
|
|
@@ -28,6 +29,7 @@ async function checkCrashForensics(db, date, pass, computationName) {
|
|
|
28
29
|
const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computationName}`;
|
|
29
30
|
const doc = await db.doc(ledgerPath).get();
|
|
30
31
|
|
|
32
|
+
// Default to standard
|
|
31
33
|
if (!doc.exists) return 'standard';
|
|
32
34
|
|
|
33
35
|
const data = doc.data();
|
|
@@ -63,33 +65,48 @@ async function checkCrashForensics(db, date, pass, computationName) {
|
|
|
63
65
|
*/
|
|
64
66
|
async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
|
|
65
67
|
const { logger, db } = dependencies;
|
|
66
|
-
const pubsubUtils
|
|
67
|
-
const passToRun
|
|
68
|
+
const pubsubUtils = new PubSubUtils(dependencies);
|
|
69
|
+
const passToRun = String(config.COMPUTATION_PASS_TO_RUN);
|
|
68
70
|
|
|
69
71
|
// Extract Date and Callback from request body (pushed by Workflow)
|
|
72
|
+
// NOTE: 'dateStr' acts as the "Target Date" (Ceiling), usually T-1.
|
|
70
73
|
const dateStr = reqBody.date || config.date;
|
|
71
74
|
const callbackUrl = reqBody.callbackUrl || null;
|
|
72
75
|
|
|
73
76
|
if (!passToRun) { return logger.log('ERROR', '[Dispatcher] No pass defined (COMPUTATION_PASS_TO_RUN). Aborting.'); }
|
|
74
77
|
if (!dateStr) { return logger.log('ERROR', '[Dispatcher] No date defined. Aborting.'); }
|
|
75
78
|
|
|
76
|
-
const currentManifestHash = generateCodeHash(
|
|
77
|
-
computationManifest.map(c => c.hash).sort().join('|')
|
|
78
|
-
);
|
|
79
|
+
const currentManifestHash = generateCodeHash( computationManifest.map(c => c.hash).sort().join('|') );
|
|
79
80
|
|
|
80
81
|
const passes = groupByPass(computationManifest);
|
|
81
82
|
const calcsInThisPass = passes[passToRun] || [];
|
|
82
83
|
|
|
83
84
|
if (!calcsInThisPass.length) { return logger.log('WARN', `[Dispatcher] No calcs for Pass ${passToRun}. Exiting.`); }
|
|
84
85
|
|
|
85
|
-
logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun}
|
|
86
|
+
logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun} (Target: ${dateStr})`);
|
|
86
87
|
|
|
87
|
-
// -- DATE ANALYSIS LOGIC --
|
|
88
|
-
|
|
88
|
+
// -- DATE ANALYSIS LOGIC (FIXED: RANGE SCAN) --
|
|
89
|
+
|
|
90
|
+
// 1. Determine the absolute start of data history
|
|
91
|
+
const earliestDates = await getEarliestDataDates(config, dependencies);
|
|
92
|
+
const startDate = earliestDates.absoluteEarliest;
|
|
93
|
+
const endDate = new Date(dateStr + 'T00:00:00Z');
|
|
94
|
+
|
|
95
|
+
// 2. Generate the full range of dates to check
|
|
96
|
+
let allExpectedDates = getExpectedDateStrings(startDate, endDate);
|
|
97
|
+
|
|
98
|
+
// Safety fallback: if range is invalid or empty, default to target date only
|
|
99
|
+
if (!allExpectedDates || allExpectedDates.length === 0) {
|
|
100
|
+
logger.log('WARN', `[Dispatcher] Date range calculation returned empty (Start: ${startDate.toISOString()} -> End: ${endDate.toISOString()}). Defaulting to single target date.`);
|
|
101
|
+
allExpectedDates = [dateStr];
|
|
102
|
+
} else {
|
|
103
|
+
logger.log('INFO', `[Dispatcher] 📅 Analysis Range: ${allExpectedDates.length} days (${allExpectedDates[0]} to ${allExpectedDates[allExpectedDates.length-1]})`);
|
|
104
|
+
}
|
|
105
|
+
|
|
89
106
|
const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
|
|
90
107
|
const tasksToDispatch = [];
|
|
91
108
|
|
|
92
|
-
// Concurrency limit for analysis & forensics
|
|
109
|
+
// Concurrency limit for analysis & forensics (Parallelize the historical scan)
|
|
93
110
|
const limit = pLimit(20);
|
|
94
111
|
|
|
95
112
|
const analysisPromises = allExpectedDates.map(d => limit(async () => {
|
|
@@ -105,14 +122,15 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
105
122
|
prevDate.setUTCDate(prevDate.getUTCDate() - 1);
|
|
106
123
|
prevDateStr = prevDate.toISOString().slice(0, 10);
|
|
107
124
|
|
|
125
|
+
// Only fetch previous status if it's within valid range
|
|
108
126
|
if (prevDate >= DEFINITIVE_EARLIEST_DATES.absoluteEarliest) {
|
|
109
127
|
fetchPromises.push(fetchComputationStatus(prevDateStr, config, dependencies));
|
|
110
128
|
}
|
|
111
129
|
}
|
|
112
130
|
|
|
113
|
-
const results
|
|
114
|
-
const dailyStatus
|
|
115
|
-
const availability
|
|
131
|
+
const results = await Promise.all(fetchPromises);
|
|
132
|
+
const dailyStatus = results[0];
|
|
133
|
+
const availability = results[1];
|
|
116
134
|
const prevDailyStatus = (prevDateStr && results[2]) ? results[2] : (prevDateStr ? {} : null);
|
|
117
135
|
|
|
118
136
|
const rootDataStatus = availability ? availability.status : {
|
|
@@ -183,7 +201,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
183
201
|
if (callbackUrl) {
|
|
184
202
|
await db.doc(metaStatePath).set({
|
|
185
203
|
createdAt: new Date(),
|
|
186
|
-
date: dateStr,
|
|
204
|
+
date: dateStr, // Acts as the "Job Label" (target date)
|
|
187
205
|
pass: passToRun,
|
|
188
206
|
totalTasks: tasksToDispatch.length,
|
|
189
207
|
remainingTasks: tasksToDispatch.length,
|
|
@@ -201,9 +219,9 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
201
219
|
|
|
202
220
|
// 3. Create Audit Ledger Entries
|
|
203
221
|
const finalDispatched = [];
|
|
204
|
-
const txnLimit
|
|
222
|
+
const txnLimit = pLimit(20);
|
|
205
223
|
|
|
206
|
-
const txnPromises
|
|
224
|
+
const txnPromises = tasksToDispatch.map(task => txnLimit(async () => {
|
|
207
225
|
const ledgerRef = db.collection(`computation_audit_ledger/${task.date}/passes/${task.pass}/tasks`).doc(task.computation);
|
|
208
226
|
|
|
209
227
|
try {
|
|
@@ -281,4 +299,4 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
281
299
|
}
|
|
282
300
|
}
|
|
283
301
|
|
|
284
|
-
module.exports = { dispatchComputationPass };
|
|
302
|
+
module.exports = { dispatchComputationPass };
|