bulltrackers-module 1.0.293 → 1.0.295

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -70,7 +70,6 @@ async function runBatchPriceComputation(config, deps, dateStrings, calcs, target
70
70
  .collection(config.computationsSubcollection)
71
71
  .doc(normalizeName(calcManifest.name));
72
72
 
73
- // [UPDATE] Add _lastUpdated timestamp
74
73
  writes.push({
75
74
  ref: docRef,
76
75
  data: {
@@ -1,7 +1,8 @@
1
1
  /**
2
2
  * @fileoverview Executor for "Standard" (per-user) calculations.
3
3
  * UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
4
- * UPDATED: Removes manual global.gc() calls.
4
+ * UPDATED: Implements "Circuit Breaker" to fail fast on high error rates.
5
+ * UPDATED: Implements "Adaptive Flushing" based on V8 Heap usage.
5
6
  * UPDATED: Manages incremental sharding states.
6
7
  * UPDATED: Implements 'isInitialWrite' flag for robust cleanup.
7
8
  */
@@ -12,6 +13,7 @@ const { ContextFactory } = require
12
13
  const { commitResults } = require('../persistence/ResultCommitter');
13
14
  const mathLayer = require('../layers/index');
14
15
  const { performance } = require('perf_hooks');
16
+ const v8 = require('v8');
15
17
 
16
18
  class StandardExecutor {
17
19
  static async run(date, calcs, passName, config, deps, rootData, fetchedDeps, previousFetchedDeps, skipStatusWrite = false) {
@@ -59,6 +61,9 @@ class StandardExecutor {
59
61
  const aggregatedSuccess = {};
60
62
  const aggregatedFailures = [];
61
63
 
64
+ // [NEW] Global Error Tracking for Circuit Breaker
65
+ const errorStats = { count: 0, total: 0 };
66
+
62
67
  Object.keys(state).forEach(name => {
63
68
  executionStats[name] = {
64
69
  processedUsers: 0,
@@ -89,7 +94,7 @@ class StandardExecutor {
89
94
 
90
95
  let yP_chunk = {}, tH_chunk = {};
91
96
 
92
- const BATCH_SIZE = 5000;
97
+ const MIN_BATCH_SIZE = 1000; // Minimum to process before checking stats
93
98
  let usersSinceLastFlush = 0;
94
99
 
95
100
  try {
@@ -103,6 +108,8 @@ class StandardExecutor {
103
108
  const chunkSize = Object.keys(tP_chunk).length;
104
109
 
105
110
  const startProcessing = performance.now();
111
+
112
+ // [UPDATED] Collect execution results (success/failure counts)
106
113
  const promises = streamingCalcs.map(calc =>
107
114
  StandardExecutor.executePerUser(
108
115
  calc, calc.manifest, dateStr, tP_chunk, yP_chunk, tH_chunk,
@@ -110,15 +117,37 @@ class StandardExecutor {
110
117
  executionStats[normalizeName(calc.manifest.name)]
111
118
  )
112
119
  );
113
- await Promise.all(promises);
120
+
121
+ const batchResults = await Promise.all(promises);
114
122
  const procDuration = performance.now() - startProcessing;
115
123
 
116
124
  Object.keys(executionStats).forEach(name => executionStats[name].timings.processing += procDuration);
117
125
 
126
+ // [NEW] Update Error Stats
127
+ batchResults.forEach(r => {
128
+ errorStats.total += (r.success + r.failures);
129
+ errorStats.count += r.failures;
130
+ });
131
+
132
+ // [NEW] Circuit Breaker: Fail fast if error rate > 10% after processing 100+ items
133
+ // We check total > 100 to avoid failing on the very first user if they happen to be bad.
134
+ if (errorStats.total > 100 && (errorStats.count / errorStats.total) > 0.10) {
135
+ const failRate = (errorStats.count / errorStats.total * 100).toFixed(1);
136
+ throw new Error(`[Circuit Breaker] High failure rate detected (${failRate}%). Aborting batch to prevent silent data loss.`);
137
+ }
138
+
118
139
  usersSinceLastFlush += chunkSize;
119
140
 
120
- if (usersSinceLastFlush >= BATCH_SIZE) {
121
- logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users...`);
141
+ // [NEW] Adaptive Flushing (Memory Pressure Check)
142
+ const heapStats = v8.getHeapStatistics();
143
+ const heapUsedRatio = heapStats.used_heap_size / heapStats.heap_size_limit;
144
+ const MEMORY_THRESHOLD = 0.70; // 70% of available RAM
145
+ const COUNT_THRESHOLD = 5000;
146
+
147
+ if (usersSinceLastFlush >= COUNT_THRESHOLD || heapUsedRatio > MEMORY_THRESHOLD) {
148
+ const reason = heapUsedRatio > MEMORY_THRESHOLD ? `MEMORY_PRESSURE (${(heapUsedRatio*100).toFixed(0)}%)` : 'BATCH_LIMIT';
149
+
150
+ logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users. Reason: ${reason}`);
122
151
 
123
152
  // [UPDATED] Pass isInitialWrite: true only on the first flush
124
153
  const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true, !hasFlushed);
@@ -171,6 +200,7 @@ class StandardExecutor {
171
200
  _executionStats: executionStats[name]
172
201
  };
173
202
 
203
+ // Clear the memory immediately after preparing the commit
174
204
  inst.results = {};
175
205
  }
176
206
 
@@ -226,6 +256,10 @@ class StandardExecutor {
226
256
  const insights = metadata.rootDataDependencies?.includes('insights') ? { today: await loader.loadInsights(dateStr) } : null;
227
257
  const SCHEMAS = mathLayer.SCHEMAS;
228
258
 
259
+ // [NEW] Track local batch success/failure
260
+ let chunkSuccess = 0;
261
+ let chunkFailures = 0;
262
+
229
263
  for (const [userId, todayPortfolio] of Object.entries(portfolioData)) {
230
264
  const yesterdayPortfolio = yesterdayPortfolioData ? yesterdayPortfolioData[userId] : null;
231
265
  const todayHistory = historyData ? historyData[userId] : null;
@@ -249,10 +283,16 @@ class StandardExecutor {
249
283
  try {
250
284
  await calcInstance.process(context);
251
285
  if (stats) stats.processedUsers++;
286
+ chunkSuccess++;
252
287
  }
253
- catch (e) { logger.log('WARN', `Calc ${metadata.name} failed for user ${userId}: ${e.message}`); }
288
+ catch (e) {
289
+ logger.log('WARN', `Calc ${metadata.name} failed for user ${userId}: ${e.message}`);
290
+ chunkFailures++;
291
+ }
254
292
  }
293
+
294
+ return { success: chunkSuccess, failures: chunkFailures };
255
295
  }
256
296
  }
257
297
 
258
- module.exports = { StandardExecutor };
298
+ module.exports = { StandardExecutor };
@@ -0,0 +1,395 @@
1
+ # Complete Feature Inventory of BullTrackers Computation System
2
+
3
+ ## Core DAG Engine Features
4
+
5
+ ### 1. **Topological Sorting (Kahn's Algorithm)**
6
+ - **Files**: `ManifestBuilder.js:187-205`
7
+ - **Implementation**: Builds execution passes by tracking in-degrees, queuing zero-dependency nodes
8
+ - **Niche aspect**: Dynamic pass assignment (line 201: `neighborEntry.pass = currentEntry.pass + 1`)
9
+ - **Common in**: Airflow, Prefect, Dagster (all use topological sort)
10
+
11
+ ### 2. **Cycle Detection (Tarjan's SCC Algorithm)**
12
+ - **Files**: `ManifestBuilder.js:98-141`
13
+ - **Implementation**: Strongly Connected Components detection with stack-based traversal
14
+ - **Niche aspect**: Returns human-readable cycle chain (line 137: `cycle.join(' -> ') + ' -> ' + cycle[0]`)
15
+ - **Common in**: Academic graph libraries, rare in production DAG systems (most use simpler DFS)
16
+
17
+ ### 3. **Auto-Discovery Manifest Building**
18
+ - **Files**: `ManifestBuilder.js:143-179`, `ManifestLoader.js:9-42`
19
+ - **Implementation**: Scans directories, instantiates classes, extracts metadata via `getMetadata()` static method
20
+ - **Niche aspect**: Singleton caching with multi-key support (ManifestLoader.js:9)
21
+ - **Common in**: Plugin systems (Airflow providers), less common for computation graphs
22
+
23
+ ## Dependency Management & Optimization
24
+
25
+ ### 4. **Multi-Layered Hash Composition**
26
+ - **Files**: `ManifestBuilder.js:56-95`, `HashManager.js:25-36`
27
+ - **Implementation**: Composite hash from code + epoch + infrastructure + layers + dependencies
28
+ - **Niche aspect**: Infrastructure hash (recursive file tree hashing, HashManager.js:38-79)
29
+ - **Common in**: Build systems (Bazel, Buck), **very rare** in data pipelines
30
+
31
+ ### 5. **Content-Based Dependency Short-Circuiting**
32
+ - **Files**: `WorkflowOrchestrator.js:51-73`
33
+ - **Implementation**: Tracks `resultHash` (output data hash), skips re-run if output unchanged despite code change
34
+ - **Niche aspect**: `dependencyResultHashes` tracking (line 59-67)
35
+ - **Common in**: **Extremely rare** - only seen in specialized incremental computation systems
36
+
37
+ ### 6. **Behavioral Stability Detection (SimHash)**
38
+ - **Files**: `BuildReporter.js:55-89`, `SimRunner.js:12-42`, `Fabricator.js:20-244`
39
+ - **Implementation**: Runs code against deterministic mock data, hashes output to detect "logic changes" vs "cosmetic changes"
40
+ - **Niche aspect**: Seeded random data generation (SeededRandom.js:1-38) for reproducible simulations
41
+ - **Common in**: **Unique** - haven't seen this elsewhere. Conceptually similar to property-based testing but for optimization
42
+
43
+ ### 7. **System Epoch Forcing**
44
+ - **Files**: `system_epoch.js:1-2`, `ManifestBuilder.js:65`
45
+ - **Implementation**: Manual version bump to force global re-computation
46
+ - **Niche aspect**: Single-line file that invalidates all cached results
47
+ - **Common in**: Cache invalidation patterns, but unusual to have a dedicated module
48
+
49
+ ## Execution & Resource Management
50
+
51
+ ### 8. **Streaming Execution with Batch Flushing**
52
+ - **Files**: `StandardExecutor.js:86-158`
53
+ - **Implementation**: Async generators yield data chunks, flush to DB every N users
54
+ - **Niche aspect**: Adaptive flushing based on V8 heap pressure (line 128-145)
55
+ - **Common in**: ETL tools (Spark, Flink use micro-batching), **heap-aware flushing is rare**
56
+
57
+ ### 9. **Memory Heartbeat (Flight Recorder)**
58
+ - **Files**: `computation_worker.js:30-53`
59
+ - **Implementation**: Background timer writes memory stats to Firestore every 2 seconds
60
+ - **Niche aspect**: Uses `.unref()` to prevent blocking process exit (line 50)
61
+ - **Common in**: APM tools (DataDog, New Relic), **embedding in workers is custom**
62
+
63
+ ### 10. **Forensic Crash Analysis & Intelligent Routing**
64
+ - **Files**: `computation_dispatcher.js:31-68`
65
+ - **Implementation**: Reads last memory stats from failed runs, routes to high-mem queue if OOM suspected
66
+ - **Niche aspect**: Parses telemetry to distinguish crash types (line 44-50)
67
+ - **Common in**: Kubernetes autoscaling heuristics, **application-level routing is rare**
68
+
69
+ ### 11. **Circuit Breaker Pattern**
70
+ - **Files**: `StandardExecutor.js:164-173`
71
+ - **Implementation**: Tracks error rate, fails fast if >10% failures after 100 items
72
+ - **Niche aspect**: Runs mid-stream (not just at job start)
73
+ - **Common in**: Microservices (Hystrix, Resilience4j), uncommon in data pipelines
74
+
75
+ ### 12. **Incremental Auto-Sharding**
76
+ - **Files**: `ResultCommitter.js:234-302`
77
+ - **Implementation**: Dynamically splits results into Firestore subcollection shards, tracks shard index across flushes
78
+ - **Niche aspect**: `flushMode: INTERMEDIATE` flag (line 150) to avoid pointer updates mid-stream
79
+ - **Common in**: Database sharding, **dynamic document sharding is custom**
80
+
81
+ ### 13. **GZIP Compression Strategy**
82
+ - **Files**: `ResultCommitter.js:128-157`
83
+ - **Implementation**: Compresses results >50KB, stores as binary blob if <900KB compressed
84
+ - **Niche aspect**: Falls back to sharding if compression fails or exceeds limit
85
+ - **Common in**: Storage layers, integration at application level is custom
86
+
87
+ ## Data Quality & Validation
88
+
89
+ ### 14. **Heuristic Validation (Grey Box)**
90
+ - **Files**: `ResultsValidator.js:8-96`
91
+ - **Implementation**: Statistical analysis (zero%, null%, flatline detection) without knowing schema
92
+ - **Niche aspect**: Weekend mode (line 57-64) - relaxes thresholds on Saturdays/Sundays
93
+ - **Common in**: Data quality tools (Great Expectations, Soda), **weekend-aware thresholds are domain-specific**
94
+
95
+ ### 15. **Contract Discovery & Enforcement**
96
+ - **Files**: `ContractDiscoverer.js:11-120`, `ContractValidator.js:9-64`
97
+ - **Implementation**: Monte Carlo simulation learns behavioral bounds, enforces at runtime
98
+ - **Niche aspect**: Distinguishes "physics limits" (ratios 0-1) from "statistical envelopes" (6-sigma)
99
+ - **Common in**: **Unique** - closest analogue is schema inference (Pandas Profiling) but this is probabilistic + enforced
100
+
101
+ ### 16. **Semantic Gates**
102
+ - **Files**: `ResultCommitter.js:118-127`
103
+ - **Implementation**: Blocks results that violate contracts before writing
104
+ - **Niche aspect**: Differentiated error handling - `SEMANTIC_GATE` errors are non-retryable (line 210-225)
105
+ - **Common in**: Type systems (TypeScript, Mypy), **runtime probabilistic checks are rare**
106
+
107
+ ### 17. **Root Data Availability Tracking**
108
+ - **Files**: `AvailabilityChecker.js:49-87`, `utils.js:11-17`
109
+ - **Implementation**: Centralized index (`system_root_data_index`) tracks what data exists per day
110
+ - **Niche aspect**: Granular user-type checks (speculator vs normal portfolio, line 23-47)
111
+ - **Common in**: Data catalogs (Amundsen, DataHub), **day-level granularity is custom**
112
+
113
+ ### 18. **Impossible State Propagation**
114
+ - **Files**: `WorkflowOrchestrator.js:94-96`, `logger.js:77-93`
115
+ - **Implementation**: Marks calculations as `IMPOSSIBLE` instead of failing them, allows graph to continue
116
+ - **Niche aspect**: Separate "impossible" category in analysis reports (logger.js:86-91)
117
+ - **Common in**: Workflow engines handle failures, **explicit impossible state is rare**
118
+
119
+ ## Orchestration & Coordination
120
+
121
+ ### 19. **Event-Driven Callback Pattern (Zero Polling)**
122
+ - **Files**: `bulltrackers_pipeline.yaml:49-76`, `computation_worker.js:82-104`
123
+ - **Implementation**: Workflow creates callback endpoint, worker POSTs on completion, workflow wakes
124
+ - **Niche aspect**: IAM authentication for callbacks (computation_worker.js:88-91)
125
+ - **Common in**: Cloud Workflows, AWS Step Functions (both support callbacks), **IAM-secured callbacks are best practice but not default**
126
+
127
+ ### 20. **Run State Counter Pattern**
128
+ - **Files**: `computation_dispatcher.js:107-115`, `computation_worker.js:106-123`
129
+ - **Implementation**: Shared Firestore doc tracks `remainingTasks`, workers decrement on completion
130
+ - **Niche aspect**: Transaction-based decrement (computation_worker.js:109-119) ensures atomicity
131
+ - **Common in**: Distributed systems, **Firestore-specific implementation is custom**
132
+
133
+ ### 21. **Audit Ledger (Ledger-DB Pattern)**
134
+ - **Files**: `computation_dispatcher.js:143-163`, `RunRecorder.js:26-99`
135
+ - **Implementation**: Write-once ledger per task (`computation_audit_ledger/{date}/passes/{pass}/tasks/{calc}`)
136
+ - **Niche aspect**: Stores granular timing breakdown (RunRecorder.js:64-70)
137
+ - **Common in**: Event sourcing systems, **granular profiling in ledger is uncommon**
138
+
139
+ ### 22. **Poison Message Handling (DLQ)**
140
+ - **Files**: `computation_worker.js:36-60`
141
+ - **Implementation**: Max retries check via Pub/Sub `deliveryAttempt`, moves to dead letter queue
142
+ - **Niche aspect**: Differentiates deterministic errors (line 194-222) from transient failures
143
+ - **Common in**: Message queues (RabbitMQ, SQS), **logic-aware routing is custom**
144
+
145
+ ### 23. **Catch-Up Logic (Historical Scan)**
146
+ - **Files**: `computation_dispatcher.js:65-81`
147
+ - **Implementation**: Scans full date range (earliest data → target date) instead of just target date
148
+ - **Niche aspect**: Parallel analysis with concurrency limit (line 85)
149
+ - **Common in**: Data pipelines (backfill mode), **integrated into dispatcher is convenient**
150
+
151
+ ## Observability & Debugging
152
+
153
+ ### 24. **Structured Logging System**
154
+ - **Files**: `logger.js:27-118`
155
+ - **Implementation**: Dual output (human-readable + JSON), process tracking, context inheritance
156
+ - **Niche aspect**: `ProcessLogger` class (line 120-148) for scoped logging with auto-stats
157
+ - **Common in**: Production apps (Winston, Bunyan), **process-scoped loggers are nice touch**
158
+
159
+ ### 25. **Date Analysis Reports**
160
+ - **Files**: `logger.js:77-132`
161
+ - **Implementation**: Per-date breakdown of runnable/blocked/impossible/skipped calculations
162
+ - **Niche aspect**: Unicode symbols for visual parsing (line 103)
163
+ - **Common in**: DAG visualization tools, **inline CLI reports are developer-friendly**
164
+
165
+ ### 26. **Build Report Generator**
166
+ - **Files**: `BuildReporter.js:138-248`
167
+ - **Implementation**: Pre-deployment impact analysis showing blast radius of code changes
168
+ - **Niche aspect**: Blast radius calculation (line 62-77) - finds all downstream dependents
169
+ - **Common in**: CI/CD tools (GitHub's "affected projects"), **calculation-level granularity is detailed**
170
+
171
+ ### 27. **System Fingerprinting**
172
+ - **Files**: `BuildReporter.js:28-51`, `HashManager.js:80-111`
173
+ - **Implementation**: SHA-256 hash of entire codebase + manifest, triggers report on change
174
+ - **Niche aspect**: Recursive directory walk with ignore patterns (HashManager.js:44-60)
175
+ - **Common in**: Docker layer caching, **for change detection at deploy-time is creative**
176
+
177
+ ### 28. **Execution Statistics Tracking**
178
+ - **Files**: `StandardExecutor.js:64-71`, `RunRecorder.js:57-70`
179
+ - **Implementation**: Tracks processed/skipped users, setup/stream/processing time breakdowns
180
+ - **Niche aspect**: Profiler-ready structure (RunRecorder.js:64-70) for BigQuery analysis
181
+ - **Common in**: Profilers (cProfile, pyflame), **baked into business logic is pragmatic**
182
+
183
+ ## Data Access Patterns
184
+
185
+ ### 29. **Smart Shard Indexing**
186
+ - **Files**: `data_loader.js:152-213`
187
+ - **Implementation**: Maintains `instrumentId → shardId` index to avoid scanning all shards
188
+ - **Niche aspect**: 24-hour TTL with rebuild logic (line 167-172)
189
+ - **Common in**: Database indexes, **application-level shard routing is custom**
190
+
191
+ ### 30. **Async Generator Streaming**
192
+ - **Files**: `data_loader.js:130-150`
193
+ - **Implementation**: `async function*` yields data chunks, caller consumes with `for await`
194
+ - **Niche aspect**: Supports pre-provided refs (line 132) for dependency injection
195
+ - **Common in**: Node.js streams, **generator-based approach is modern/clean**
196
+
197
+ ### 31. **Cached Data Loader**
198
+ - **Files**: `CachedDataLoader.js:14-73`
199
+ - **Implementation**: Execution-scoped cache for mappings/insights/social data
200
+ - **Niche aspect**: Decompression helper (line 24-32) for transparent GZIP handling
201
+ - **Common in**: Data layers (Apollo Client, React Query), **per-execution scope is appropriate**
202
+
203
+ ### 32. **Deferred Hydration**
204
+ - **Files**: `DependencyFetcher.js:23-66`
205
+ - **Implementation**: Fetches metadata documents, hydrates sharded data on-demand
206
+ - **Niche aspect**: Parallel hydration promises (line 44-47)
207
+ - **Common in**: ORMs (lazy loading), **manual shard hydration is low-level**
208
+
209
+ ## Domain-Specific Intelligence
210
+
211
+ ### 33. **User Classification Engine**
212
+ - **Files**: `profiling.js:24-236`
213
+ - **Implementation**: "Smart Money" scoring with 18+ behavioral signals
214
+ - **Niche aspect**: Multi-factor scoring (portfolio allocation + trade history + execution timing)
215
+ - **Common in**: Fintech risk models, **granularity is impressive**
216
+
217
+ ### 34. **Convex Hull Risk Geometry**
218
+ - **Files**: `profiling.js:338-365`
219
+ - **Implementation**: Monotone Chain algorithm for efficient frontier analysis
220
+ - **Niche aspect**: O(n log n) algorithm choice (profiling.js:345-363)
221
+ - **Common in**: Computational geometry libraries, **integration into user profiling is domain-specific**
222
+
223
+ ### 35. **Kadane's Maximum Drawdown**
224
+ - **Files**: `extractors.js:27-52`
225
+ - **Implementation**: O(n) single-pass algorithm for peak-to-trough decline
226
+ - **Niche aspect**: Returns indices for visualization (line 47)
227
+ - **Common in**: Finance libraries (QuantLib), **clean implementation**
228
+
229
+ ### 36. **Fast Fourier Transform (Cooley-Tukey)**
230
+ - **Files**: `mathematics.js:148-184`
231
+ - **Implementation**: O(n log n) frequency domain analysis with zero-padding
232
+ - **Niche aspect**: Recursive implementation (line 163-183)
233
+ - **Common in**: Signal processing (NumPy, SciPy), **JavaScript implementation is rare**
234
+
235
+ ### 37. **Sliding Window Extrema (Monotonic Queue)**
236
+ - **Files**: `mathematics.js:227-259`
237
+ - **Implementation**: O(n) min/max calculation using deque
238
+ - **Niche aspect**: Dual deques (one for min, one for max, line 236-237)
239
+ - **Common in**: Competitive programming, **production usage is uncommon**
240
+
241
+ ### 38. **Geometric Brownian Motion Simulator**
242
+ - **Files**: `mathematics.js:99-118`
243
+ - **Implementation**: Box-Muller transform for normal random variates, Monte Carlo simulation
244
+ - **Niche aspect**: Returns `Float32Array` for memory efficiency (line 106)
245
+ - **Common in**: Quant finance (Black-Scholes), **typed arrays are performance-conscious**
246
+
247
+ ### 39. **Hit Probability Calculator**
248
+ - **Files**: `mathematics.js:75-97`
249
+ - **Implementation**: Closed-form barrier option pricing formula
250
+ - **Niche aspect**: Custom `normCDF` implementation (line 85-89) avoids external deps
251
+ - **Common in**: Options pricing libraries, **standalone implementation is self-contained**
252
+
253
+ ### 40. **Kernel Density Estimation**
254
+ - **Files**: `mathematics.js:263-288`
255
+ - **Implementation**: Gaussian kernel with weighted samples
256
+ - **Niche aspect**: 3-bandwidth cutoff for performance (line 276)
257
+ - **Common in**: Stats packages (SciPy, R), **production KDE is uncommon**
258
+
259
+ ## Schema & Type Management
260
+
261
+ ### 41. **Schema Capture System**
262
+ - **Files**: `schema_capture.js:28-68`
263
+ - **Implementation**: Batch stores class-defined schemas to Firestore
264
+ - **Niche aspect**: Pre-commit validation (line 32-34) prevents batch failures
265
+ - **Common in**: Schema registries (Confluent), **lightweight alternative**
266
+
267
+ ### 42. **Production Schema Validators**
268
+ - **Files**: `validators.js:14-137`
269
+ - **Implementation**: Structural validation matching schema.md definitions
270
+ - **Niche aspect**: Separate validators per data type (portfolio/history/social/insights/prices)
271
+ - **Common in**: Data quality frameworks, **schema.md alignment is discipline**
272
+
273
+ ### 43. **Legacy Mapping System**
274
+ - **Files**: `HashManager.js:8-23`, `ContextFactory.js:12-17`
275
+ - **Implementation**: Alias mapping for backward compatibility (e.g., `extract` → `DataExtractor`)
276
+ - **Niche aspect**: Dual injection into context (line 14-16)
277
+ - **Common in**: API versioning, **maintaining during refactor is good practice**
278
+
279
+ ## Infrastructure & Operations
280
+
281
+ ### 44. **Self-Healing Sharding Strategy**
282
+ - **Files**: `ResultCommitter.js:234-302`
283
+ - **Implementation**: Progressively stricter sharding on failure (900KB → 450KB → 200KB → 100KB)
284
+ - **Niche aspect**: Strategy array iteration (line 241-246)
285
+ - **Common in**: Resilience patterns, **adaptive sharding is creative**
286
+
287
+ ### 45. **Initial Write Cleanup Logic**
288
+ - **Files**: `ResultCommitter.js:111-127`, `StandardExecutor.js:122-124`
289
+ - **Implementation**: `isInitialWrite` flag triggers shard deletion before first write
290
+ - **Niche aspect**: Transition detection (line 115-121) from sharded → compressed
291
+ - **Common in**: Migration scripts, **baked into write path is convenient**
292
+
293
+ ### 46. **Firestore Byte Calculator**
294
+ - **Files**: `ResultCommitter.js:319-324`
295
+ - **Implementation**: Estimates document size for batch limits
296
+ - **Niche aspect**: Handles `DocumentReference` paths (line 322)
297
+ - **Common in**: Firestore SDKs (internal), **custom implementation for control**
298
+
299
+ ### 47. **Retry with Exponential Backoff**
300
+ - **Files**: `utils.js:65-79`
301
+ - **Implementation**: Async retry wrapper with configurable attempts and backoff
302
+ - **Niche aspect**: 1s → 2s → 4s progression (line 75)
303
+ - **Common in**: HTTP clients (axios, got), **standalone utility is reusable**
304
+
305
+ ### 48. **Batch Commit Chunker**
306
+ - **Files**: `utils.js:86-128`
307
+ - **Implementation**: Splits writes into Firestore 500-op/10MB batches
308
+ - **Niche aspect**: Supports DELETE operations (line 103-108)
309
+ - **Common in**: ORMs (SQLAlchemy bulk), **DELETE support is complete**
310
+
311
+ ### 49. **Date Range Generator**
312
+ - **Files**: `utils.js:131-139`
313
+ - **Implementation**: UTC-aware date string generation
314
+ - **Niche aspect**: Forces UTC via `Date.UTC()` constructor (line 133-134)
315
+ - **Common in**: Date libraries (date-fns, Luxon), **UTC enforcement is critical for finance**
316
+
317
+ ### 50. **Earliest Date Discovery**
318
+ - **Files**: `utils.js:158-207`
319
+ - **Implementation**: Scans multiple collections to find first available data
320
+ - **Niche aspect**: Handles both flat and sharded collections (line 142-157, 160-174)
321
+ - **Common in**: Data discovery tools, **multi-source aggregation is thorough**
322
+
323
+ ## Advanced Patterns
324
+
325
+ ### 51. **Tarjan's Stack Management**
326
+ - **Files**: `ManifestBuilder.js:98-141`
327
+ - **Implementation**: Manual stack tracking for SCC detection
328
+ - **Niche aspect**: `onStack` Set for O(1) membership checks (line 106)
329
+ - **Common in**: Graph algorithm implementations, **production usage is advanced**
330
+
331
+ ### 52. **Dependency-Injection Context Factory**
332
+ - **Files**: `ContextFactory.js:17-61`
333
+ - **Implementation**: Separate builders for per-user vs meta contexts
334
+ - **Niche aspect**: Math layer injection with legacy aliases (line 12-17)
335
+ - **Common in**: DI frameworks (Spring, Guice), **manual factory is lightweight**
336
+
337
+ ### 53. **Price Batch Executor**
338
+ - **Files**: `PriceBatchExecutor.js:12-104`
339
+ - **Implementation**: Specialized executor for price-only calculations (optimization pass)
340
+ - **Niche aspect**: Outer concurrency (2) + shard batching (20) + write batching (50) nested limits
341
+ - **Common in**: MapReduce systems, **three-level batching is complex**
342
+
343
+ ### 54. **Deterministic Mock Data Fabrication**
344
+ - **Files**: `Fabricator.js:20-244`, `SeededRandom.js:8-38`
345
+ - **Implementation**: LCG PRNG seeded by calculation name for reproducible fakes
346
+ - **Niche aspect**: Iteration-based seed rotation (Fabricator.js:29)
347
+ - **Common in**: Property-based testing (Hypothesis, QuickCheck), **for optimization is novel**
348
+
349
+ ### 55. **Schema-Driven Fake Generation**
350
+ - **Files**: `Fabricator.js:48-71`
351
+ - **Implementation**: Recursively generates data matching JSON schema
352
+ - **Niche aspect**: Volume scaling flag (line 49) for aggregate vs per-item data
353
+ - **Common in**: Schema-based generators (JSF, json-schema-faker), **custom to domain**
354
+
355
+ ### 56. **Migration Cleanup Hook**
356
+ - **Files**: `ResultCommitter.js:81-83`, `ResultCommitter.js:305-317`
357
+ - **Implementation**: Deletes old category data when calculation moves
358
+ - **Niche aspect**: `previousCategory` tracking in manifest (WorkflowOrchestrator.js:50-54)
359
+ - **Common in**: Schema migration tools (Alembic, Flyway), **inline cleanup is pragmatic**
360
+
361
+ ### 57. **Non-Retryable Error Classification**
362
+ - **Files**: `ResultCommitter.js:18-21`, `computation_worker.js:194-225`
363
+ - **Implementation**: Distinguishes deterministic failures from transient errors
364
+ - **Niche aspect**: `error.stage` property for categorization (computation_worker.js:205-209)
365
+ - **Common in**: Error handling libraries (Sentry), **semantic error types are good practice**
366
+
367
+ ### 58. **Reverse Adjacency Graph**
368
+ - **Files**: `BuildReporter.js:62-77`
369
+ - **Implementation**: Maintains child → parent edges for impact analysis
370
+ - **Niche aspect**: Used for blast radius calculation (line 66-74)
371
+ - **Common in**: Dependency analyzers (npm-why), **runtime maintenance is useful**
372
+
373
+ ### 59. **Multi-Key Manifest Cache**
374
+ - **Files**: `ManifestLoader.js:9-14`
375
+ - **Implementation**: Cache key is JSON-stringified sorted product lines
376
+ - **Niche aspect**: Handles `['ALL']` vs `['crypto', 'stocks']` as different keys
377
+ - **Common in**: Memoization libraries (lodash.memoize), **cache key design is thoughtful**
378
+
379
+ ### 60. **Workflow Variable Restoration**
380
+ - **Files**: `bulltrackers_pipeline.yaml:11-17`
381
+ - **Implementation**: Comment notes a bug fix restoring `passes` and `max_retries` variables
382
+ - **Niche aspect**: T-1 date logic (line 13-15) for "process yesterday" pattern
383
+ - **Common in**: Production YAML configs, **inline documentation is helpful**
384
+
385
+ ---
386
+
387
+ ## Summary Statistics
388
+
389
+ - **Total Features Identified**: 60
390
+ - **Unique/Rare Features**: ~15 (SimHash, content-based short-circuit, forensic routing, contract discovery, weekend validation, behavioral stability, heap-aware flushing, monotonic queue extrema, FFT, KDE, smart shard indexing, recursive infra hash, semantic gates, impossible propagation, blast radius)
391
+ - **Advanced CS Algorithms**: 8 (Kahn's, Tarjan's, Convex Hull, Kadane's, FFT, Box-Muller, Monotonic Queue, LCG)
392
+ - **Common Patterns (Elevated)**: ~25 (executed exceptionally well or with domain-specific twist)
393
+ - **Standard Infrastructure**: ~22 (logging, retries, batching, streaming, caching, validation, etc.)
394
+
395
+ **Verdict**: About 25% truly novel, 40% common patterns elevated to production-grade, 35% standard infrastructure executed well.
@@ -3,9 +3,10 @@
3
3
  * PURPOSE: "Smart Dispatcher" - Analyzes state, initializes Run Counters, and dispatches tasks.
4
4
  * UPDATED: Implements Callback Pattern. Initializes 'computation_runs' doc for worker coordination.
5
5
  * UPDATED: Implements Forensic Crash Analysis & Intelligent Resource Routing.
6
+ * FIXED: Implemented "Catch-Up" logic to scan full history (Start -> Target Date) instead of just Target Date.
6
7
  */
7
8
 
8
- const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
9
+ const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
9
10
  const { groupByPass, analyzeDateExecution } = require('../WorkflowOrchestrator.js');
10
11
  const { PubSubUtils } = require('../../core/utils/pubsub_utils');
11
12
  const { fetchComputationStatus, updateComputationStatus } = require('../persistence/StatusRepository');
@@ -28,6 +29,7 @@ async function checkCrashForensics(db, date, pass, computationName) {
28
29
  const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computationName}`;
29
30
  const doc = await db.doc(ledgerPath).get();
30
31
 
32
+ // Default to standard
31
33
  if (!doc.exists) return 'standard';
32
34
 
33
35
  const data = doc.data();
@@ -63,33 +65,48 @@ async function checkCrashForensics(db, date, pass, computationName) {
63
65
  */
64
66
  async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
65
67
  const { logger, db } = dependencies;
66
- const pubsubUtils = new PubSubUtils(dependencies);
67
- const passToRun = String(config.COMPUTATION_PASS_TO_RUN);
68
+ const pubsubUtils = new PubSubUtils(dependencies);
69
+ const passToRun = String(config.COMPUTATION_PASS_TO_RUN);
68
70
 
69
71
  // Extract Date and Callback from request body (pushed by Workflow)
72
+ // NOTE: 'dateStr' acts as the "Target Date" (Ceiling), usually T-1.
70
73
  const dateStr = reqBody.date || config.date;
71
74
  const callbackUrl = reqBody.callbackUrl || null;
72
75
 
73
76
  if (!passToRun) { return logger.log('ERROR', '[Dispatcher] No pass defined (COMPUTATION_PASS_TO_RUN). Aborting.'); }
74
77
  if (!dateStr) { return logger.log('ERROR', '[Dispatcher] No date defined. Aborting.'); }
75
78
 
76
- const currentManifestHash = generateCodeHash(
77
- computationManifest.map(c => c.hash).sort().join('|')
78
- );
79
+ const currentManifestHash = generateCodeHash( computationManifest.map(c => c.hash).sort().join('|') );
79
80
 
80
81
  const passes = groupByPass(computationManifest);
81
82
  const calcsInThisPass = passes[passToRun] || [];
82
83
 
83
84
  if (!calcsInThisPass.length) { return logger.log('WARN', `[Dispatcher] No calcs for Pass ${passToRun}. Exiting.`); }
84
85
 
85
- logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun} for ${dateStr}`);
86
+ logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun} (Target: ${dateStr})`);
86
87
 
87
- // -- DATE ANALYSIS LOGIC --
88
- const allExpectedDates = [dateStr];
88
+ // -- DATE ANALYSIS LOGIC (FIXED: RANGE SCAN) --
89
+
90
+ // 1. Determine the absolute start of data history
91
+ const earliestDates = await getEarliestDataDates(config, dependencies);
92
+ const startDate = earliestDates.absoluteEarliest;
93
+ const endDate = new Date(dateStr + 'T00:00:00Z');
94
+
95
+ // 2. Generate the full range of dates to check
96
+ let allExpectedDates = getExpectedDateStrings(startDate, endDate);
97
+
98
+ // Safety fallback: if range is invalid or empty, default to target date only
99
+ if (!allExpectedDates || allExpectedDates.length === 0) {
100
+ logger.log('WARN', `[Dispatcher] Date range calculation returned empty (Start: ${startDate.toISOString()} -> End: ${endDate.toISOString()}). Defaulting to single target date.`);
101
+ allExpectedDates = [dateStr];
102
+ } else {
103
+ logger.log('INFO', `[Dispatcher] 📅 Analysis Range: ${allExpectedDates.length} days (${allExpectedDates[0]} to ${allExpectedDates[allExpectedDates.length-1]})`);
104
+ }
105
+
89
106
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
90
107
  const tasksToDispatch = [];
91
108
 
92
- // Concurrency limit for analysis & forensics
109
+ // Concurrency limit for analysis & forensics (Parallelize the historical scan)
93
110
  const limit = pLimit(20);
94
111
 
95
112
  const analysisPromises = allExpectedDates.map(d => limit(async () => {
@@ -105,14 +122,15 @@ async function dispatchComputationPass(config, dependencies, computationManifest
105
122
  prevDate.setUTCDate(prevDate.getUTCDate() - 1);
106
123
  prevDateStr = prevDate.toISOString().slice(0, 10);
107
124
 
125
+ // Only fetch previous status if it's within valid range
108
126
  if (prevDate >= DEFINITIVE_EARLIEST_DATES.absoluteEarliest) {
109
127
  fetchPromises.push(fetchComputationStatus(prevDateStr, config, dependencies));
110
128
  }
111
129
  }
112
130
 
113
- const results = await Promise.all(fetchPromises);
114
- const dailyStatus = results[0];
115
- const availability = results[1];
131
+ const results = await Promise.all(fetchPromises);
132
+ const dailyStatus = results[0];
133
+ const availability = results[1];
116
134
  const prevDailyStatus = (prevDateStr && results[2]) ? results[2] : (prevDateStr ? {} : null);
117
135
 
118
136
  const rootDataStatus = availability ? availability.status : {
@@ -183,7 +201,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
183
201
  if (callbackUrl) {
184
202
  await db.doc(metaStatePath).set({
185
203
  createdAt: new Date(),
186
- date: dateStr,
204
+ date: dateStr, // Acts as the "Job Label" (target date)
187
205
  pass: passToRun,
188
206
  totalTasks: tasksToDispatch.length,
189
207
  remainingTasks: tasksToDispatch.length,
@@ -201,9 +219,9 @@ async function dispatchComputationPass(config, dependencies, computationManifest
201
219
 
202
220
  // 3. Create Audit Ledger Entries
203
221
  const finalDispatched = [];
204
- const txnLimit = pLimit(20);
222
+ const txnLimit = pLimit(20);
205
223
 
206
- const txnPromises = tasksToDispatch.map(task => txnLimit(async () => {
224
+ const txnPromises = tasksToDispatch.map(task => txnLimit(async () => {
207
225
  const ledgerRef = db.collection(`computation_audit_ledger/${task.date}/passes/${task.pass}/tasks`).doc(task.computation);
208
226
 
209
227
  try {
@@ -281,4 +299,4 @@ async function dispatchComputationPass(config, dependencies, computationManifest
281
299
  }
282
300
  }
283
301
 
284
- module.exports = { dispatchComputationPass };
302
+ module.exports = { dispatchComputationPass };