npm - bulltrackers-module - Versions diffs - 1.0.293 → 1.0.295 - Mend

bulltrackers-module 1.0.293 → 1.0.295

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/functions/computation-system/executors/PriceBatchExecutor.js CHANGED Viewed

@@ -70,7 +70,6 @@ async function runBatchPriceComputation(config, deps, dateStrings, calcs, target
                                         .collection(config.computationsSubcollection)
                                         .doc(normalizeName(calcManifest.name));
-                                    // [UPDATE] Add _lastUpdated timestamp
                                     writes.push({
                                         ref: docRef,
                                         data: {

package/functions/computation-system/executors/StandardExecutor.js CHANGED Viewed

@@ -1,7 +1,8 @@
 /**
  * @fileoverview Executor for "Standard" (per-user) calculations.
  * UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
- * UPDATED: Removes manual global.gc() calls.
+ * UPDATED: Implements "Circuit Breaker" to fail fast on high error rates.
+ * UPDATED: Implements "Adaptive Flushing" based on V8 Heap usage.
  * UPDATED: Manages incremental sharding states.
  * UPDATED: Implements 'isInitialWrite' flag for robust cleanup.
  */
@@ -12,6 +13,7 @@ const { ContextFactory }                                               = require
 const { commitResults }                                                = require('../persistence/ResultCommitter');
 const mathLayer                                                        = require('../layers/index');
 const { performance }                                                  = require('perf_hooks');
+const v8                                                               = require('v8');
 class StandardExecutor {
     static async run(date, calcs, passName, config, deps, rootData, fetchedDeps, previousFetchedDeps, skipStatusWrite = false) {
@@ -59,6 +61,9 @@ class StandardExecutor {
         const aggregatedSuccess = {};
         const aggregatedFailures = [];
+        // [NEW] Global Error Tracking for Circuit Breaker
+        const errorStats = { count: 0, total: 0 };
         Object.keys(state).forEach(name => {
             executionStats[name] = {
                 processedUsers: 0,
@@ -89,7 +94,7 @@ class StandardExecutor {
         let yP_chunk = {}, tH_chunk = {};
-        const BATCH_SIZE = 5000;
+        const MIN_BATCH_SIZE = 1000; // Minimum to process before checking stats
         let usersSinceLastFlush = 0;
         try {
@@ -103,6 +108,8 @@ class StandardExecutor {
                 const chunkSize = Object.keys(tP_chunk).length;
                 const startProcessing = performance.now();
+                // [UPDATED] Collect execution results (success/failure counts)
                 const promises = streamingCalcs.map(calc =>
                     StandardExecutor.executePerUser(
                         calc, calc.manifest, dateStr, tP_chunk, yP_chunk, tH_chunk,
@@ -110,15 +117,37 @@ class StandardExecutor {
                         executionStats[normalizeName(calc.manifest.name)]
                     )
                 );
-                await Promise.all(promises);
+                const batchResults = await Promise.all(promises);
                 const procDuration = performance.now() - startProcessing;
                 Object.keys(executionStats).forEach(name => executionStats[name].timings.processing += procDuration);
+                // [NEW] Update Error Stats
+                batchResults.forEach(r => {
+                    errorStats.total += (r.success + r.failures);
+                    errorStats.count += r.failures;
+                });
+                // [NEW] Circuit Breaker: Fail fast if error rate > 10% after processing 100+ items
+                // We check total > 100 to avoid failing on the very first user if they happen to be bad.
+                if (errorStats.total > 100 && (errorStats.count / errorStats.total) > 0.10) {
+                    const failRate = (errorStats.count / errorStats.total * 100).toFixed(1);
+                    throw new Error(`[Circuit Breaker] High failure rate detected (${failRate}%). Aborting batch to prevent silent data loss.`);
+                }
                 usersSinceLastFlush += chunkSize;
-                if (usersSinceLastFlush >= BATCH_SIZE) {
-                    logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users...`);
+                // [NEW] Adaptive Flushing (Memory Pressure Check)
+                const heapStats = v8.getHeapStatistics();
+                const heapUsedRatio = heapStats.used_heap_size / heapStats.heap_size_limit;
+                const MEMORY_THRESHOLD = 0.70; // 70% of available RAM
+                const COUNT_THRESHOLD = 5000;
+                if (usersSinceLastFlush >= COUNT_THRESHOLD || heapUsedRatio > MEMORY_THRESHOLD) {
+                    const reason = heapUsedRatio > MEMORY_THRESHOLD ? `MEMORY_PRESSURE (${(heapUsedRatio*100).toFixed(0)}%)` : 'BATCH_LIMIT';
+                    logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users. Reason: ${reason}`);
                     // [UPDATED] Pass isInitialWrite: true only on the first flush
                     const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true, !hasFlushed);
@@ -171,6 +200,7 @@ class StandardExecutor {
                 _executionStats: executionStats[name]
             };
+            // Clear the memory immediately after preparing the commit
             inst.results = {};
         }
@@ -226,6 +256,10 @@ class StandardExecutor {
         const insights = metadata.rootDataDependencies?.includes('insights') ? { today: await loader.loadInsights(dateStr) } : null;
         const SCHEMAS  = mathLayer.SCHEMAS;
+        // [NEW] Track local batch success/failure
+        let chunkSuccess = 0;
+        let chunkFailures = 0;
         for (const [userId, todayPortfolio] of Object.entries(portfolioData)) {
             const yesterdayPortfolio = yesterdayPortfolioData ? yesterdayPortfolioData[userId] : null;
             const todayHistory       = historyData ? historyData[userId] : null;
@@ -249,10 +283,16 @@ class StandardExecutor {
             try {
                 await calcInstance.process(context);
                 if (stats) stats.processedUsers++;
+                chunkSuccess++;
             }
-            catch (e) { logger.log('WARN', `Calc ${metadata.name} failed for user ${userId}: ${e.message}`); }
+            catch (e) {
+                logger.log('WARN', `Calc ${metadata.name} failed for user ${userId}: ${e.message}`);
+                chunkFailures++;
+            }
         }
+        return { success: chunkSuccess, failures: chunkFailures };
     }
 }
-module.exports = { StandardExecutor };
+module.exports = { StandardExecutor };

package/functions/computation-system/features.md ADDED Viewed

@@ -0,0 +1,395 @@
+# Complete Feature Inventory of BullTrackers Computation System
+## Core DAG Engine Features
+### 1. **Topological Sorting (Kahn's Algorithm)**
+- **Files**: `ManifestBuilder.js:187-205`
+- **Implementation**: Builds execution passes by tracking in-degrees, queuing zero-dependency nodes
+- **Niche aspect**: Dynamic pass assignment (line 201: `neighborEntry.pass = currentEntry.pass + 1`)
+- **Common in**: Airflow, Prefect, Dagster (all use topological sort)
+### 2. **Cycle Detection (Tarjan's SCC Algorithm)**
+- **Files**: `ManifestBuilder.js:98-141`
+- **Implementation**: Strongly Connected Components detection with stack-based traversal
+- **Niche aspect**: Returns human-readable cycle chain (line 137: `cycle.join(' -> ') + ' -> ' + cycle[0]`)
+- **Common in**: Academic graph libraries, rare in production DAG systems (most use simpler DFS)
+### 3. **Auto-Discovery Manifest Building**
+- **Files**: `ManifestBuilder.js:143-179`, `ManifestLoader.js:9-42`
+- **Implementation**: Scans directories, instantiates classes, extracts metadata via `getMetadata()` static method
+- **Niche aspect**: Singleton caching with multi-key support (ManifestLoader.js:9)
+- **Common in**: Plugin systems (Airflow providers), less common for computation graphs
+## Dependency Management & Optimization
+### 4. **Multi-Layered Hash Composition**
+- **Files**: `ManifestBuilder.js:56-95`, `HashManager.js:25-36`
+- **Implementation**: Composite hash from code + epoch + infrastructure + layers + dependencies
+- **Niche aspect**: Infrastructure hash (recursive file tree hashing, HashManager.js:38-79)
+- **Common in**: Build systems (Bazel, Buck), **very rare** in data pipelines
+### 5. **Content-Based Dependency Short-Circuiting**
+- **Files**: `WorkflowOrchestrator.js:51-73`
+- **Implementation**: Tracks `resultHash` (output data hash), skips re-run if output unchanged despite code change
+- **Niche aspect**: `dependencyResultHashes` tracking (line 59-67)
+- **Common in**: **Extremely rare** - only seen in specialized incremental computation systems
+### 6. **Behavioral Stability Detection (SimHash)**
+- **Files**: `BuildReporter.js:55-89`, `SimRunner.js:12-42`, `Fabricator.js:20-244`
+- **Implementation**: Runs code against deterministic mock data, hashes output to detect "logic changes" vs "cosmetic changes"
+- **Niche aspect**: Seeded random data generation (SeededRandom.js:1-38) for reproducible simulations
+- **Common in**: **Unique** - haven't seen this elsewhere. Conceptually similar to property-based testing but for optimization
+### 7. **System Epoch Forcing**
+- **Files**: `system_epoch.js:1-2`, `ManifestBuilder.js:65`
+- **Implementation**: Manual version bump to force global re-computation
+- **Niche aspect**: Single-line file that invalidates all cached results
+- **Common in**: Cache invalidation patterns, but unusual to have a dedicated module
+## Execution & Resource Management
+### 8. **Streaming Execution with Batch Flushing**
+- **Files**: `StandardExecutor.js:86-158`
+- **Implementation**: Async generators yield data chunks, flush to DB every N users
+- **Niche aspect**: Adaptive flushing based on V8 heap pressure (line 128-145)
+- **Common in**: ETL tools (Spark, Flink use micro-batching), **heap-aware flushing is rare**
+### 9. **Memory Heartbeat (Flight Recorder)**
+- **Files**: `computation_worker.js:30-53`
+- **Implementation**: Background timer writes memory stats to Firestore every 2 seconds
+- **Niche aspect**: Uses `.unref()` to prevent blocking process exit (line 50)
+- **Common in**: APM tools (DataDog, New Relic), **embedding in workers is custom**
+### 10. **Forensic Crash Analysis & Intelligent Routing**
+- **Files**: `computation_dispatcher.js:31-68`
+- **Implementation**: Reads last memory stats from failed runs, routes to high-mem queue if OOM suspected
+- **Niche aspect**: Parses telemetry to distinguish crash types (line 44-50)
+- **Common in**: Kubernetes autoscaling heuristics, **application-level routing is rare**
+### 11. **Circuit Breaker Pattern**
+- **Files**: `StandardExecutor.js:164-173`
+- **Implementation**: Tracks error rate, fails fast if >10% failures after 100 items
+- **Niche aspect**: Runs mid-stream (not just at job start)
+- **Common in**: Microservices (Hystrix, Resilience4j), uncommon in data pipelines
+### 12. **Incremental Auto-Sharding**
+- **Files**: `ResultCommitter.js:234-302`
+- **Implementation**: Dynamically splits results into Firestore subcollection shards, tracks shard index across flushes
+- **Niche aspect**: `flushMode: INTERMEDIATE` flag (line 150) to avoid pointer updates mid-stream
+- **Common in**: Database sharding, **dynamic document sharding is custom**
+### 13. **GZIP Compression Strategy**
+- **Files**: `ResultCommitter.js:128-157`
+- **Implementation**: Compresses results >50KB, stores as binary blob if <900KB compressed
+- **Niche aspect**: Falls back to sharding if compression fails or exceeds limit
+- **Common in**: Storage layers, integration at application level is custom
+## Data Quality & Validation
+### 14. **Heuristic Validation (Grey Box)**
+- **Files**: `ResultsValidator.js:8-96`
+- **Implementation**: Statistical analysis (zero%, null%, flatline detection) without knowing schema
+- **Niche aspect**: Weekend mode (line 57-64) - relaxes thresholds on Saturdays/Sundays
+- **Common in**: Data quality tools (Great Expectations, Soda), **weekend-aware thresholds are domain-specific**
+### 15. **Contract Discovery & Enforcement**
+- **Files**: `ContractDiscoverer.js:11-120`, `ContractValidator.js:9-64`
+- **Implementation**: Monte Carlo simulation learns behavioral bounds, enforces at runtime
+- **Niche aspect**: Distinguishes "physics limits" (ratios 0-1) from "statistical envelopes" (6-sigma)
+- **Common in**: **Unique** - closest analogue is schema inference (Pandas Profiling) but this is probabilistic + enforced
+### 16. **Semantic Gates**
+- **Files**: `ResultCommitter.js:118-127`
+- **Implementation**: Blocks results that violate contracts before writing
+- **Niche aspect**: Differentiated error handling - `SEMANTIC_GATE` errors are non-retryable (line 210-225)
+- **Common in**: Type systems (TypeScript, Mypy), **runtime probabilistic checks are rare**
+### 17. **Root Data Availability Tracking**
+- **Files**: `AvailabilityChecker.js:49-87`, `utils.js:11-17`
+- **Implementation**: Centralized index (`system_root_data_index`) tracks what data exists per day
+- **Niche aspect**: Granular user-type checks (speculator vs normal portfolio, line 23-47)
+- **Common in**: Data catalogs (Amundsen, DataHub), **day-level granularity is custom**
+### 18. **Impossible State Propagation**
+- **Files**: `WorkflowOrchestrator.js:94-96`, `logger.js:77-93`
+- **Implementation**: Marks calculations as `IMPOSSIBLE` instead of failing them, allows graph to continue
+- **Niche aspect**: Separate "impossible" category in analysis reports (logger.js:86-91)
+- **Common in**: Workflow engines handle failures, **explicit impossible state is rare**
+## Orchestration & Coordination
+### 19. **Event-Driven Callback Pattern (Zero Polling)**
+- **Files**: `bulltrackers_pipeline.yaml:49-76`, `computation_worker.js:82-104`
+- **Implementation**: Workflow creates callback endpoint, worker POSTs on completion, workflow wakes
+- **Niche aspect**: IAM authentication for callbacks (computation_worker.js:88-91)
+- **Common in**: Cloud Workflows, AWS Step Functions (both support callbacks), **IAM-secured callbacks are best practice but not default**
+### 20. **Run State Counter Pattern**
+- **Files**: `computation_dispatcher.js:107-115`, `computation_worker.js:106-123`
+- **Implementation**: Shared Firestore doc tracks `remainingTasks`, workers decrement on completion
+- **Niche aspect**: Transaction-based decrement (computation_worker.js:109-119) ensures atomicity
+- **Common in**: Distributed systems, **Firestore-specific implementation is custom**
+### 21. **Audit Ledger (Ledger-DB Pattern)**
+- **Files**: `computation_dispatcher.js:143-163`, `RunRecorder.js:26-99`
+- **Implementation**: Write-once ledger per task (`computation_audit_ledger/{date}/passes/{pass}/tasks/{calc}`)
+- **Niche aspect**: Stores granular timing breakdown (RunRecorder.js:64-70)
+- **Common in**: Event sourcing systems, **granular profiling in ledger is uncommon**
+### 22. **Poison Message Handling (DLQ)**
+- **Files**: `computation_worker.js:36-60`
+- **Implementation**: Max retries check via Pub/Sub `deliveryAttempt`, moves to dead letter queue
+- **Niche aspect**: Differentiates deterministic errors (line 194-222) from transient failures
+- **Common in**: Message queues (RabbitMQ, SQS), **logic-aware routing is custom**
+### 23. **Catch-Up Logic (Historical Scan)**
+- **Files**: `computation_dispatcher.js:65-81`
+- **Implementation**: Scans full date range (earliest data → target date) instead of just target date
+- **Niche aspect**: Parallel analysis with concurrency limit (line 85)
+- **Common in**: Data pipelines (backfill mode), **integrated into dispatcher is convenient**
+## Observability & Debugging
+### 24. **Structured Logging System**
+- **Files**: `logger.js:27-118`
+- **Implementation**: Dual output (human-readable + JSON), process tracking, context inheritance
+- **Niche aspect**: `ProcessLogger` class (line 120-148) for scoped logging with auto-stats
+- **Common in**: Production apps (Winston, Bunyan), **process-scoped loggers are nice touch**
+### 25. **Date Analysis Reports**
+- **Files**: `logger.js:77-132`
+- **Implementation**: Per-date breakdown of runnable/blocked/impossible/skipped calculations
+- **Niche aspect**: Unicode symbols for visual parsing (line 103)
+- **Common in**: DAG visualization tools, **inline CLI reports are developer-friendly**
+### 26. **Build Report Generator**
+- **Files**: `BuildReporter.js:138-248`
+- **Implementation**: Pre-deployment impact analysis showing blast radius of code changes
+- **Niche aspect**: Blast radius calculation (line 62-77) - finds all downstream dependents
+- **Common in**: CI/CD tools (GitHub's "affected projects"), **calculation-level granularity is detailed**
+### 27. **System Fingerprinting**
+- **Files**: `BuildReporter.js:28-51`, `HashManager.js:80-111`
+- **Implementation**: SHA-256 hash of entire codebase + manifest, triggers report on change
+- **Niche aspect**: Recursive directory walk with ignore patterns (HashManager.js:44-60)
+- **Common in**: Docker layer caching, **for change detection at deploy-time is creative**
+### 28. **Execution Statistics Tracking**
+- **Files**: `StandardExecutor.js:64-71`, `RunRecorder.js:57-70`
+- **Implementation**: Tracks processed/skipped users, setup/stream/processing time breakdowns
+- **Niche aspect**: Profiler-ready structure (RunRecorder.js:64-70) for BigQuery analysis
+- **Common in**: Profilers (cProfile, pyflame), **baked into business logic is pragmatic**
+## Data Access Patterns
+### 29. **Smart Shard Indexing**
+- **Files**: `data_loader.js:152-213`
+- **Implementation**: Maintains `instrumentId → shardId` index to avoid scanning all shards
+- **Niche aspect**: 24-hour TTL with rebuild logic (line 167-172)
+- **Common in**: Database indexes, **application-level shard routing is custom**
+### 30. **Async Generator Streaming**
+- **Files**: `data_loader.js:130-150`
+- **Implementation**: `async function*` yields data chunks, caller consumes with `for await`
+- **Niche aspect**: Supports pre-provided refs (line 132) for dependency injection
+- **Common in**: Node.js streams, **generator-based approach is modern/clean**
+### 31. **Cached Data Loader**
+- **Files**: `CachedDataLoader.js:14-73`
+- **Implementation**: Execution-scoped cache for mappings/insights/social data
+- **Niche aspect**: Decompression helper (line 24-32) for transparent GZIP handling
+- **Common in**: Data layers (Apollo Client, React Query), **per-execution scope is appropriate**
+### 32. **Deferred Hydration**
+- **Files**: `DependencyFetcher.js:23-66`
+- **Implementation**: Fetches metadata documents, hydrates sharded data on-demand
+- **Niche aspect**: Parallel hydration promises (line 44-47)
+- **Common in**: ORMs (lazy loading), **manual shard hydration is low-level**
+## Domain-Specific Intelligence
+### 33. **User Classification Engine**
+- **Files**: `profiling.js:24-236`
+- **Implementation**: "Smart Money" scoring with 18+ behavioral signals
+- **Niche aspect**: Multi-factor scoring (portfolio allocation + trade history + execution timing)
+- **Common in**: Fintech risk models, **granularity is impressive**
+### 34. **Convex Hull Risk Geometry**
+- **Files**: `profiling.js:338-365`
+- **Implementation**: Monotone Chain algorithm for efficient frontier analysis
+- **Niche aspect**: O(n log n) algorithm choice (profiling.js:345-363)
+- **Common in**: Computational geometry libraries, **integration into user profiling is domain-specific**
+### 35. **Kadane's Maximum Drawdown**
+- **Files**: `extractors.js:27-52`
+- **Implementation**: O(n) single-pass algorithm for peak-to-trough decline
+- **Niche aspect**: Returns indices for visualization (line 47)
+- **Common in**: Finance libraries (QuantLib), **clean implementation**
+### 36. **Fast Fourier Transform (Cooley-Tukey)**
+- **Files**: `mathematics.js:148-184`
+- **Implementation**: O(n log n) frequency domain analysis with zero-padding
+- **Niche aspect**: Recursive implementation (line 163-183)
+- **Common in**: Signal processing (NumPy, SciPy), **JavaScript implementation is rare**
+### 37. **Sliding Window Extrema (Monotonic Queue)**
+- **Files**: `mathematics.js:227-259`
+- **Implementation**: O(n) min/max calculation using deque
+- **Niche aspect**: Dual deques (one for min, one for max, line 236-237)
+- **Common in**: Competitive programming, **production usage is uncommon**
+### 38. **Geometric Brownian Motion Simulator**
+- **Files**: `mathematics.js:99-118`
+- **Implementation**: Box-Muller transform for normal random variates, Monte Carlo simulation
+- **Niche aspect**: Returns `Float32Array` for memory efficiency (line 106)
+- **Common in**: Quant finance (Black-Scholes), **typed arrays are performance-conscious**
+### 39. **Hit Probability Calculator**
+- **Files**: `mathematics.js:75-97`
+- **Implementation**: Closed-form barrier option pricing formula
+- **Niche aspect**: Custom `normCDF` implementation (line 85-89) avoids external deps
+- **Common in**: Options pricing libraries, **standalone implementation is self-contained**
+### 40. **Kernel Density Estimation**
+- **Files**: `mathematics.js:263-288`
+- **Implementation**: Gaussian kernel with weighted samples
+- **Niche aspect**: 3-bandwidth cutoff for performance (line 276)
+- **Common in**: Stats packages (SciPy, R), **production KDE is uncommon**
+## Schema & Type Management
+### 41. **Schema Capture System**
+- **Files**: `schema_capture.js:28-68`
+- **Implementation**: Batch stores class-defined schemas to Firestore
+- **Niche aspect**: Pre-commit validation (line 32-34) prevents batch failures
+- **Common in**: Schema registries (Confluent), **lightweight alternative**
+### 42. **Production Schema Validators**
+- **Files**: `validators.js:14-137`
+- **Implementation**: Structural validation matching schema.md definitions
+- **Niche aspect**: Separate validators per data type (portfolio/history/social/insights/prices)
+- **Common in**: Data quality frameworks, **schema.md alignment is discipline**
+### 43. **Legacy Mapping System**
+- **Files**: `HashManager.js:8-23`, `ContextFactory.js:12-17`
+- **Implementation**: Alias mapping for backward compatibility (e.g., `extract` → `DataExtractor`)
+- **Niche aspect**: Dual injection into context (line 14-16)
+- **Common in**: API versioning, **maintaining during refactor is good practice**
+## Infrastructure & Operations
+### 44. **Self-Healing Sharding Strategy**
+- **Files**: `ResultCommitter.js:234-302`
+- **Implementation**: Progressively stricter sharding on failure (900KB → 450KB → 200KB → 100KB)
+- **Niche aspect**: Strategy array iteration (line 241-246)
+- **Common in**: Resilience patterns, **adaptive sharding is creative**
+### 45. **Initial Write Cleanup Logic**
+- **Files**: `ResultCommitter.js:111-127`, `StandardExecutor.js:122-124`
+- **Implementation**: `isInitialWrite` flag triggers shard deletion before first write
+- **Niche aspect**: Transition detection (line 115-121) from sharded → compressed
+- **Common in**: Migration scripts, **baked into write path is convenient**
+### 46. **Firestore Byte Calculator**
+- **Files**: `ResultCommitter.js:319-324`
+- **Implementation**: Estimates document size for batch limits
+- **Niche aspect**: Handles `DocumentReference` paths (line 322)
+- **Common in**: Firestore SDKs (internal), **custom implementation for control**
+### 47. **Retry with Exponential Backoff**
+- **Files**: `utils.js:65-79`
+- **Implementation**: Async retry wrapper with configurable attempts and backoff
+- **Niche aspect**: 1s → 2s → 4s progression (line 75)
+- **Common in**: HTTP clients (axios, got), **standalone utility is reusable**
+### 48. **Batch Commit Chunker**
+- **Files**: `utils.js:86-128`
+- **Implementation**: Splits writes into Firestore 500-op/10MB batches
+- **Niche aspect**: Supports DELETE operations (line 103-108)
+- **Common in**: ORMs (SQLAlchemy bulk), **DELETE support is complete**
+### 49. **Date Range Generator**
+- **Files**: `utils.js:131-139`
+- **Implementation**: UTC-aware date string generation
+- **Niche aspect**: Forces UTC via `Date.UTC()` constructor (line 133-134)
+- **Common in**: Date libraries (date-fns, Luxon), **UTC enforcement is critical for finance**
+### 50. **Earliest Date Discovery**
+- **Files**: `utils.js:158-207`
+- **Implementation**: Scans multiple collections to find first available data
+- **Niche aspect**: Handles both flat and sharded collections (line 142-157, 160-174)
+- **Common in**: Data discovery tools, **multi-source aggregation is thorough**
+## Advanced Patterns
+### 51. **Tarjan's Stack Management**
+- **Files**: `ManifestBuilder.js:98-141`
+- **Implementation**: Manual stack tracking for SCC detection
+- **Niche aspect**: `onStack` Set for O(1) membership checks (line 106)
+- **Common in**: Graph algorithm implementations, **production usage is advanced**
+### 52. **Dependency-Injection Context Factory**
+- **Files**: `ContextFactory.js:17-61`
+- **Implementation**: Separate builders for per-user vs meta contexts
+- **Niche aspect**: Math layer injection with legacy aliases (line 12-17)
+- **Common in**: DI frameworks (Spring, Guice), **manual factory is lightweight**
+### 53. **Price Batch Executor**
+- **Files**: `PriceBatchExecutor.js:12-104`
+- **Implementation**: Specialized executor for price-only calculations (optimization pass)
+- **Niche aspect**: Outer concurrency (2) + shard batching (20) + write batching (50) nested limits
+- **Common in**: MapReduce systems, **three-level batching is complex**
+### 54. **Deterministic Mock Data Fabrication**
+- **Files**: `Fabricator.js:20-244`, `SeededRandom.js:8-38`
+- **Implementation**: LCG PRNG seeded by calculation name for reproducible fakes
+- **Niche aspect**: Iteration-based seed rotation (Fabricator.js:29)
+- **Common in**: Property-based testing (Hypothesis, QuickCheck), **for optimization is novel**
+### 55. **Schema-Driven Fake Generation**
+- **Files**: `Fabricator.js:48-71`
+- **Implementation**: Recursively generates data matching JSON schema
+- **Niche aspect**: Volume scaling flag (line 49) for aggregate vs per-item data
+- **Common in**: Schema-based generators (JSF, json-schema-faker), **custom to domain**
+### 56. **Migration Cleanup Hook**
+- **Files**: `ResultCommitter.js:81-83`, `ResultCommitter.js:305-317`
+- **Implementation**: Deletes old category data when calculation moves
+- **Niche aspect**: `previousCategory` tracking in manifest (WorkflowOrchestrator.js:50-54)
+- **Common in**: Schema migration tools (Alembic, Flyway), **inline cleanup is pragmatic**
+### 57. **Non-Retryable Error Classification**
+- **Files**: `ResultCommitter.js:18-21`, `computation_worker.js:194-225`
+- **Implementation**: Distinguishes deterministic failures from transient errors
+- **Niche aspect**: `error.stage` property for categorization (computation_worker.js:205-209)
+- **Common in**: Error handling libraries (Sentry), **semantic error types are good practice**
+### 58. **Reverse Adjacency Graph**
+- **Files**: `BuildReporter.js:62-77`
+- **Implementation**: Maintains child → parent edges for impact analysis
+- **Niche aspect**: Used for blast radius calculation (line 66-74)
+- **Common in**: Dependency analyzers (npm-why), **runtime maintenance is useful**
+### 59. **Multi-Key Manifest Cache**
+- **Files**: `ManifestLoader.js:9-14`
+- **Implementation**: Cache key is JSON-stringified sorted product lines
+- **Niche aspect**: Handles `['ALL']` vs `['crypto', 'stocks']` as different keys
+- **Common in**: Memoization libraries (lodash.memoize), **cache key design is thoughtful**
+### 60. **Workflow Variable Restoration**
+- **Files**: `bulltrackers_pipeline.yaml:11-17`
+- **Implementation**: Comment notes a bug fix restoring `passes` and `max_retries` variables
+- **Niche aspect**: T-1 date logic (line 13-15) for "process yesterday" pattern
+- **Common in**: Production YAML configs, **inline documentation is helpful**
+---
+## Summary Statistics
+- **Total Features Identified**: 60
+- **Unique/Rare Features**: ~15 (SimHash, content-based short-circuit, forensic routing, contract discovery, weekend validation, behavioral stability, heap-aware flushing, monotonic queue extrema, FFT, KDE, smart shard indexing, recursive infra hash, semantic gates, impossible propagation, blast radius)
+- **Advanced CS Algorithms**: 8 (Kahn's, Tarjan's, Convex Hull, Kadane's, FFT, Box-Muller, Monotonic Queue, LCG)
+- **Common Patterns (Elevated)**: ~25 (executed exceptionally well or with domain-specific twist)
+- **Standard Infrastructure**: ~22 (logging, retries, batching, streaming, caching, validation, etc.)
+**Verdict**: About 25% truly novel, 40% common patterns elevated to production-grade, 35% standard infrastructure executed well.

package/functions/computation-system/helpers/computation_dispatcher.js CHANGED Viewed

@@ -3,9 +3,10 @@
  * PURPOSE: "Smart Dispatcher" - Analyzes state, initializes Run Counters, and dispatches tasks.
  * UPDATED: Implements Callback Pattern. Initializes 'computation_runs' doc for worker coordination.
  * UPDATED: Implements Forensic Crash Analysis & Intelligent Resource Routing.
+ * FIXED: Implemented "Catch-Up" logic to scan full history (Start -> Target Date) instead of just Target Date.
  */
-const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
+const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
 const { groupByPass, analyzeDateExecution }     = require('../WorkflowOrchestrator.js');
 const { PubSubUtils }                           = require('../../core/utils/pubsub_utils');
 const { fetchComputationStatus, updateComputationStatus } = require('../persistence/StatusRepository');
@@ -28,6 +29,7 @@ async function checkCrashForensics(db, date, pass, computationName) {
         const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computationName}`;
         const doc = await db.doc(ledgerPath).get();
+        // Default to standard
         if (!doc.exists) return 'standard';
         const data = doc.data();
@@ -63,33 +65,48 @@ async function checkCrashForensics(db, date, pass, computationName) {
  */
 async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
     const { logger, db } = dependencies;
-    const pubsubUtils = new PubSubUtils(dependencies);
-    const passToRun   = String(config.COMPUTATION_PASS_TO_RUN);
+    const pubsubUtils    = new PubSubUtils(dependencies);
+    const passToRun      = String(config.COMPUTATION_PASS_TO_RUN);
     // Extract Date and Callback from request body (pushed by Workflow)
+    // NOTE: 'dateStr' acts as the "Target Date" (Ceiling), usually T-1.
     const dateStr     = reqBody.date || config.date;
     const callbackUrl = reqBody.callbackUrl || null;
     if (!passToRun) { return logger.log('ERROR', '[Dispatcher] No pass defined (COMPUTATION_PASS_TO_RUN). Aborting.'); }
     if (!dateStr)   { return logger.log('ERROR', '[Dispatcher] No date defined. Aborting.'); }
-    const currentManifestHash = generateCodeHash(
-        computationManifest.map(c => c.hash).sort().join('|')
-    );
+    const currentManifestHash = generateCodeHash( computationManifest.map(c => c.hash).sort().join('|') );
     const passes          = groupByPass(computationManifest);
     const calcsInThisPass = passes[passToRun] || [];
     if (!calcsInThisPass.length) { return logger.log('WARN', `[Dispatcher] No calcs for Pass ${passToRun}. Exiting.`); }
-    logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun} for ${dateStr}`);
+    logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun} (Target: ${dateStr})`);
-    // -- DATE ANALYSIS LOGIC --
-    const allExpectedDates = [dateStr];
+    // -- DATE ANALYSIS LOGIC (FIXED: RANGE SCAN) --
+    // 1. Determine the absolute start of data history
+    const earliestDates = await getEarliestDataDates(config, dependencies);
+    const startDate     = earliestDates.absoluteEarliest;
+    const endDate       = new Date(dateStr + 'T00:00:00Z');
+    // 2. Generate the full range of dates to check
+    let allExpectedDates = getExpectedDateStrings(startDate, endDate);
+    // Safety fallback: if range is invalid or empty, default to target date only
+    if (!allExpectedDates || allExpectedDates.length === 0) {
+        logger.log('WARN', `[Dispatcher] Date range calculation returned empty (Start: ${startDate.toISOString()} -> End: ${endDate.toISOString()}). Defaulting to single target date.`);
+        allExpectedDates = [dateStr];
+    } else {
+        logger.log('INFO', `[Dispatcher] 📅 Analysis Range: ${allExpectedDates.length} days (${allExpectedDates[0]} to ${allExpectedDates[allExpectedDates.length-1]})`);
+    }
     const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
     const tasksToDispatch = [];
-    // Concurrency limit for analysis & forensics
+    // Concurrency limit for analysis & forensics (Parallelize the historical scan)
     const limit = pLimit(20);
     const analysisPromises = allExpectedDates.map(d => limit(async () => {
@@ -105,14 +122,15 @@ async function dispatchComputationPass(config, dependencies, computationManifest
                 prevDate.setUTCDate(prevDate.getUTCDate() - 1);
                 prevDateStr = prevDate.toISOString().slice(0, 10);
+                // Only fetch previous status if it's within valid range
                 if (prevDate >= DEFINITIVE_EARLIEST_DATES.absoluteEarliest) {
                     fetchPromises.push(fetchComputationStatus(prevDateStr, config, dependencies));
                 }
             }
-            const results = await Promise.all(fetchPromises);
-            const dailyStatus = results[0];
-            const availability = results[1];
+            const results         = await Promise.all(fetchPromises);
+            const dailyStatus     = results[0];
+            const availability    = results[1];
             const prevDailyStatus = (prevDateStr && results[2]) ? results[2] : (prevDateStr ? {} : null);
             const rootDataStatus = availability ? availability.status : {
@@ -183,7 +201,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
         if (callbackUrl) {
             await db.doc(metaStatePath).set({
                 createdAt: new Date(),
-                date: dateStr,
+                date: dateStr, // Acts as the "Job Label" (target date)
                 pass: passToRun,
                 totalTasks: tasksToDispatch.length,
                 remainingTasks: tasksToDispatch.length,
@@ -201,9 +219,9 @@ async function dispatchComputationPass(config, dependencies, computationManifest
         // 3. Create Audit Ledger Entries
         const finalDispatched = [];
-        const txnLimit = pLimit(20);
+        const txnLimit        = pLimit(20);
-        const txnPromises = tasksToDispatch.map(task => txnLimit(async () => {
+        const txnPromises   = tasksToDispatch.map(task => txnLimit(async () => {
             const ledgerRef = db.collection(`computation_audit_ledger/${task.date}/passes/${task.pass}/tasks`).doc(task.computation);
             try {
@@ -281,4 +299,4 @@ async function dispatchComputationPass(config, dependencies, computationManifest
     }
 }
-module.exports = { dispatchComputationPass };
+module.exports = { dispatchComputationPass };