npm - bulltrackers-module - Versions diffs - 1.0.733 → 1.0.734 - Mend

bulltrackers-module 1.0.733 → 1.0.734

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

package/functions/computation-system-v2/framework/execution/Orchestrator.js ADDED Viewed

@@ -0,0 +1,498 @@
+/**
+ * @fileoverview Orchestrator (Refactored Executor)
+ * * Responsibilities:
+ * 1. Graph & Schedule Management (Manifest, Passes, DAG)
+ * 2. Data Provisioning (Fetching Data, Loading Dependencies, Reference Data)
+ * 3. Execution Strategy (Streaming vs. In-Memory)
+ * 4. Delegation (Hands off actual 'work' to TaskRunner + Middleware)
+ */
+const crypto = require('crypto');
+const pLimit = require('p-limit');
+// Core Components
+const { ManifestBuilder } = require('../core/Manifest');
+const { RulesRegistry } = require('../core/Rules');
+const { RuleInjector } = require('../core/RuleInjector');
+const { RunAnalyzer } = require('../core/RunAnalyzer');
+// Data & Storage
+const { SchemaRegistry } = require('../data/SchemaRegistry');
+const { QueryBuilder } = require('../data/QueryBuilder');
+const { DataFetcher } = require('../data/DataFetcher');
+const { StorageManager } = require('../storage/StorageManager');
+const { StateRepository } = require('../storage/StateRepository');
+const { Checkpointer } = require('../resilience/Checkpointer');
+// Execution Components
+const { TaskRunner } = require('./TaskRunner');
+const { ProfilerMiddleware } = require('./middleware/ProfilerMiddleware');
+const { CostTrackerMiddleware } = require('./middleware/CostTrackerMiddleware');
+const { LineageMiddleware } = require('./middleware/LineageMiddleware');
+const DEFAULT_CONCURRENCY = 20;
+const BATCH_SIZE = 1000;
+class Orchestrator {
+    constructor(config, logger = null) {
+        this.config = config;
+        this.logger = logger || console;
+        // 1. Initialize Base Services
+        this.schemaRegistry = new SchemaRegistry(config.bigquery, this.logger);
+        this.queryBuilder = new QueryBuilder(config.bigquery, this.schemaRegistry, this.logger);
+        this.dataFetcher = new DataFetcher({ ...config.bigquery, tables: config.tables }, this.queryBuilder, this.logger);
+        this.storageManager = new StorageManager(config, this.logger);
+        this.stateRepository = new StateRepository(config, this.logger);
+        // 2. Initialize Logic & Rules
+        this.manifestBuilder = new ManifestBuilder(config, this.logger);
+        const rulesRegistry = new RulesRegistry(config, this.logger);
+        this.ruleInjector = new RuleInjector(rulesRegistry);
+        // 3. Initialize Execution Stack (Middleware)
+        const profiler = new ProfilerMiddleware(config);
+        profiler.setStorage(this.storageManager);
+        this.lineageMiddleware = new LineageMiddleware(config);
+        const costTracker = new CostTrackerMiddleware(config);
+        // The "Onion": Cost( Lineage( Profiler( Task ) ) )
+        this.runner = new TaskRunner([
+            costTracker,
+            this.lineageMiddleware,
+            profiler
+        ]);
+        // State
+        this.manifest = null;
+        this.runAnalyzer = null;
+        this.referenceDataCache = {};
+    }
+    async initialize() {
+        this._log('INFO', 'Initializing Orchestrator...');
+        // Build Manifest
+        this.manifest = this.manifestBuilder.build(this.config.computations || []);
+        // Initialize Analyzer
+        this.runAnalyzer = new RunAnalyzer(this.manifest, this.dataFetcher, this.logger);
+        // Warm Schema Cache
+        await this.schemaRegistry.warmCache(this._getAllTables());
+        // Load Reference Data (e.g. sectors, holidays)
+        await this._loadReferenceData();
+        this._log('INFO', `Initialized with ${this.manifest.length} computations`);
+    }
+    /**
+     * Analyze what needs to run for a given date.
+     */
+    async analyze(options) {
+        const { date } = options;
+        if (!this.manifest) await this.initialize();
+        const dailyStatus = await this.stateRepository.getDailyStatus(date);
+        const prevStatus = await this.stateRepository.getDailyStatus(this._subtractDay(date));
+        const report = await this.runAnalyzer.analyze(date, dailyStatus, prevStatus);
+        // Compatibility: Merge reRuns into runnable
+        report.runnable = [...report.runnable, ...report.reRuns];
+        return report;
+    }
+    /**
+     * Main Execution Loop
+     */
+    async execute(options) {
+        const { date, pass = null, computation = null, dryRun = false, entities = null } = options;
+        if (!this.manifest) await this.initialize();
+        this._log('INFO', `Starting execution for ${date}...`);
+        // 1. Filter Manifest
+        let toRun = this.manifest;
+        if (computation) {
+            const norm = computation.toLowerCase().replace(/[^a-z0-9]/g, '');
+            toRun = this.manifest.filter(e => e.name === norm);
+            if (!toRun.length) throw new Error(`Computation not found: ${computation}`);
+        }
+        // 2. Group by Pass
+        const passes = this.manifestBuilder.groupByPass(toRun);
+        const passNumbers = Object.keys(passes).map(Number).sort((a,b) => a-b);
+        const passesToRun = pass ? [parseInt(pass, 10)] : passNumbers;
+        const summary = {
+            date,
+            summary: { completed: 0, skipped: 0, blocked: 0, impossible: 0, errors: 0 },
+            completed: [], skipped: [], blocked: [], impossible: [], errors: []
+        };
+        // 3. Execute Passes
+        for (const passNum of passesToRun) {
+            const passComputations = passes[passNum] || [];
+            this._log('INFO', `Executing Pass ${passNum}: ${passComputations.length} computations`);
+            // Note: In a strict DAG, items in the same pass are parallelizable.
+            // We use Promise.all to run them concurrently.
+            await Promise.all(passComputations.map(async (entry) => {
+                try {
+                    // Pass specific options like "entities" (force entities) down
+                    const res = await this._executeComputation(entry, date, { dryRun, entities });
+                    summary[res.status].push(res);
+                    summary.summary[res.status]++;
+                } catch (e) {
+                    summary.errors.push({ name: entry.name, error: e.message });
+                    summary.summary.errors++;
+                    this._log('ERROR', `${entry.name} failed: ${e.message}`);
+                }
+            }));
+        }
+        return summary;
+    }
+    async runSingle(entry, dateStr, options = {}) {
+        if (!this.manifest) await this.initialize();
+        return this._executeComputation(entry, dateStr, {
+            dryRun: options.dryRun || false,
+            entities: options.entityIds
+        });
+    }
+    // =========================================================================
+    // INTERNAL EXECUTION LOGIC
+    // =========================================================================
+    async _executeComputation(entry, dateStr, options) {
+        const { name } = entry;
+        const forceEntities = options.entities;
+        // 1. Logic Check (Skip if unnecessary)
+        if (!forceEntities) {
+            const decision = await this._analyzeEntry(entry, dateStr);
+            if (decision.type !== 'runnable' && decision.type !== 'reRuns') {
+                return { name, status: decision.type, reason: decision.payload.reason };
+            }
+        }
+        this._log('INFO', `Running ${name} (Type: ${entry.type})...`);
+        const startTime = Date.now();
+        // 2. Load Dependencies & Previous Results
+        const { depResults, depResultHashes } = await this._loadDependencies(entry, dateStr);
+        let previousResult = null;
+        if (entry.isHistorical) {
+            previousResult = await this.stateRepository.getResult(this._subtractDay(dateStr), name);
+        }
+        // 3. Select Execution Strategy
+        let stats = { count: 0, hash: null, skipped: false };
+        if (entry.type === 'per-entity' && !forceEntities) {
+            // STRATEGY A: Streaming (Low Memory, Checkpointing)
+            stats = await this._executeStreaming(entry, dateStr, depResults, previousResult, options);
+        } else {
+            // STRATEGY B: In-Memory (Global, Aggregates, or Forced Entities)
+            stats = await this._executeGlobal(entry, dateStr, depResults, previousResult, options, forceEntities);
+        }
+        if (stats.skipped) {
+            return { name, status: 'skipped', reason: 'Results unchanged', duration: Date.now() - startTime };
+        }
+        // 4. Update State (If real run)
+        if (!options.dryRun) {
+            await this.stateRepository.updateStatusCache(dateStr, name, {
+                hash: entry.hash,
+                resultHash: stats.hash,
+                dependencyResultHashes: depResultHashes,
+                entityCount: stats.count
+            });
+            // Flush any buffered lineage logs
+            await this.lineageMiddleware.flush();
+        }
+        return { name, status: 'completed', duration: Date.now() - startTime, resultCount: stats.count };
+    }
+    // --- STRATEGY A: STREAMING ---
+    async _executeStreaming(entry, dateStr, depResults, previousResult, options) {
+        // 1. Setup Checkpoint
+        const checkpointer = new Checkpointer(this.config, this.storageManager);
+        let cp = null;
+        if (!options.dryRun) {
+            cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0); // 0 = unknown total
+            if (cp?.isCompleted) return { count: 0, hash: 'cached', skipped: true };
+            if (cp?.isResumed) this._log('INFO', `Resuming ${entry.name} from checkpoint...`);
+        }
+        // 2. Initialize Stream
+        const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
+        const batchStream = this.dataFetcher.fetchComputationBatched(entry.requires, dateStr, batchSize);
+        const rollingHash = crypto.createHash('sha256');
+        let totalCount = 0;
+        let batchIndex = 0;
+        const concurrency = this.config.execution?.entityConcurrency || DEFAULT_CONCURRENCY;
+        const limit = pLimit(concurrency);
+        // 3. Iterate Batches
+        for await (const batch of batchStream) {
+            // Resume Logic: Skip completed batches
+            if (cp && cp.completedBatches.has(batchIndex)) {
+                batchIndex++;
+                continue;
+            }
+            const { data: batchData, entityIds } = batch;
+            // 4. PREFETCH DEPENDENCIES
+            const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
+            // 5. Dynamic Context Injection
+            const { rules } = this.ruleInjector.createContext(); // Used is implicit via Proxy
+            // 6. Execute Batch Concurrently
+            const batchResults = {};
+            await Promise.all(entityIds.map(entityId => limit(async () => {
+                const instance = new entry.class();
+                const entityData = this._filterDataForEntity(batchData, entityId);
+                const context = {
+                    computation: entry,
+                    date: dateStr,
+                    entityId,
+                    data: entityData,
+                    // Dependency Injector
+                    getDependency: (depName, targetId) => {
+                        if (batchDeps[depName] && batchDeps[depName].has(targetId || entityId)) {
+                            return batchDeps[depName].get(targetId || entityId);
+                        }
+                        return this._lazyLoadDependency(dateStr, depName, targetId || entityId, depResults);
+                    },
+                    previousResult,
+                    rules,
+                    references: this.referenceDataCache,
+                    config: this.config,
+                    dataFetcher: this.dataFetcher // <--- ADDED: Required by CostTrackerMiddleware
+                };
+                // DELEGATE TO RUNNER
+                const result = await this.runner.run(instance, context);
+                if (result !== undefined) {
+                    batchResults[entityId] = result;
+                    this._updateRollingHash(rollingHash, result);
+                }
+            })));
+            // 7. Commit Batch
+            if (!options.dryRun) {
+                await this.storageManager.commitResults(dateStr, entry, batchResults, {});
+                const lastId = entityIds[entityIds.length - 1];
+                await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
+            }
+            totalCount += Object.keys(batchResults).length;
+            batchIndex++;
+        }
+        if (!options.dryRun && cp) await checkpointer.complete(dateStr, entry.name, cp.id);
+        return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16) };
+    }
+    // --- STRATEGY B: GLOBAL / IN-MEMORY ---
+    async _executeGlobal(entry, dateStr, depResults, previousResult, options, forceEntities) {
+        // 1. Fetch Full Data
+        const data = await this.dataFetcher.fetchForComputation(entry.requires, dateStr, forceEntities);
+        const { rules } = this.ruleInjector.createContext();
+        const instance = new entry.class();
+        const context = {
+            computation: entry,
+            date: dateStr,
+            data,
+            getDependency: (dep, ent) => this._lazyLoadDependency(dateStr, dep, ent, depResults),
+            previousResult,
+            rules,
+            references: this.referenceDataCache,
+            config: this.config,
+            entityId: forceEntities ? null : '_global',
+            dataFetcher: this.dataFetcher // <--- ADDED: Required by CostTrackerMiddleware
+        };
+        // 2. Delegate to Runner
+        let results = {};
+        if (entry.type === 'per-entity') {
+            const ids = forceEntities || this._extractEntityIds(data);
+            const limit = pLimit(DEFAULT_CONCURRENCY);
+            await Promise.all(ids.map(id => limit(async () => {
+                const subCtx = {
+                    ...context,
+                    entityId: id,
+                    data: this._filterDataForEntity(data, id)
+                };
+                const res = await this.runner.run(instance, subCtx);
+                if (res) results[id] = res;
+            })));
+        } else {
+            results = await this.runner.run(instance, context);
+        }
+        // 3. Smart Invalidation Check
+        const finalHash = this._hashResults(results);
+        if (!options.dryRun && !forceEntities) {
+            const currentStatus = await this.stateRepository.getDailyStatus(dateStr);
+            const status = currentStatus.get(entry.name.toLowerCase());
+            if (status && status.resultHash === finalHash) {
+                return { count: Object.keys(results || {}).length, hash: finalHash, skipped: true };
+            }
+            await this.storageManager.commitResults(dateStr, entry, results, {});
+        }
+        return { count: Object.keys(results || {}).length, hash: finalHash };
+    }
+    // =========================================================================
+    // HELPER METHODS
+    // =========================================================================
+    async _analyzeEntry(entry, dateStr) {
+        const d = await this.stateRepository.getDailyStatus(dateStr);
+        const p = await this.stateRepository.getDailyStatus(this._subtractDay(dateStr));
+        return this.runAnalyzer._evaluateEntry(entry, dateStr, false, d, p);
+    }
+    async _loadDependencies(entry, dateStr) {
+        const depResults = {};
+        const depResultHashes = {};
+        const dailyStatus = await this.stateRepository.getDailyStatus(dateStr);
+        for (const dep of entry.dependencies) {
+            const stat = dailyStatus.get(dep);
+            if (stat?.resultHash) depResultHashes[dep] = stat.resultHash;
+            if (stat?.entityCount > 50000) {
+                depResults[dep] = null;
+            } else {
+                depResults[dep] = await this.stateRepository.getResult(dateStr, dep);
+            }
+        }
+        if (entry.conditionalDependencies) {
+            for (const condDep of entry.conditionalDependencies) {
+                const shouldLoad = condDep.condition({ date: dateStr, config: this.config });
+                if (shouldLoad) {
+                    const depStatus = dailyStatus.get(condDep.computation.toLowerCase());
+                    if (depStatus) {
+                        depResults[condDep.computation] = await this.stateRepository.getResult(dateStr, condDep.computation);
+                        depResultHashes[condDep.computation] = depStatus.resultHash;
+                    }
+                }
+            }
+        }
+        return { depResults, depResultHashes };
+    }
+    async _prefetchBatchDependencies(entry, dateStr, loadedDeps, batchEntityIds) {
+        const prefetched = {};
+        for (const depName of entry.dependencies) {
+            if (loadedDeps[depName] === null) {
+                const batchRes = await this.stateRepository.getBatchEntityResults(dateStr, depName, batchEntityIds);
+                prefetched[depName] = new Map(Object.entries(batchRes));
+            }
+        }
+        return prefetched;
+    }
+    async _lazyLoadDependency(dateStr, depName, entityId, preloaded) {
+        if (preloaded[depName] && !entityId) return preloaded[depName];
+        if (preloaded[depName] && entityId) return preloaded[depName][entityId];
+        if (entityId) {
+            return this.stateRepository.getEntityResult(dateStr, depName, entityId);
+        }
+        return this.stateRepository.getResult(dateStr, depName);
+    }
+    async _loadReferenceData() {
+        if (!this.config.referenceData) return;
+        await Promise.all(this.config.referenceData.map(async (table) => {
+            try {
+                const data = await this.dataFetcher.fetch({
+                    table,
+                    targetDate: new Date().toISOString().slice(0, 10),
+                    mandatory: false
+                });
+                this.referenceDataCache[table] = data || {};
+            } catch (e) {
+                this._log('WARN', `Failed to load Ref Data ${table}: ${e.message}`);
+            }
+        }));
+    }
+    _extractEntityIds(data) {
+        const ids = new Set();
+        Object.entries(data).forEach(([tbl, d]) => {
+            const conf = this.config.tables[tbl] || {};
+            if (conf.entityField && d && !Array.isArray(d)) Object.keys(d).forEach(k => ids.add(k));
+        });
+        return Array.from(ids);
+    }
+    _filterDataForEntity(data, id) {
+        const out = {};
+        Object.entries(data).forEach(([tbl, d]) => {
+            const conf = this.config.tables[tbl] || {};
+            if (conf.entityField && d && !Array.isArray(d)) out[tbl] = d[id] || null;
+            else out[tbl] = d;
+        });
+        return out;
+    }
+    _updateRollingHash(hasher, result) {
+        if (result) hasher.update(JSON.stringify(result));
+    }
+    _hashResults(results) {
+        const canonical = JSON.stringify(results, Object.keys(results || {}).sort());
+        return crypto.createHash('sha256').update(canonical).digest('hex').substring(0, 16);
+    }
+    _subtractDay(dateStr) {
+        const d = new Date(dateStr + 'T00:00:00Z');
+        d.setUTCDate(d.getUTCDate() - 1);
+        return d.toISOString().slice(0, 10);
+    }
+    _getAllTables() {
+        const s = new Set();
+        if (this.manifest) this.manifest.forEach(e => Object.keys(e.requires).forEach(t => s.add(t)));
+        return Array.from(s);
+    }
+    _log(l, m) { this.logger.log(l, `[Orchestrator] ${m}`); }
+}
+module.exports = { Orchestrator };

package/functions/computation-system-v2/framework/execution/TaskRunner.js ADDED Viewed

@@ -0,0 +1,35 @@
+/**
+ * @fileoverview Task Runner
+ * Executes a single unit of work (global or per-entity) through a middleware chain.
+ */
+class TaskRunner {
+    constructor(middlewares = []) {
+        this.middlewares = middlewares;
+    }
+    /**
+     * Run the computation instance process method wrapped in middleware.
+     * @param {Object} instance - The computation class instance
+     * @param {Object} context - The execution context
+     */
+    async run(instance, context) {
+        // The Core Kernel: The actual computation logic
+        const coreKernel = async (ctx) => {
+            await instance.process(ctx);
+            // Retrieve result based on mode
+            if (ctx.entityId) return instance.results[ctx.entityId];
+            return instance.results;
+        };
+        // Compose Middleware: Reduce Right (Inner -> Outer)
+        const chain = this.middlewares.reduceRight((next, mw) => {
+            return async (ctx) => mw.execute(ctx, next);
+        }, coreKernel);
+        // Execute
+        return await chain(context);
+    }
+}
+module.exports = { TaskRunner };

package/functions/computation-system-v2/framework/execution/middleware/CostTrackerMiddleware.js ADDED Viewed

@@ -0,0 +1,32 @@
+const { Middleware } = require('./Middleware');
+const { CostTracker } = require('../../cost/CostTracker');
+class CostTrackerMiddleware extends Middleware {
+    constructor(config) {
+        super();
+        this.tracker = new CostTracker(config);
+    }
+    async execute(context, next) {
+        const { computation, date, dataFetcher } = context;
+        // Snapshot bytes before
+        const startBytes = dataFetcher.getStats().bytesProcessed || 0;
+        const result = await next(context);
+        // Snapshot bytes after
+        const endBytes = dataFetcher.getStats().bytesProcessed || 0;
+        const delta = endBytes - startBytes;
+        if (delta > 0) {
+            // Fire and forget
+            this.tracker.trackCost(computation.name, date, delta)
+                .catch(e => console.error('Cost tracking failed', e));
+        }
+        return result;
+    }
+}
+module.exports = { CostTrackerMiddleware };

package/functions/computation-system-v2/framework/execution/middleware/LineageMiddleware.js ADDED Viewed

@@ -0,0 +1,32 @@
+const { Middleware } = require('./Middleware');
+const { LineageTracker } = require('../../lineage/LineageTracker');
+class LineageMiddleware extends Middleware {
+    constructor(config) {
+        super();
+        this.tracker = new LineageTracker(config);
+    }
+    async execute(context, next) {
+        const result = await next(context);
+        // Only track if we have a valid result and entity
+        if (result && context.entityId) {
+             this.tracker.track({
+                computation: context.computation.name,
+                date: context.date,
+                entityId: context.entityId,
+                sourceData: context.data, // The slice of data used
+                result: result
+            }).catch(e => console.error('Lineage tracking failed', e));
+        }
+        return result;
+    }
+    async flush() {
+        await this.tracker.flush();
+    }
+}
+module.exports = { LineageMiddleware };

package/functions/computation-system-v2/framework/execution/middleware/Middleware.js ADDED Viewed

@@ -0,0 +1,14 @@
+/**
+ * @fileoverview Middleware Interface
+ */
+class Middleware {
+    /**
+     * @param {Object} context - The execution context
+     * @param {Function} next - The next function in the chain
+     */
+    async execute(context, next) {
+        return await next(context);
+    }
+}
+module.exports = { Middleware };

package/functions/computation-system-v2/framework/execution/middleware/ProfilerMiddleware.js ADDED Viewed

@@ -0,0 +1,47 @@
+const { Middleware } = require('./Middleware');
+const { ComputationProfiler } = require('../../monitoring/Profiler');
+class ProfilerMiddleware extends Middleware {
+    constructor(config) {
+        super();
+        this.profiler = new ComputationProfiler();
+        this.storageManager = null; // Injected by Orchestrator
+    }
+    setStorage(storageManager) {
+        this.storageManager = storageManager;
+    }
+    async execute(context, next) {
+        const { computation, entityId, date } = context;
+        // Start Profile
+        const key = this.profiler.startProfile(computation.name, entityId || 'global');
+        try {
+            // Run Next
+            const result = await next(context);
+            return result;
+        } finally {
+            // End Profile (runs even if error)
+            const resultSize = context.results ? JSON.stringify(context.results).length : 0;
+            const profile = this.profiler.endProfile(key, {
+                entityId: entityId || 'global',
+                resultSize
+            });
+            // Persist Profile if storage available
+            if (this.storageManager && profile) {
+                // Async save (don't block)
+                this.storageManager.savePerformanceReport(date, {
+                    computations: [{
+                        name: computation.name,
+                        ...profile
+                    }]
+                }).catch(err => console.error('Failed to save profile', err));
+            }
+        }
+    }
+}
+module.exports = { ProfilerMiddleware };