bulltrackers-module 1.0.735 → 1.0.736
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/config/bulltrackers.config.js +75 -5
- package/functions/computation-system-v2/framework/data/DataFetcher.js +107 -105
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +357 -150
- package/functions/computation-system-v2/framework/execution/RemoteTaskRunner.js +327 -0
- package/functions/computation-system-v2/framework/execution/middleware/LineageMiddleware.js +9 -4
- package/functions/computation-system-v2/framework/execution/middleware/ProfilerMiddleware.js +9 -21
- package/functions/computation-system-v2/framework/index.js +10 -3
- package/functions/computation-system-v2/framework/lineage/LineageTracker.js +53 -57
- package/functions/computation-system-v2/framework/monitoring/Profiler.js +54 -52
- package/functions/computation-system-v2/framework/resilience/Checkpointer.js +173 -27
- package/functions/computation-system-v2/framework/storage/StorageManager.js +419 -187
- package/functions/computation-system-v2/handlers/index.js +10 -1
- package/functions/computation-system-v2/handlers/scheduler.js +85 -193
- package/functions/computation-system-v2/handlers/worker.js +242 -0
- package/functions/computation-system-v2/test/analyze-results.js +238 -0
- package/functions/computation-system-v2/test/{test-dispatcher.js → other/test-dispatcher.js} +6 -6
- package/functions/computation-system-v2/test/{test-framework.js → other/test-framework.js} +14 -14
- package/functions/computation-system-v2/test/{test-real-execution.js → other/test-real-execution.js} +1 -1
- package/functions/computation-system-v2/test/{test-real-integration.js → other/test-real-integration.js} +3 -3
- package/functions/computation-system-v2/test/{test-refactor-e2e.js → other/test-refactor-e2e.js} +3 -3
- package/functions/computation-system-v2/test/{test-risk-metrics-computation.js → other/test-risk-metrics-computation.js} +4 -4
- package/functions/computation-system-v2/test/{test-scheduler.js → other/test-scheduler.js} +1 -1
- package/functions/computation-system-v2/test/{test-storage.js → other/test-storage.js} +2 -2
- package/functions/computation-system-v2/test/run-pipeline-test.js +554 -0
- package/functions/computation-system-v2/test/test-worker-pool.js +494 -0
- package/package.json +1 -1
- package/functions/computation-system-v2/computations/TestComputation.js +0 -46
- /package/functions/computation-system-v2/test/{test-results.json → other/test-results.json} +0 -0
|
@@ -5,6 +5,10 @@
|
|
|
5
5
|
* 2. Data Provisioning (Fetching Data, Loading Dependencies, Reference Data)
|
|
6
6
|
* 3. Execution Strategy (Streaming vs. In-Memory)
|
|
7
7
|
* 4. Delegation (Hands off actual 'work' to TaskRunner + Middleware)
|
|
8
|
+
* * * UPDATE: Added Execution Summary logging to debug Skipped/Blocked/Impossible tasks.
|
|
9
|
+
* * * UPDATE: Includes Global vs Batch Data Split to fix "Identity Crisis".
|
|
10
|
+
* * * UPDATE: Implemented FORCE logic to bypass "up-to-date" checks for testing.
|
|
11
|
+
* * * UPDATE: Aggregates performance reporting to prevent log spam.
|
|
8
12
|
*/
|
|
9
13
|
|
|
10
14
|
const crypto = require('crypto');
|
|
@@ -26,6 +30,7 @@ const { Checkpointer } = require('../resilience/Checkpointer');
|
|
|
26
30
|
|
|
27
31
|
// Execution Components
|
|
28
32
|
const { TaskRunner } = require('./TaskRunner');
|
|
33
|
+
const { RemoteTaskRunner } = require('./RemoteTaskRunner');
|
|
29
34
|
const { ProfilerMiddleware } = require('./middleware/ProfilerMiddleware');
|
|
30
35
|
const { CostTrackerMiddleware } = require('./middleware/CostTrackerMiddleware');
|
|
31
36
|
const { LineageMiddleware } = require('./middleware/LineageMiddleware');
|
|
@@ -51,8 +56,9 @@ class Orchestrator {
|
|
|
51
56
|
this.ruleInjector = new RuleInjector(rulesRegistry);
|
|
52
57
|
|
|
53
58
|
// 3. Initialize Execution Stack (Middleware)
|
|
54
|
-
|
|
55
|
-
|
|
59
|
+
// Keep reference to profiler middleware to access stats later
|
|
60
|
+
this.profilerMiddleware = new ProfilerMiddleware(config);
|
|
61
|
+
this.profilerMiddleware.setStorage(this.storageManager);
|
|
56
62
|
|
|
57
63
|
this.lineageMiddleware = new LineageMiddleware(config);
|
|
58
64
|
const costTracker = new CostTrackerMiddleware(config);
|
|
@@ -61,9 +67,15 @@ class Orchestrator {
|
|
|
61
67
|
this.runner = new TaskRunner([
|
|
62
68
|
costTracker,
|
|
63
69
|
this.lineageMiddleware,
|
|
64
|
-
|
|
70
|
+
this.profilerMiddleware
|
|
65
71
|
]);
|
|
66
72
|
|
|
73
|
+
// 4. Initialize Remote Task Runner (Worker Pool)
|
|
74
|
+
// Only create if worker pool is enabled in config
|
|
75
|
+
this.remoteRunner = config.workerPool?.enabled
|
|
76
|
+
? new RemoteTaskRunner(config, this.logger)
|
|
77
|
+
: null;
|
|
78
|
+
|
|
67
79
|
// State
|
|
68
80
|
this.manifest = null;
|
|
69
81
|
this.runAnalyzer = null;
|
|
@@ -72,49 +84,29 @@ class Orchestrator {
|
|
|
72
84
|
|
|
73
85
|
async initialize() {
|
|
74
86
|
this._log('INFO', 'Initializing Orchestrator...');
|
|
75
|
-
|
|
76
|
-
// Build Manifest
|
|
77
87
|
this.manifest = this.manifestBuilder.build(this.config.computations || []);
|
|
78
|
-
|
|
79
|
-
// Initialize Analyzer
|
|
80
88
|
this.runAnalyzer = new RunAnalyzer(this.manifest, this.dataFetcher, this.logger);
|
|
81
|
-
|
|
82
|
-
// Warm Schema Cache
|
|
83
89
|
await this.schemaRegistry.warmCache(this._getAllTables());
|
|
84
|
-
|
|
85
|
-
// Load Reference Data (e.g. sectors, holidays)
|
|
86
90
|
await this._loadReferenceData();
|
|
87
|
-
|
|
88
91
|
this._log('INFO', `Initialized with ${this.manifest.length} computations`);
|
|
89
92
|
}
|
|
90
93
|
|
|
91
|
-
/**
|
|
92
|
-
* Analyze what needs to run for a given date.
|
|
93
|
-
*/
|
|
94
94
|
async analyze(options) {
|
|
95
95
|
const { date } = options;
|
|
96
96
|
if (!this.manifest) await this.initialize();
|
|
97
|
-
|
|
98
97
|
const dailyStatus = await this.stateRepository.getDailyStatus(date);
|
|
99
98
|
const prevStatus = await this.stateRepository.getDailyStatus(this._subtractDay(date));
|
|
100
|
-
|
|
101
99
|
const report = await this.runAnalyzer.analyze(date, dailyStatus, prevStatus);
|
|
102
|
-
|
|
103
|
-
// Compatibility: Merge reRuns into runnable
|
|
104
100
|
report.runnable = [...report.runnable, ...report.reRuns];
|
|
105
101
|
return report;
|
|
106
102
|
}
|
|
107
103
|
|
|
108
|
-
/**
|
|
109
|
-
* Main Execution Loop
|
|
110
|
-
*/
|
|
111
104
|
async execute(options) {
|
|
112
105
|
const { date, pass = null, computation = null, dryRun = false, entities = null } = options;
|
|
113
106
|
if (!this.manifest) await this.initialize();
|
|
114
107
|
|
|
115
108
|
this._log('INFO', `Starting execution for ${date}...`);
|
|
116
109
|
|
|
117
|
-
// 1. Filter Manifest
|
|
118
110
|
let toRun = this.manifest;
|
|
119
111
|
if (computation) {
|
|
120
112
|
const norm = computation.toLowerCase().replace(/[^a-z0-9]/g, '');
|
|
@@ -122,7 +114,6 @@ class Orchestrator {
|
|
|
122
114
|
if (!toRun.length) throw new Error(`Computation not found: ${computation}`);
|
|
123
115
|
}
|
|
124
116
|
|
|
125
|
-
// 2. Group by Pass
|
|
126
117
|
const passes = this.manifestBuilder.groupByPass(toRun);
|
|
127
118
|
const passNumbers = Object.keys(passes).map(Number).sort((a,b) => a-b);
|
|
128
119
|
const passesToRun = pass ? [parseInt(pass, 10)] : passNumbers;
|
|
@@ -133,20 +124,27 @@ class Orchestrator {
|
|
|
133
124
|
completed: [], skipped: [], blocked: [], impossible: [], errors: []
|
|
134
125
|
};
|
|
135
126
|
|
|
136
|
-
// 3. Execute Passes
|
|
137
127
|
for (const passNum of passesToRun) {
|
|
138
128
|
const passComputations = passes[passNum] || [];
|
|
139
129
|
this._log('INFO', `Executing Pass ${passNum}: ${passComputations.length} computations`);
|
|
140
130
|
|
|
141
|
-
//
|
|
142
|
-
// We use Promise.all to run them concurrently.
|
|
131
|
+
// Computations in the same pass run in parallel here
|
|
143
132
|
await Promise.all(passComputations.map(async (entry) => {
|
|
144
133
|
try {
|
|
145
|
-
|
|
146
|
-
const res = await this._executeComputation(entry, date, { dryRun, entities });
|
|
134
|
+
const res = await this._executeComputation(entry, date, { ...options, dryRun, entities });
|
|
147
135
|
|
|
148
|
-
summary[res.status]
|
|
149
|
-
|
|
136
|
+
if (summary[res.status]) {
|
|
137
|
+
summary[res.status].push(res);
|
|
138
|
+
summary.summary[res.status]++;
|
|
139
|
+
} else {
|
|
140
|
+
summary.errors.push({ name: entry.name, error: `Unknown status: ${res.status}` });
|
|
141
|
+
summary.summary.errors++;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (res.status === 'skipped' || res.status === 'blocked' || res.status === 'impossible') {
|
|
145
|
+
this._log('WARN', `Skipped ${entry.name}: [${res.status.toUpperCase()}] ${res.reason}`);
|
|
146
|
+
}
|
|
147
|
+
|
|
150
148
|
} catch (e) {
|
|
151
149
|
summary.errors.push({ name: entry.name, error: e.message });
|
|
152
150
|
summary.summary.errors++;
|
|
@@ -155,12 +153,14 @@ class Orchestrator {
|
|
|
155
153
|
}));
|
|
156
154
|
}
|
|
157
155
|
|
|
156
|
+
this._printExecutionSummary(summary);
|
|
158
157
|
return summary;
|
|
159
158
|
}
|
|
160
159
|
|
|
161
160
|
async runSingle(entry, dateStr, options = {}) {
|
|
162
161
|
if (!this.manifest) await this.initialize();
|
|
163
162
|
return this._executeComputation(entry, dateStr, {
|
|
163
|
+
...options,
|
|
164
164
|
dryRun: options.dryRun || false,
|
|
165
165
|
entities: options.entityIds
|
|
166
166
|
});
|
|
@@ -174,18 +174,22 @@ class Orchestrator {
|
|
|
174
174
|
const { name } = entry;
|
|
175
175
|
const forceEntities = options.entities;
|
|
176
176
|
|
|
177
|
-
// 1. Logic Check (Skip if unnecessary)
|
|
178
177
|
if (!forceEntities) {
|
|
179
178
|
const decision = await this._analyzeEntry(entry, dateStr);
|
|
180
|
-
|
|
179
|
+
const isSkippedOrCached = decision.type === 'skipped' || decision.type === 'cached';
|
|
180
|
+
const shouldForce = options.force && isSkippedOrCached;
|
|
181
|
+
|
|
182
|
+
if (!shouldForce && decision.type !== 'runnable' && decision.type !== 'reRuns') {
|
|
181
183
|
return { name, status: decision.type, reason: decision.payload.reason };
|
|
182
184
|
}
|
|
185
|
+
if (shouldForce) {
|
|
186
|
+
this._log('INFO', `Force Run Enabled: Ignoring '${decision.type}' status for ${name}`);
|
|
187
|
+
}
|
|
183
188
|
}
|
|
184
189
|
|
|
185
190
|
this._log('INFO', `Running ${name} (Type: ${entry.type})...`);
|
|
186
191
|
const startTime = Date.now();
|
|
187
192
|
|
|
188
|
-
// 2. Load Dependencies & Previous Results
|
|
189
193
|
const { depResults, depResultHashes } = await this._loadDependencies(entry, dateStr);
|
|
190
194
|
|
|
191
195
|
let previousResult = null;
|
|
@@ -193,113 +197,151 @@ class Orchestrator {
|
|
|
193
197
|
previousResult = await this.stateRepository.getResult(this._subtractDay(dateStr), name);
|
|
194
198
|
}
|
|
195
199
|
|
|
196
|
-
// 3. Select Execution Strategy
|
|
197
200
|
let stats = { count: 0, hash: null, skipped: false };
|
|
198
201
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
}
|
|
202
|
+
try {
|
|
203
|
+
if (entry.type === 'per-entity' && !forceEntities) {
|
|
204
|
+
stats = await this._executeStreaming(entry, dateStr, depResults, previousResult, options);
|
|
205
|
+
} else {
|
|
206
|
+
stats = await this._executeGlobal(entry, dateStr, depResults, previousResult, options, forceEntities);
|
|
207
|
+
}
|
|
206
208
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
209
|
+
// REPORTING: Save Aggregated Performance Report
|
|
210
|
+
if (!options.dryRun) {
|
|
211
|
+
const wallClockDuration = Date.now() - startTime;
|
|
212
|
+
const profilerStats = this.profilerMiddleware.profiler.getAndClearStats(entry.name);
|
|
213
|
+
|
|
214
|
+
// If we have granular stats, include them. If not (e.g. skipped batches), use defaults.
|
|
215
|
+
const report = {
|
|
216
|
+
runId: this.config.testMode?.runId || 'production',
|
|
217
|
+
computationName: entry.name,
|
|
218
|
+
date: dateStr,
|
|
219
|
+
durationMs: wallClockDuration,
|
|
220
|
+
entityCount: stats.count,
|
|
221
|
+
status: stats.skipped ? 'skipped' : 'completed',
|
|
222
|
+
metrics: profilerStats ? {
|
|
223
|
+
avgEntityDuration: profilerStats.avgDuration,
|
|
224
|
+
p95EntityDuration: profilerStats.p95Duration,
|
|
225
|
+
avgMemoryDelta: profilerStats.avgMemoryDelta,
|
|
226
|
+
throughput: (stats.count / (wallClockDuration / 1000)).toFixed(2) + ' ent/sec'
|
|
227
|
+
} : {}
|
|
228
|
+
};
|
|
210
229
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
230
|
+
// Async save (don't block pipeline)
|
|
231
|
+
this.storageManager.savePerformanceReport(report).catch(e => {
|
|
232
|
+
this._log('WARN', `Failed to save perf report for ${name}: ${e.message}`);
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
if (stats.skipped) {
|
|
237
|
+
return { name, status: 'skipped', reason: 'Results unchanged or Dead Letter', duration: Date.now() - startTime };
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (!options.dryRun) {
|
|
241
|
+
await this.stateRepository.updateStatusCache(dateStr, name, {
|
|
242
|
+
hash: entry.hash,
|
|
243
|
+
resultHash: stats.hash,
|
|
244
|
+
dependencyResultHashes: depResultHashes,
|
|
245
|
+
entityCount: stats.count
|
|
246
|
+
});
|
|
247
|
+
await this.lineageMiddleware.flush();
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return { name, status: 'completed', duration: Date.now() - startTime, resultCount: stats.count };
|
|
251
|
+
|
|
252
|
+
} catch (e) {
|
|
253
|
+
// Also try to save failure report
|
|
254
|
+
const wallClockDuration = Date.now() - startTime;
|
|
255
|
+
this.storageManager.savePerformanceReport({
|
|
256
|
+
runId: this.config.testMode?.runId || 'production',
|
|
257
|
+
computationName: entry.name,
|
|
258
|
+
date: dateStr,
|
|
259
|
+
durationMs: wallClockDuration,
|
|
260
|
+
entityCount: stats.count || 0,
|
|
261
|
+
status: 'failed',
|
|
262
|
+
metrics: { error: e.message }
|
|
263
|
+
}).catch(() => {});
|
|
219
264
|
|
|
220
|
-
|
|
221
|
-
await this.lineageMiddleware.flush();
|
|
265
|
+
throw e;
|
|
222
266
|
}
|
|
223
|
-
|
|
224
|
-
return { name, status: 'completed', duration: Date.now() - startTime, resultCount: stats.count };
|
|
225
267
|
}
|
|
226
268
|
|
|
227
|
-
// --- STRATEGY A: STREAMING ---
|
|
228
269
|
async _executeStreaming(entry, dateStr, depResults, previousResult, options) {
|
|
229
|
-
// 1. Setup Checkpoint
|
|
230
270
|
const checkpointer = new Checkpointer(this.config, this.storageManager);
|
|
231
271
|
let cp = null;
|
|
232
272
|
if (!options.dryRun) {
|
|
233
|
-
|
|
273
|
+
const forceLock = options.force || this.config.bypassLocks || process.env.NODE_ENV === 'test';
|
|
274
|
+
cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
|
|
275
|
+
|
|
276
|
+
if (cp && cp.isLocked) throw new Error(`⚠️ Computation ${entry.name} is currently LOCKED.`);
|
|
277
|
+
if (cp && cp.skipped) {
|
|
278
|
+
this._log('INFO', `⏭️ Skipping ${entry.name}: ${cp.reason}`);
|
|
279
|
+
return { count: 0, hash: 'skipped_dead_letter', skipped: true };
|
|
280
|
+
}
|
|
234
281
|
if (cp?.isCompleted) return { count: 0, hash: 'cached', skipped: true };
|
|
235
282
|
if (cp?.isResumed) this._log('INFO', `Resuming ${entry.name} from checkpoint...`);
|
|
236
283
|
}
|
|
237
284
|
|
|
238
|
-
//
|
|
285
|
+
// DECISION: Use remote workers or local execution?
|
|
286
|
+
const useRemote = this._shouldUseRemoteWorkers(entry, options);
|
|
287
|
+
|
|
288
|
+
if (useRemote) {
|
|
289
|
+
this._log('INFO', `Using REMOTE worker pool for ${entry.name}`);
|
|
290
|
+
return this._executeStreamingRemote(entry, dateStr, depResults, previousResult, options, checkpointer, cp);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// LOCAL EXECUTION PATH (Original Logic)
|
|
294
|
+
const driverTable = this._getDriverTable(entry.requires);
|
|
295
|
+
const driverEntityField = this.config.tables[driverTable]?.entityField;
|
|
296
|
+
const { batchRequires, globalRequires } = this._splitRequirements(entry.requires, driverTable);
|
|
297
|
+
|
|
298
|
+
let globalData = {};
|
|
299
|
+
if (Object.keys(globalRequires).length > 0) {
|
|
300
|
+
globalData = await this.dataFetcher.fetchForComputation(globalRequires, dateStr);
|
|
301
|
+
}
|
|
302
|
+
|
|
239
303
|
const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
|
|
240
|
-
const batchStream = this.dataFetcher.fetchComputationBatched(
|
|
304
|
+
const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
|
|
241
305
|
|
|
242
306
|
const rollingHash = crypto.createHash('sha256');
|
|
243
307
|
let totalCount = 0;
|
|
244
308
|
let batchIndex = 0;
|
|
245
|
-
|
|
246
309
|
const concurrency = this.config.execution?.entityConcurrency || DEFAULT_CONCURRENCY;
|
|
247
310
|
const limit = pLimit(concurrency);
|
|
248
311
|
|
|
249
|
-
// 3. Iterate Batches
|
|
250
312
|
for await (const batch of batchStream) {
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
batchIndex++;
|
|
254
|
-
continue;
|
|
313
|
+
if (cp && cp.completedBatches && cp.completedBatches.has(batchIndex)) {
|
|
314
|
+
batchIndex++; continue;
|
|
255
315
|
}
|
|
256
316
|
|
|
257
|
-
const { data:
|
|
258
|
-
|
|
259
|
-
// 4. PREFETCH DEPENDENCIES
|
|
317
|
+
const { data: batchLocalData, entityIds } = batch;
|
|
318
|
+
const combinedData = { ...batchLocalData, ...globalData };
|
|
260
319
|
const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
|
|
261
|
-
|
|
262
|
-
// 5. Dynamic Context Injection
|
|
263
|
-
const { rules } = this.ruleInjector.createContext(); // Used is implicit via Proxy
|
|
264
|
-
|
|
265
|
-
// 6. Execute Batch Concurrently
|
|
320
|
+
const { rules } = this.ruleInjector.createContext();
|
|
266
321
|
const batchResults = {};
|
|
267
322
|
|
|
268
323
|
await Promise.all(entityIds.map(entityId => limit(async () => {
|
|
269
324
|
const instance = new entry.class();
|
|
270
|
-
const entityData = this._filterDataForEntity(
|
|
271
|
-
|
|
325
|
+
const entityData = this._filterDataForEntity(combinedData, entityId, driverEntityField);
|
|
272
326
|
const context = {
|
|
273
|
-
computation: entry,
|
|
274
|
-
date: dateStr,
|
|
275
|
-
entityId,
|
|
276
|
-
data: entityData,
|
|
277
|
-
|
|
278
|
-
// Dependency Injector
|
|
327
|
+
computation: entry, date: dateStr, entityId, data: entityData,
|
|
279
328
|
getDependency: (depName, targetId) => {
|
|
280
329
|
if (batchDeps[depName] && batchDeps[depName].has(targetId || entityId)) {
|
|
281
330
|
return batchDeps[depName].get(targetId || entityId);
|
|
282
331
|
}
|
|
283
332
|
return this._lazyLoadDependency(dateStr, depName, targetId || entityId, depResults);
|
|
284
333
|
},
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
rules,
|
|
288
|
-
references: this.referenceDataCache,
|
|
289
|
-
config: this.config,
|
|
290
|
-
dataFetcher: this.dataFetcher // <--- ADDED: Required by CostTrackerMiddleware
|
|
334
|
+
previousResult, rules, references: this.referenceDataCache,
|
|
335
|
+
config: this.config, dataFetcher: this.dataFetcher
|
|
291
336
|
};
|
|
292
337
|
|
|
293
|
-
// DELEGATE TO RUNNER
|
|
294
338
|
const result = await this.runner.run(instance, context);
|
|
295
|
-
|
|
296
339
|
if (result !== undefined) {
|
|
297
340
|
batchResults[entityId] = result;
|
|
298
341
|
this._updateRollingHash(rollingHash, result);
|
|
299
342
|
}
|
|
300
343
|
})));
|
|
301
344
|
|
|
302
|
-
// 7. Commit Batch
|
|
303
345
|
if (!options.dryRun) {
|
|
304
346
|
await this.storageManager.commitResults(dateStr, entry, batchResults, {});
|
|
305
347
|
const lastId = entityIds[entityIds.length - 1];
|
|
@@ -310,44 +352,182 @@ class Orchestrator {
|
|
|
310
352
|
batchIndex++;
|
|
311
353
|
}
|
|
312
354
|
|
|
313
|
-
if (!options.dryRun
|
|
355
|
+
if (!options.dryRun) {
|
|
356
|
+
await this.storageManager.finalizeResults(dateStr, entry);
|
|
357
|
+
if (cp) await checkpointer.complete(dateStr, entry.name, cp.id);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16) };
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Determine if a computation should use remote workers
|
|
365
|
+
*/
|
|
366
|
+
_shouldUseRemoteWorkers(entry, options) {
|
|
367
|
+
// No remote runner configured
|
|
368
|
+
if (!this.remoteRunner) return false;
|
|
369
|
+
|
|
370
|
+
// Force local execution via options
|
|
371
|
+
if (options.forceLocal) return false;
|
|
372
|
+
|
|
373
|
+
const poolConfig = this.config.workerPool || {};
|
|
374
|
+
|
|
375
|
+
// Exclusion list
|
|
376
|
+
if (poolConfig.excludeComputations?.includes(entry.name) ||
|
|
377
|
+
poolConfig.excludeComputations?.includes(entry.originalName)) {
|
|
378
|
+
return false;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// Force list (override threshold)
|
|
382
|
+
if (poolConfig.forceOffloadComputations?.includes(entry.name) ||
|
|
383
|
+
poolConfig.forceOffloadComputations?.includes(entry.originalName)) {
|
|
384
|
+
return true;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// Only per-entity computations can be offloaded
|
|
388
|
+
if (entry.type !== 'per-entity') return false;
|
|
389
|
+
|
|
390
|
+
// Default: use remote if worker pool is enabled
|
|
391
|
+
return true;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
/**
|
|
395
|
+
* Execute using remote worker pool
|
|
396
|
+
* Workers handle individual entities, Orchestrator handles batching and storage
|
|
397
|
+
*/
|
|
398
|
+
async _executeStreamingRemote(entry, dateStr, depResults, previousResult, options, checkpointer, cp) {
|
|
399
|
+
const driverTable = this._getDriverTable(entry.requires);
|
|
400
|
+
const driverEntityField = this.config.tables[driverTable]?.entityField;
|
|
401
|
+
const { batchRequires, globalRequires } = this._splitRequirements(entry.requires, driverTable);
|
|
402
|
+
|
|
403
|
+
// Load global data once (shared across all entities)
|
|
404
|
+
let globalData = {};
|
|
405
|
+
if (Object.keys(globalRequires).length > 0) {
|
|
406
|
+
globalData = await this.dataFetcher.fetchForComputation(globalRequires, dateStr);
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Prepare base context (shared across all entities)
|
|
410
|
+
const baseContext = {
|
|
411
|
+
references: this.referenceDataCache,
|
|
412
|
+
config: {
|
|
413
|
+
// Only pass serializable config to workers
|
|
414
|
+
project: this.config.project,
|
|
415
|
+
tables: this.config.tables
|
|
416
|
+
}
|
|
417
|
+
};
|
|
418
|
+
|
|
419
|
+
const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
|
|
420
|
+
const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
|
|
421
|
+
|
|
422
|
+
const rollingHash = crypto.createHash('sha256');
|
|
423
|
+
let totalCount = 0;
|
|
424
|
+
let totalErrors = 0;
|
|
425
|
+
let batchIndex = 0;
|
|
426
|
+
|
|
427
|
+
for await (const batch of batchStream) {
|
|
428
|
+
// Skip completed batches (checkpoint resume)
|
|
429
|
+
if (cp && cp.completedBatches && cp.completedBatches.has(batchIndex)) {
|
|
430
|
+
batchIndex++;
|
|
431
|
+
continue;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
const { data: batchLocalData, entityIds } = batch;
|
|
435
|
+
const combinedData = { ...batchLocalData, ...globalData };
|
|
436
|
+
|
|
437
|
+
// Prefetch dependencies for this batch
|
|
438
|
+
const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
|
|
439
|
+
|
|
440
|
+
// Convert Map to Object for serialization
|
|
441
|
+
const serializedDeps = {};
|
|
442
|
+
for (const [depName, depMap] of Object.entries(batchDeps)) {
|
|
443
|
+
if (depMap instanceof Map) {
|
|
444
|
+
serializedDeps[depName] = Object.fromEntries(depMap);
|
|
445
|
+
} else {
|
|
446
|
+
serializedDeps[depName] = depMap;
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// Merge with preloaded deps
|
|
451
|
+
const mergedDeps = { ...depResults, ...serializedDeps };
|
|
452
|
+
|
|
453
|
+
// Build entity data map
|
|
454
|
+
const entityDataMap = new Map();
|
|
455
|
+
for (const entityId of entityIds) {
|
|
456
|
+
const entityData = this._filterDataForEntity(combinedData, entityId, driverEntityField);
|
|
457
|
+
entityDataMap.set(entityId, entityData);
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// INVOKE REMOTE WORKERS
|
|
461
|
+
this._log('INFO', `[Remote] Processing batch ${batchIndex}: ${entityIds.length} entities`);
|
|
462
|
+
const { results: batchResults, errors } = await this.remoteRunner.runBatch(
|
|
463
|
+
entry,
|
|
464
|
+
dateStr,
|
|
465
|
+
baseContext,
|
|
466
|
+
entityIds,
|
|
467
|
+
entityDataMap,
|
|
468
|
+
mergedDeps
|
|
469
|
+
);
|
|
470
|
+
|
|
471
|
+
if (errors.length > 0) {
|
|
472
|
+
this._log('WARN', `[Remote] Batch ${batchIndex}: ${errors.length} entities failed`);
|
|
473
|
+
totalErrors += errors.length;
|
|
474
|
+
|
|
475
|
+
// Log first few errors for debugging
|
|
476
|
+
errors.slice(0, 3).forEach(e => {
|
|
477
|
+
this._log('DEBUG', ` - ${e.entityId}: ${e.error}`);
|
|
478
|
+
});
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
// Update rolling hash with results
|
|
482
|
+
for (const result of Object.values(batchResults)) {
|
|
483
|
+
this._updateRollingHash(rollingHash, result);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// Commit results to storage
|
|
487
|
+
if (!options.dryRun && Object.keys(batchResults).length > 0) {
|
|
488
|
+
await this.storageManager.commitResults(dateStr, entry, batchResults, {});
|
|
489
|
+
const lastId = entityIds[entityIds.length - 1];
|
|
490
|
+
await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
totalCount += Object.keys(batchResults).length;
|
|
494
|
+
batchIndex++;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
// Finalize
|
|
498
|
+
if (!options.dryRun) {
|
|
499
|
+
await this.storageManager.finalizeResults(dateStr, entry);
|
|
500
|
+
if (cp) await checkpointer.complete(dateStr, entry.name, cp.id);
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
if (totalErrors > 0) {
|
|
504
|
+
this._log('WARN', `[Remote] Completed with ${totalErrors} total errors out of ${totalCount + totalErrors} entities`);
|
|
505
|
+
}
|
|
314
506
|
|
|
315
507
|
return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16) };
|
|
316
508
|
}
|
|
317
509
|
|
|
318
|
-
// --- STRATEGY B: GLOBAL / IN-MEMORY ---
|
|
319
510
|
async _executeGlobal(entry, dateStr, depResults, previousResult, options, forceEntities) {
|
|
320
|
-
// 1. Fetch Full Data
|
|
321
511
|
const data = await this.dataFetcher.fetchForComputation(entry.requires, dateStr, forceEntities);
|
|
322
512
|
const { rules } = this.ruleInjector.createContext();
|
|
323
|
-
|
|
513
|
+
const driverTable = entry.type === 'per-entity' ? this._getDriverTable(entry.requires) : null;
|
|
514
|
+
const driverEntityField = driverTable ? this.config.tables[driverTable]?.entityField : null;
|
|
324
515
|
const instance = new entry.class();
|
|
516
|
+
|
|
325
517
|
const context = {
|
|
326
|
-
computation: entry,
|
|
327
|
-
date: dateStr,
|
|
328
|
-
data,
|
|
518
|
+
computation: entry, date: dateStr, data,
|
|
329
519
|
getDependency: (dep, ent) => this._lazyLoadDependency(dateStr, dep, ent, depResults),
|
|
330
|
-
previousResult,
|
|
331
|
-
|
|
332
|
-
references: this.referenceDataCache,
|
|
333
|
-
config: this.config,
|
|
334
|
-
entityId: forceEntities ? null : '_global',
|
|
335
|
-
dataFetcher: this.dataFetcher // <--- ADDED: Required by CostTrackerMiddleware
|
|
520
|
+
previousResult, rules, references: this.referenceDataCache,
|
|
521
|
+
config: this.config, entityId: forceEntities ? null : '_global', dataFetcher: this.dataFetcher
|
|
336
522
|
};
|
|
337
523
|
|
|
338
|
-
// 2. Delegate to Runner
|
|
339
524
|
let results = {};
|
|
340
525
|
|
|
341
526
|
if (entry.type === 'per-entity') {
|
|
342
527
|
const ids = forceEntities || this._extractEntityIds(data);
|
|
343
528
|
const limit = pLimit(DEFAULT_CONCURRENCY);
|
|
344
|
-
|
|
345
529
|
await Promise.all(ids.map(id => limit(async () => {
|
|
346
|
-
const subCtx = {
|
|
347
|
-
...context,
|
|
348
|
-
entityId: id,
|
|
349
|
-
data: this._filterDataForEntity(data, id)
|
|
350
|
-
};
|
|
530
|
+
const subCtx = { ...context, entityId: id, data: this._filterDataForEntity(data, id, driverEntityField) };
|
|
351
531
|
const res = await this.runner.run(instance, subCtx);
|
|
352
532
|
if (res) results[id] = res;
|
|
353
533
|
})));
|
|
@@ -355,26 +535,73 @@ class Orchestrator {
|
|
|
355
535
|
results = await this.runner.run(instance, context);
|
|
356
536
|
}
|
|
357
537
|
|
|
358
|
-
// 3. Smart Invalidation Check
|
|
359
538
|
const finalHash = this._hashResults(results);
|
|
360
539
|
|
|
361
540
|
if (!options.dryRun && !forceEntities) {
|
|
362
541
|
const currentStatus = await this.stateRepository.getDailyStatus(dateStr);
|
|
363
542
|
const status = currentStatus.get(entry.name.toLowerCase());
|
|
364
543
|
|
|
365
|
-
if (status && status.resultHash === finalHash) {
|
|
544
|
+
if (!options.force && status && status.resultHash === finalHash) {
|
|
366
545
|
return { count: Object.keys(results || {}).length, hash: finalHash, skipped: true };
|
|
367
546
|
}
|
|
368
547
|
|
|
369
548
|
await this.storageManager.commitResults(dateStr, entry, results, {});
|
|
549
|
+
await this.storageManager.finalizeResults(dateStr, entry);
|
|
370
550
|
}
|
|
371
551
|
|
|
372
552
|
return { count: Object.keys(results || {}).length, hash: finalHash };
|
|
373
553
|
}
|
|
374
554
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
555
|
+
_printExecutionSummary(summary) {
|
|
556
|
+
console.log('\n┌────────────────────────────────────────────────────────┐');
|
|
557
|
+
console.log('│ EXECUTION SUMMARY │');
|
|
558
|
+
console.log('└────────────────────────────────────────────────────────┘');
|
|
559
|
+
console.log(`\n📅 Date: ${summary.date}`);
|
|
560
|
+
console.log(`✅ Completed: ${summary.summary.completed}`);
|
|
561
|
+
console.log(`❌ Errors: ${summary.summary.errors}`);
|
|
562
|
+
console.log(`⏭️ Skipped: ${summary.summary.skipped + summary.summary.blocked + summary.summary.impossible}`);
|
|
563
|
+
|
|
564
|
+
const skippedItems = [...summary.skipped, ...summary.blocked, ...summary.impossible];
|
|
565
|
+
if (skippedItems.length > 0) {
|
|
566
|
+
console.log('\n--- Details (Why did it skip?) ---');
|
|
567
|
+
skippedItems.forEach(item => {
|
|
568
|
+
let icon = '⏭️';
|
|
569
|
+
if (item.status === 'blocked') icon = '⛔';
|
|
570
|
+
if (item.status === 'impossible') icon = '🚫';
|
|
571
|
+
console.log(`${icon} ${item.name}: ${item.status.toUpperCase()} -> ${item.reason}`);
|
|
572
|
+
});
|
|
573
|
+
}
|
|
574
|
+
if (summary.errors.length > 0) {
|
|
575
|
+
console.log('\n--- Errors ---');
|
|
576
|
+
summary.errors.forEach(e => console.log(`❌ ${e.name}: ${e.error}`));
|
|
577
|
+
}
|
|
578
|
+
console.log('');
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
_getDriverTable(requires) {
|
|
582
|
+
for (const name of Object.keys(requires)) {
|
|
583
|
+
const conf = this.config.tables[name];
|
|
584
|
+
if (conf && conf.entityField) return name;
|
|
585
|
+
}
|
|
586
|
+
return null;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
_splitRequirements(requires, driverTable) {
|
|
590
|
+
const batchRequires = {};
|
|
591
|
+
const globalRequires = {};
|
|
592
|
+
if (!driverTable) return { batchRequires: {}, globalRequires: requires };
|
|
593
|
+
const driverConfig = this.config.tables[driverTable];
|
|
594
|
+
const driverEntityField = driverConfig ? driverConfig.entityField : null;
|
|
595
|
+
for (const [name, spec] of Object.entries(requires)) {
|
|
596
|
+
const conf = this.config.tables[name];
|
|
597
|
+
if (conf && conf.entityField === driverEntityField) {
|
|
598
|
+
batchRequires[name] = spec;
|
|
599
|
+
} else {
|
|
600
|
+
globalRequires[name] = spec;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
return { batchRequires, globalRequires };
|
|
604
|
+
}
|
|
378
605
|
|
|
379
606
|
async _analyzeEntry(entry, dateStr) {
|
|
380
607
|
const d = await this.stateRepository.getDailyStatus(dateStr);
|
|
@@ -386,18 +613,12 @@ class Orchestrator {
|
|
|
386
613
|
const depResults = {};
|
|
387
614
|
const depResultHashes = {};
|
|
388
615
|
const dailyStatus = await this.stateRepository.getDailyStatus(dateStr);
|
|
389
|
-
|
|
390
616
|
for (const dep of entry.dependencies) {
|
|
391
617
|
const stat = dailyStatus.get(dep);
|
|
392
618
|
if (stat?.resultHash) depResultHashes[dep] = stat.resultHash;
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
depResults[dep] = null;
|
|
396
|
-
} else {
|
|
397
|
-
depResults[dep] = await this.stateRepository.getResult(dateStr, dep);
|
|
398
|
-
}
|
|
619
|
+
if (stat?.entityCount > 50000) depResults[dep] = null;
|
|
620
|
+
else depResults[dep] = await this.stateRepository.getResult(dateStr, dep);
|
|
399
621
|
}
|
|
400
|
-
|
|
401
622
|
if (entry.conditionalDependencies) {
|
|
402
623
|
for (const condDep of entry.conditionalDependencies) {
|
|
403
624
|
const shouldLoad = condDep.condition({ date: dateStr, config: this.config });
|
|
@@ -410,13 +631,11 @@ class Orchestrator {
|
|
|
410
631
|
}
|
|
411
632
|
}
|
|
412
633
|
}
|
|
413
|
-
|
|
414
634
|
return { depResults, depResultHashes };
|
|
415
635
|
}
|
|
416
636
|
|
|
417
637
|
async _prefetchBatchDependencies(entry, dateStr, loadedDeps, batchEntityIds) {
|
|
418
638
|
const prefetched = {};
|
|
419
|
-
|
|
420
639
|
for (const depName of entry.dependencies) {
|
|
421
640
|
if (loadedDeps[depName] === null) {
|
|
422
641
|
const batchRes = await this.stateRepository.getBatchEntityResults(dateStr, depName, batchEntityIds);
|
|
@@ -429,10 +648,7 @@ class Orchestrator {
|
|
|
429
648
|
async _lazyLoadDependency(dateStr, depName, entityId, preloaded) {
|
|
430
649
|
if (preloaded[depName] && !entityId) return preloaded[depName];
|
|
431
650
|
if (preloaded[depName] && entityId) return preloaded[depName][entityId];
|
|
432
|
-
|
|
433
|
-
if (entityId) {
|
|
434
|
-
return this.stateRepository.getEntityResult(dateStr, depName, entityId);
|
|
435
|
-
}
|
|
651
|
+
if (entityId) return this.stateRepository.getEntityResult(dateStr, depName, entityId);
|
|
436
652
|
return this.stateRepository.getResult(dateStr, depName);
|
|
437
653
|
}
|
|
438
654
|
|
|
@@ -440,15 +656,9 @@ class Orchestrator {
|
|
|
440
656
|
if (!this.config.referenceData) return;
|
|
441
657
|
await Promise.all(this.config.referenceData.map(async (table) => {
|
|
442
658
|
try {
|
|
443
|
-
const data = await this.dataFetcher.fetch({
|
|
444
|
-
table,
|
|
445
|
-
targetDate: new Date().toISOString().slice(0, 10),
|
|
446
|
-
mandatory: false
|
|
447
|
-
});
|
|
659
|
+
const data = await this.dataFetcher.fetch({ table, targetDate: new Date().toISOString().slice(0, 10), mandatory: false });
|
|
448
660
|
this.referenceDataCache[table] = data || {};
|
|
449
|
-
} catch (e) {
|
|
450
|
-
this._log('WARN', `Failed to load Ref Data ${table}: ${e.message}`);
|
|
451
|
-
}
|
|
661
|
+
} catch (e) { this._log('WARN', `Failed to load Ref Data ${table}: ${e.message}`); }
|
|
452
662
|
}));
|
|
453
663
|
}
|
|
454
664
|
|
|
@@ -461,37 +671,34 @@ class Orchestrator {
|
|
|
461
671
|
return Array.from(ids);
|
|
462
672
|
}
|
|
463
673
|
|
|
464
|
-
_filterDataForEntity(data, id) {
|
|
674
|
+
_filterDataForEntity(data, id, driverEntityField) {
|
|
465
675
|
const out = {};
|
|
466
676
|
Object.entries(data).forEach(([tbl, d]) => {
|
|
467
677
|
const conf = this.config.tables[tbl] || {};
|
|
468
|
-
if (conf.entityField && d && !Array.isArray(d))
|
|
469
|
-
|
|
678
|
+
if (conf.entityField === driverEntityField && d && !Array.isArray(d)) {
|
|
679
|
+
out[tbl] = d[id] || null;
|
|
680
|
+
} else {
|
|
681
|
+
out[tbl] = d;
|
|
682
|
+
}
|
|
470
683
|
});
|
|
471
684
|
return out;
|
|
472
685
|
}
|
|
473
686
|
|
|
474
|
-
_updateRollingHash(
|
|
475
|
-
if (result) hasher.update(JSON.stringify(result));
|
|
476
|
-
}
|
|
477
|
-
|
|
687
|
+
_updateRollingHash(rollingHash, result) { if (result) rollingHash.update(JSON.stringify(result)); }
|
|
478
688
|
_hashResults(results) {
|
|
479
689
|
const canonical = JSON.stringify(results, Object.keys(results || {}).sort());
|
|
480
690
|
return crypto.createHash('sha256').update(canonical).digest('hex').substring(0, 16);
|
|
481
691
|
}
|
|
482
|
-
|
|
483
692
|
_subtractDay(dateStr) {
|
|
484
693
|
const d = new Date(dateStr + 'T00:00:00Z');
|
|
485
694
|
d.setUTCDate(d.getUTCDate() - 1);
|
|
486
695
|
return d.toISOString().slice(0, 10);
|
|
487
696
|
}
|
|
488
|
-
|
|
489
697
|
_getAllTables() {
|
|
490
698
|
const s = new Set();
|
|
491
699
|
if (this.manifest) this.manifest.forEach(e => Object.keys(e.requires).forEach(t => s.add(t)));
|
|
492
700
|
return Array.from(s);
|
|
493
701
|
}
|
|
494
|
-
|
|
495
702
|
_log(l, m) { this.logger.log(l, `[Orchestrator] ${m}`); }
|
|
496
703
|
}
|
|
497
704
|
|