bulltrackers-module 1.0.306 → 1.0.308
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/WorkflowOrchestrator.js +100 -212
- package/functions/computation-system/helpers/computation_worker.js +56 -267
- package/functions/computation-system/utils/utils.js +54 -171
- package/package.json +1 -1
- package/functions/computation-system/features.md +0 -395
- package/functions/computation-system/paper.md +0 -93
|
@@ -1,277 +1,165 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
3
|
-
* UPDATED:
|
|
4
|
-
*
|
|
5
|
-
* UPDATED: Detailed Dependency Reporting for Impossible Chains.
|
|
2
|
+
* FILENAME: computation-system/WorkflowOrchestrator.js
|
|
3
|
+
* UPDATED: Added missing groupByPass export.
|
|
4
|
+
* Includes Content-Based Short-Circuiting for both Upstream and Historical dependencies.
|
|
6
5
|
*/
|
|
6
|
+
|
|
7
7
|
const { normalizeName, DEFINITIVE_EARLIEST_DATES } = require('./utils/utils');
|
|
8
8
|
const { checkRootDataAvailability, checkRootDependencies } = require('./data/AvailabilityChecker');
|
|
9
9
|
const { fetchExistingResults } = require('./data/DependencyFetcher');
|
|
10
10
|
const { fetchComputationStatus, updateComputationStatus } = require('./persistence/StatusRepository');
|
|
11
11
|
const { StandardExecutor } = require('./executors/StandardExecutor');
|
|
12
12
|
const { MetaExecutor } = require('./executors/MetaExecutor');
|
|
13
|
-
const { generateProcessId, PROCESS_TYPES } = require('./logger/logger');
|
|
14
13
|
|
|
15
14
|
const STATUS_IMPOSSIBLE_PREFIX = 'IMPOSSIBLE';
|
|
16
15
|
|
|
17
|
-
function groupByPass(manifest) { return manifest.reduce((acc, calc) => { (acc[calc.pass] = acc[calc.pass] || []).push(calc); return acc; }, {}); }
|
|
18
|
-
|
|
19
16
|
/**
|
|
20
|
-
*
|
|
21
|
-
*
|
|
17
|
+
* [FIX] Groups manifest entries by their pass number.
|
|
18
|
+
* Required by the Dispatcher to identify current work-sets.
|
|
22
19
|
*/
|
|
23
|
-
function
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
const storedDep = currentStatusMap[norm];
|
|
32
|
-
const depManifest = manifestMap.get(norm);
|
|
33
|
-
|
|
34
|
-
// 1. Basic Existence Checks
|
|
35
|
-
if (!storedDep) return false;
|
|
36
|
-
if (typeof storedDep.hash === 'string' && storedDep.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) return false;
|
|
37
|
-
if (!depManifest) return false;
|
|
20
|
+
function groupByPass(manifest) {
|
|
21
|
+
const passes = {};
|
|
22
|
+
manifest.forEach(calc => {
|
|
23
|
+
if (!passes[calc.pass]) passes[calc.pass] = [];
|
|
24
|
+
passes[calc.pass].push(calc);
|
|
25
|
+
});
|
|
26
|
+
return passes;
|
|
27
|
+
}
|
|
38
28
|
|
|
39
|
-
|
|
40
|
-
|
|
29
|
+
/**
|
|
30
|
+
* [NEW] Core Short-Circuit Logic.
|
|
31
|
+
* Checks if a dependency (either a different node or "yesterday's self") is satisfied.
|
|
32
|
+
*/
|
|
33
|
+
function isDependencyReady(depName, isHistoricalSelf, currentStatusMap, prevStatusMap, manifestMap, storedStatus) {
|
|
34
|
+
const norm = normalizeName(depName);
|
|
35
|
+
const targetStatus = isHistoricalSelf ? (prevStatusMap ? prevStatusMap[norm] : null) : currentStatusMap[norm];
|
|
36
|
+
const depManifest = manifestMap.get(norm);
|
|
37
|
+
|
|
38
|
+
if (!targetStatus) return { ready: false, reason: 'Missing' };
|
|
39
|
+
if (String(targetStatus.hash).startsWith(STATUS_IMPOSSIBLE_PREFIX)) return { ready: false, reason: 'Impossible Upstream' };
|
|
40
|
+
|
|
41
|
+
// 1. Code Hash Match (Strict)
|
|
42
|
+
if (targetStatus.hash === depManifest.hash) return { ready: true };
|
|
43
|
+
|
|
44
|
+
// 2. Content Hash Match (Short-Circuit)
|
|
45
|
+
// If our code didn't change, check if the output of the dependency is what we expect.
|
|
46
|
+
const lastSeenResultHash = storedStatus?.dependencyResultHashes?.[depName];
|
|
47
|
+
if (lastSeenResultHash && targetStatus.resultHash === lastSeenResultHash) {
|
|
48
|
+
return { ready: true, shortCircuited: true };
|
|
49
|
+
}
|
|
41
50
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
// dependentStoredStatus = The status of the calculation (B) that depends on this (A).
|
|
45
|
-
// dependentStoredStatus.dependencyResultHashes[depName] = The ResultHash of A when B last ran.
|
|
46
|
-
// storedDep.resultHash = The current ResultHash of A.
|
|
47
|
-
if (dependentStoredStatus &&
|
|
48
|
-
dependentStoredStatus.dependencyResultHashes &&
|
|
49
|
-
dependentStoredStatus.dependencyResultHashes[depName] &&
|
|
50
|
-
storedDep.resultHash &&
|
|
51
|
-
storedDep.resultHash === dependentStoredStatus.dependencyResultHashes[depName]) {
|
|
52
|
-
return true; // Short-circuit: The output didn't change, so we are safe.
|
|
53
|
-
}
|
|
51
|
+
return { ready: false, reason: 'Hash Mismatch' };
|
|
52
|
+
}
|
|
54
53
|
|
|
55
|
-
|
|
56
|
-
};
|
|
54
|
+
function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus = null) {
|
|
55
|
+
const report = { runnable: [], blocked: [], impossible: [], failedDependency: [], reRuns: [], skipped: [] };
|
|
56
|
+
const simulationStatus = { ...dailyStatus };
|
|
57
57
|
|
|
58
58
|
for (const calc of calcsInPass) {
|
|
59
|
-
const cName
|
|
60
|
-
const stored
|
|
61
|
-
const
|
|
62
|
-
const storedCategory = stored ? stored.category : null;
|
|
63
|
-
const currentHash = calc.hash;
|
|
64
|
-
|
|
65
|
-
// Collect current result hashes of dependencies for the next run
|
|
66
|
-
const currentDependencyResultHashes = {};
|
|
67
|
-
if (calc.dependencies) {
|
|
68
|
-
calc.dependencies.forEach(d => {
|
|
69
|
-
const normD = normalizeName(d);
|
|
70
|
-
if (simulationStatus[normD] && simulationStatus[normD].resultHash) {
|
|
71
|
-
currentDependencyResultHashes[d] = simulationStatus[normD].resultHash;
|
|
72
|
-
}
|
|
73
|
-
});
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
const markImpossible = (reason, type = 'GENERIC') => {
|
|
77
|
-
report.impossible.push({ name: cName, reason });
|
|
78
|
-
const statusHash = `${STATUS_IMPOSSIBLE_PREFIX}:${type}`;
|
|
79
|
-
simulationStatus[cName] = { hash: statusHash, category: calc.category };
|
|
80
|
-
};
|
|
59
|
+
const cName = normalizeName(calc.name);
|
|
60
|
+
const stored = simulationStatus[cName];
|
|
61
|
+
const currentHash = calc.hash;
|
|
81
62
|
|
|
82
|
-
|
|
83
|
-
const payload = {
|
|
84
|
-
name: cName,
|
|
85
|
-
...reRunDetails,
|
|
86
|
-
dependencyResultHashes: currentDependencyResultHashes // Pass forward
|
|
87
|
-
};
|
|
88
|
-
if (isReRun) report.reRuns.push(payload);
|
|
89
|
-
else report.runnable.push(payload);
|
|
90
|
-
// Simulate success so dependents can pass their check
|
|
91
|
-
simulationStatus[cName] = {
|
|
92
|
-
hash: currentHash,
|
|
93
|
-
resultHash: 'SIMULATED',
|
|
94
|
-
category: calc.category,
|
|
95
|
-
composition: calc.composition
|
|
96
|
-
};
|
|
97
|
-
};
|
|
98
|
-
|
|
99
|
-
let migrationOldCategory = null;
|
|
100
|
-
if (storedCategory && storedCategory !== calc.category) { migrationOldCategory = storedCategory; }
|
|
101
|
-
|
|
102
|
-
// 1. Check Root Data
|
|
63
|
+
// 1. Root Data Check
|
|
103
64
|
const rootCheck = checkRootDependencies(calc, rootDataStatus);
|
|
104
|
-
|
|
105
65
|
if (!rootCheck.canRun) {
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
66
|
+
if (dateStr !== new Date().toISOString().slice(0, 10)) {
|
|
67
|
+
report.impossible.push({ name: cName, reason: `Missing Root: ${rootCheck.missing.join(', ')}` });
|
|
68
|
+
simulationStatus[cName] = { hash: `${STATUS_IMPOSSIBLE_PREFIX}:NO_DATA` };
|
|
109
69
|
} else {
|
|
110
|
-
report.blocked.push({ name: cName, reason: `
|
|
70
|
+
report.blocked.push({ name: cName, reason: `Waiting for Root Data` });
|
|
111
71
|
}
|
|
112
72
|
continue;
|
|
113
73
|
}
|
|
114
74
|
|
|
115
|
-
// 2. Check
|
|
116
|
-
let dependencyIsImpossible = false;
|
|
117
|
-
let impossibleDepCause = null;
|
|
75
|
+
// 2. Dependency & Temporal Check
|
|
118
76
|
const missingDeps = [];
|
|
77
|
+
let isBlockedByHistory = false;
|
|
78
|
+
|
|
79
|
+
// A. Standard Upstream Dependencies
|
|
119
80
|
if (calc.dependencies) {
|
|
120
81
|
for (const dep of calc.dependencies) {
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
if (depStored && typeof depStored.hash === 'string' && depStored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) {
|
|
124
|
-
dependencyIsImpossible = true;
|
|
125
|
-
impossibleDepCause = dep;
|
|
126
|
-
break;
|
|
127
|
-
}
|
|
128
|
-
// Pass 'stored' (this calc's status) to check short-circuiting
|
|
129
|
-
if (!isDepSatisfied(dep, simulationStatus, manifestMap, stored)) { missingDeps.push(dep); }
|
|
82
|
+
const check = isDependencyReady(dep, false, simulationStatus, null, manifestMap, stored);
|
|
83
|
+
if (!check.ready) missingDeps.push(dep);
|
|
130
84
|
}
|
|
131
85
|
}
|
|
132
86
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
continue;
|
|
136
|
-
}
|
|
137
|
-
if (missingDeps.length > 0) { report.failedDependency.push({ name: cName, missing: missingDeps }); continue; }
|
|
138
|
-
|
|
139
|
-
// 3. Check Historical Continuity
|
|
140
|
-
if (calc.isHistorical && prevDailyStatus) {
|
|
87
|
+
// B. [UPGRADED] Temporal Dependency (Yesterday's Self)
|
|
88
|
+
if (calc.isHistorical) {
|
|
141
89
|
const yesterday = new Date(dateStr + 'T00:00:00Z');
|
|
142
90
|
yesterday.setUTCDate(yesterday.getUTCDate() - 1);
|
|
91
|
+
|
|
92
|
+
// Only block if yesterday is a valid data date.
|
|
143
93
|
if (yesterday >= DEFINITIVE_EARLIEST_DATES.absoluteEarliest) {
|
|
144
|
-
const
|
|
145
|
-
if (!
|
|
146
|
-
report.blocked.push({ name: cName, reason: `Waiting for historical continuity (Yesterday ${!prevStored ? 'Missing' : 'Hash Mismatch'})` });
|
|
147
|
-
continue;
|
|
148
|
-
}
|
|
94
|
+
const check = isDependencyReady(calc.name, true, null, prevDailyStatus, manifestMap, stored);
|
|
95
|
+
if (!check.ready) isBlockedByHistory = true;
|
|
149
96
|
}
|
|
150
97
|
}
|
|
151
|
-
|
|
152
|
-
// 4. Check Hash / Composition (The Audit Gate)
|
|
153
|
-
if (!storedHash) {
|
|
154
|
-
markRunnable(false, { reason: "New Calculation" });
|
|
155
|
-
}
|
|
156
|
-
else if (storedHash !== currentHash) {
|
|
157
|
-
// [NEW] Check if Dependencies caused this, and if their content is actually same
|
|
158
|
-
// Note: If we are here, it means code changed.
|
|
159
|
-
// Short-circuiting logic was handled in 'isDepSatisfied' for upstream checks.
|
|
160
|
-
// But if *my* code changed, I must re-run unless I implement output-caching which is dangerous.
|
|
161
|
-
// So we strictly re-run if code changes.
|
|
162
|
-
|
|
163
|
-
let changeReason = "Hash Mismatch (Unknown)";
|
|
164
|
-
const oldComp = stored.composition;
|
|
165
|
-
const newComp = calc.composition;
|
|
166
98
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
const changedLayers = [];
|
|
173
|
-
for(const lKey in newComp.layers) {
|
|
174
|
-
if (newComp.layers[lKey] !== oldComp.layers[lKey]) changedLayers.push(lKey);
|
|
175
|
-
}
|
|
176
|
-
changeReason = `Layer Update: [${changedLayers.join(', ')}]`;
|
|
177
|
-
}
|
|
178
|
-
else if (JSON.stringify(oldComp.deps) !== JSON.stringify(newComp.deps)) {
|
|
179
|
-
// Dependency Hash Mismatch.
|
|
180
|
-
// This is where we COULD have short-circuited if we weren't enforcing code-hash strictness here.
|
|
181
|
-
// But typically if code hash mismatches, we re-run.
|
|
182
|
-
// The "Short-Circuit" benefit is mainly that *dependents* of this calculation
|
|
183
|
-
// won't need to re-run if *this* calculation produces the same output.
|
|
184
|
-
const changedDeps = [];
|
|
185
|
-
for(const dKey in newComp.deps) {
|
|
186
|
-
if (newComp.deps[dKey] !== oldComp.deps[dKey]) changedDeps.push(dKey);
|
|
187
|
-
}
|
|
188
|
-
changeReason = `Upstream Change: [${changedDeps.join(', ')}]`;
|
|
189
|
-
}
|
|
190
|
-
else {
|
|
191
|
-
changeReason = "Logic/Epoch Change";
|
|
192
|
-
}
|
|
99
|
+
if (missingDeps.length > 0) {
|
|
100
|
+
const isImpossible = missingDeps.some(d => simulationStatus[normalizeName(d)]?.hash?.startsWith(STATUS_IMPOSSIBLE_PREFIX));
|
|
101
|
+
if (isImpossible) {
|
|
102
|
+
report.impossible.push({ name: cName, reason: 'Upstream Impossible' });
|
|
103
|
+
simulationStatus[cName] = { hash: `${STATUS_IMPOSSIBLE_PREFIX}:UPSTREAM` };
|
|
193
104
|
} else {
|
|
194
|
-
|
|
105
|
+
report.failedDependency.push({ name: cName, missing: missingDeps });
|
|
195
106
|
}
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
196
109
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
else if (!stored.composition) {
|
|
209
|
-
markRunnable(true, {
|
|
210
|
-
name: cName,
|
|
211
|
-
oldHash: storedHash,
|
|
212
|
-
newHash: currentHash,
|
|
213
|
-
reason: 'Audit Upgrade (Populating Composition Metadata)'
|
|
110
|
+
if (isBlockedByHistory) {
|
|
111
|
+
report.blocked.push({ name: cName, reason: 'Waiting for Yesterday' });
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// 3. Runnable / Skip Logic
|
|
116
|
+
const currentDependencyResultHashes = {};
|
|
117
|
+
if (calc.dependencies) {
|
|
118
|
+
calc.dependencies.forEach(d => {
|
|
119
|
+
const resHash = simulationStatus[normalizeName(d)]?.resultHash;
|
|
120
|
+
if (resHash) currentDependencyResultHashes[d] = resHash;
|
|
214
121
|
});
|
|
215
122
|
}
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
123
|
+
|
|
124
|
+
const taskPayload = { name: cName, dependencyResultHashes: currentDependencyResultHashes };
|
|
125
|
+
|
|
126
|
+
if (!stored?.hash) {
|
|
127
|
+
report.runnable.push({ ...taskPayload, reason: "New Calculation" });
|
|
128
|
+
simulationStatus[cName] = { hash: currentHash, resultHash: 'SIMULATED' };
|
|
129
|
+
} else if (stored.hash !== currentHash) {
|
|
130
|
+
report.reRuns.push({ ...taskPayload, oldHash: stored.hash, newHash: currentHash, reason: "Hash Mismatch" });
|
|
131
|
+
simulationStatus[cName] = { hash: currentHash, resultHash: 'SIMULATED' };
|
|
132
|
+
} else {
|
|
133
|
+
report.skipped.push({ name: cName, reason: "Up To Date" });
|
|
219
134
|
}
|
|
220
135
|
}
|
|
221
136
|
return report;
|
|
222
137
|
}
|
|
223
138
|
|
|
224
|
-
/**
|
|
225
|
-
* DIRECT EXECUTION PIPELINE (For Workers)
|
|
226
|
-
* [UPDATED] Accepts dependencyResultHashes
|
|
227
|
-
*/
|
|
228
139
|
async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null, dependencyResultHashes = {}) {
|
|
229
140
|
const { logger } = dependencies;
|
|
230
|
-
const pid = generateProcessId(PROCESS_TYPES.EXECUTOR, targetComputation, dateStr);
|
|
231
|
-
|
|
232
141
|
const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
|
|
233
142
|
const calcManifest = manifestMap.get(normalizeName(targetComputation));
|
|
234
143
|
|
|
235
|
-
if (!calcManifest)
|
|
236
|
-
|
|
237
|
-
// [NEW] Attach the dependency result hashes to the manifest so ResultCommitter can save them
|
|
144
|
+
if (!calcManifest) throw new Error(`Calc '${targetComputation}' not found.`);
|
|
238
145
|
calcManifest.dependencyResultHashes = dependencyResultHashes;
|
|
239
146
|
|
|
240
|
-
if (previousCategory) {
|
|
241
|
-
calcManifest.previousCategory = previousCategory;
|
|
242
|
-
logger.log('INFO', `[Executor] Migration detected for ${calcManifest.name}. Old data will be cleaned from: ${previousCategory}`);
|
|
243
|
-
}
|
|
244
|
-
|
|
245
147
|
const rootData = await checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES);
|
|
246
|
-
if (!rootData) {
|
|
247
|
-
logger.log('ERROR', `[Executor] FATAL: Root data check failed for ${targetComputation} on ${dateStr}.`);
|
|
248
|
-
return;
|
|
249
|
-
}
|
|
250
|
-
|
|
251
148
|
const calcsToRun = [calcManifest];
|
|
252
|
-
const existingResults = await fetchExistingResults(dateStr, calcsToRun, computationManifest, config, dependencies, false);
|
|
253
149
|
|
|
150
|
+
const existingResults = await fetchExistingResults(dateStr, calcsToRun, computationManifest, config, dependencies, false);
|
|
254
151
|
let previousResults = {};
|
|
255
152
|
if (calcManifest.isHistorical) {
|
|
256
|
-
const
|
|
257
|
-
|
|
258
|
-
const prevDateStr = prevDate.toISOString().slice(0, 10);
|
|
259
|
-
previousResults = await fetchExistingResults(prevDateStr, calcsToRun, computationManifest, config, dependencies, true);
|
|
153
|
+
const prev = new Date(dateStr + 'T00:00:00Z'); prev.setUTCDate(prev.getUTCDate() - 1);
|
|
154
|
+
previousResults = await fetchExistingResults(prev.toISOString().slice(0, 10), calcsToRun, computationManifest, config, dependencies, true);
|
|
260
155
|
}
|
|
261
156
|
|
|
262
|
-
|
|
263
|
-
|
|
157
|
+
const execDate = new Date(dateStr + 'T00:00:00Z');
|
|
158
|
+
const updates = (calcManifest.type === 'standard')
|
|
159
|
+
? await StandardExecutor.run(execDate, calcsToRun, `Pass ${pass}`, config, dependencies, rootData, existingResults, previousResults)
|
|
160
|
+
: await MetaExecutor.run(execDate, calcsToRun, `Pass ${pass}`, config, dependencies, existingResults, previousResults, rootData);
|
|
264
161
|
|
|
265
|
-
|
|
266
|
-
if (calcManifest.type === 'standard') { resultUpdates = await StandardExecutor.run(new Date(dateStr + 'T00:00:00Z'), [calcManifest], `Pass ${pass}`, config, dependencies, rootData, existingResults, previousResults);
|
|
267
|
-
} else if (calcManifest.type === 'meta') { resultUpdates = await MetaExecutor.run (new Date(dateStr + 'T00:00:00Z'), [calcManifest], `Pass ${pass}`, config, dependencies, existingResults, previousResults, rootData);
|
|
268
|
-
}
|
|
269
|
-
logger.log('INFO', `[Executor] Success: ${calcManifest.name} for ${dateStr}`);
|
|
270
|
-
return { date: dateStr, updates: resultUpdates };
|
|
271
|
-
} catch (err) {
|
|
272
|
-
logger.log('ERROR', `[Executor] Failed ${calcManifest.name}: ${err.message}`, { processId: pid, stack: err.stack });
|
|
273
|
-
throw err;
|
|
274
|
-
}
|
|
162
|
+
return { date: dateStr, updates };
|
|
275
163
|
}
|
|
276
164
|
|
|
277
|
-
module.exports = { executeDispatchTask,
|
|
165
|
+
module.exports = { executeDispatchTask, analyzeDateExecution, groupByPass };
|