bulltrackers-module 1.0.312 → 1.0.314
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/WorkflowOrchestrator.js +41 -51
- package/functions/computation-system/context/ManifestBuilder.js +35 -0
- package/functions/computation-system/helpers/computation_dispatcher.js +31 -16
- package/functions/computation-system/helpers/computation_worker.js +7 -7
- package/functions/computation-system/helpers/monitor.js +1 -0
- package/functions/computation-system/persistence/ResultCommitter.js +22 -12
- package/functions/computation-system/workflows/bulltrackers_pipeline.yaml +16 -35
- package/package.json +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/WorkflowOrchestrator.js
|
|
3
|
-
* UPDATED: Implements Data-Drift Detection
|
|
3
|
+
* UPDATED: Implements Early-Skip Optimization for Data-Drift Detection.
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
const { normalizeName, DEFINITIVE_EARLIEST_DATES } = require('./utils/utils');
|
|
@@ -12,10 +12,6 @@ const { MetaExecutor } = require('./executor
|
|
|
12
12
|
|
|
13
13
|
const STATUS_IMPOSSIBLE_PREFIX = 'IMPOSSIBLE';
|
|
14
14
|
|
|
15
|
-
/**
|
|
16
|
-
* Groups manifest entries by their pass number.
|
|
17
|
-
* Required by the Dispatcher to identify current work-sets.
|
|
18
|
-
*/
|
|
19
15
|
function groupByPass(manifest) {
|
|
20
16
|
const passes = {};
|
|
21
17
|
manifest.forEach(calc => {
|
|
@@ -25,34 +21,20 @@ function groupByPass(manifest) {
|
|
|
25
21
|
return passes;
|
|
26
22
|
}
|
|
27
23
|
|
|
28
|
-
/**
|
|
29
|
-
* Core Short-Circuit Logic.
|
|
30
|
-
* Checks if a dependency is satisfied AND checks for Data Drift.
|
|
31
|
-
* Returns { ready: boolean, dataChanged: boolean, reason: string }
|
|
32
|
-
*/
|
|
33
24
|
function isDependencyReady(depName, isHistoricalSelf, currentStatusMap, prevStatusMap, manifestMap, storedStatus) {
|
|
34
25
|
const norm = normalizeName(depName);
|
|
35
26
|
const targetStatus = isHistoricalSelf ? (prevStatusMap ? prevStatusMap[norm] : null) : currentStatusMap[norm];
|
|
36
27
|
const depManifest = manifestMap.get(norm);
|
|
37
28
|
|
|
38
|
-
// 1. Availability Check
|
|
39
29
|
if (!targetStatus) return { ready: false, reason: 'Missing' };
|
|
40
30
|
if (String(targetStatus.hash).startsWith(STATUS_IMPOSSIBLE_PREFIX)) return { ready: false, reason: 'Impossible Upstream' };
|
|
41
31
|
|
|
42
|
-
// 2. Code Hash Check (The dependency must be running the correct version)
|
|
43
|
-
// If the dependency's hash doesn't match its manifest, it means the dependency itself needs to run/update first.
|
|
44
32
|
if (depManifest && targetStatus.hash !== depManifest.hash) {
|
|
45
33
|
return { ready: false, reason: 'Dependency Version Mismatch' };
|
|
46
34
|
}
|
|
47
35
|
|
|
48
|
-
// 3. Data Integrity Check (The Short-Circuit Logic)
|
|
49
|
-
// We check if the result hash of the dependency matches what we remember using last time.
|
|
50
36
|
if (storedStatus && storedStatus.dependencyResultHashes) {
|
|
51
37
|
const lastSeenResultHash = storedStatus.dependencyResultHashes[depName];
|
|
52
|
-
|
|
53
|
-
// If we recorded a dependency hash last time, and it differs from the current live status,
|
|
54
|
-
// then the dependency has produced NEW data. We are NOT ready to skip.
|
|
55
|
-
// We return 'ready: true' (it exists) but we flag 'dataChanged: true' to force execution.
|
|
56
38
|
if (lastSeenResultHash && targetStatus.resultHash !== lastSeenResultHash) {
|
|
57
39
|
return { ready: true, dataChanged: true, reason: 'Dependency Data Update' };
|
|
58
40
|
}
|
|
@@ -61,6 +43,9 @@ function isDependencyReady(depName, isHistoricalSelf, currentStatusMap, prevStat
|
|
|
61
43
|
return { ready: true, dataChanged: false };
|
|
62
44
|
}
|
|
63
45
|
|
|
46
|
+
/**
|
|
47
|
+
* UPDATED: Logic moved to top of loop for early skip on stable data.
|
|
48
|
+
*/
|
|
64
49
|
function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus = null) {
|
|
65
50
|
const report = { runnable: [], blocked: [], impossible: [], failedDependency: [], reRuns: [], skipped: [] };
|
|
66
51
|
const simulationStatus = { ...dailyStatus };
|
|
@@ -82,33 +67,56 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
82
67
|
continue;
|
|
83
68
|
}
|
|
84
69
|
|
|
85
|
-
//
|
|
70
|
+
// --- OPTIMIZATION: Early skip if code matches AND data is stable ---
|
|
71
|
+
if (stored?.hash === currentHash) {
|
|
72
|
+
let hasDataDrift = false;
|
|
73
|
+
let isBlocked = false;
|
|
74
|
+
let missingDeps = [];
|
|
75
|
+
|
|
76
|
+
if (calc.dependencies) {
|
|
77
|
+
for (const dep of calc.dependencies) {
|
|
78
|
+
const check = isDependencyReady(dep, false, simulationStatus, null, manifestMap, stored);
|
|
79
|
+
if (!check.ready) missingDeps.push(dep);
|
|
80
|
+
else if (check.dataChanged) { hasDataDrift = true; break; }
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (!hasDataDrift && missingDeps.length === 0 && calc.isHistorical) {
|
|
85
|
+
const yesterday = new Date(dateStr + 'T00:00:00Z');
|
|
86
|
+
yesterday.setUTCDate(yesterday.getUTCDate() - 1);
|
|
87
|
+
if (yesterday >= DEFINITIVE_EARLIEST_DATES.absoluteEarliest) {
|
|
88
|
+
const check = isDependencyReady(calc.name, true, null, prevDailyStatus, manifestMap, stored);
|
|
89
|
+
if (!check.ready) isBlocked = true;
|
|
90
|
+
else if (check.dataChanged) hasDataDrift = true;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (!hasDataDrift && !isBlocked && missingDeps.length === 0) {
|
|
95
|
+
report.skipped.push({ name: cName, reason: "Up To Date" });
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// --- END OPTIMIZATION ---
|
|
100
|
+
|
|
86
101
|
const missingDeps = [];
|
|
87
|
-
let hasDataDrift = false;
|
|
102
|
+
let hasDataDrift = false;
|
|
88
103
|
let isBlocked = false;
|
|
89
104
|
|
|
90
|
-
// A. Standard Upstream Dependencies
|
|
91
105
|
if (calc.dependencies) {
|
|
92
106
|
for (const dep of calc.dependencies) {
|
|
93
107
|
const check = isDependencyReady(dep, false, simulationStatus, null, manifestMap, stored);
|
|
94
|
-
if (!check.ready)
|
|
95
|
-
|
|
96
|
-
} else if (check.dataChanged) {
|
|
97
|
-
hasDataDrift = true;
|
|
98
|
-
}
|
|
108
|
+
if (!check.ready) missingDeps.push(dep);
|
|
109
|
+
else if (check.dataChanged) hasDataDrift = true;
|
|
99
110
|
}
|
|
100
111
|
}
|
|
101
112
|
|
|
102
|
-
// B. Temporal Dependency (Yesterday's Self)
|
|
103
113
|
if (calc.isHistorical) {
|
|
104
114
|
const yesterday = new Date(dateStr + 'T00:00:00Z');
|
|
105
115
|
yesterday.setUTCDate(yesterday.getUTCDate() - 1);
|
|
106
|
-
|
|
107
|
-
// Only block if yesterday is a valid data date.
|
|
108
116
|
if (yesterday >= DEFINITIVE_EARLIEST_DATES.absoluteEarliest) {
|
|
109
117
|
const check = isDependencyReady(calc.name, true, null, prevDailyStatus, manifestMap, stored);
|
|
110
118
|
if (!check.ready) isBlocked = true;
|
|
111
|
-
else if (check.dataChanged) hasDataDrift = true;
|
|
119
|
+
else if (check.dataChanged) hasDataDrift = true;
|
|
112
120
|
}
|
|
113
121
|
}
|
|
114
122
|
|
|
@@ -128,9 +136,6 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
128
136
|
continue;
|
|
129
137
|
}
|
|
130
138
|
|
|
131
|
-
// 3. Execution Decision
|
|
132
|
-
|
|
133
|
-
// Collect current dependency result hashes to be saved if we run
|
|
134
139
|
const currentDependencyResultHashes = {};
|
|
135
140
|
if (calc.dependencies) {
|
|
136
141
|
calc.dependencies.forEach(d => {
|
|
@@ -142,23 +147,11 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
142
147
|
const taskPayload = { name: cName, dependencyResultHashes: currentDependencyResultHashes };
|
|
143
148
|
|
|
144
149
|
if (!stored?.hash) {
|
|
145
|
-
// Case A: New Calculation (Never run)
|
|
146
150
|
report.runnable.push({ ...taskPayload, reason: "New Calculation" });
|
|
147
|
-
|
|
148
|
-
}
|
|
149
|
-
else if (stored.hash !== currentHash) {
|
|
150
|
-
// Case B: Code Hash Mismatch (Logic Changed)
|
|
151
|
+
} else if (stored.hash !== currentHash) {
|
|
151
152
|
report.reRuns.push({ ...taskPayload, oldHash: stored.hash, newHash: currentHash, reason: "Hash Mismatch" });
|
|
152
|
-
|
|
153
|
-
}
|
|
154
|
-
else if (hasDataDrift) {
|
|
155
|
-
// Case C: Code Matches, BUT Input Data Changed (The Holy Grail Optimization)
|
|
153
|
+
} else if (hasDataDrift) {
|
|
156
154
|
report.runnable.push({ ...taskPayload, reason: "Input Data Changed" });
|
|
157
|
-
simulationStatus[cName] = { hash: currentHash, resultHash: 'SIMULATED' };
|
|
158
|
-
}
|
|
159
|
-
else {
|
|
160
|
-
// Case D: Code Matches AND Data Matches -> Short Circuit
|
|
161
|
-
report.skipped.push({ name: cName, reason: "Up To Date" });
|
|
162
155
|
}
|
|
163
156
|
}
|
|
164
157
|
return report;
|
|
@@ -170,9 +163,6 @@ async function executeDispatchTask(dateStr, pass, targetComputation, config, dep
|
|
|
170
163
|
const calcManifest = manifestMap.get(normalizeName(targetComputation));
|
|
171
164
|
|
|
172
165
|
if (!calcManifest) throw new Error(`Calc '${targetComputation}' not found.`);
|
|
173
|
-
|
|
174
|
-
// [CRITICAL] Inject the fresh dependency result hashes so they are saved to DB on commit.
|
|
175
|
-
// This enables the "lastSeenResultHash" check in future runs.
|
|
176
166
|
calcManifest.dependencyResultHashes = dependencyResultHashes;
|
|
177
167
|
|
|
178
168
|
const rootData = await checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES);
|
|
@@ -26,6 +26,39 @@ const LAYER_GROUPS = {
|
|
|
26
26
|
'validators': ValidatorsLayer
|
|
27
27
|
};
|
|
28
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Heuristic to estimate the "weight" of a calculation based on its output structure.
|
|
31
|
+
*/
|
|
32
|
+
function estimateComplexity(Class, metadata) {
|
|
33
|
+
let weight = 1.0; // Base weight (for single aggregate values)
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
const schema = typeof Class.getSchema === 'function' ? Class.getSchema() : {};
|
|
37
|
+
|
|
38
|
+
// 1. Detect Map-like outputs (per-ticker, per-sector, per-user)
|
|
39
|
+
// If the schema uses patternProperties, it's likely a dynamic map.
|
|
40
|
+
if (schema.patternProperties || (schema.type === 'object' && !schema.properties)) {
|
|
41
|
+
weight *= 5.0; // Higher weight for dynamic maps (e.g., Per Sector/Ticker)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// 2. Metadata hints
|
|
45
|
+
const name = Class.name.toLowerCase();
|
|
46
|
+
if (name.includes('perstock') || name.includes('perticker')) weight *= 2.0;
|
|
47
|
+
if (name.includes('peruser')) weight *= 10.0; // Very high cost
|
|
48
|
+
|
|
49
|
+
// 3. Dependency hints
|
|
50
|
+
if (metadata.rootDataDependencies && metadata.rootDataDependencies.includes('portfolio')) {
|
|
51
|
+
// Portfolio-based calcs usually iterate over all users in the StandardExecutor
|
|
52
|
+
weight *= 1.5;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
} catch (e) {
|
|
56
|
+
// Fallback to base weight if schema is missing/broken
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return weight;
|
|
60
|
+
}
|
|
61
|
+
|
|
29
62
|
function generateLayerHashes(layerExports, layerName) {
|
|
30
63
|
const hashes = {};
|
|
31
64
|
const keys = Object.keys(layerExports).sort();
|
|
@@ -170,6 +203,7 @@ function buildManifest(productLinesToRun = [], calculations) {
|
|
|
170
203
|
if (typeof Class.getDependencies !== 'function') { log.fatal(`Calculation "${normalizedName}" missing static getDependencies().`); hasFatalError = true; return; }
|
|
171
204
|
|
|
172
205
|
const metadata = Class.getMetadata();
|
|
206
|
+
const weight = estimateComplexity(Class, metadata)
|
|
173
207
|
const dependencies = Class.getDependencies().map(normalizeName);
|
|
174
208
|
const codeStr = Class.toString();
|
|
175
209
|
const selfCodeHash = generateCodeHash(codeStr);
|
|
@@ -222,6 +256,7 @@ function buildManifest(productLinesToRun = [], calculations) {
|
|
|
222
256
|
dependencies: dependencies,
|
|
223
257
|
pass: 0,
|
|
224
258
|
hash: intrinsicHash,
|
|
259
|
+
weight: weight,
|
|
225
260
|
composition: {
|
|
226
261
|
epoch: SYSTEM_EPOCH,
|
|
227
262
|
code: selfCodeHash,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_dispatcher.js
|
|
3
3
|
* PURPOSE: Sequential Cursor-Based Dispatcher with Hyper-Verbose Telemetry.
|
|
4
|
-
*
|
|
4
|
+
* UPDATED: Sweep mode now auto-upgrades missed tasks to high-mem due to potential silent OOMs.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
@@ -12,7 +12,7 @@ const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
|
|
|
12
12
|
const crypto = require('crypto');
|
|
13
13
|
|
|
14
14
|
const OOM_THRESHOLD_MB = 1500;
|
|
15
|
-
const
|
|
15
|
+
const BASE_SECONDS_PER_WEIGHT_UNIT = 15;
|
|
16
16
|
|
|
17
17
|
async function getHighMemReroutes(db, date, pass, tasks) {
|
|
18
18
|
const reroutes = [];
|
|
@@ -47,6 +47,8 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
47
47
|
const passes = groupByPass(computationManifest);
|
|
48
48
|
const calcsInThisPass = passes[passToRun] || [];
|
|
49
49
|
|
|
50
|
+
const manifestWeightMap = new Map(computationManifest.map(c => [normalizeName(c.name), c.weight || 1.0]));
|
|
51
|
+
|
|
50
52
|
if (!calcsInThisPass.length) {
|
|
51
53
|
logger.log('WARN', `[Dispatcher] 🛑 No calculations found for Pass ${passToRun}.`);
|
|
52
54
|
return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
|
|
@@ -81,7 +83,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
81
83
|
let isReroute = false;
|
|
82
84
|
let isSweep = false;
|
|
83
85
|
|
|
84
|
-
// Logic for Reroutes (OOM handling)
|
|
86
|
+
// Logic for Reroutes (Known OOM handling)
|
|
85
87
|
if (targetCursorN > 1 && (targetCursorN - 2) < dirtyDates.length) {
|
|
86
88
|
const prevEntry = dirtyDates[targetCursorN - 2];
|
|
87
89
|
const reroutes = await getHighMemReroutes(db, prevEntry.date, passToRun, prevEntry.tasks);
|
|
@@ -92,16 +94,23 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
92
94
|
}
|
|
93
95
|
}
|
|
94
96
|
|
|
95
|
-
// Logic for standard cursor progression
|
|
97
|
+
// Logic for standard cursor progression or Sweep (Recovery) mode
|
|
96
98
|
if (!selectedDate) {
|
|
97
99
|
if (targetCursorN <= dirtyDates.length) {
|
|
98
100
|
const entry = dirtyDates[targetCursorN - 1];
|
|
99
101
|
selectedDate = entry.date;
|
|
100
102
|
selectedTasks = entry.tasks;
|
|
101
103
|
} else if (dirtyDates.length > 0) {
|
|
104
|
+
// RECOVERY/SWEEP MODE:
|
|
105
|
+
// The cursor has passed the number of dirty dates, but tasks still remain.
|
|
106
|
+
// These tasks are automatically routed to 'high-mem' with recovery reasoning.
|
|
102
107
|
isSweep = true;
|
|
103
108
|
selectedDate = dirtyDates[0].date;
|
|
104
|
-
selectedTasks = dirtyDates[0].tasks
|
|
109
|
+
selectedTasks = dirtyDates[0].tasks.map(t => ({
|
|
110
|
+
...t,
|
|
111
|
+
resources: 'high-mem',
|
|
112
|
+
reason: `${t.reason || 'Missed Computation'} [RECOVERY_AUTO_UPGRADE: Potential Silent OOM]`
|
|
113
|
+
}));
|
|
105
114
|
}
|
|
106
115
|
}
|
|
107
116
|
|
|
@@ -110,22 +119,27 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
110
119
|
return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0, etaSeconds: 0 };
|
|
111
120
|
}
|
|
112
121
|
|
|
122
|
+
const totalweight = selectedTasks.reduce((sum, t) => {
|
|
123
|
+
const weight = manifestWeightMap.get(normalizeName(t.name)) || 1.0;
|
|
124
|
+
return sum + weight;
|
|
125
|
+
}, 0);
|
|
126
|
+
|
|
113
127
|
// 2. Prepare Payload and Telemetry
|
|
114
128
|
const currentDispatchId = crypto.randomUUID();
|
|
115
|
-
const etaSeconds = Math.max(20,
|
|
129
|
+
const etaSeconds = Math.max(20, Math.cell(totalweight + BASE_SECONDS_PER_WEIGHT_UNIT));
|
|
116
130
|
const remainingDatesCount = Math.max(0, dirtyDates.length - targetCursorN);
|
|
117
131
|
|
|
118
|
-
// requirement: condense computations into a log payload
|
|
119
132
|
const computationNames = selectedTasks.map(t => t.name);
|
|
120
133
|
|
|
121
|
-
logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}. ETA: ${etaSeconds}s
|
|
122
|
-
date:
|
|
123
|
-
pass:
|
|
124
|
-
dispatchedCount:
|
|
134
|
+
logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}. ETA: ${etaSeconds}s. [Mode: ${isSweep ? 'RECOVERY' : 'NORMAL'}]`, {
|
|
135
|
+
date: selectedDate,
|
|
136
|
+
pass: passToRun,
|
|
137
|
+
dispatchedCount: selectedTasks.length,
|
|
125
138
|
remainingCursorDates: remainingDatesCount,
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
139
|
+
totalweight: totalweight,
|
|
140
|
+
etaSeconds: etaSeconds,
|
|
141
|
+
dispatchId: currentDispatchId,
|
|
142
|
+
tasks: computationNames
|
|
129
143
|
});
|
|
130
144
|
|
|
131
145
|
const mapToTaskPayload = (t) => ({
|
|
@@ -135,7 +149,8 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
135
149
|
date: selectedDate,
|
|
136
150
|
pass: passToRun,
|
|
137
151
|
dispatchId: currentDispatchId,
|
|
138
|
-
triggerReason: t.reason
|
|
152
|
+
triggerReason: t.reason,
|
|
153
|
+
resources: t.resources || 'standard'
|
|
139
154
|
});
|
|
140
155
|
|
|
141
156
|
const standardTasks = selectedTasks.filter(t => t.resources !== 'high-mem').map(mapToTaskPayload);
|
|
@@ -164,7 +179,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
164
179
|
dispatched : selectedTasks.length,
|
|
165
180
|
n_cursor_ignored: isReroute,
|
|
166
181
|
etaSeconds : etaSeconds,
|
|
167
|
-
remainingDates : remainingDatesCount
|
|
182
|
+
remainingDates : remainingDatesCount
|
|
168
183
|
};
|
|
169
184
|
}
|
|
170
185
|
|
|
@@ -75,13 +75,13 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
75
75
|
|
|
76
76
|
const calcUpdate = successUpdates[normalizeName(computation)] || {};
|
|
77
77
|
const metrics = {
|
|
78
|
-
durationMs:
|
|
79
|
-
peakMemoryMB:
|
|
80
|
-
io:
|
|
81
|
-
storage:
|
|
82
|
-
execution:
|
|
83
|
-
validation:
|
|
84
|
-
composition:
|
|
78
|
+
durationMs: Date.now() - startTime,
|
|
79
|
+
peakMemoryMB: heartbeat.getPeak(),
|
|
80
|
+
io: calcUpdate.metrics?.io,
|
|
81
|
+
storage: calcUpdate.metrics?.storage,
|
|
82
|
+
execution: calcUpdate.metrics?.execution,
|
|
83
|
+
validation: calcUpdate.metrics?.validation,
|
|
84
|
+
composition: calcUpdate.composition
|
|
85
85
|
};
|
|
86
86
|
|
|
87
87
|
await db.doc(ledgerPath).update({ status: 'COMPLETED', completedAt: new Date() });
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* @fileoverview Monitor helper for Cloud Workflows.
|
|
3
3
|
* Checks the state of the Audit Ledger to determine if a pass is complete.
|
|
4
4
|
* This function is stateless and receives dependencies via injection.
|
|
5
|
+
* THIS FILE IS NOW REDUNDANT, LOGIC HAS MOVED TO THE DISPATCHER AND WORKFLOW
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
/**
|
|
@@ -29,7 +29,15 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
29
29
|
const shardIndexes = options.shardIndexes || {};
|
|
30
30
|
const nextShardIndexes = {};
|
|
31
31
|
const fanOutLimit = pLimit(10);
|
|
32
|
-
|
|
32
|
+
|
|
33
|
+
// 1. [BATCH OPTIMIZATION] Fetch all SimHashes and Contracts upfront
|
|
34
|
+
const calcNames = Object.keys(stateObj);
|
|
35
|
+
const hashKeys = calcNames.map(n => stateObj[n].manifest?.hash).filter(Boolean);
|
|
36
|
+
|
|
37
|
+
const [contractMap, simHashMap] = await Promise.all([
|
|
38
|
+
fetchContracts(db, calcNames),
|
|
39
|
+
batchFetchSimHashes(db, hashKeys)
|
|
40
|
+
]);
|
|
33
41
|
|
|
34
42
|
for (const name in stateObj) {
|
|
35
43
|
const calc = stateObj[name];
|
|
@@ -40,13 +48,11 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
40
48
|
storage: { sizeBytes: 0, isSharded: false, shardCount: 1, keys: 0 },
|
|
41
49
|
validation: { isValid: true, anomalies: [] },
|
|
42
50
|
execution: execStats,
|
|
43
|
-
// [NEW] Track Ops
|
|
44
51
|
io: { writes: 0, deletes: 0 }
|
|
45
52
|
};
|
|
46
53
|
|
|
47
54
|
try {
|
|
48
55
|
const result = await calc.getResult();
|
|
49
|
-
|
|
50
56
|
const configOverrides = validationOverrides[calc.manifest.name] || {};
|
|
51
57
|
const dataDeps = calc.manifest.rootDataDependencies || [];
|
|
52
58
|
const isPriceOnly = (dataDeps.length === 1 && dataDeps[0] === 'price');
|
|
@@ -81,13 +87,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
81
87
|
const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
|
|
82
88
|
const resultHash = isEmpty ? 'empty' : generateDataHash(result);
|
|
83
89
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
try {
|
|
87
|
-
const regDoc = await db.collection(SIMHASH_REGISTRY_COLLECTION).doc(calc.manifest.hash).get();
|
|
88
|
-
if (regDoc.exists) simHash = regDoc.data().simHash;
|
|
89
|
-
} catch (e) {}
|
|
90
|
-
}
|
|
90
|
+
// [NEW] Use pre-fetched SimHash
|
|
91
|
+
const simHash = (flushMode !== 'INTERMEDIATE') ? (simHashMap[calc.manifest.hash] || null) : null;
|
|
91
92
|
|
|
92
93
|
if (isEmpty) {
|
|
93
94
|
if (flushMode === 'INTERMEDIATE') { nextShardIndexes[name] = currentShardIndex; continue; }
|
|
@@ -112,8 +113,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
112
113
|
if (!dailyData || Object.keys(dailyData).length === 0) return;
|
|
113
114
|
const historicalDocRef = db.collection(config.resultsCollection).doc(historicalDate).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
|
|
114
115
|
const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD', false);
|
|
115
|
-
|
|
116
|
-
// Aggregate IO Ops
|
|
117
116
|
runMetrics.io.writes += stats.opCounts.writes;
|
|
118
117
|
runMetrics.io.deletes += stats.opCounts.deletes;
|
|
119
118
|
}));
|
|
@@ -158,6 +157,17 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
158
157
|
return { successUpdates, failureReport, shardIndexes: nextShardIndexes };
|
|
159
158
|
}
|
|
160
159
|
|
|
160
|
+
async function batchFetchSimHashes(db, hashes) {
|
|
161
|
+
if (!hashes || hashes.length === 0) return {};
|
|
162
|
+
const map = {};
|
|
163
|
+
const refs = hashes.map(h => db.collection(SIMHASH_REGISTRY_COLLECTION).doc(h));
|
|
164
|
+
try {
|
|
165
|
+
const snaps = await db.getAll(...refs);
|
|
166
|
+
snaps.forEach(snap => { if (snap.exists) map[snap.id] = snap.data().simHash; });
|
|
167
|
+
} catch (e) {}
|
|
168
|
+
return map;
|
|
169
|
+
}
|
|
170
|
+
|
|
161
171
|
async function fetchContracts(db, calcNames) {
|
|
162
172
|
if (!calcNames || calcNames.length === 0) return {};
|
|
163
173
|
const map = {};
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# Cloud Workflows: Precision Cursor-Based Orchestrator
|
|
2
|
-
#
|
|
3
|
-
# UPDATED: Added Short-Circuit logic to break infinite loops on empty dispatches.
|
|
2
|
+
# UPDATED: Added satiation detection to break early on 0 remaining dates.
|
|
4
3
|
|
|
5
4
|
main:
|
|
6
5
|
params: [input]
|
|
@@ -9,15 +8,9 @@ main:
|
|
|
9
8
|
assign:
|
|
10
9
|
- project: '${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}'
|
|
11
10
|
- passes: ["1", "2", "3", "4", "5"]
|
|
12
|
-
# Dynamically calculate today's date (YYYY-MM-DD) if no date is provided in input
|
|
13
11
|
- current_date: '${text.split(time.format(sys.now()), "T")[0]}'
|
|
14
12
|
- date_to_run: '${default(map.get(input, "date"), current_date)}'
|
|
15
13
|
|
|
16
|
-
- log_start:
|
|
17
|
-
call: sys.log
|
|
18
|
-
args:
|
|
19
|
-
text: '${"Starting Pipeline execution. Target Date Limit: " + date_to_run}'
|
|
20
|
-
|
|
21
14
|
- run_sequential_passes:
|
|
22
15
|
for:
|
|
23
16
|
value: pass_id
|
|
@@ -27,7 +20,7 @@ main:
|
|
|
27
20
|
assign:
|
|
28
21
|
- n_cursor: 1
|
|
29
22
|
- pass_complete: false
|
|
30
|
-
- consecutive_empty_dispatches: 0
|
|
23
|
+
- consecutive_empty_dispatches: 0
|
|
31
24
|
|
|
32
25
|
- sequential_date_loop:
|
|
33
26
|
switch:
|
|
@@ -46,34 +39,36 @@ main:
|
|
|
46
39
|
|
|
47
40
|
- evaluate_dispatch:
|
|
48
41
|
switch:
|
|
49
|
-
# State 1: Dispatcher signal to move to the next pass
|
|
50
42
|
- condition: '${dispatch_res.body.status == "MOVE_TO_NEXT_PASS"}'
|
|
51
43
|
assign:
|
|
52
44
|
- pass_complete: true
|
|
53
45
|
|
|
54
|
-
#
|
|
46
|
+
# NEW: Explicit Satiation Check
|
|
47
|
+
- condition: '${dispatch_res.body.status == "CONTINUE_PASS" and dispatch_res.body.remainingDates == 0}'
|
|
48
|
+
steps:
|
|
49
|
+
- log_satiation:
|
|
50
|
+
call: sys.log
|
|
51
|
+
args:
|
|
52
|
+
text: '${"Pass " + pass_id + " - ✅ Pass satiated (0 remaining dates). Moving to next pass."}'
|
|
53
|
+
- mark_complete:
|
|
54
|
+
assign:
|
|
55
|
+
- pass_complete: true
|
|
56
|
+
|
|
55
57
|
- condition: '${dispatch_res.body.dispatched > 0}'
|
|
56
58
|
steps:
|
|
57
59
|
- reset_retry_counter:
|
|
58
60
|
assign:
|
|
59
|
-
- consecutive_empty_dispatches: 0
|
|
60
|
-
- log_dispatch:
|
|
61
|
-
call: sys.log
|
|
62
|
-
args:
|
|
63
|
-
text: '${"Pass " + pass_id + " - Dispatching " + string(dispatch_res.body.dispatched) + " tasks for " + dispatch_res.body.dateProcessed + ". ETA: " + string(dispatch_res.body.etaSeconds) + "s."}'
|
|
61
|
+
- consecutive_empty_dispatches: 0
|
|
64
62
|
- wait_for_completion:
|
|
65
63
|
call: sys.sleep
|
|
66
64
|
args:
|
|
67
65
|
seconds: '${int(dispatch_res.body.etaSeconds)}'
|
|
68
66
|
- update_cursor:
|
|
69
67
|
assign:
|
|
70
|
-
# If n_cursor_ignored is true, stay on same N to retry (e.g. for high-mem)
|
|
71
68
|
- n_cursor: '${if(dispatch_res.body.n_cursor_ignored, n_cursor, n_cursor + 1)}'
|
|
72
69
|
- next_loop_work:
|
|
73
70
|
next: sequential_date_loop
|
|
74
71
|
|
|
75
|
-
# State 3: No tasks dispatched (Potential Infinite Loop Scenario)
|
|
76
|
-
# The Dispatcher is "Continuing" but found nothing runnable on the target date.
|
|
77
72
|
- condition: '${dispatch_res.body.dispatched == 0}'
|
|
78
73
|
steps:
|
|
79
74
|
- increment_retry:
|
|
@@ -81,33 +76,19 @@ main:
|
|
|
81
76
|
- consecutive_empty_dispatches: '${consecutive_empty_dispatches + 1}'
|
|
82
77
|
- check_break_condition:
|
|
83
78
|
switch:
|
|
84
|
-
# If we have tried 3 times in a row with 0 results, assume the date is "stuck"
|
|
85
79
|
- condition: '${consecutive_empty_dispatches >= 3}'
|
|
86
|
-
|
|
87
|
-
-
|
|
88
|
-
call: sys.log
|
|
89
|
-
args:
|
|
90
|
-
text: '${"Pass " + pass_id + " - 🛑 FORCE BREAK: 3 consecutive empty dispatches. Moving to next pass to prevent infinite loop."}'
|
|
91
|
-
- force_complete:
|
|
92
|
-
assign:
|
|
93
|
-
- pass_complete: true
|
|
94
|
-
# Otherwise, wait briefly and retry (or move cursor depending on dispatcher logic)
|
|
80
|
+
assign:
|
|
81
|
+
- pass_complete: true
|
|
95
82
|
- condition: '${true}'
|
|
96
83
|
steps:
|
|
97
|
-
- log_retry:
|
|
98
|
-
call: sys.log
|
|
99
|
-
args:
|
|
100
|
-
text: '${"Pass " + pass_id + " - Empty dispatch (" + string(consecutive_empty_dispatches) + "/3). Retrying..."}'
|
|
101
84
|
- wait_short:
|
|
102
85
|
call: sys.sleep
|
|
103
86
|
args:
|
|
104
87
|
seconds: 5
|
|
105
88
|
- update_cursor_retry:
|
|
106
89
|
assign:
|
|
107
|
-
# Still advance cursor if it wasn't a strict reroute, to try next date
|
|
108
90
|
- n_cursor: '${if(dispatch_res.body.n_cursor_ignored, n_cursor, n_cursor + 1)}'
|
|
109
91
|
- next_loop_retry:
|
|
110
92
|
next: sequential_date_loop
|
|
111
|
-
|
|
112
93
|
- finish:
|
|
113
94
|
return: "Pipeline Execution Satiated and Complete"
|