bulltrackers-module 1.0.311 → 1.0.313
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/WorkflowOrchestrator.js +41 -51
- package/functions/computation-system/helpers/computation_dispatcher.js +35 -13
- package/functions/computation-system/helpers/computation_worker.js +7 -7
- package/functions/computation-system/helpers/monitor.js +1 -0
- package/functions/computation-system/persistence/ResultCommitter.js +22 -12
- package/functions/computation-system/workflows/bulltrackers_pipeline.yaml +16 -35
- package/package.json +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/WorkflowOrchestrator.js
|
|
3
|
-
* UPDATED: Implements Data-Drift Detection
|
|
3
|
+
* UPDATED: Implements Early-Skip Optimization for Data-Drift Detection.
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
const { normalizeName, DEFINITIVE_EARLIEST_DATES } = require('./utils/utils');
|
|
@@ -12,10 +12,6 @@ const { MetaExecutor } = require('./executor
|
|
|
12
12
|
|
|
13
13
|
const STATUS_IMPOSSIBLE_PREFIX = 'IMPOSSIBLE';
|
|
14
14
|
|
|
15
|
-
/**
|
|
16
|
-
* Groups manifest entries by their pass number.
|
|
17
|
-
* Required by the Dispatcher to identify current work-sets.
|
|
18
|
-
*/
|
|
19
15
|
function groupByPass(manifest) {
|
|
20
16
|
const passes = {};
|
|
21
17
|
manifest.forEach(calc => {
|
|
@@ -25,34 +21,20 @@ function groupByPass(manifest) {
|
|
|
25
21
|
return passes;
|
|
26
22
|
}
|
|
27
23
|
|
|
28
|
-
/**
|
|
29
|
-
* Core Short-Circuit Logic.
|
|
30
|
-
* Checks if a dependency is satisfied AND checks for Data Drift.
|
|
31
|
-
* Returns { ready: boolean, dataChanged: boolean, reason: string }
|
|
32
|
-
*/
|
|
33
24
|
function isDependencyReady(depName, isHistoricalSelf, currentStatusMap, prevStatusMap, manifestMap, storedStatus) {
|
|
34
25
|
const norm = normalizeName(depName);
|
|
35
26
|
const targetStatus = isHistoricalSelf ? (prevStatusMap ? prevStatusMap[norm] : null) : currentStatusMap[norm];
|
|
36
27
|
const depManifest = manifestMap.get(norm);
|
|
37
28
|
|
|
38
|
-
// 1. Availability Check
|
|
39
29
|
if (!targetStatus) return { ready: false, reason: 'Missing' };
|
|
40
30
|
if (String(targetStatus.hash).startsWith(STATUS_IMPOSSIBLE_PREFIX)) return { ready: false, reason: 'Impossible Upstream' };
|
|
41
31
|
|
|
42
|
-
// 2. Code Hash Check (The dependency must be running the correct version)
|
|
43
|
-
// If the dependency's hash doesn't match its manifest, it means the dependency itself needs to run/update first.
|
|
44
32
|
if (depManifest && targetStatus.hash !== depManifest.hash) {
|
|
45
33
|
return { ready: false, reason: 'Dependency Version Mismatch' };
|
|
46
34
|
}
|
|
47
35
|
|
|
48
|
-
// 3. Data Integrity Check (The Short-Circuit Logic)
|
|
49
|
-
// We check if the result hash of the dependency matches what we remember using last time.
|
|
50
36
|
if (storedStatus && storedStatus.dependencyResultHashes) {
|
|
51
37
|
const lastSeenResultHash = storedStatus.dependencyResultHashes[depName];
|
|
52
|
-
|
|
53
|
-
// If we recorded a dependency hash last time, and it differs from the current live status,
|
|
54
|
-
// then the dependency has produced NEW data. We are NOT ready to skip.
|
|
55
|
-
// We return 'ready: true' (it exists) but we flag 'dataChanged: true' to force execution.
|
|
56
38
|
if (lastSeenResultHash && targetStatus.resultHash !== lastSeenResultHash) {
|
|
57
39
|
return { ready: true, dataChanged: true, reason: 'Dependency Data Update' };
|
|
58
40
|
}
|
|
@@ -61,6 +43,9 @@ function isDependencyReady(depName, isHistoricalSelf, currentStatusMap, prevStat
|
|
|
61
43
|
return { ready: true, dataChanged: false };
|
|
62
44
|
}
|
|
63
45
|
|
|
46
|
+
/**
|
|
47
|
+
* UPDATED: Logic moved to top of loop for early skip on stable data.
|
|
48
|
+
*/
|
|
64
49
|
function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus = null) {
|
|
65
50
|
const report = { runnable: [], blocked: [], impossible: [], failedDependency: [], reRuns: [], skipped: [] };
|
|
66
51
|
const simulationStatus = { ...dailyStatus };
|
|
@@ -82,33 +67,56 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
82
67
|
continue;
|
|
83
68
|
}
|
|
84
69
|
|
|
85
|
-
//
|
|
70
|
+
// --- OPTIMIZATION: Early skip if code matches AND data is stable ---
|
|
71
|
+
if (stored?.hash === currentHash) {
|
|
72
|
+
let hasDataDrift = false;
|
|
73
|
+
let isBlocked = false;
|
|
74
|
+
let missingDeps = [];
|
|
75
|
+
|
|
76
|
+
if (calc.dependencies) {
|
|
77
|
+
for (const dep of calc.dependencies) {
|
|
78
|
+
const check = isDependencyReady(dep, false, simulationStatus, null, manifestMap, stored);
|
|
79
|
+
if (!check.ready) missingDeps.push(dep);
|
|
80
|
+
else if (check.dataChanged) { hasDataDrift = true; break; }
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (!hasDataDrift && missingDeps.length === 0 && calc.isHistorical) {
|
|
85
|
+
const yesterday = new Date(dateStr + 'T00:00:00Z');
|
|
86
|
+
yesterday.setUTCDate(yesterday.getUTCDate() - 1);
|
|
87
|
+
if (yesterday >= DEFINITIVE_EARLIEST_DATES.absoluteEarliest) {
|
|
88
|
+
const check = isDependencyReady(calc.name, true, null, prevDailyStatus, manifestMap, stored);
|
|
89
|
+
if (!check.ready) isBlocked = true;
|
|
90
|
+
else if (check.dataChanged) hasDataDrift = true;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (!hasDataDrift && !isBlocked && missingDeps.length === 0) {
|
|
95
|
+
report.skipped.push({ name: cName, reason: "Up To Date" });
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// --- END OPTIMIZATION ---
|
|
100
|
+
|
|
86
101
|
const missingDeps = [];
|
|
87
|
-
let hasDataDrift = false;
|
|
102
|
+
let hasDataDrift = false;
|
|
88
103
|
let isBlocked = false;
|
|
89
104
|
|
|
90
|
-
// A. Standard Upstream Dependencies
|
|
91
105
|
if (calc.dependencies) {
|
|
92
106
|
for (const dep of calc.dependencies) {
|
|
93
107
|
const check = isDependencyReady(dep, false, simulationStatus, null, manifestMap, stored);
|
|
94
|
-
if (!check.ready)
|
|
95
|
-
|
|
96
|
-
} else if (check.dataChanged) {
|
|
97
|
-
hasDataDrift = true;
|
|
98
|
-
}
|
|
108
|
+
if (!check.ready) missingDeps.push(dep);
|
|
109
|
+
else if (check.dataChanged) hasDataDrift = true;
|
|
99
110
|
}
|
|
100
111
|
}
|
|
101
112
|
|
|
102
|
-
// B. Temporal Dependency (Yesterday's Self)
|
|
103
113
|
if (calc.isHistorical) {
|
|
104
114
|
const yesterday = new Date(dateStr + 'T00:00:00Z');
|
|
105
115
|
yesterday.setUTCDate(yesterday.getUTCDate() - 1);
|
|
106
|
-
|
|
107
|
-
// Only block if yesterday is a valid data date.
|
|
108
116
|
if (yesterday >= DEFINITIVE_EARLIEST_DATES.absoluteEarliest) {
|
|
109
117
|
const check = isDependencyReady(calc.name, true, null, prevDailyStatus, manifestMap, stored);
|
|
110
118
|
if (!check.ready) isBlocked = true;
|
|
111
|
-
else if (check.dataChanged) hasDataDrift = true;
|
|
119
|
+
else if (check.dataChanged) hasDataDrift = true;
|
|
112
120
|
}
|
|
113
121
|
}
|
|
114
122
|
|
|
@@ -128,9 +136,6 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
128
136
|
continue;
|
|
129
137
|
}
|
|
130
138
|
|
|
131
|
-
// 3. Execution Decision
|
|
132
|
-
|
|
133
|
-
// Collect current dependency result hashes to be saved if we run
|
|
134
139
|
const currentDependencyResultHashes = {};
|
|
135
140
|
if (calc.dependencies) {
|
|
136
141
|
calc.dependencies.forEach(d => {
|
|
@@ -142,23 +147,11 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
142
147
|
const taskPayload = { name: cName, dependencyResultHashes: currentDependencyResultHashes };
|
|
143
148
|
|
|
144
149
|
if (!stored?.hash) {
|
|
145
|
-
// Case A: New Calculation (Never run)
|
|
146
150
|
report.runnable.push({ ...taskPayload, reason: "New Calculation" });
|
|
147
|
-
|
|
148
|
-
}
|
|
149
|
-
else if (stored.hash !== currentHash) {
|
|
150
|
-
// Case B: Code Hash Mismatch (Logic Changed)
|
|
151
|
+
} else if (stored.hash !== currentHash) {
|
|
151
152
|
report.reRuns.push({ ...taskPayload, oldHash: stored.hash, newHash: currentHash, reason: "Hash Mismatch" });
|
|
152
|
-
|
|
153
|
-
}
|
|
154
|
-
else if (hasDataDrift) {
|
|
155
|
-
// Case C: Code Matches, BUT Input Data Changed (The Holy Grail Optimization)
|
|
153
|
+
} else if (hasDataDrift) {
|
|
156
154
|
report.runnable.push({ ...taskPayload, reason: "Input Data Changed" });
|
|
157
|
-
simulationStatus[cName] = { hash: currentHash, resultHash: 'SIMULATED' };
|
|
158
|
-
}
|
|
159
|
-
else {
|
|
160
|
-
// Case D: Code Matches AND Data Matches -> Short Circuit
|
|
161
|
-
report.skipped.push({ name: cName, reason: "Up To Date" });
|
|
162
155
|
}
|
|
163
156
|
}
|
|
164
157
|
return report;
|
|
@@ -170,9 +163,6 @@ async function executeDispatchTask(dateStr, pass, targetComputation, config, dep
|
|
|
170
163
|
const calcManifest = manifestMap.get(normalizeName(targetComputation));
|
|
171
164
|
|
|
172
165
|
if (!calcManifest) throw new Error(`Calc '${targetComputation}' not found.`);
|
|
173
|
-
|
|
174
|
-
// [CRITICAL] Inject the fresh dependency result hashes so they are saved to DB on commit.
|
|
175
|
-
// This enables the "lastSeenResultHash" check in future runs.
|
|
176
166
|
calcManifest.dependencyResultHashes = dependencyResultHashes;
|
|
177
167
|
|
|
178
168
|
const rootData = await checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES);
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_dispatcher.js
|
|
3
3
|
* PURPOSE: Sequential Cursor-Based Dispatcher with Hyper-Verbose Telemetry.
|
|
4
|
-
*
|
|
4
|
+
* UPDATED: Sweep mode now auto-upgrades missed tasks to high-mem due to potential silent OOMs.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
@@ -9,7 +9,7 @@ const { groupByPass, analyzeDateExecution } = require('../WorkflowOrchestrator.j
|
|
|
9
9
|
const { PubSubUtils } = require('../../core/utils/pubsub_utils');
|
|
10
10
|
const { fetchComputationStatus } = require('../persistence/StatusRepository');
|
|
11
11
|
const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
|
|
12
|
-
const crypto = require('crypto');
|
|
12
|
+
const crypto = require('crypto');
|
|
13
13
|
|
|
14
14
|
const OOM_THRESHOLD_MB = 1500;
|
|
15
15
|
const SECONDS_PER_CALC_MARGIN = 25;
|
|
@@ -43,14 +43,12 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
43
43
|
const targetCursorN = parseInt(reqBody.cursorIndex || 1);
|
|
44
44
|
const dateLimitStr = reqBody.date || config.date || "2025-01-01";
|
|
45
45
|
|
|
46
|
-
logger.log('INFO', `[Dispatcher] 🚀 STARTING DISPATCH: Pass ${passToRun}, Cursor ${targetCursorN}, Limit ${dateLimitStr}`);
|
|
47
|
-
|
|
48
46
|
const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
|
|
49
47
|
const passes = groupByPass(computationManifest);
|
|
50
48
|
const calcsInThisPass = passes[passToRun] || [];
|
|
51
49
|
|
|
52
50
|
if (!calcsInThisPass.length) {
|
|
53
|
-
logger.log('WARN', `[Dispatcher] 🛑 No calculations found for Pass ${passToRun}
|
|
51
|
+
logger.log('WARN', `[Dispatcher] 🛑 No calculations found for Pass ${passToRun}.`);
|
|
54
52
|
return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
|
|
55
53
|
}
|
|
56
54
|
|
|
@@ -62,6 +60,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
62
60
|
return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
|
|
63
61
|
}
|
|
64
62
|
|
|
63
|
+
// 1. Identify all "Dirty" dates (dates that actually have work to do)
|
|
65
64
|
const dirtyDates = [];
|
|
66
65
|
for (const d of allDates) {
|
|
67
66
|
const dailyStatus = await fetchComputationStatus(d, config, dependencies);
|
|
@@ -82,10 +81,10 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
82
81
|
let isReroute = false;
|
|
83
82
|
let isSweep = false;
|
|
84
83
|
|
|
84
|
+
// Logic for Reroutes (Known OOM handling)
|
|
85
85
|
if (targetCursorN > 1 && (targetCursorN - 2) < dirtyDates.length) {
|
|
86
86
|
const prevEntry = dirtyDates[targetCursorN - 2];
|
|
87
87
|
const reroutes = await getHighMemReroutes(db, prevEntry.date, passToRun, prevEntry.tasks);
|
|
88
|
-
|
|
89
88
|
if (reroutes.length > 0) {
|
|
90
89
|
selectedDate = prevEntry.date;
|
|
91
90
|
selectedTasks = reroutes;
|
|
@@ -93,24 +92,47 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
93
92
|
}
|
|
94
93
|
}
|
|
95
94
|
|
|
95
|
+
// Logic for standard cursor progression or Sweep (Recovery) mode
|
|
96
96
|
if (!selectedDate) {
|
|
97
97
|
if (targetCursorN <= dirtyDates.length) {
|
|
98
98
|
const entry = dirtyDates[targetCursorN - 1];
|
|
99
99
|
selectedDate = entry.date;
|
|
100
100
|
selectedTasks = entry.tasks;
|
|
101
101
|
} else if (dirtyDates.length > 0) {
|
|
102
|
+
// RECOVERY/SWEEP MODE:
|
|
103
|
+
// The cursor has passed the number of dirty dates, but tasks still remain.
|
|
104
|
+
// These tasks are automatically routed to 'high-mem' with recovery reasoning.
|
|
102
105
|
isSweep = true;
|
|
103
106
|
selectedDate = dirtyDates[0].date;
|
|
104
|
-
selectedTasks = dirtyDates[0].tasks
|
|
107
|
+
selectedTasks = dirtyDates[0].tasks.map(t => ({
|
|
108
|
+
...t,
|
|
109
|
+
resources: 'high-mem',
|
|
110
|
+
reason: `${t.reason || 'Missed Computation'} [RECOVERY_AUTO_UPGRADE: Potential Silent OOM]`
|
|
111
|
+
}));
|
|
105
112
|
}
|
|
106
113
|
}
|
|
107
114
|
|
|
108
115
|
if (!selectedDate) {
|
|
116
|
+
logger.log('INFO', `[Dispatcher] 🏁 Pass ${passToRun} is fully satiated. No work remaining.`);
|
|
109
117
|
return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0, etaSeconds: 0 };
|
|
110
118
|
}
|
|
111
119
|
|
|
112
|
-
//
|
|
120
|
+
// 2. Prepare Payload and Telemetry
|
|
113
121
|
const currentDispatchId = crypto.randomUUID();
|
|
122
|
+
const etaSeconds = Math.max(20, selectedTasks.length * SECONDS_PER_CALC_MARGIN);
|
|
123
|
+
const remainingDatesCount = Math.max(0, dirtyDates.length - targetCursorN);
|
|
124
|
+
|
|
125
|
+
const computationNames = selectedTasks.map(t => t.name);
|
|
126
|
+
|
|
127
|
+
logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}. ETA: ${etaSeconds}s. [Mode: ${isSweep ? 'RECOVERY' : 'NORMAL'}]`, {
|
|
128
|
+
date: selectedDate,
|
|
129
|
+
pass: passToRun,
|
|
130
|
+
dispatchedCount: selectedTasks.length,
|
|
131
|
+
remainingCursorDates: remainingDatesCount,
|
|
132
|
+
etaSeconds: etaSeconds,
|
|
133
|
+
dispatchId: currentDispatchId,
|
|
134
|
+
tasks: computationNames
|
|
135
|
+
});
|
|
114
136
|
|
|
115
137
|
const mapToTaskPayload = (t) => ({
|
|
116
138
|
...t,
|
|
@@ -118,8 +140,9 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
118
140
|
computation: t.name,
|
|
119
141
|
date: selectedDate,
|
|
120
142
|
pass: passToRun,
|
|
121
|
-
dispatchId: currentDispatchId,
|
|
122
|
-
triggerReason: t.reason
|
|
143
|
+
dispatchId: currentDispatchId,
|
|
144
|
+
triggerReason: t.reason,
|
|
145
|
+
resources: t.resources || 'standard'
|
|
123
146
|
});
|
|
124
147
|
|
|
125
148
|
const standardTasks = selectedTasks.filter(t => t.resources !== 'high-mem').map(mapToTaskPayload);
|
|
@@ -142,14 +165,13 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
142
165
|
}
|
|
143
166
|
await Promise.all(pubPromises);
|
|
144
167
|
|
|
145
|
-
const etaSeconds = Math.max(20, selectedTasks.length * SECONDS_PER_CALC_MARGIN);
|
|
146
|
-
|
|
147
168
|
return {
|
|
148
169
|
status : isSweep ? 'RECOVERY' : 'CONTINUE_PASS',
|
|
149
170
|
dateProcessed : selectedDate,
|
|
150
171
|
dispatched : selectedTasks.length,
|
|
151
172
|
n_cursor_ignored: isReroute,
|
|
152
|
-
etaSeconds : etaSeconds
|
|
173
|
+
etaSeconds : etaSeconds,
|
|
174
|
+
remainingDates : remainingDatesCount
|
|
153
175
|
};
|
|
154
176
|
}
|
|
155
177
|
|
|
@@ -75,13 +75,13 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
75
75
|
|
|
76
76
|
const calcUpdate = successUpdates[normalizeName(computation)] || {};
|
|
77
77
|
const metrics = {
|
|
78
|
-
durationMs:
|
|
79
|
-
peakMemoryMB:
|
|
80
|
-
io:
|
|
81
|
-
storage:
|
|
82
|
-
execution:
|
|
83
|
-
validation:
|
|
84
|
-
composition:
|
|
78
|
+
durationMs: Date.now() - startTime,
|
|
79
|
+
peakMemoryMB: heartbeat.getPeak(),
|
|
80
|
+
io: calcUpdate.metrics?.io,
|
|
81
|
+
storage: calcUpdate.metrics?.storage,
|
|
82
|
+
execution: calcUpdate.metrics?.execution,
|
|
83
|
+
validation: calcUpdate.metrics?.validation,
|
|
84
|
+
composition: calcUpdate.composition
|
|
85
85
|
};
|
|
86
86
|
|
|
87
87
|
await db.doc(ledgerPath).update({ status: 'COMPLETED', completedAt: new Date() });
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* @fileoverview Monitor helper for Cloud Workflows.
|
|
3
3
|
* Checks the state of the Audit Ledger to determine if a pass is complete.
|
|
4
4
|
* This function is stateless and receives dependencies via injection.
|
|
5
|
+
* THIS FILE IS NOW REDUNDANT, LOGIC HAS MOVED TO THE DISPATCHER AND WORKFLOW
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
/**
|
|
@@ -29,7 +29,15 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
29
29
|
const shardIndexes = options.shardIndexes || {};
|
|
30
30
|
const nextShardIndexes = {};
|
|
31
31
|
const fanOutLimit = pLimit(10);
|
|
32
|
-
|
|
32
|
+
|
|
33
|
+
// 1. [BATCH OPTIMIZATION] Fetch all SimHashes and Contracts upfront
|
|
34
|
+
const calcNames = Object.keys(stateObj);
|
|
35
|
+
const hashKeys = calcNames.map(n => stateObj[n].manifest?.hash).filter(Boolean);
|
|
36
|
+
|
|
37
|
+
const [contractMap, simHashMap] = await Promise.all([
|
|
38
|
+
fetchContracts(db, calcNames),
|
|
39
|
+
batchFetchSimHashes(db, hashKeys)
|
|
40
|
+
]);
|
|
33
41
|
|
|
34
42
|
for (const name in stateObj) {
|
|
35
43
|
const calc = stateObj[name];
|
|
@@ -40,13 +48,11 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
40
48
|
storage: { sizeBytes: 0, isSharded: false, shardCount: 1, keys: 0 },
|
|
41
49
|
validation: { isValid: true, anomalies: [] },
|
|
42
50
|
execution: execStats,
|
|
43
|
-
// [NEW] Track Ops
|
|
44
51
|
io: { writes: 0, deletes: 0 }
|
|
45
52
|
};
|
|
46
53
|
|
|
47
54
|
try {
|
|
48
55
|
const result = await calc.getResult();
|
|
49
|
-
|
|
50
56
|
const configOverrides = validationOverrides[calc.manifest.name] || {};
|
|
51
57
|
const dataDeps = calc.manifest.rootDataDependencies || [];
|
|
52
58
|
const isPriceOnly = (dataDeps.length === 1 && dataDeps[0] === 'price');
|
|
@@ -81,13 +87,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
81
87
|
const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
|
|
82
88
|
const resultHash = isEmpty ? 'empty' : generateDataHash(result);
|
|
83
89
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
try {
|
|
87
|
-
const regDoc = await db.collection(SIMHASH_REGISTRY_COLLECTION).doc(calc.manifest.hash).get();
|
|
88
|
-
if (regDoc.exists) simHash = regDoc.data().simHash;
|
|
89
|
-
} catch (e) {}
|
|
90
|
-
}
|
|
90
|
+
// [NEW] Use pre-fetched SimHash
|
|
91
|
+
const simHash = (flushMode !== 'INTERMEDIATE') ? (simHashMap[calc.manifest.hash] || null) : null;
|
|
91
92
|
|
|
92
93
|
if (isEmpty) {
|
|
93
94
|
if (flushMode === 'INTERMEDIATE') { nextShardIndexes[name] = currentShardIndex; continue; }
|
|
@@ -112,8 +113,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
112
113
|
if (!dailyData || Object.keys(dailyData).length === 0) return;
|
|
113
114
|
const historicalDocRef = db.collection(config.resultsCollection).doc(historicalDate).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
|
|
114
115
|
const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD', false);
|
|
115
|
-
|
|
116
|
-
// Aggregate IO Ops
|
|
117
116
|
runMetrics.io.writes += stats.opCounts.writes;
|
|
118
117
|
runMetrics.io.deletes += stats.opCounts.deletes;
|
|
119
118
|
}));
|
|
@@ -158,6 +157,17 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
158
157
|
return { successUpdates, failureReport, shardIndexes: nextShardIndexes };
|
|
159
158
|
}
|
|
160
159
|
|
|
160
|
+
async function batchFetchSimHashes(db, hashes) {
|
|
161
|
+
if (!hashes || hashes.length === 0) return {};
|
|
162
|
+
const map = {};
|
|
163
|
+
const refs = hashes.map(h => db.collection(SIMHASH_REGISTRY_COLLECTION).doc(h));
|
|
164
|
+
try {
|
|
165
|
+
const snaps = await db.getAll(...refs);
|
|
166
|
+
snaps.forEach(snap => { if (snap.exists) map[snap.id] = snap.data().simHash; });
|
|
167
|
+
} catch (e) {}
|
|
168
|
+
return map;
|
|
169
|
+
}
|
|
170
|
+
|
|
161
171
|
async function fetchContracts(db, calcNames) {
|
|
162
172
|
if (!calcNames || calcNames.length === 0) return {};
|
|
163
173
|
const map = {};
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# Cloud Workflows: Precision Cursor-Based Orchestrator
|
|
2
|
-
#
|
|
3
|
-
# UPDATED: Added Short-Circuit logic to break infinite loops on empty dispatches.
|
|
2
|
+
# UPDATED: Added satiation detection to break early on 0 remaining dates.
|
|
4
3
|
|
|
5
4
|
main:
|
|
6
5
|
params: [input]
|
|
@@ -9,15 +8,9 @@ main:
|
|
|
9
8
|
assign:
|
|
10
9
|
- project: '${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}'
|
|
11
10
|
- passes: ["1", "2", "3", "4", "5"]
|
|
12
|
-
# Dynamically calculate today's date (YYYY-MM-DD) if no date is provided in input
|
|
13
11
|
- current_date: '${text.split(time.format(sys.now()), "T")[0]}'
|
|
14
12
|
- date_to_run: '${default(map.get(input, "date"), current_date)}'
|
|
15
13
|
|
|
16
|
-
- log_start:
|
|
17
|
-
call: sys.log
|
|
18
|
-
args:
|
|
19
|
-
text: '${"Starting Pipeline execution. Target Date Limit: " + date_to_run}'
|
|
20
|
-
|
|
21
14
|
- run_sequential_passes:
|
|
22
15
|
for:
|
|
23
16
|
value: pass_id
|
|
@@ -27,7 +20,7 @@ main:
|
|
|
27
20
|
assign:
|
|
28
21
|
- n_cursor: 1
|
|
29
22
|
- pass_complete: false
|
|
30
|
-
- consecutive_empty_dispatches: 0
|
|
23
|
+
- consecutive_empty_dispatches: 0
|
|
31
24
|
|
|
32
25
|
- sequential_date_loop:
|
|
33
26
|
switch:
|
|
@@ -46,34 +39,36 @@ main:
|
|
|
46
39
|
|
|
47
40
|
- evaluate_dispatch:
|
|
48
41
|
switch:
|
|
49
|
-
# State 1: Dispatcher signal to move to the next pass
|
|
50
42
|
- condition: '${dispatch_res.body.status == "MOVE_TO_NEXT_PASS"}'
|
|
51
43
|
assign:
|
|
52
44
|
- pass_complete: true
|
|
53
45
|
|
|
54
|
-
#
|
|
46
|
+
# NEW: Explicit Satiation Check
|
|
47
|
+
- condition: '${dispatch_res.body.status == "CONTINUE_PASS" and dispatch_res.body.remainingDates == 0}'
|
|
48
|
+
steps:
|
|
49
|
+
- log_satiation:
|
|
50
|
+
call: sys.log
|
|
51
|
+
args:
|
|
52
|
+
text: '${"Pass " + pass_id + " - ✅ Pass satiated (0 remaining dates). Moving to next pass."}'
|
|
53
|
+
- mark_complete:
|
|
54
|
+
assign:
|
|
55
|
+
- pass_complete: true
|
|
56
|
+
|
|
55
57
|
- condition: '${dispatch_res.body.dispatched > 0}'
|
|
56
58
|
steps:
|
|
57
59
|
- reset_retry_counter:
|
|
58
60
|
assign:
|
|
59
|
-
- consecutive_empty_dispatches: 0
|
|
60
|
-
- log_dispatch:
|
|
61
|
-
call: sys.log
|
|
62
|
-
args:
|
|
63
|
-
text: '${"Pass " + pass_id + " - Dispatching " + string(dispatch_res.body.dispatched) + " tasks for " + dispatch_res.body.dateProcessed + ". ETA: " + string(dispatch_res.body.etaSeconds) + "s."}'
|
|
61
|
+
- consecutive_empty_dispatches: 0
|
|
64
62
|
- wait_for_completion:
|
|
65
63
|
call: sys.sleep
|
|
66
64
|
args:
|
|
67
65
|
seconds: '${int(dispatch_res.body.etaSeconds)}'
|
|
68
66
|
- update_cursor:
|
|
69
67
|
assign:
|
|
70
|
-
# If n_cursor_ignored is true, stay on same N to retry (e.g. for high-mem)
|
|
71
68
|
- n_cursor: '${if(dispatch_res.body.n_cursor_ignored, n_cursor, n_cursor + 1)}'
|
|
72
69
|
- next_loop_work:
|
|
73
70
|
next: sequential_date_loop
|
|
74
71
|
|
|
75
|
-
# State 3: No tasks dispatched (Potential Infinite Loop Scenario)
|
|
76
|
-
# The Dispatcher is "Continuing" but found nothing runnable on the target date.
|
|
77
72
|
- condition: '${dispatch_res.body.dispatched == 0}'
|
|
78
73
|
steps:
|
|
79
74
|
- increment_retry:
|
|
@@ -81,33 +76,19 @@ main:
|
|
|
81
76
|
- consecutive_empty_dispatches: '${consecutive_empty_dispatches + 1}'
|
|
82
77
|
- check_break_condition:
|
|
83
78
|
switch:
|
|
84
|
-
# If we have tried 3 times in a row with 0 results, assume the date is "stuck"
|
|
85
79
|
- condition: '${consecutive_empty_dispatches >= 3}'
|
|
86
|
-
|
|
87
|
-
-
|
|
88
|
-
call: sys.log
|
|
89
|
-
args:
|
|
90
|
-
text: '${"Pass " + pass_id + " - 🛑 FORCE BREAK: 3 consecutive empty dispatches. Moving to next pass to prevent infinite loop."}'
|
|
91
|
-
- force_complete:
|
|
92
|
-
assign:
|
|
93
|
-
- pass_complete: true
|
|
94
|
-
# Otherwise, wait briefly and retry (or move cursor depending on dispatcher logic)
|
|
80
|
+
assign:
|
|
81
|
+
- pass_complete: true
|
|
95
82
|
- condition: '${true}'
|
|
96
83
|
steps:
|
|
97
|
-
- log_retry:
|
|
98
|
-
call: sys.log
|
|
99
|
-
args:
|
|
100
|
-
text: '${"Pass " + pass_id + " - Empty dispatch (" + string(consecutive_empty_dispatches) + "/3). Retrying..."}'
|
|
101
84
|
- wait_short:
|
|
102
85
|
call: sys.sleep
|
|
103
86
|
args:
|
|
104
87
|
seconds: 5
|
|
105
88
|
- update_cursor_retry:
|
|
106
89
|
assign:
|
|
107
|
-
# Still advance cursor if it wasn't a strict reroute, to try next date
|
|
108
90
|
- n_cursor: '${if(dispatch_res.body.n_cursor_ignored, n_cursor, n_cursor + 1)}'
|
|
109
91
|
- next_loop_retry:
|
|
110
92
|
next: sequential_date_loop
|
|
111
|
-
|
|
112
93
|
- finish:
|
|
113
94
|
return: "Pipeline Execution Satiated and Complete"
|