bulltrackers-module 1.0.303 โ 1.0.305
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_dispatcher.js
|
|
3
|
-
* PURPOSE: Sequential Cursor-Based Dispatcher.
|
|
4
|
-
* IMPLEMENTS: Dirty-Date Discovery, Forensics Rerouting, and Satiation Sweeps.
|
|
3
|
+
* PURPOSE: Sequential Cursor-Based Dispatcher with Hyper-Verbose Telemetry.
|
|
5
4
|
*/
|
|
6
5
|
|
|
7
6
|
const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
@@ -11,22 +10,17 @@ const { fetchComputationStatus } = require('../persistence/StatusRepository');
|
|
|
11
10
|
const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
|
|
12
11
|
|
|
13
12
|
const OOM_THRESHOLD_MB = 1500;
|
|
14
|
-
const SECONDS_PER_CALC_MARGIN = 25;
|
|
13
|
+
const SECONDS_PER_CALC_MARGIN = 25;
|
|
15
14
|
|
|
16
|
-
/**
|
|
17
|
-
* Checks if specific tasks on a date need a high-memory reroute.
|
|
18
|
-
* Returns only tasks that failed on 'standard' and haven't been tried on 'high-mem'.
|
|
19
|
-
*/
|
|
20
15
|
async function getHighMemReroutes(db, date, pass, tasks) {
|
|
21
16
|
const reroutes = [];
|
|
22
17
|
for (const task of tasks) {
|
|
23
|
-
const name
|
|
18
|
+
const name = normalizeName(task.name);
|
|
24
19
|
const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
|
|
25
|
-
const doc
|
|
20
|
+
const doc = await db.doc(ledgerPath).get();
|
|
26
21
|
|
|
27
22
|
if (doc.exists) {
|
|
28
23
|
const data = doc.data();
|
|
29
|
-
// Check if it failed due to memory and hasn't been attempted on high-mem yet
|
|
30
24
|
const isOOM = (data.status === 'FAILED' || data.status === 'CRASH') &&
|
|
31
25
|
(data.resourceTier !== 'high-mem') &&
|
|
32
26
|
((data.peakMemoryMB > OOM_THRESHOLD_MB) || (data.error && /memory/i.test(data.error.message)));
|
|
@@ -41,108 +35,138 @@ async function getHighMemReroutes(db, date, pass, tasks) {
|
|
|
41
35
|
|
|
42
36
|
async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
|
|
43
37
|
const { logger, db } = dependencies;
|
|
44
|
-
const pubsubUtils
|
|
38
|
+
const pubsubUtils = new PubSubUtils(dependencies);
|
|
45
39
|
|
|
46
|
-
//
|
|
47
|
-
const passToRun
|
|
40
|
+
// 1. Capture Inputs
|
|
41
|
+
const passToRun = String(reqBody.pass || config.COMPUTATION_PASS_TO_RUN || "1");
|
|
48
42
|
const targetCursorN = parseInt(reqBody.cursorIndex || 1);
|
|
49
|
-
const dateLimitStr
|
|
43
|
+
const dateLimitStr = reqBody.date || config.date || "2025-01-01";
|
|
50
44
|
|
|
51
|
-
|
|
52
|
-
|
|
45
|
+
logger.log('INFO', `[Dispatcher] ๐ STARTING DISPATCH: Pass ${passToRun}, Cursor ${targetCursorN}, Limit ${dateLimitStr}`);
|
|
46
|
+
|
|
47
|
+
const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
|
|
48
|
+
const passes = groupByPass(computationManifest);
|
|
53
49
|
const calcsInThisPass = passes[passToRun] || [];
|
|
54
50
|
|
|
55
51
|
if (!calcsInThisPass.length) {
|
|
52
|
+
logger.log('WARN', `[Dispatcher] ๐ No calculations found for Pass ${passToRun}. Moving to next pass.`);
|
|
56
53
|
return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
|
|
57
54
|
}
|
|
58
55
|
|
|
59
|
-
//
|
|
56
|
+
// 2. Discover Discovery Boundaries
|
|
60
57
|
const earliestDates = await getEarliestDataDates(config, dependencies);
|
|
58
|
+
logger.log('INFO', `[Dispatcher] Discovery Boundaries: Earliest=${earliestDates.absoluteEarliest.toISOString().slice(0,10)}, Limit=${dateLimitStr}`);
|
|
59
|
+
|
|
61
60
|
const allDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
|
|
62
61
|
|
|
62
|
+
if (allDates.length === 0) {
|
|
63
|
+
logger.log('ERROR', `[Dispatcher] โ Date range is empty. Check if dateLimit is before earliest data.`);
|
|
64
|
+
return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// 3. Date Scanning Loop
|
|
63
68
|
const dirtyDates = [];
|
|
69
|
+
let blockedCount = 0;
|
|
70
|
+
let upToDateCount = 0;
|
|
71
|
+
|
|
72
|
+
logger.log('INFO', `[Dispatcher] Scanning ${allDates.length} dates for work...`);
|
|
73
|
+
|
|
64
74
|
for (const d of allDates) {
|
|
65
|
-
const dailyStatus
|
|
75
|
+
const dailyStatus = await fetchComputationStatus(d, config, dependencies);
|
|
66
76
|
const availability = await checkRootDataAvailability(d, config, dependencies, DEFINITIVE_EARLIEST_DATES);
|
|
67
77
|
|
|
78
|
+
// Detailed check on availability status
|
|
79
|
+
if (!availability || !availability.status.hasPrices) {
|
|
80
|
+
// Log every 30 days to avoid log spam if data is missing for long periods
|
|
81
|
+
if (allDates.indexOf(d) % 30 === 0) logger.log('DEBUG', `[Dispatcher] ${d}: Root Data Index Missing or Price=false.`);
|
|
82
|
+
blockedCount++;
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
|
|
68
86
|
const report = analyzeDateExecution(d, calcsInThisPass, availability.status, dailyStatus, manifestMap, null);
|
|
69
|
-
const tasks
|
|
87
|
+
const tasks = [...report.runnable, ...report.reRuns];
|
|
70
88
|
|
|
71
89
|
if (tasks.length > 0) {
|
|
90
|
+
logger.log('INFO', `[Dispatcher] โจ Found Dirty Date: ${d} (${tasks.length} tasks)`);
|
|
72
91
|
dirtyDates.push({ date: d, tasks });
|
|
92
|
+
} else {
|
|
93
|
+
upToDateCount++;
|
|
73
94
|
}
|
|
74
95
|
}
|
|
75
96
|
|
|
76
|
-
|
|
97
|
+
logger.log('INFO', `[Dispatcher] Scan Complete: ${dirtyDates.length} dirty, ${upToDateCount} up-to-date, ${blockedCount} blocked/missing data.`);
|
|
98
|
+
|
|
99
|
+
let selectedDate = null;
|
|
77
100
|
let selectedTasks = [];
|
|
78
|
-
let isReroute
|
|
79
|
-
let isSweep
|
|
101
|
+
let isReroute = false;
|
|
102
|
+
let isSweep = false;
|
|
80
103
|
|
|
81
|
-
//
|
|
104
|
+
// 4. Cursor Logic
|
|
82
105
|
if (targetCursorN > 1 && (targetCursorN - 2) < dirtyDates.length) {
|
|
83
106
|
const prevEntry = dirtyDates[targetCursorN - 2];
|
|
84
|
-
const reroutes
|
|
107
|
+
const reroutes = await getHighMemReroutes(db, prevEntry.date, passToRun, prevEntry.tasks);
|
|
85
108
|
|
|
86
109
|
if (reroutes.length > 0) {
|
|
87
|
-
selectedDate
|
|
110
|
+
selectedDate = prevEntry.date;
|
|
88
111
|
selectedTasks = reroutes;
|
|
89
|
-
isReroute
|
|
90
|
-
logger.log('INFO', `[Dispatcher] Reroute detected for ${selectedDate}.
|
|
112
|
+
isReroute = true;
|
|
113
|
+
logger.log('INFO', `[Dispatcher] ๐ Reroute detected for ${selectedDate}. Retrying same cursor position with High-Mem.`);
|
|
91
114
|
}
|
|
92
115
|
}
|
|
93
116
|
|
|
94
|
-
// 3. Logic: N-th Dirty Date or Final Sweep
|
|
95
117
|
if (!selectedDate) {
|
|
96
118
|
if (targetCursorN <= dirtyDates.length) {
|
|
97
|
-
const entry
|
|
98
|
-
selectedDate
|
|
119
|
+
const entry = dirtyDates[targetCursorN - 1];
|
|
120
|
+
selectedDate = entry.date;
|
|
99
121
|
selectedTasks = entry.tasks;
|
|
122
|
+
logger.log('INFO', `[Dispatcher] Selecting Dirty Date #${targetCursorN}: ${selectedDate}`);
|
|
100
123
|
} else {
|
|
101
|
-
// Final Satiation Sweep: Check if anything was missed (recovery)
|
|
102
124
|
if (dirtyDates.length > 0) {
|
|
103
|
-
isSweep
|
|
104
|
-
selectedDate
|
|
125
|
+
isSweep = true;
|
|
126
|
+
selectedDate = dirtyDates[0].date;
|
|
105
127
|
selectedTasks = dirtyDates[0].tasks;
|
|
128
|
+
logger.log('INFO', `[Dispatcher] ๐งน Satiation Sweep: Checking earliest dirty date ${selectedDate}`);
|
|
106
129
|
}
|
|
107
130
|
}
|
|
108
131
|
}
|
|
109
132
|
|
|
110
|
-
//
|
|
133
|
+
// 5. Termination Check
|
|
111
134
|
if (!selectedDate) {
|
|
135
|
+
logger.log('SUCCESS', `[Dispatcher] โ
Pass ${passToRun} is fully satiated. Signalling MOVE_TO_NEXT_PASS.`);
|
|
112
136
|
return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0, etaSeconds: 0 };
|
|
113
137
|
}
|
|
114
138
|
|
|
115
|
-
//
|
|
139
|
+
// 6. Pub/Sub Dispatch
|
|
116
140
|
const standardTasks = selectedTasks.filter(t => t.resources !== 'high-mem').map(t => ({ ...t, date: selectedDate, pass: passToRun }));
|
|
117
|
-
const highMemTasks
|
|
141
|
+
const highMemTasks = selectedTasks.filter(t => t.resources === 'high-mem').map(t => ({ ...t, date: selectedDate, pass: passToRun }));
|
|
118
142
|
|
|
119
143
|
const pubPromises = [];
|
|
120
144
|
if (standardTasks.length > 0) {
|
|
121
145
|
pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
|
|
122
146
|
topicName: config.computationTopicStandard || 'computation-tasks',
|
|
123
|
-
tasks: standardTasks,
|
|
124
|
-
taskType: `pass-${passToRun}-std`
|
|
147
|
+
tasks : standardTasks,
|
|
148
|
+
taskType : `pass-${passToRun}-std`
|
|
125
149
|
}));
|
|
126
150
|
}
|
|
127
151
|
if (highMemTasks.length > 0) {
|
|
128
152
|
pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
|
|
129
153
|
topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
|
|
130
|
-
tasks: highMemTasks,
|
|
131
|
-
taskType: `pass-${passToRun}-high`
|
|
154
|
+
tasks : highMemTasks,
|
|
155
|
+
taskType : `pass-${passToRun}-high`
|
|
132
156
|
}));
|
|
133
157
|
}
|
|
134
158
|
await Promise.all(pubPromises);
|
|
135
159
|
|
|
136
160
|
const etaSeconds = Math.max(20, selectedTasks.length * SECONDS_PER_CALC_MARGIN);
|
|
137
161
|
|
|
138
|
-
logger.log('INFO', `[Dispatcher]
|
|
162
|
+
logger.log('INFO', `[Dispatcher] ๐ฐ๏ธ DISPATCHED ${selectedTasks.length} tasks for ${selectedDate}. ETA ${etaSeconds}s.`);
|
|
139
163
|
|
|
140
164
|
return {
|
|
141
|
-
status: isSweep ? 'RECOVERY' : 'CONTINUE_PASS',
|
|
142
|
-
dateProcessed: selectedDate,
|
|
143
|
-
dispatched: selectedTasks.length,
|
|
144
|
-
n_cursor_ignored: isReroute,
|
|
145
|
-
etaSeconds: etaSeconds
|
|
165
|
+
status : isSweep ? 'RECOVERY' : 'CONTINUE_PASS',
|
|
166
|
+
dateProcessed : selectedDate,
|
|
167
|
+
dispatched : selectedTasks.length,
|
|
168
|
+
n_cursor_ignored: isReroute,
|
|
169
|
+
etaSeconds : etaSeconds
|
|
146
170
|
};
|
|
147
171
|
}
|
|
148
172
|
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
# Cloud Workflows: Precision Cursor-Based Orchestrator
|
|
2
|
+
# PURPOSE: Orchestrates 5 passes with deterministic pacing and syntax fixes.
|
|
3
|
+
|
|
2
4
|
main:
|
|
3
5
|
params: [input]
|
|
4
6
|
steps:
|
|
5
7
|
- init:
|
|
6
8
|
assign:
|
|
7
|
-
- project: ${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}
|
|
9
|
+
- project: '${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}'
|
|
8
10
|
- passes: ["1", "2", "3", "4", "5"]
|
|
9
|
-
- date_to_run: ${default(map.get(input, "date"), "2025-01-01")}
|
|
11
|
+
- date_to_run: '${default(map.get(input, "date"), "2025-01-01")}'
|
|
10
12
|
|
|
11
13
|
- run_sequential_passes:
|
|
12
14
|
for:
|
|
@@ -25,29 +27,38 @@ main:
|
|
|
25
27
|
- call_dispatcher:
|
|
26
28
|
call: http.post
|
|
27
29
|
args:
|
|
28
|
-
url: ${"https://europe-west1-" + project + ".cloudfunctions.net/
|
|
30
|
+
url: '${"https://europe-west1-" + project + ".cloudfunctions.net/computation-pass-" + pass_id}'
|
|
29
31
|
body:
|
|
30
|
-
pass: ${pass_id}
|
|
31
|
-
cursorIndex: ${n_cursor}
|
|
32
|
-
date: ${date_to_run}
|
|
32
|
+
pass: '${pass_id}'
|
|
33
|
+
cursorIndex: '${n_cursor}'
|
|
34
|
+
date: '${date_to_run}'
|
|
33
35
|
auth: { type: OIDC }
|
|
34
36
|
result: dispatch_res
|
|
35
37
|
|
|
36
38
|
- evaluate_dispatch:
|
|
37
39
|
switch:
|
|
38
|
-
# State 1:
|
|
39
|
-
- condition: ${dispatch_res.body.status == "MOVE_TO_NEXT_PASS"}
|
|
40
|
-
assign:
|
|
40
|
+
# State 1: Pass exhausted and Satiation Sweep complete
|
|
41
|
+
- condition: '${dispatch_res.body.status == "MOVE_TO_NEXT_PASS"}'
|
|
42
|
+
assign:
|
|
43
|
+
- pass_complete: true
|
|
41
44
|
|
|
42
|
-
# State 2: Tasks dispatched (Standard, Reroute, or Recovery)
|
|
43
|
-
- condition: ${dispatch_res.body.dispatched > 0}
|
|
45
|
+
# State 2: Tasks dispatched (Standard, Reroute, or Recovery Sweep)
|
|
46
|
+
- condition: '${dispatch_res.body.dispatched > 0}'
|
|
44
47
|
steps:
|
|
45
|
-
-
|
|
48
|
+
- log_dispatch:
|
|
49
|
+
call: sys.log
|
|
50
|
+
args:
|
|
51
|
+
text: '${"Pass " + pass_id + " - Dispatching " + string(dispatch_res.body.dispatched) + " tasks for " + dispatch_res.body.dateProcessed + ". ETA: " + string(dispatch_res.body.etaSeconds) + "s."}'
|
|
52
|
+
- wait_for_completion:
|
|
46
53
|
call: sys.sleep
|
|
47
|
-
args:
|
|
54
|
+
args:
|
|
55
|
+
seconds: '${int(dispatch_res.body.etaSeconds)}'
|
|
48
56
|
- update_cursor:
|
|
49
57
|
assign:
|
|
50
|
-
# If
|
|
51
|
-
- n_cursor: ${if(dispatch_res.body.n_cursor_ignored, n_cursor, n_cursor + 1)}
|
|
52
|
-
-
|
|
53
|
-
next: sequential_date_loop
|
|
58
|
+
# If n_cursor_ignored is true (Reroute or Sweep Recovery), we stay on same N
|
|
59
|
+
- n_cursor: '${if(dispatch_res.body.n_cursor_ignored, n_cursor, n_cursor + 1)}'
|
|
60
|
+
- next_loop:
|
|
61
|
+
next: sequential_date_loop
|
|
62
|
+
|
|
63
|
+
- finish:
|
|
64
|
+
return: "Pipeline Execution Satiated and Complete"
|