bulltrackers-module 1.0.295 → 1.0.297
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/executors/StandardExecutor.js +44 -86
- package/functions/computation-system/helpers/computation_worker.js +40 -16
- package/functions/computation-system/persistence/ResultCommitter.js +77 -183
- package/functions/computation-system/persistence/RunRecorder.js +20 -10
- package/functions/generic-api/admin-api/index.js +390 -91
- package/package.json +1 -1
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* @fileoverview Admin API Router
|
|
3
3
|
* Sub-module for system observability, debugging, and visualization.
|
|
4
4
|
* Mounted at /admin within the Generic API.
|
|
5
|
+
* UPDATED: Added advanced cost, performance, and live monitoring endpoints.
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
const express = require('express');
|
|
@@ -19,130 +20,127 @@ const createAdminRouter = (config, dependencies, unifiedCalculations) => {
|
|
|
19
20
|
const router = express.Router();
|
|
20
21
|
const { db, logger } = dependencies;
|
|
21
22
|
|
|
23
|
+
// Helper to get fresh manifest
|
|
24
|
+
const getFullManifest = () => getManifest([], unifiedCalculations, dependencies);
|
|
25
|
+
|
|
22
26
|
// --- 1. TOPOLOGY VISUALIZER ---
|
|
23
|
-
// Returns nodes/edges for React Flow or Cytoscape
|
|
24
27
|
router.get('/topology', async (req, res) => {
|
|
25
28
|
try {
|
|
26
|
-
|
|
27
|
-
// Passing [] for productLines ensures we get the FULL graph
|
|
28
|
-
const manifest = getManifest([], unifiedCalculations, dependencies);
|
|
29
|
-
|
|
29
|
+
const manifest = getFullManifest();
|
|
30
30
|
const nodes = [];
|
|
31
31
|
const edges = [];
|
|
32
32
|
|
|
33
33
|
manifest.forEach(calc => {
|
|
34
|
-
// Nodes
|
|
35
34
|
nodes.push({
|
|
36
35
|
id: calc.name,
|
|
37
36
|
data: {
|
|
38
37
|
label: calc.name,
|
|
39
38
|
layer: calc.category,
|
|
40
|
-
pass: calc.pass,
|
|
39
|
+
pass: calc.pass,
|
|
41
40
|
isHistorical: calc.isHistorical,
|
|
42
41
|
type: calc.type
|
|
43
42
|
},
|
|
44
|
-
position: { x: 0, y: 0 }
|
|
43
|
+
position: { x: 0, y: 0 }
|
|
45
44
|
});
|
|
46
45
|
|
|
47
|
-
// Dependency Edges (Calc -> Calc)
|
|
48
46
|
if (calc.dependencies) {
|
|
49
47
|
calc.dependencies.forEach(dep => {
|
|
50
48
|
edges.push({
|
|
51
49
|
id: `e-${dep}-${calc.name}`,
|
|
52
50
|
source: normalizeName(dep),
|
|
53
51
|
target: calc.name,
|
|
54
|
-
type: '
|
|
55
|
-
animated: false
|
|
52
|
+
type: 'smoothstep'
|
|
56
53
|
});
|
|
57
54
|
});
|
|
58
55
|
}
|
|
59
56
|
|
|
60
|
-
// Root Data Edges (Data -> Calc)
|
|
61
57
|
if (calc.rootDataDependencies) {
|
|
62
58
|
calc.rootDataDependencies.forEach(root => {
|
|
63
|
-
// Ensure a node exists for the root data type
|
|
64
59
|
const rootId = `ROOT_${root.toUpperCase()}`;
|
|
65
60
|
if (!nodes.find(n => n.id === rootId)) {
|
|
66
61
|
nodes.push({
|
|
67
62
|
id: rootId,
|
|
68
|
-
type: 'input',
|
|
63
|
+
type: 'input',
|
|
69
64
|
data: { label: `${root.toUpperCase()} DB` },
|
|
70
|
-
position: { x: 0, y: 0 }
|
|
65
|
+
position: { x: 0, y: 0 },
|
|
66
|
+
style: { background: '#f0f0f0', border: '1px solid #777' }
|
|
71
67
|
});
|
|
72
68
|
}
|
|
73
|
-
|
|
74
69
|
edges.push({
|
|
75
70
|
id: `e-root-${root}-${calc.name}`,
|
|
76
71
|
source: rootId,
|
|
77
72
|
target: calc.name,
|
|
78
73
|
animated: true,
|
|
79
|
-
style: { stroke: '#ff0072' }
|
|
74
|
+
style: { stroke: '#ff0072' }
|
|
80
75
|
});
|
|
81
76
|
});
|
|
82
77
|
}
|
|
83
78
|
});
|
|
84
79
|
|
|
85
|
-
res.json({
|
|
86
|
-
summary: {
|
|
87
|
-
totalNodes: nodes.length,
|
|
88
|
-
totalEdges: edges.length
|
|
89
|
-
},
|
|
90
|
-
nodes,
|
|
91
|
-
edges
|
|
92
|
-
});
|
|
80
|
+
res.json({ summary: { totalNodes: nodes.length, totalEdges: edges.length }, nodes, edges });
|
|
93
81
|
} catch (e) {
|
|
94
82
|
logger.log('ERROR', '[AdminAPI] Topology build failed', e);
|
|
95
83
|
res.status(500).json({ error: e.message });
|
|
96
84
|
}
|
|
97
85
|
});
|
|
98
86
|
|
|
99
|
-
// --- 2. STATUS MATRIX (Calendar
|
|
100
|
-
// ?start=2023-01-01&end=2023-01-30
|
|
87
|
+
// --- 2. STATUS MATRIX (Calendar / State UI) ---
|
|
101
88
|
router.get('/matrix', async (req, res) => {
|
|
102
89
|
const { start, end } = req.query;
|
|
103
|
-
if (!start || !end) return res.status(400).json({ error: "Start
|
|
90
|
+
if (!start || !end) return res.status(400).json({ error: "Start and End dates required." });
|
|
104
91
|
|
|
105
92
|
try {
|
|
106
|
-
const startDate = new Date(start);
|
|
107
|
-
const endDate = new Date(end);
|
|
93
|
+
const startDate = new Date(String(start));
|
|
94
|
+
const endDate = new Date(String(end));
|
|
108
95
|
const dates = [];
|
|
109
|
-
|
|
110
|
-
// Generate date range
|
|
111
96
|
for (let d = new Date(startDate); d <= endDate; d.setDate(d.getDate() + 1)) {
|
|
112
97
|
dates.push(d.toISOString().slice(0, 10));
|
|
113
98
|
}
|
|
114
99
|
|
|
115
|
-
const
|
|
100
|
+
const manifest = getFullManifest();
|
|
101
|
+
const allCalcNames = new Set(manifest.map(c => c.name));
|
|
102
|
+
|
|
103
|
+
const limit = pLimit(20);
|
|
116
104
|
const matrix = {};
|
|
117
105
|
|
|
118
106
|
await Promise.all(dates.map(date => limit(async () => {
|
|
119
|
-
// Fetch
|
|
107
|
+
// Fetch Status and Root Data Availability
|
|
120
108
|
const [statusSnap, rootSnap] = await Promise.all([
|
|
121
109
|
db.collection('computation_status').doc(date).get(),
|
|
122
110
|
db.collection('system_root_data_index').doc(date).get()
|
|
123
111
|
]);
|
|
124
112
|
|
|
125
|
-
// Flatten status for frontend (calcName -> { status: 'COMPLETED' | 'IMPOSSIBLE' })
|
|
126
113
|
const statusData = statusSnap.exists ? statusSnap.data() : {};
|
|
127
114
|
const rootData = rootSnap.exists ? rootSnap.data() : { status: { hasPortfolio: false } };
|
|
128
115
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
116
|
+
const dateStatus = {};
|
|
117
|
+
|
|
118
|
+
// Check every calculation in the Manifest
|
|
119
|
+
allCalcNames.forEach(calcName => {
|
|
120
|
+
const entry = statusData[calcName];
|
|
121
|
+
|
|
122
|
+
if (!entry) {
|
|
123
|
+
// If root data exists but calc is missing -> PENDING
|
|
124
|
+
// If no root data -> WAITING_DATA
|
|
125
|
+
dateStatus[calcName] = rootData.status?.hasPortfolio ? 'PENDING' : 'WAITING_DATA';
|
|
126
|
+
} else if (typeof entry === 'object') {
|
|
127
|
+
if (entry.hash && typeof entry.hash === 'string' && entry.hash.startsWith('IMPOSSIBLE')) {
|
|
128
|
+
dateStatus[calcName] = 'IMPOSSIBLE';
|
|
129
|
+
} else if (entry.hash === false) {
|
|
130
|
+
dateStatus[calcName] = 'BLOCKED';
|
|
131
|
+
} else {
|
|
132
|
+
dateStatus[calcName] = 'COMPLETED';
|
|
133
|
+
}
|
|
136
134
|
} else if (entry === 'IMPOSSIBLE') {
|
|
137
|
-
|
|
138
|
-
} else
|
|
139
|
-
|
|
135
|
+
dateStatus[calcName] = 'IMPOSSIBLE';
|
|
136
|
+
} else {
|
|
137
|
+
dateStatus[calcName] = 'COMPLETED';
|
|
140
138
|
}
|
|
141
139
|
});
|
|
142
140
|
|
|
143
141
|
matrix[date] = {
|
|
144
|
-
dataAvailable: rootData.status || {},
|
|
145
|
-
calculations:
|
|
142
|
+
dataAvailable: rootData.status || {},
|
|
143
|
+
calculations: dateStatus
|
|
146
144
|
};
|
|
147
145
|
})));
|
|
148
146
|
|
|
@@ -153,78 +151,379 @@ const createAdminRouter = (config, dependencies, unifiedCalculations) => {
|
|
|
153
151
|
}
|
|
154
152
|
});
|
|
155
153
|
|
|
156
|
-
// --- 3.
|
|
157
|
-
|
|
154
|
+
// --- 3. PIPELINE STATE (Progress Bar) ---
|
|
155
|
+
router.get('/pipeline/state', async (req, res) => {
|
|
156
|
+
const { date } = req.query;
|
|
157
|
+
if (!date) return res.status(400).json({ error: "Date required" });
|
|
158
|
+
|
|
159
|
+
try {
|
|
160
|
+
const passes = ['1', '2', '3', '4', '5'];
|
|
161
|
+
const state = await Promise.all(passes.map(async (pass) => {
|
|
162
|
+
// We use the Audit Ledger which is the source of truth for execution state
|
|
163
|
+
const tasksSnap = await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).get();
|
|
164
|
+
|
|
165
|
+
const stats = {
|
|
166
|
+
pending: 0,
|
|
167
|
+
inProgress: 0,
|
|
168
|
+
completed: 0,
|
|
169
|
+
failed: 0,
|
|
170
|
+
totalMemoryMB: 0,
|
|
171
|
+
avgDurationMs: 0
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
const durations = [];
|
|
175
|
+
|
|
176
|
+
tasksSnap.forEach(doc => {
|
|
177
|
+
const data = doc.data();
|
|
178
|
+
const s = (data.status || 'UNKNOWN').toLowerCase();
|
|
179
|
+
if (stats[s] !== undefined) stats[s]++;
|
|
180
|
+
else stats[s] = 1;
|
|
181
|
+
|
|
182
|
+
if (data.telemetry?.lastMemory?.rssMB) {
|
|
183
|
+
stats.totalMemoryMB += data.telemetry.lastMemory.rssMB;
|
|
184
|
+
}
|
|
185
|
+
if (data.completedAt && data.startedAt) {
|
|
186
|
+
durations.push(new Date(data.completedAt).getTime() - new Date(data.startedAt).getTime());
|
|
187
|
+
}
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
stats.avgDurationMs = durations.length ?
|
|
191
|
+
Math.round(durations.reduce((a, b) => a + b, 0) / durations.length) : 0;
|
|
192
|
+
|
|
193
|
+
return { pass, stats, totalTasks: tasksSnap.size };
|
|
194
|
+
}));
|
|
195
|
+
|
|
196
|
+
res.json({ date, passes: state });
|
|
197
|
+
} catch (e) {
|
|
198
|
+
res.status(500).json({ error: e.message });
|
|
199
|
+
}
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
// --- 4. DEPENDENCY TRACER (Blast Radius) ---
|
|
203
|
+
router.get('/trace/:calcName', async (req, res) => {
|
|
204
|
+
const { calcName } = req.params;
|
|
205
|
+
const mode = req.query.mode || 'downstream'; // 'upstream' or 'downstream'
|
|
206
|
+
|
|
207
|
+
try {
|
|
208
|
+
const manifest = getFullManifest();
|
|
209
|
+
const manifestMap = new Map(manifest.map(c => [c.name, c]));
|
|
210
|
+
|
|
211
|
+
if (!manifestMap.has(calcName)) return res.status(404).json({ error: 'Calculation not found' });
|
|
212
|
+
|
|
213
|
+
const trace = { root: calcName, chain: [] };
|
|
214
|
+
|
|
215
|
+
if (mode === 'upstream') {
|
|
216
|
+
// What does X depend on?
|
|
217
|
+
const visited = new Set();
|
|
218
|
+
const walk = (name, depth = 0) => {
|
|
219
|
+
if (visited.has(name) || depth > 10) return;
|
|
220
|
+
visited.add(name);
|
|
221
|
+
const calc = manifestMap.get(name);
|
|
222
|
+
if (!calc) return;
|
|
223
|
+
|
|
224
|
+
trace.chain.push({
|
|
225
|
+
name, depth, pass: calc.pass, type: calc.type
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
calc.dependencies?.forEach(dep => walk(dep, depth + 1));
|
|
229
|
+
};
|
|
230
|
+
walk(calcName);
|
|
231
|
+
} else {
|
|
232
|
+
// What depends on X? (Downstream / Impact)
|
|
233
|
+
const reverseGraph = new Map();
|
|
234
|
+
manifest.forEach(c => {
|
|
235
|
+
c.dependencies?.forEach(dep => {
|
|
236
|
+
const normDep = normalizeName(dep);
|
|
237
|
+
if (!reverseGraph.has(normDep)) reverseGraph.set(normDep, []);
|
|
238
|
+
reverseGraph.get(normDep).push(c.name);
|
|
239
|
+
});
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
const visited = new Set();
|
|
243
|
+
const walk = (name, depth = 0) => {
|
|
244
|
+
if (visited.has(name) || depth > 10) return;
|
|
245
|
+
visited.add(name);
|
|
246
|
+
|
|
247
|
+
const calc = manifestMap.get(name);
|
|
248
|
+
trace.chain.push({
|
|
249
|
+
name, depth, pass: calc?.pass
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
reverseGraph.get(name)?.forEach(child => walk(child, depth + 1));
|
|
253
|
+
};
|
|
254
|
+
walk(calcName);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
res.json(trace);
|
|
258
|
+
} catch (e) {
|
|
259
|
+
res.status(500).json({ error: e.message });
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
// --- 5. CONTRACT VIOLATIONS (Quality Gate) ---
|
|
264
|
+
router.get('/violations', async (req, res) => {
|
|
265
|
+
const days = parseInt(String(req.query.days)) || 7;
|
|
266
|
+
const cutoff = new Date();
|
|
267
|
+
cutoff.setDate(cutoff.getDate() - days);
|
|
268
|
+
|
|
269
|
+
try {
|
|
270
|
+
// Check DLQ for Semantic Failures (Hard Violations)
|
|
271
|
+
const dlqSnap = await db.collection('computation_dead_letter_queue')
|
|
272
|
+
.where('finalAttemptAt', '>', cutoff)
|
|
273
|
+
.where('error.stage', '==', 'SEMANTIC_GATE')
|
|
274
|
+
.limit(50)
|
|
275
|
+
.get();
|
|
276
|
+
|
|
277
|
+
const violations = [];
|
|
278
|
+
dlqSnap.forEach(doc => {
|
|
279
|
+
const data = doc.data();
|
|
280
|
+
violations.push({
|
|
281
|
+
id: doc.id,
|
|
282
|
+
computation: data.originalData.computation,
|
|
283
|
+
date: data.originalData.date,
|
|
284
|
+
reason: data.error.message,
|
|
285
|
+
type: 'HARD_VIOLATION',
|
|
286
|
+
timestamp: data.finalAttemptAt
|
|
287
|
+
});
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
// Check Audit Logs for Soft Anomalies (Statistical warnings)
|
|
291
|
+
const anomalySnap = await db.collectionGroup('history')
|
|
292
|
+
.where('triggerTime', '>', cutoff.toISOString())
|
|
293
|
+
.where('anomalies', '!=', []) // Firestore != operator
|
|
294
|
+
.limit(50)
|
|
295
|
+
.get();
|
|
296
|
+
|
|
297
|
+
anomalySnap.forEach(doc => {
|
|
298
|
+
const data = doc.data();
|
|
299
|
+
data.anomalies?.forEach(anomaly => {
|
|
300
|
+
violations.push({
|
|
301
|
+
id: doc.id,
|
|
302
|
+
computation: data.computationName,
|
|
303
|
+
date: data.targetDate,
|
|
304
|
+
reason: anomaly,
|
|
305
|
+
type: 'SOFT_ANOMALY',
|
|
306
|
+
timestamp: data.triggerTime
|
|
307
|
+
});
|
|
308
|
+
});
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
// Sort by time desc
|
|
312
|
+
violations.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime());
|
|
313
|
+
|
|
314
|
+
res.json({ count: violations.length, violations });
|
|
315
|
+
} catch (e) {
|
|
316
|
+
res.status(500).json({ error: e.message });
|
|
317
|
+
}
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
// --- 6. MEMORY HOTSPOTS (Forensics) ---
|
|
321
|
+
router.get('/memory/hotspots', async (req, res) => {
|
|
322
|
+
const thresholdMB = parseInt(String(req.query.threshold)) || 1000; // 1GB default
|
|
323
|
+
|
|
324
|
+
try {
|
|
325
|
+
// Ledger tasks maintain 'telemetry.lastMemory'
|
|
326
|
+
// We use collectionGroup to search across all dates/passes
|
|
327
|
+
const snapshot = await db.collectionGroup('tasks')
|
|
328
|
+
.where('telemetry.lastMemory.rssMB', '>', thresholdMB)
|
|
329
|
+
.orderBy('telemetry.lastMemory.rssMB', 'desc')
|
|
330
|
+
.limit(20)
|
|
331
|
+
.get();
|
|
332
|
+
|
|
333
|
+
const hotspots = [];
|
|
334
|
+
snapshot.forEach(doc => {
|
|
335
|
+
const data = doc.data();
|
|
336
|
+
hotspots.push({
|
|
337
|
+
computation: data.computation,
|
|
338
|
+
rssMB: data.telemetry.lastMemory.rssMB,
|
|
339
|
+
heapMB: data.telemetry.lastMemory.heapUsedMB,
|
|
340
|
+
status: data.status,
|
|
341
|
+
worker: data.workerId,
|
|
342
|
+
date: doc.ref.parent.parent.parent.parent.id // traversing path to get date
|
|
343
|
+
});
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
res.json({ count: hotspots.length, hotspots });
|
|
347
|
+
} catch (e) {
|
|
348
|
+
res.status(500).json({ error: e.message });
|
|
349
|
+
}
|
|
350
|
+
});
|
|
351
|
+
|
|
352
|
+
// --- 7. FLIGHT RECORDER (Inspection) ---
|
|
158
353
|
router.get('/inspect/:date/:calcName', async (req, res) => {
|
|
159
354
|
const { date, calcName } = req.params;
|
|
160
355
|
try {
|
|
161
|
-
// We search across all potential passes (1-5) because we might not know which one it belongs to
|
|
162
356
|
const passes = ['1', '2', '3', '4', '5'];
|
|
163
357
|
let executionRecord = null;
|
|
164
358
|
|
|
165
|
-
// Run in parallel to find the record fast
|
|
166
359
|
await Promise.all(passes.map(async (pass) => {
|
|
167
|
-
if (executionRecord) return;
|
|
360
|
+
if (executionRecord) return;
|
|
168
361
|
const ref = db.doc(`computation_audit_ledger/${date}/passes/${pass}/tasks/${calcName}`);
|
|
169
362
|
const snap = await ref.get();
|
|
170
|
-
if (snap.exists) {
|
|
171
|
-
executionRecord = { pass, ...snap.data() };
|
|
172
|
-
}
|
|
363
|
+
if (snap.exists) executionRecord = { pass, ...snap.data() };
|
|
173
364
|
}));
|
|
174
365
|
|
|
175
|
-
if (!executionRecord) {
|
|
176
|
-
return res.status(404).json({
|
|
177
|
-
status: 'NOT_FOUND',
|
|
178
|
-
message: `No execution record found in ledger for ${calcName} on ${date}`
|
|
179
|
-
});
|
|
180
|
-
}
|
|
366
|
+
if (!executionRecord) return res.status(404).json({ status: 'NOT_FOUND' });
|
|
181
367
|
|
|
182
|
-
// Also fetch the "Contract" if it exists (for volatility analysis)
|
|
183
368
|
const contractSnap = await db.collection('system_contracts').doc(calcName).get();
|
|
184
369
|
|
|
185
370
|
res.json({
|
|
186
371
|
execution: executionRecord,
|
|
187
372
|
contract: contractSnap.exists ? contractSnap.data() : null
|
|
188
373
|
});
|
|
189
|
-
|
|
190
374
|
} catch (e) {
|
|
191
|
-
logger.log('ERROR', `[AdminAPI] Inspect failed for ${calcName}`, e);
|
|
192
375
|
res.status(500).json({ error: e.message });
|
|
193
376
|
}
|
|
194
377
|
});
|
|
195
378
|
|
|
196
|
-
// ---
|
|
197
|
-
|
|
198
|
-
|
|
379
|
+
// --- 8. COST & RESOURCE ANALYSIS ---
|
|
380
|
+
router.get('/analytics/costs', async (req, res) => {
|
|
381
|
+
const { date, days } = req.query;
|
|
382
|
+
// Default to today if no date, or range if days provided
|
|
383
|
+
const targetDate = date || new Date().toISOString().slice(0, 10);
|
|
384
|
+
|
|
385
|
+
// Simple Cost Model (Estimates)
|
|
386
|
+
const COSTS = {
|
|
387
|
+
write: 0.18 / 100000,
|
|
388
|
+
read: 0.06 / 100000,
|
|
389
|
+
delete: 0.02 / 100000,
|
|
390
|
+
compute_std_sec: 0.000023, // 1vCPU 2GB (approx)
|
|
391
|
+
compute_high_sec: 0.000092 // 2vCPU 8GB (approx)
|
|
392
|
+
};
|
|
393
|
+
|
|
199
394
|
try {
|
|
200
|
-
const
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
const
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
395
|
+
const auditRef = db.collection('computation_audit_logs');
|
|
396
|
+
// We scan the 'history' subcollectionGroup for the given date(s)
|
|
397
|
+
// Note: This can be expensive. In prod, you'd want aggregate counters.
|
|
398
|
+
const query = db.collectionGroup('history').where('targetDate', '==', targetDate);
|
|
399
|
+
const snap = await query.get();
|
|
400
|
+
|
|
401
|
+
let totalCost = 0;
|
|
402
|
+
const byPass = {};
|
|
403
|
+
const byCalc = {};
|
|
404
|
+
|
|
405
|
+
snap.forEach(doc => {
|
|
406
|
+
const data = doc.data();
|
|
407
|
+
const ops = data.firestoreOps || { reads: 0, writes: 0, deletes: 0 };
|
|
408
|
+
const durationSec = (data.durationMs || 0) / 1000;
|
|
409
|
+
const tier = data.resourceTier || 'standard';
|
|
410
|
+
|
|
411
|
+
const ioCost = (ops.writes * COSTS.write) + (ops.reads * COSTS.read) + (ops.deletes * COSTS.delete);
|
|
412
|
+
const computeCost = durationSec * (tier === 'high-mem' ? COSTS.compute_high_sec : COSTS.compute_std_sec);
|
|
413
|
+
const itemCost = ioCost + computeCost;
|
|
414
|
+
|
|
415
|
+
totalCost += itemCost;
|
|
416
|
+
|
|
417
|
+
// Aggregations
|
|
418
|
+
const pass = data.pass || 'unknown';
|
|
419
|
+
if (!byPass[pass]) byPass[pass] = { cost: 0, runs: 0, duration: 0 };
|
|
420
|
+
byPass[pass].cost += itemCost;
|
|
421
|
+
byPass[pass].runs++;
|
|
422
|
+
byPass[pass].duration += durationSec;
|
|
423
|
+
|
|
424
|
+
const calc = data.computationName;
|
|
425
|
+
if (!byCalc[calc]) byCalc[calc] = { cost: 0, runs: 0, ops: { r:0, w:0 } };
|
|
426
|
+
byCalc[calc].cost += itemCost;
|
|
427
|
+
byCalc[calc].runs++;
|
|
428
|
+
byCalc[calc].ops.r += ops.reads;
|
|
429
|
+
byCalc[calc].ops.w += ops.writes;
|
|
219
430
|
});
|
|
220
431
|
|
|
432
|
+
// Top 10 Expensive Calcs
|
|
433
|
+
const topCalcs = Object.entries(byCalc)
|
|
434
|
+
.sort((a, b) => b[1].cost - a[1].cost)
|
|
435
|
+
.slice(0, 10)
|
|
436
|
+
.map(([name, stats]) => ({ name, ...stats }));
|
|
437
|
+
|
|
221
438
|
res.json({
|
|
222
|
-
|
|
223
|
-
|
|
439
|
+
date: targetDate,
|
|
440
|
+
totalCostUSD: totalCost,
|
|
441
|
+
breakdown: {
|
|
442
|
+
byPass,
|
|
443
|
+
topCalculations: topCalcs
|
|
444
|
+
},
|
|
445
|
+
meta: { model: COSTS }
|
|
224
446
|
});
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
447
|
+
|
|
448
|
+
} catch (e) { res.status(500).json({ error: e.message }); }
|
|
449
|
+
});
|
|
450
|
+
|
|
451
|
+
// --- 9. REROUTE (OOM) ANALYSIS ---
|
|
452
|
+
router.get('/analytics/reroutes', async (req, res) => {
|
|
453
|
+
const { date } = req.query;
|
|
454
|
+
if (!date) return res.status(400).json({ error: "Date required" });
|
|
455
|
+
|
|
456
|
+
try {
|
|
457
|
+
// Find all runs that used high-mem
|
|
458
|
+
const query = db.collectionGroup('history')
|
|
459
|
+
.where('targetDate', '==', date)
|
|
460
|
+
.where('resourceTier', '==', 'high-mem');
|
|
461
|
+
|
|
462
|
+
const snap = await query.get();
|
|
463
|
+
const reroutes = [];
|
|
464
|
+
|
|
465
|
+
snap.forEach(doc => {
|
|
466
|
+
const data = doc.data();
|
|
467
|
+
reroutes.push({
|
|
468
|
+
computation: data.computationName,
|
|
469
|
+
pass: data.pass,
|
|
470
|
+
trigger: data.trigger?.reason,
|
|
471
|
+
peakMemoryMB: data.peakMemoryMB,
|
|
472
|
+
durationMs: data.durationMs,
|
|
473
|
+
runId: data.runId
|
|
474
|
+
});
|
|
475
|
+
});
|
|
476
|
+
|
|
477
|
+
res.json({ count: reroutes.length, reroutes });
|
|
478
|
+
} catch (e) { res.status(500).json({ error: e.message }); }
|
|
479
|
+
});
|
|
480
|
+
|
|
481
|
+
// --- 10. LIVE DASHBOARD (Snapshot) ---
|
|
482
|
+
// Poll this endpoint to simulate a WebSocket feed
|
|
483
|
+
router.get('/live/dashboard', async (req, res) => {
|
|
484
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
485
|
+
try {
|
|
486
|
+
// Query the Ledger for Active Tasks
|
|
487
|
+
// We look at all passes for today
|
|
488
|
+
const passes = ['1', '2', '3', '4', '5'];
|
|
489
|
+
const activeTasks = [];
|
|
490
|
+
const recentFailures = [];
|
|
491
|
+
|
|
492
|
+
await Promise.all(passes.map(async (pass) => {
|
|
493
|
+
const colRef = db.collection(`computation_audit_ledger/${today}/passes/${pass}/tasks`);
|
|
494
|
+
|
|
495
|
+
// Get Running
|
|
496
|
+
const runningSnap = await colRef.where('status', 'in', ['PENDING', 'IN_PROGRESS']).get();
|
|
497
|
+
runningSnap.forEach(doc => {
|
|
498
|
+
activeTasks.push({ pass, ...doc.data() });
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
// Get Recent Failures (last 10 mins?? hard to query without index, just grab failures)
|
|
502
|
+
const failSnap = await colRef.where('status', '==', 'FAILED').get();
|
|
503
|
+
failSnap.forEach(doc => {
|
|
504
|
+
recentFailures.push({ pass, ...doc.data() });
|
|
505
|
+
});
|
|
506
|
+
}));
|
|
507
|
+
|
|
508
|
+
// Get Pipeline Stage (which pass is active?)
|
|
509
|
+
// We infer this by seeing which pass has pending tasks
|
|
510
|
+
let currentStage = 'IDLE';
|
|
511
|
+
for (const p of passes) {
|
|
512
|
+
const hasActive = activeTasks.some(t => t.pass === p);
|
|
513
|
+
if (hasActive) { currentStage = `PASS_${p}`; break; }
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
res.json({
|
|
517
|
+
status: 'success',
|
|
518
|
+
timestamp: new Date(),
|
|
519
|
+
pipelineState: currentStage,
|
|
520
|
+
activeCount: activeTasks.length,
|
|
521
|
+
failureCount: recentFailures.length,
|
|
522
|
+
tasks: activeTasks,
|
|
523
|
+
failures: recentFailures
|
|
524
|
+
});
|
|
525
|
+
|
|
526
|
+
} catch (e) { res.status(500).json({ error: e.message }); }
|
|
228
527
|
});
|
|
229
528
|
|
|
230
529
|
return router;
|