bulltrackers-module 1.0.339 โ 1.0.341
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/config/monitoring_config.js +31 -0
- package/functions/computation-system/helpers/computation_dispatcher.js +50 -19
- package/functions/computation-system/helpers/computation_worker.js +102 -60
- package/functions/computation-system/logger/logger.js +23 -5
- package/functions/computation-system/persistence/RunRecorder.js +22 -7
- package/functions/computation-system/tools/BuildReporter.js +231 -119
- package/package.json +4 -3
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* computation-system/config/monitoring_config.js
|
|
3
|
+
* Configuration for Google Cloud Monitoring, Logging, and Tracing.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
module.exports = {
|
|
7
|
+
// Toggle to easily disable all GCP telemetry without changing code
|
|
8
|
+
enabled: true,
|
|
9
|
+
|
|
10
|
+
// Your Google Cloud Project ID
|
|
11
|
+
project: process.env.GOOGLE_CLOUD_PROJECT_ID || process.env.GCP_PROJECT || 'your-project-id',
|
|
12
|
+
|
|
13
|
+
// Configuration for the "Eyeball" (Trace)
|
|
14
|
+
trace: {
|
|
15
|
+
enabled: true,
|
|
16
|
+
// Force sampling for these computations (can use '*' for all)
|
|
17
|
+
sampledTasks: ['*']
|
|
18
|
+
},
|
|
19
|
+
|
|
20
|
+
// Definitions for Custom Metrics
|
|
21
|
+
metrics: {
|
|
22
|
+
// The heartbeat metric replaces the Firestore 'active' check over time
|
|
23
|
+
heartbeat: 'custom.googleapis.com/computation/worker_heartbeat',
|
|
24
|
+
|
|
25
|
+
// Tracks memory usage per worker/task
|
|
26
|
+
memory: 'custom.googleapis.com/computation/memory_usage',
|
|
27
|
+
|
|
28
|
+
// Tracks duration of specific processing stages
|
|
29
|
+
duration: 'custom.googleapis.com/computation/stage_duration'
|
|
30
|
+
}
|
|
31
|
+
};
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* PURPOSE: Sequential Cursor-Based Dispatcher.
|
|
4
4
|
* UPDATED: Enforces Strict One-Shot Policy (Standard -> HighMem -> Dead Letter).
|
|
5
5
|
* UPDATED: Prevents infinite loops by permanently ignoring deterministic failures.
|
|
6
|
+
* UPDATED: Generates Google Cloud Trace Context (traceId/spanId) for end-to-end monitoring.
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
9
|
const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
@@ -11,6 +12,7 @@ const { PubSubUtils } = require('../../core/utils/pubsub_utils');
|
|
|
11
12
|
const { fetchComputationStatus } = require('../persistence/StatusRepository');
|
|
12
13
|
const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
|
|
13
14
|
const crypto = require('crypto');
|
|
15
|
+
const monConfig = require('../config/monitoring_config'); // [NEW]
|
|
14
16
|
|
|
15
17
|
const OOM_THRESHOLD_MB = 1500;
|
|
16
18
|
const BASE_SECONDS_PER_WEIGHT_UNIT = 3;
|
|
@@ -246,7 +248,6 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
|
|
|
246
248
|
}
|
|
247
249
|
|
|
248
250
|
// [CRITICAL] FILTER FOR SWEEP:
|
|
249
|
-
// Only dispatch if it hasn't failed High-Mem or Quality checks.
|
|
250
251
|
const validTasks = [];
|
|
251
252
|
for (const task of pending) {
|
|
252
253
|
const name = normalizeName(task.name);
|
|
@@ -256,13 +257,13 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
|
|
|
256
257
|
const data = doc.data();
|
|
257
258
|
const stage = data.error?.stage;
|
|
258
259
|
|
|
259
|
-
// A. QUALITY CHECK
|
|
260
|
+
// A. QUALITY CHECK
|
|
260
261
|
if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
|
|
261
262
|
logger.log('WARN', `[Sweep] ๐ Skipping deterministic failure for ${name} (${stage}).`);
|
|
262
263
|
continue;
|
|
263
264
|
}
|
|
264
265
|
|
|
265
|
-
// B. DEAD END CHECK
|
|
266
|
+
// B. DEAD END CHECK
|
|
266
267
|
if (data.resourceTier === 'high-mem' && data.status === 'FAILED') {
|
|
267
268
|
logger.log('WARN', `[Sweep] ๐ Skipping ${name} - Already failed on High-Mem.`);
|
|
268
269
|
continue;
|
|
@@ -276,19 +277,31 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
|
|
|
276
277
|
return { dispatched: 0 };
|
|
277
278
|
}
|
|
278
279
|
|
|
279
|
-
// 2. FORCE High Mem
|
|
280
|
+
// 2. FORCE High Mem & INJECT TRACE
|
|
280
281
|
const currentDispatchId = crypto.randomUUID();
|
|
281
282
|
|
|
282
|
-
const tasksPayload = validTasks.map(t =>
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
283
|
+
const tasksPayload = validTasks.map(t => {
|
|
284
|
+
// [NEW] Generate Eyeball Trace
|
|
285
|
+
const traceId = crypto.randomBytes(16).toString('hex');
|
|
286
|
+
const spanId = crypto.randomBytes(8).toString('hex');
|
|
287
|
+
|
|
288
|
+
return {
|
|
289
|
+
...t,
|
|
290
|
+
action: 'RUN_COMPUTATION_DATE',
|
|
291
|
+
computation: t.name,
|
|
292
|
+
date: date,
|
|
293
|
+
pass: passToRun,
|
|
294
|
+
dispatchId: currentDispatchId,
|
|
295
|
+
triggerReason: 'SWEEP_RECOVERY',
|
|
296
|
+
resources: 'high-mem', // FORCE
|
|
297
|
+
// [NEW] Attach the eyeball
|
|
298
|
+
traceContext: {
|
|
299
|
+
traceId: traceId,
|
|
300
|
+
spanId: spanId,
|
|
301
|
+
sampled: true
|
|
302
|
+
}
|
|
303
|
+
};
|
|
304
|
+
});
|
|
292
305
|
|
|
293
306
|
logger.log('WARN', `[Sweep] ๐งน Forcing ${tasksPayload.length} tasks to HIGH-MEM for ${date}.`);
|
|
294
307
|
|
|
@@ -349,7 +362,7 @@ async function handleStandardDispatch(config, dependencies, computationManifest,
|
|
|
349
362
|
checkRootDataAvailability(selectedDate, config, dependencies, DEFINITIVE_EARLIEST_DATES)
|
|
350
363
|
]);
|
|
351
364
|
|
|
352
|
-
if (availability && availability.status
|
|
365
|
+
if (availability && availability.status) { // Just check availability exists
|
|
353
366
|
const report = analyzeDateExecution(selectedDate, calcsInThisPass, availability.status, dailyStatus, manifestMap, prevDailyStatus);
|
|
354
367
|
let rawTasks = [...report.runnable, ...report.reRuns];
|
|
355
368
|
|
|
@@ -386,10 +399,28 @@ async function handleStandardDispatch(config, dependencies, computationManifest,
|
|
|
386
399
|
date: selectedDate, pass: passToRun, dispatchedCount: selectedTasks.length, etaSeconds, dispatchId: currentDispatchId, details: taskDetails
|
|
387
400
|
});
|
|
388
401
|
|
|
389
|
-
const mapToTaskPayload = (t) =>
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
402
|
+
const mapToTaskPayload = (t) => {
|
|
403
|
+
// [NEW] Generate Eyeball Trace
|
|
404
|
+
const traceId = crypto.randomBytes(16).toString('hex');
|
|
405
|
+
const spanId = crypto.randomBytes(8).toString('hex');
|
|
406
|
+
|
|
407
|
+
return {
|
|
408
|
+
...t,
|
|
409
|
+
action: 'RUN_COMPUTATION_DATE',
|
|
410
|
+
computation: t.name,
|
|
411
|
+
date: selectedDate,
|
|
412
|
+
pass: passToRun,
|
|
413
|
+
dispatchId: currentDispatchId,
|
|
414
|
+
triggerReason: t.reason,
|
|
415
|
+
resources: t.resources || 'standard',
|
|
416
|
+
// [NEW] Attach the eyeball
|
|
417
|
+
traceContext: {
|
|
418
|
+
traceId: traceId,
|
|
419
|
+
spanId: spanId,
|
|
420
|
+
sampled: true
|
|
421
|
+
}
|
|
422
|
+
};
|
|
423
|
+
};
|
|
393
424
|
|
|
394
425
|
const standardTasks = selectedTasks.filter(t => t.resources !== 'high-mem').map(mapToTaskPayload);
|
|
395
426
|
const highMemTasks = selectedTasks.filter(t => t.resources === 'high-mem').map(mapToTaskPayload);
|
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_worker.js
|
|
3
|
-
* UPDATED:
|
|
4
|
-
*
|
|
5
|
-
* 1. Checks Ledger via Transaction before execution.
|
|
6
|
-
* 2. If Status is COMPLETED or FAILED, immediately ACKs (returns) to stop Pub/Sub loops.
|
|
7
|
-
* 3. Preserves Error 'stage' to ensure logic errors are not retried.
|
|
3
|
+
* UPDATED: Implements Google Cloud Monitoring Heartbeats.
|
|
4
|
+
* UPDATED: Implements Structured Logging with Trace Context.
|
|
8
5
|
*/
|
|
9
6
|
|
|
10
7
|
const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
|
|
@@ -13,6 +10,11 @@ const { StructuredLogger } = require('../logger/logger');
|
|
|
13
10
|
const { recordRunAttempt } = require('../persistence/RunRecorder');
|
|
14
11
|
const { normalizeName } = require('../utils/utils');
|
|
15
12
|
const os = require('os');
|
|
13
|
+
const monitoring = require('@google-cloud/monitoring'); // [NEW]
|
|
14
|
+
const monConfig = require('../config/monitoring_config'); // [NEW]
|
|
15
|
+
|
|
16
|
+
// Initialize the Google Monitoring Client once
|
|
17
|
+
const metricClient = new monitoring.MetricServiceClient();
|
|
16
18
|
|
|
17
19
|
let calculationPackage;
|
|
18
20
|
try { calculationPackage = require('aiden-shared-calculations-unified'); } catch (e) { throw e; }
|
|
@@ -20,21 +22,75 @@ const calculations = calculationPackage.calculations;
|
|
|
20
22
|
|
|
21
23
|
const MAX_RETRIES = 3;
|
|
22
24
|
|
|
23
|
-
|
|
25
|
+
// [NEW] Helper to push metric to Google Cloud
|
|
26
|
+
async function pushMetric(type, value, labels) {
|
|
27
|
+
if (!monConfig.enabled) return;
|
|
28
|
+
try {
|
|
29
|
+
const dataPoint = {
|
|
30
|
+
interval: { endTime: { seconds: Date.now() / 1000 } },
|
|
31
|
+
value: { int64Value: value }, // Assumes memory in MB
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const timeSeriesData = {
|
|
35
|
+
name: metricClient.projectPath(monConfig.project),
|
|
36
|
+
timeSeries: [{
|
|
37
|
+
metric: {
|
|
38
|
+
type: type,
|
|
39
|
+
labels: labels,
|
|
40
|
+
},
|
|
41
|
+
resource: {
|
|
42
|
+
type: 'global',
|
|
43
|
+
labels: { project_id: monConfig.project },
|
|
44
|
+
},
|
|
45
|
+
points: [dataPoint],
|
|
46
|
+
}],
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
// Fire and forget (don't await to avoid blocking compute)
|
|
50
|
+
metricClient.createTimeSeries(timeSeriesData).catch(err => console.error('[Monitoring] Push failed', err.message));
|
|
51
|
+
} catch (e) { /* Ignore setup errors */ }
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function startMemoryHeartbeat(db, ledgerPath, workerId, computationName, traceId, intervalMs = 5000) {
|
|
24
55
|
let peakRss = 0;
|
|
25
|
-
|
|
56
|
+
|
|
57
|
+
// Firestore Heartbeat (Legacy/State)
|
|
58
|
+
const firestoreTimer = setInterval(async () => {
|
|
26
59
|
const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024);
|
|
27
60
|
if (rssMB > peakRss) peakRss = rssMB;
|
|
28
|
-
await db.doc(ledgerPath).update({
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
61
|
+
await db.doc(ledgerPath).update({
|
|
62
|
+
'telemetry.lastMemoryMB': rssMB,
|
|
63
|
+
'telemetry.lastHeartbeat': new Date()
|
|
64
|
+
}).catch(() => {});
|
|
65
|
+
}, 2000); // 2s for DB
|
|
66
|
+
|
|
67
|
+
// Google Monitoring Heartbeat (New/Out-of-Band)
|
|
68
|
+
const monitoringTimer = setInterval(async () => {
|
|
69
|
+
const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024);
|
|
70
|
+
|
|
71
|
+
// Push Memory Metric
|
|
72
|
+
pushMetric(monConfig.metrics.memory, rssMB, {
|
|
73
|
+
worker_id: workerId,
|
|
74
|
+
computation: computationName,
|
|
75
|
+
// Including trace_id in metric labels allows strict correlation
|
|
76
|
+
// BUT can cause high cardinality. Use with caution or only for debugging.
|
|
77
|
+
// Per user request, we include it to "attach eyeball".
|
|
78
|
+
trace_id: traceId || 'unknown'
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
}, intervalMs); // 5s for API
|
|
82
|
+
|
|
83
|
+
firestoreTimer.unref();
|
|
84
|
+
monitoringTimer.unref();
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
stop: () => { clearInterval(firestoreTimer); clearInterval(monitoringTimer); },
|
|
88
|
+
getPeak: () => peakRss
|
|
89
|
+
};
|
|
32
90
|
}
|
|
33
91
|
|
|
34
92
|
/**
|
|
35
93
|
* STRICT IDEMPOTENCY GATE
|
|
36
|
-
* Uses a transaction to verify this task hasn't already been processed.
|
|
37
|
-
* Returns { shouldRun: boolean, leaseData: object }
|
|
38
94
|
*/
|
|
39
95
|
async function checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerId) {
|
|
40
96
|
const docRef = db.doc(ledgerPath);
|
|
@@ -45,31 +101,17 @@ async function checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerI
|
|
|
45
101
|
|
|
46
102
|
if (doc.exists) {
|
|
47
103
|
const data = doc.data();
|
|
48
|
-
|
|
49
|
-
// 1. TERMINAL STATE CHECK
|
|
50
|
-
// If the task is already finalized, we MUST NOT run it again.
|
|
51
|
-
// This stops the infinite loop if Pub/Sub redelivers a FAILED task.
|
|
52
104
|
if (['COMPLETED', 'FAILED', 'CRASH'].includes(data.status)) {
|
|
53
105
|
return { shouldRun: false, reason: `Task already in terminal state: ${data.status}` };
|
|
54
106
|
}
|
|
55
|
-
|
|
56
|
-
// 2. DUPLICATE DELIVERY CHECK
|
|
57
|
-
// If it's IN_PROGRESS with the SAME dispatchId, we are likely seeing a Pub/Sub redelivery
|
|
58
|
-
// while the code is actually running. Ignore it.
|
|
59
107
|
if (data.status === 'IN_PROGRESS' && data.dispatchId === dispatchId) {
|
|
60
108
|
return { shouldRun: false, reason: 'Duplicate delivery: Task already IN_PROGRESS with same ID.' };
|
|
61
109
|
}
|
|
62
|
-
|
|
63
|
-
// 3. ZOMBIE CHECK (Optional safety, usually handled by Dispatcher)
|
|
64
|
-
// If it's IN_PROGRESS but clearly stale (older than 15 mins), we might steal it,
|
|
65
|
-
// but generally we let the Dispatcher handle zombies. For now, we block collision.
|
|
66
110
|
if (data.status === 'IN_PROGRESS') {
|
|
67
111
|
return { shouldRun: false, reason: 'Collision: Task currently IN_PROGRESS by another worker.' };
|
|
68
112
|
}
|
|
69
113
|
}
|
|
70
114
|
|
|
71
|
-
// 4. CLAIM LEASE
|
|
72
|
-
// If we get here, the task is either new (doesn't exist) or PENDING.
|
|
73
115
|
const lease = {
|
|
74
116
|
status: 'IN_PROGRESS',
|
|
75
117
|
workerId: workerId,
|
|
@@ -81,18 +123,13 @@ async function checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerI
|
|
|
81
123
|
return { shouldRun: true, leaseData: lease };
|
|
82
124
|
});
|
|
83
125
|
} catch (e) {
|
|
84
|
-
console.error(`[Idempotency] Transaction failed: ${e.message}`);
|
|
85
|
-
// If transaction fails (contention), assume we shouldn't run
|
|
86
126
|
return { shouldRun: false, reason: `Transaction Error: ${e.message}` };
|
|
87
127
|
}
|
|
88
128
|
}
|
|
89
129
|
|
|
90
130
|
async function handleComputationTask(message, config, dependencies) {
|
|
91
|
-
|
|
92
|
-
const runDeps = { ...dependencies, logger };
|
|
93
|
-
const db = dependencies.db;
|
|
131
|
+
// 1. Parse Message
|
|
94
132
|
let data;
|
|
95
|
-
|
|
96
133
|
try {
|
|
97
134
|
const raw = message.data?.message?.data || message.data || message.json;
|
|
98
135
|
data = (typeof raw === 'string') ? JSON.parse(Buffer.from(raw, 'base64').toString()) : raw;
|
|
@@ -100,25 +137,43 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
100
137
|
|
|
101
138
|
if (!data || data.action !== 'RUN_COMPUTATION_DATE') return;
|
|
102
139
|
|
|
103
|
-
const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes, resources } = data;
|
|
140
|
+
const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes, resources, traceContext } = data;
|
|
104
141
|
const resourceTier = resources || 'standard';
|
|
105
142
|
const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computation}`;
|
|
106
|
-
|
|
107
143
|
const workerId = process.env.K_REVISION || os.hostname();
|
|
108
144
|
|
|
145
|
+
// 2. Initialize Trace-Aware Logger (The "Eyeball")
|
|
146
|
+
// If traceContext is present (from dispatcher), we inject it into globalMetadata
|
|
147
|
+
// so ALL logs from this execution line up in Google Trace.
|
|
148
|
+
const globalMetadata = {};
|
|
149
|
+
if (traceContext && monConfig.enabled) {
|
|
150
|
+
globalMetadata['logging.googleapis.com/trace'] = `projects/${monConfig.project}/traces/${traceContext.traceId}`;
|
|
151
|
+
globalMetadata['logging.googleapis.com/spanId'] = traceContext.spanId;
|
|
152
|
+
globalMetadata['logging.googleapis.com/trace_sampled'] = traceContext.sampled;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const logger = new StructuredLogger({
|
|
156
|
+
minLevel: config.minLevel || 'INFO',
|
|
157
|
+
enableStructured: true,
|
|
158
|
+
globalMetadata, // [NEW] Inject Trace
|
|
159
|
+
...config
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
const runDeps = { ...dependencies, logger };
|
|
163
|
+
const db = dependencies.db;
|
|
164
|
+
|
|
109
165
|
// --- STEP 1: IDEMPOTENCY CHECK ---
|
|
110
166
|
const gate = await checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerId);
|
|
111
|
-
|
|
112
167
|
if (!gate.shouldRun) {
|
|
113
|
-
// [CRITICAL] We return successfully (ACK) to remove the message from Pub/Sub.
|
|
114
|
-
// We do NOT throw an error, because that would cause a retry.
|
|
115
168
|
logger.log('WARN', `[Worker] ๐ Idempotency Gate: Skipping ${computation}. Reason: ${gate.reason}`);
|
|
116
169
|
return;
|
|
117
170
|
}
|
|
118
171
|
|
|
119
172
|
logger.log('INFO', `[Worker] ๐ฅ Task: ${computation} (${date}) [Tier: ${resourceTier}] [ID: ${dispatchId}]`);
|
|
120
173
|
|
|
121
|
-
|
|
174
|
+
// --- STEP 2: START DUAL HEARTBEATS ---
|
|
175
|
+
// Starts both Firestore writes (legacy) AND Google Metrics (new)
|
|
176
|
+
const heartbeats = startMemoryHeartbeat(db, ledgerPath, workerId, computation, traceContext?.traceId);
|
|
122
177
|
|
|
123
178
|
try {
|
|
124
179
|
const manifest = getManifest(config.activeProductLines || [], calculations, runDeps);
|
|
@@ -129,11 +184,10 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
129
184
|
manifest, previousCategory, dependencyResultHashes
|
|
130
185
|
);
|
|
131
186
|
|
|
132
|
-
|
|
187
|
+
heartbeats.stop();
|
|
133
188
|
const failureReport = result?.updates?.failureReport || [];
|
|
134
189
|
const successUpdates = result?.updates?.successUpdates || {};
|
|
135
190
|
|
|
136
|
-
// [CRITICAL] Propagate Error Stage from inner logic
|
|
137
191
|
if (failureReport.length > 0) {
|
|
138
192
|
const reportedError = failureReport[0].error;
|
|
139
193
|
const errorObj = new Error(reportedError.message);
|
|
@@ -144,7 +198,7 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
144
198
|
const calcUpdate = successUpdates[normalizeName(computation)] || {};
|
|
145
199
|
const metrics = {
|
|
146
200
|
durationMs: Date.now() - startTime,
|
|
147
|
-
peakMemoryMB:
|
|
201
|
+
peakMemoryMB: heartbeats.getPeak(),
|
|
148
202
|
io: calcUpdate.metrics?.io,
|
|
149
203
|
storage: calcUpdate.metrics?.storage,
|
|
150
204
|
execution: calcUpdate.metrics?.execution,
|
|
@@ -153,34 +207,22 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
153
207
|
};
|
|
154
208
|
|
|
155
209
|
await db.doc(ledgerPath).update({ status: 'COMPLETED', completedAt: new Date() });
|
|
210
|
+
// Use the new recorder which also prints JSON for logs
|
|
156
211
|
await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, metrics, triggerReason, resourceTier);
|
|
157
212
|
|
|
158
213
|
} catch (err) {
|
|
159
|
-
|
|
214
|
+
heartbeats.stop();
|
|
160
215
|
|
|
161
216
|
const isDeterministic = ['SHARDING_LIMIT_EXCEEDED', 'QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(err.stage);
|
|
162
217
|
|
|
163
|
-
// --- STEP 2: ERROR HANDLING ---
|
|
164
|
-
// If Logic Error OR Max Retries reached, mark FAILED and ACK.
|
|
165
218
|
if (isDeterministic || (message.deliveryAttempt || 1) >= MAX_RETRIES) {
|
|
166
|
-
|
|
167
|
-
const errorPayload = {
|
|
168
|
-
message: err.message,
|
|
169
|
-
stage: err.stage || 'FATAL'
|
|
170
|
-
};
|
|
171
|
-
|
|
172
|
-
// This write ensures the Idempotency Gate blocks future retries
|
|
173
|
-
await db.doc(ledgerPath).set({
|
|
174
|
-
status: 'FAILED',
|
|
175
|
-
error: errorPayload,
|
|
176
|
-
failedAt: new Date()
|
|
177
|
-
}, { merge: true });
|
|
219
|
+
const errorPayload = { message: err.message, stage: err.stage || 'FATAL' };
|
|
178
220
|
|
|
179
|
-
await
|
|
180
|
-
|
|
221
|
+
await db.doc(ledgerPath).set({ status: 'FAILED', error: errorPayload, failedAt: new Date() }, { merge: true });
|
|
222
|
+
await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'FATAL' }, { peakMemoryMB: heartbeats.getPeak() }, triggerReason, resourceTier);
|
|
223
|
+
return;
|
|
181
224
|
}
|
|
182
225
|
|
|
183
|
-
// Only throw (NACK) for transient system errors (Network, etc)
|
|
184
226
|
throw err;
|
|
185
227
|
}
|
|
186
228
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Structured Logging System for Computation Engine
|
|
3
|
-
* UPDATED: Added
|
|
3
|
+
* UPDATED: Added support for Google Cloud Trace Context injection.
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
const crypto = require('crypto');
|
|
@@ -27,7 +27,8 @@ function generateProcessId(type, identifier, date = '') {
|
|
|
27
27
|
function formatLogEntry(entry) {
|
|
28
28
|
return JSON.stringify({
|
|
29
29
|
timestamp: entry.timestamp,
|
|
30
|
-
level: entry.level,
|
|
30
|
+
level: entry.level, // Google uses 'severity', mapped below
|
|
31
|
+
severity: entry.severity,
|
|
31
32
|
processType: entry.processType,
|
|
32
33
|
processId: entry.processId,
|
|
33
34
|
computationName: entry.computationName,
|
|
@@ -37,7 +38,9 @@ function formatLogEntry(entry) {
|
|
|
37
38
|
metadata: entry.metadata,
|
|
38
39
|
stats: entry.stats,
|
|
39
40
|
storage: entry.storage,
|
|
40
|
-
details: entry.details
|
|
41
|
+
details: entry.details,
|
|
42
|
+
// Spread the global Trace fields here
|
|
43
|
+
...entry.globalMetadata
|
|
41
44
|
});
|
|
42
45
|
}
|
|
43
46
|
|
|
@@ -51,6 +54,9 @@ class StructuredLogger {
|
|
|
51
54
|
...config
|
|
52
55
|
};
|
|
53
56
|
this.activeProcesses = new Map();
|
|
57
|
+
|
|
58
|
+
// [NEW] Store global metadata (like Trace IDs) for this logger instance
|
|
59
|
+
this.globalMetadata = config.globalMetadata || {};
|
|
54
60
|
}
|
|
55
61
|
|
|
56
62
|
startProcess(processType, computationName = null, date = null) {
|
|
@@ -98,6 +104,7 @@ class StructuredLogger {
|
|
|
98
104
|
console.log(JSON.stringify({
|
|
99
105
|
timestamp: new Date().toISOString(),
|
|
100
106
|
level: 'INFO',
|
|
107
|
+
severity: 'INFO',
|
|
101
108
|
processType: PROCESS_TYPES.ANALYSIS,
|
|
102
109
|
date: dateStr,
|
|
103
110
|
message: `Date Analysis for ${dateStr}`,
|
|
@@ -109,7 +116,8 @@ class StructuredLogger {
|
|
|
109
116
|
failedDependency: failedDependency.length,
|
|
110
117
|
skipped: skipped.length
|
|
111
118
|
},
|
|
112
|
-
details: analysisReport
|
|
119
|
+
details: analysisReport,
|
|
120
|
+
...this.globalMetadata
|
|
113
121
|
}));
|
|
114
122
|
}
|
|
115
123
|
|
|
@@ -202,9 +210,17 @@ class StructuredLogger {
|
|
|
202
210
|
delete finalContext.stats; delete finalContext.storage;
|
|
203
211
|
}
|
|
204
212
|
|
|
213
|
+
// Map Level to Google Severity
|
|
214
|
+
let severity = 'INFO';
|
|
215
|
+
if (numericLevel === LOG_LEVELS.WARN) severity = 'WARNING';
|
|
216
|
+
else if (numericLevel === LOG_LEVELS.ERROR) severity = 'ERROR';
|
|
217
|
+
else if (numericLevel === LOG_LEVELS.FATAL) severity = 'CRITICAL';
|
|
218
|
+
else if (numericLevel === LOG_LEVELS.DEBUG) severity = 'DEBUG';
|
|
219
|
+
|
|
205
220
|
const entry = {
|
|
206
221
|
timestamp: new Date().toISOString(),
|
|
207
222
|
level: Object.keys(LOG_LEVELS).find(k => LOG_LEVELS[k] === numericLevel) || 'INFO',
|
|
223
|
+
severity: severity,
|
|
208
224
|
processType,
|
|
209
225
|
processId,
|
|
210
226
|
computationName,
|
|
@@ -213,7 +229,8 @@ class StructuredLogger {
|
|
|
213
229
|
context: typeof finalContext === 'string' ? { error: finalContext } : finalContext,
|
|
214
230
|
metadata: finalMetadata,
|
|
215
231
|
stats: finalStats,
|
|
216
|
-
storage: finalStorage
|
|
232
|
+
storage: finalStorage,
|
|
233
|
+
globalMetadata: this.globalMetadata // [NEW] Pass trace context
|
|
217
234
|
};
|
|
218
235
|
|
|
219
236
|
if (numericLevel >= LOG_LEVELS.ERROR && this.config.includeStackTrace && finalContext.stack) {
|
|
@@ -225,6 +242,7 @@ class StructuredLogger {
|
|
|
225
242
|
}
|
|
226
243
|
|
|
227
244
|
if (this.config.enableStructured) {
|
|
245
|
+
// [CRITICAL] This is what Google Cloud Logging picks up
|
|
228
246
|
console.log(formatLogEntry(entry));
|
|
229
247
|
}
|
|
230
248
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Utility for recording computation run attempts (The Audit Logger).
|
|
3
3
|
* UPDATED: Stores 'trigger', 'execution' stats, 'cost' metrics, and 'forensics'.
|
|
4
|
+
* UPDATED: Emits Structured Logs for GCP Observability.
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
7
|
const { FieldValue } = require('../utils/utils');
|
|
@@ -49,18 +50,15 @@ async function recordRunAttempt(db, context, status, error = null, detailedMetri
|
|
|
49
50
|
durationMs: detailedMetrics.durationMs || 0,
|
|
50
51
|
status: status,
|
|
51
52
|
|
|
52
|
-
|
|
53
|
-
resourceTier: resourceTier, // 'standard' or 'high-mem'
|
|
53
|
+
resourceTier: resourceTier,
|
|
54
54
|
peakMemoryMB: detailedMetrics.peakMemoryMB || 0,
|
|
55
55
|
|
|
56
|
-
// [NEW] IO Operations (for Cost Calc)
|
|
57
56
|
firestoreOps: {
|
|
58
57
|
reads: detailedMetrics.io?.reads || 0,
|
|
59
58
|
writes: detailedMetrics.io?.writes || 0,
|
|
60
59
|
deletes: detailedMetrics.io?.deletes || 0
|
|
61
60
|
},
|
|
62
61
|
|
|
63
|
-
// [NEW] Code Linkage (Forensics)
|
|
64
62
|
composition: detailedMetrics.composition || null,
|
|
65
63
|
|
|
66
64
|
trigger: {
|
|
@@ -86,7 +84,7 @@ async function recordRunAttempt(db, context, status, error = null, detailedMetri
|
|
|
86
84
|
},
|
|
87
85
|
|
|
88
86
|
anomalies: anomalies,
|
|
89
|
-
_schemaVersion: '2.3'
|
|
87
|
+
_schemaVersion: '2.3'
|
|
90
88
|
};
|
|
91
89
|
|
|
92
90
|
if (error) {
|
|
@@ -98,12 +96,29 @@ async function recordRunAttempt(db, context, status, error = null, detailedMetri
|
|
|
98
96
|
};
|
|
99
97
|
}
|
|
100
98
|
|
|
101
|
-
//
|
|
99
|
+
// 1. [NEW] Emit Structured Log for GCP Observability
|
|
100
|
+
// This allows you to query "jsonPayload.event = 'AUDIT_COMPLETE'" in Cloud Logging
|
|
101
|
+
// independent of Firestore status.
|
|
102
|
+
console.log(JSON.stringify({
|
|
103
|
+
event: 'AUDIT_COMPLETE',
|
|
104
|
+
severity: status === 'SUCCESS' ? 'INFO' : 'ERROR',
|
|
105
|
+
computation: computation,
|
|
106
|
+
runId: runId,
|
|
107
|
+
status: status,
|
|
108
|
+
metrics: {
|
|
109
|
+
duration: runEntry.durationMs,
|
|
110
|
+
memory: runEntry.peakMemoryMB,
|
|
111
|
+
writes: runEntry.firestoreOps.writes
|
|
112
|
+
},
|
|
113
|
+
error: runEntry.error ? runEntry.error.message : null
|
|
114
|
+
}));
|
|
115
|
+
|
|
116
|
+
// 2. Existing Firestore Aggregation Logic
|
|
102
117
|
const statsUpdate = {
|
|
103
118
|
lastRunAt: now,
|
|
104
119
|
lastRunStatus: status,
|
|
105
120
|
totalRuns: FieldValue.increment(1),
|
|
106
|
-
totalCostAccumulated: FieldValue.increment(0)
|
|
121
|
+
totalCostAccumulated: FieldValue.increment(0)
|
|
107
122
|
};
|
|
108
123
|
|
|
109
124
|
if (status === 'SUCCESS') { statsUpdate.successCount = FieldValue.increment(1);
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* FILENAME:
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
2
|
+
* FILENAME: computation-system/tools/BuildReporter.js
|
|
3
|
+
* PURPOSE: Generates the "Build Report" - a comprehensive audit of the computation system's state.
|
|
4
|
+
* UPDATED: Trigger logic now strictly depends on Package Version changes (Calculations OR Module).
|
|
5
|
+
* UPDATED: Added Algorithmic "Dynamic Warnings" for impossibility analysis.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
const { analyzeDateExecution } = require('../WorkflowOrchestrator');
|
|
@@ -10,30 +10,50 @@ const { fetchComputationStatus, updateComputationStatus } = require('../persiste
|
|
|
10
10
|
const { normalizeName, getExpectedDateStrings, DEFINITIVE_EARLIEST_DATES, getEarliestDataDates } = require('../utils/utils');
|
|
11
11
|
const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
|
|
12
12
|
const SimRunner = require('../simulation/SimRunner');
|
|
13
|
-
const SYSTEM_EPOCH = require('../system_epoch');
|
|
13
|
+
const SYSTEM_EPOCH = require('../system_epoch');
|
|
14
|
+
const REPORTER_EPOCH = require('../reporter_epoch');
|
|
14
15
|
const pLimit = require('p-limit');
|
|
15
16
|
const path = require('path');
|
|
16
17
|
const crypto = require('crypto');
|
|
17
18
|
|
|
18
|
-
// Load package info for versioning
|
|
19
|
-
const packageJson = require(path.join(__dirname, '..', '..', '..', 'package.json'));
|
|
20
|
-
const packageVersion = packageJson.version;
|
|
21
|
-
|
|
22
19
|
const BUILD_RECORDS_COLLECTION = 'computation_build_records';
|
|
23
|
-
const BUILD_METADATA_DOC = 'system_build_metadata';
|
|
24
20
|
const SIMHASH_REGISTRY_COLLECTION = 'system_simhash_registry';
|
|
25
|
-
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Resolves version numbers for both the Infrastructure (Module) and the Logic (Calculations).
|
|
24
|
+
*/
|
|
25
|
+
function getPackageVersions() {
|
|
26
|
+
let moduleVersion = 'unknown';
|
|
27
|
+
let calcVersion = 'unknown';
|
|
28
|
+
|
|
29
|
+
try {
|
|
30
|
+
const modulepkg = require('../../../../bulltrackers-module/package.json');
|
|
31
|
+
moduleVersion = modulepkg.version;
|
|
32
|
+
|
|
33
|
+
} catch (e) { console.warn('[BuildReporter] Could not resolve Module version', e.message); }
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
const calcpkg = require('../../../../calculations/package.json')
|
|
37
|
+
calcVersion = calcpkg.version;
|
|
38
|
+
|
|
39
|
+
} catch (e2) { console.warn('[BuildReporter] Could not resolve Calculations version', e2.message); }
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
return { moduleVersion, calcVersion };
|
|
43
|
+
}
|
|
26
44
|
|
|
27
45
|
/**
|
|
28
46
|
* Publishes a message to trigger the dedicated Build Reporter Cloud Function.
|
|
29
|
-
* Replaces the old ensureBuildReport that ran locally on module load.
|
|
30
47
|
*/
|
|
31
48
|
async function requestBuildReport(config, dependencies) {
|
|
32
49
|
const { pubsubUtils, logger } = dependencies;
|
|
50
|
+
const { moduleVersion, calcVersion } = getPackageVersions();
|
|
51
|
+
|
|
33
52
|
try {
|
|
34
53
|
await pubsubUtils.publish(config.buildReporterTopic, {
|
|
35
54
|
requestedAt: new Date().toISOString(),
|
|
36
|
-
|
|
55
|
+
moduleVersion,
|
|
56
|
+
calcVersion
|
|
37
57
|
});
|
|
38
58
|
logger.log('INFO', `[BuildReporter] ๐ฐ๏ธ Trigger message sent to ${config.buildReporterTopic}`);
|
|
39
59
|
return { success: true };
|
|
@@ -57,9 +77,6 @@ async function handleBuildReportTrigger(message, context, config, dependencies,
|
|
|
57
77
|
}
|
|
58
78
|
}
|
|
59
79
|
|
|
60
|
-
/**
|
|
61
|
-
* Replaces expensive file walking with System Epoch + Manifest Hash.
|
|
62
|
-
*/
|
|
63
80
|
function getSystemFingerprint(manifest) {
|
|
64
81
|
const sortedManifestHashes = manifest.map(c => c.hash).sort().join('|');
|
|
65
82
|
return crypto.createHash('sha256')
|
|
@@ -67,20 +84,6 @@ function getSystemFingerprint(manifest) {
|
|
|
67
84
|
.digest('hex');
|
|
68
85
|
}
|
|
69
86
|
|
|
70
|
-
/**
|
|
71
|
-
* Increments the patch number for the current package version in Firestore.
|
|
72
|
-
*/
|
|
73
|
-
async function getNextBuildId(db, version) {
|
|
74
|
-
const metaRef = db.collection(BUILD_RECORDS_COLLECTION).doc(BUILD_METADATA_DOC);
|
|
75
|
-
return await db.runTransaction(async (t) => {
|
|
76
|
-
const doc = await t.get(metaRef);
|
|
77
|
-
const data = doc.exists ? doc.data() : {};
|
|
78
|
-
const currentPatch = (data[version] || 0) + 1;
|
|
79
|
-
t.set(metaRef, { [version]: currentPatch }, { merge: true });
|
|
80
|
-
return `v${version}_p${currentPatch}`;
|
|
81
|
-
});
|
|
82
|
-
}
|
|
83
|
-
|
|
84
87
|
function isDateBeforeAvailability(dateStr, calcManifest) {
|
|
85
88
|
const targetDate = new Date(dateStr + 'T00:00:00Z');
|
|
86
89
|
const deps = calcManifest.rootDataDependencies || [];
|
|
@@ -169,63 +172,100 @@ async function verifyBehavioralStability(candidates, manifestMap, dailyStatus, l
|
|
|
169
172
|
}
|
|
170
173
|
|
|
171
174
|
/**
|
|
172
|
-
*
|
|
173
|
-
*
|
|
175
|
+
* Algorithmic Analysis for Dynamic Warnings.
|
|
176
|
+
* Detects patterns in "Impossible" states (e.g., Weekends, Category-Wide).
|
|
177
|
+
*/
|
|
178
|
+
function generateDynamicWarnings(impossibleStats, categoryStats, totalDates) {
|
|
179
|
+
const warnings = [];
|
|
180
|
+
|
|
181
|
+
for (const [name, stats] of impossibleStats.entries()) {
|
|
182
|
+
const failureRate = stats.count / totalDates;
|
|
183
|
+
const categoryData = categoryStats.get(stats.category) || { total: 0, impossible: 0 };
|
|
184
|
+
const catTotal = Math.max(1, categoryData.total);
|
|
185
|
+
const catFailureRate = categoryData.impossible / catTotal;
|
|
186
|
+
|
|
187
|
+
// Skip negligible errors
|
|
188
|
+
if (failureRate < 0.05) continue;
|
|
189
|
+
|
|
190
|
+
let type = 'INTERMITTENT_FAILURE';
|
|
191
|
+
let contextMsg = `Fails on ${stats.count}/${totalDates} dates.`;
|
|
192
|
+
|
|
193
|
+
// 1. Weekend Analysis
|
|
194
|
+
const isWeekendOnly = stats.dates.every(d => {
|
|
195
|
+
const day = new Date(d + 'T00:00:00Z').getUTCDay();
|
|
196
|
+
return day === 0 || day === 6; // Sunday or Saturday
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
if (isWeekendOnly && failureRate < 0.35) {
|
|
200
|
+
type = 'WEEKEND_GAP';
|
|
201
|
+
contextMsg = 'Computation consistently fails only on weekends (likely price/market data gaps).';
|
|
202
|
+
} else if (failureRate > 0.95) {
|
|
203
|
+
type = 'PERMANENT_FAILURE';
|
|
204
|
+
if (catFailureRate > 0.8) {
|
|
205
|
+
contextMsg = `Entire category '${stats.category}' is failing (${(catFailureRate*100).toFixed(0)}% fail rate). Systemic issue.`;
|
|
206
|
+
} else {
|
|
207
|
+
contextMsg = `Category '${stats.category}' is healthy, but this calculation is 100% impossible. Outlier.`;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
warnings.push({
|
|
212
|
+
computation: name,
|
|
213
|
+
category: stats.category,
|
|
214
|
+
pass: stats.pass,
|
|
215
|
+
type: type,
|
|
216
|
+
failureRate: (failureRate * 100).toFixed(1) + '%',
|
|
217
|
+
message: contextMsg,
|
|
218
|
+
reasons: Array.from(stats.reasons).slice(0, 3)
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return warnings;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* The main reporter logic.
|
|
174
227
|
*/
|
|
175
228
|
async function generateBuildReport(config, dependencies, manifest) {
|
|
176
229
|
const { db, logger } = dependencies;
|
|
177
|
-
|
|
178
|
-
// FIX: Populate the global date registry before using it
|
|
179
|
-
await getEarliestDataDates(config, dependencies);
|
|
230
|
+
const { moduleVersion, calcVersion } = getPackageVersions();
|
|
180
231
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
const
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
// If fingerprints match AND the window is the same, we can truly skip.
|
|
194
|
-
if (latest &&
|
|
195
|
-
latest.systemFingerprint === currentFingerprint &&
|
|
196
|
-
!windowChanged &&
|
|
197
|
-
!epochChanged) {
|
|
198
|
-
logger.log('INFO', `[BuildReporter] โก System fingerprint, window, and reporter epoch stable. Skipping report.`);
|
|
199
|
-
return { success: true, status: 'SKIPPED_IDENTICAL' };
|
|
232
|
+
// 1. Version-Based Trigger Check
|
|
233
|
+
// We create a composite ID for this exact code state
|
|
234
|
+
const buildId = `build_mod-${moduleVersion}_calc-${calcVersion}_${REPORTER_EPOCH}`;
|
|
235
|
+
|
|
236
|
+
const existingDoc = await db.collection(BUILD_RECORDS_COLLECTION).doc(buildId).get();
|
|
237
|
+
if (existingDoc.exists) {
|
|
238
|
+
const data = existingDoc.data();
|
|
239
|
+
if (data.status === 'COMPLETED') {
|
|
240
|
+
logger.log('INFO', `[BuildReporter] โก Report already exists for ${buildId}. Skipping execution.`);
|
|
241
|
+
return { success: true, status: 'SKIPPED_EXISTING', buildId };
|
|
242
|
+
}
|
|
200
243
|
}
|
|
201
244
|
|
|
202
|
-
|
|
203
|
-
let reason = 'Code Change';
|
|
204
|
-
if (epochChanged) reason = 'Master Epoch Override';
|
|
205
|
-
else if (windowChanged) reason = 'Data Window Drift';
|
|
245
|
+
logger.log('INFO', `[BuildReporter] ๐ Generating new report for ${buildId}.`);
|
|
206
246
|
|
|
207
|
-
//
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
const today = new Date();
|
|
247
|
+
// 2. Initialize Data
|
|
248
|
+
await getEarliestDataDates(config, dependencies); // Ensure dates are loaded
|
|
249
|
+
const currentFingerprint = getSystemFingerprint(manifest);
|
|
212
250
|
const { absoluteEarliest } = DEFINITIVE_EARLIEST_DATES;
|
|
213
251
|
|
|
214
|
-
// Dynamic Window
|
|
252
|
+
// Dynamic Window
|
|
253
|
+
const today = new Date();
|
|
215
254
|
let dynamicDaysBack = 90;
|
|
216
255
|
if (absoluteEarliest) {
|
|
217
256
|
const diffTime = Math.abs(today - absoluteEarliest);
|
|
218
257
|
dynamicDaysBack = Math.ceil(diffTime / (1000 * 60 * 60 * 24)) + 2;
|
|
219
258
|
}
|
|
220
|
-
|
|
221
259
|
const startDate = new Date();
|
|
222
260
|
startDate.setDate(today.getDate() - dynamicDaysBack);
|
|
223
|
-
|
|
224
261
|
const datesToCheck = getExpectedDateStrings(startDate, today);
|
|
262
|
+
|
|
263
|
+
// 3. Stats Accumulators
|
|
225
264
|
const manifestMap = new Map(manifest.map(c => [normalizeName(c.name), c]));
|
|
226
265
|
const simHashCache = new Map();
|
|
227
|
-
|
|
228
266
|
const reverseGraph = new Map();
|
|
267
|
+
|
|
268
|
+
// Graph for Blast Radius
|
|
229
269
|
manifest.forEach(c => {
|
|
230
270
|
const parentName = normalizeName(c.name);
|
|
231
271
|
if (c.dependencies) {
|
|
@@ -237,30 +277,39 @@ async function generateBuildReport(config, dependencies, manifest) {
|
|
|
237
277
|
}
|
|
238
278
|
});
|
|
239
279
|
|
|
280
|
+
// Global Accumulators for Report
|
|
281
|
+
let totalRun = 0, totalReRun = 0, totalStable = 0, totalErrors = 0;
|
|
282
|
+
const globalMismatchStats = new Map(); // { stored: 0, mismatch: 0 }
|
|
283
|
+
const impossibleAnalysis = new Map(); // { count: 0, dates: [], reasons: Set, category, pass }
|
|
284
|
+
const categoryStats = new Map(); // { total: 0, impossible: 0 }
|
|
285
|
+
const runnablePerDate = {};
|
|
286
|
+
const impossiblePerDate = {};
|
|
287
|
+
|
|
240
288
|
const reportHeader = {
|
|
241
289
|
buildId,
|
|
242
|
-
|
|
243
|
-
systemFingerprint: currentFingerprint,
|
|
244
|
-
reporterEpoch: REPORTER_EPOCH,
|
|
245
|
-
windowEarliest: currentEarliestStr,
|
|
290
|
+
sharded: true,
|
|
246
291
|
generatedAt: new Date().toISOString(),
|
|
292
|
+
calculationPackageVersion: calcVersion,
|
|
293
|
+
bulltrackersModulePackageVersion: moduleVersion,
|
|
294
|
+
reporterEpoch: REPORTER_EPOCH,
|
|
247
295
|
status: 'IN_PROGRESS',
|
|
248
|
-
|
|
249
|
-
|
|
296
|
+
systemFingerprint: currentFingerprint,
|
|
297
|
+
earliestWindow: absoluteEarliest ? absoluteEarliest.toISOString().slice(0, 10) : 'UNKNOWN',
|
|
298
|
+
scanRange: `${datesToCheck[0]} to ${datesToCheck[datesToCheck.length-1]}`,
|
|
299
|
+
summary: {}
|
|
250
300
|
};
|
|
251
301
|
|
|
252
|
-
// Initialize the build record
|
|
253
302
|
await db.collection(BUILD_RECORDS_COLLECTION).doc(buildId).set(reportHeader);
|
|
254
303
|
|
|
255
|
-
|
|
256
|
-
const limit = pLimit(10);
|
|
257
|
-
|
|
258
|
-
// Process dates in chunks of 5 for checkpointed writing
|
|
304
|
+
// 4. Execution Loop
|
|
305
|
+
const limit = pLimit(10);
|
|
306
|
+
|
|
259
307
|
for (let i = 0; i < datesToCheck.length; i += 5) {
|
|
260
308
|
const dateBatch = datesToCheck.slice(i, i + 5);
|
|
261
309
|
|
|
262
310
|
const results = await Promise.all(dateBatch.map(dateStr => limit(async () => {
|
|
263
311
|
try {
|
|
312
|
+
// Fetch State
|
|
264
313
|
const fetchPromises = [
|
|
265
314
|
fetchComputationStatus(dateStr, config, dependencies),
|
|
266
315
|
checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES)
|
|
@@ -280,31 +329,73 @@ async function generateBuildReport(config, dependencies, manifest) {
|
|
|
280
329
|
const prevDailyStatus = (prevDateStr && prevRes) ? prevRes : (prevDateStr ? {} : null);
|
|
281
330
|
const rootDataStatus = availability ? availability.status : { hasPortfolio: false, hasHistory: false };
|
|
282
331
|
|
|
332
|
+
// Analyze
|
|
283
333
|
const analysis = analyzeDateExecution(dateStr, manifest, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
|
|
284
334
|
|
|
285
335
|
const dateSummary = {
|
|
286
|
-
run: [], rerun: [], stable: [], blocked: [], impossible: [], uptodate: []
|
|
287
|
-
meta: { totalIncluded: 0, totalExpected: 0, match: false }
|
|
336
|
+
run: [], rerun: [], stable: [], blocked: [], impossible: [], uptodate: []
|
|
288
337
|
};
|
|
289
338
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
const pushIfValid = (targetArray, item, extraReason = null) => {
|
|
339
|
+
// --- Metric Aggregation Logic ---
|
|
340
|
+
const processItem = (targetArray, item, extraReason) => {
|
|
294
341
|
const calcManifest = manifestMap.get(item.name);
|
|
295
|
-
if (calcManifest && isDateBeforeAvailability(dateStr, calcManifest)) return;
|
|
342
|
+
if (calcManifest && isDateBeforeAvailability(dateStr, calcManifest)) return; // Skip invalid dates
|
|
343
|
+
|
|
296
344
|
const entry = { name: item.name, reason: item.reason || extraReason, pass: calcManifest?.pass || '?' };
|
|
297
|
-
|
|
345
|
+
|
|
346
|
+
// Stats for Mismatch Rate
|
|
347
|
+
if (!globalMismatchStats.has(item.name)) globalMismatchStats.set(item.name, { stored: 0, mismatch: 0 });
|
|
348
|
+
const stats = globalMismatchStats.get(item.name);
|
|
349
|
+
|
|
350
|
+
// "Stored" implies we have a result or are about to run one.
|
|
351
|
+
// We count Stable + Rerun as "Stored History comparisons"
|
|
352
|
+
// Runnable is "New", not "Stored".
|
|
353
|
+
if (targetArray === dateSummary.rerun) {
|
|
354
|
+
stats.stored++;
|
|
355
|
+
stats.mismatch++;
|
|
356
|
+
entry.impact = calculateBlastRadius(item.name, reverseGraph);
|
|
357
|
+
} else if (targetArray === dateSummary.stable) {
|
|
358
|
+
stats.stored++;
|
|
359
|
+
}
|
|
360
|
+
|
|
298
361
|
targetArray.push(entry);
|
|
299
362
|
};
|
|
300
363
|
|
|
301
|
-
|
|
364
|
+
// Track Impossible Stats
|
|
365
|
+
analysis.impossible.forEach(item => {
|
|
366
|
+
const m = manifestMap.get(item.name);
|
|
367
|
+
if (!m) return;
|
|
368
|
+
if (!impossibleAnalysis.has(item.name)) {
|
|
369
|
+
impossibleAnalysis.set(item.name, {
|
|
370
|
+
count: 0, dates: [], reasons: new Set(), category: m.category, pass: m.pass
|
|
371
|
+
});
|
|
372
|
+
}
|
|
373
|
+
const imp = impossibleAnalysis.get(item.name);
|
|
374
|
+
imp.count++;
|
|
375
|
+
imp.dates.push(dateStr);
|
|
376
|
+
imp.reasons.add(item.reason);
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
// Track Category Stats (Total vs Impossible)
|
|
380
|
+
const allEvaluated = [...analysis.runnable, ...analysis.reRuns, ...analysis.skipped, ...analysis.impossible]; // Stable often in skipped
|
|
381
|
+
allEvaluated.forEach(item => {
|
|
382
|
+
const m = manifestMap.get(item.name);
|
|
383
|
+
if (!m) return;
|
|
384
|
+
if (!categoryStats.has(m.category)) categoryStats.set(m.category, { total: 0, impossible: 0 });
|
|
385
|
+
const cat = categoryStats.get(m.category);
|
|
386
|
+
cat.total++;
|
|
387
|
+
if (analysis.impossible.find(x => x.name === item.name)) cat.impossible++;
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
analysis.runnable.forEach(item => processItem(dateSummary.run, item, "New Calculation"));
|
|
302
391
|
|
|
392
|
+
// Handle Re-Runs (SimHash Verification)
|
|
303
393
|
if (analysis.reRuns.length > 0) {
|
|
304
394
|
const { trueReRuns, stableUpdates } = await verifyBehavioralStability(analysis.reRuns, manifestMap, dailyStatus, logger, simHashCache, db);
|
|
305
|
-
trueReRuns.forEach(item =>
|
|
306
|
-
stableUpdates.forEach(item =>
|
|
395
|
+
trueReRuns.forEach(item => processItem(dateSummary.rerun, item, "Logic Changed"));
|
|
396
|
+
stableUpdates.forEach(item => processItem(dateSummary.stable, item, "Logic Stable"));
|
|
307
397
|
|
|
398
|
+
// Auto-Heal Status if Stable
|
|
308
399
|
if (stableUpdates.length > 0) {
|
|
309
400
|
const updatesPayload = {};
|
|
310
401
|
for (const stable of stableUpdates) {
|
|
@@ -324,63 +415,84 @@ async function generateBuildReport(config, dependencies, manifest) {
|
|
|
324
415
|
}
|
|
325
416
|
}
|
|
326
417
|
|
|
327
|
-
|
|
328
|
-
analysis.
|
|
329
|
-
|
|
330
|
-
analysis.
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
dateSummary.blocked.length + dateSummary.impossible.length + dateSummary.uptodate.length;
|
|
334
|
-
dateSummary.meta.totalIncluded = includedCount;
|
|
335
|
-
dateSummary.meta.match = (includedCount === expectedCount);
|
|
418
|
+
// Add skipped items to Stable count for metrics
|
|
419
|
+
analysis.skipped.forEach(item => processItem(dateSummary.stable, item, "Up To Date"));
|
|
420
|
+
|
|
421
|
+
analysis.blocked.forEach(item => processItem(dateSummary.blocked, item));
|
|
422
|
+
analysis.failedDependency.forEach(item => processItem(dateSummary.blocked, item, "Dependency Missing"));
|
|
423
|
+
analysis.impossible.forEach(item => processItem(dateSummary.impossible, item));
|
|
336
424
|
|
|
337
|
-
//
|
|
338
|
-
|
|
425
|
+
// Per-Date Metrics
|
|
426
|
+
const runnableCount = dateSummary.run.length + dateSummary.rerun.length;
|
|
427
|
+
const impossibleCount = dateSummary.impossible.length;
|
|
428
|
+
|
|
429
|
+
return {
|
|
430
|
+
date: dateStr,
|
|
431
|
+
run: dateSummary.run.length,
|
|
432
|
+
rerun: dateSummary.rerun.length,
|
|
433
|
+
stable: dateSummary.stable.length,
|
|
434
|
+
runnableCount,
|
|
435
|
+
impossibleCount,
|
|
436
|
+
error: false,
|
|
437
|
+
summaryPayload: dateSummary
|
|
438
|
+
};
|
|
339
439
|
|
|
340
|
-
return { run: dateSummary.run.length, rerun: dateSummary.rerun.length, stable: dateSummary.stable.length, error: false };
|
|
341
440
|
} catch (err) {
|
|
342
441
|
logger.log('ERROR', `[BuildReporter] Analysis failed for ${dateStr}: ${err.message}`);
|
|
343
|
-
|
|
344
|
-
// [FIX] Write error record so the date appears in the report
|
|
345
|
-
await db.collection(BUILD_RECORDS_COLLECTION).doc(buildId).collection('details').doc(dateStr).set({
|
|
346
|
-
error: err.message,
|
|
347
|
-
status: 'ANALYSIS_FAILED',
|
|
348
|
-
meta: { totalIncluded: 0, totalExpected: 0, match: false }
|
|
349
|
-
}).catch(e => logger.log('ERROR', `Failed to write error record for ${dateStr}: ${e.message}`));
|
|
350
|
-
|
|
351
442
|
return { run: 0, rerun: 0, stable: 0, error: true };
|
|
352
443
|
}
|
|
353
444
|
})));
|
|
354
445
|
|
|
355
|
-
// Accumulate
|
|
356
|
-
|
|
357
|
-
if (res.error)
|
|
358
|
-
|
|
446
|
+
// Accumulate Batch Results
|
|
447
|
+
for (const res of results) {
|
|
448
|
+
if (res.error) {
|
|
449
|
+
totalErrors++;
|
|
450
|
+
} else {
|
|
359
451
|
totalRun += res.run;
|
|
360
452
|
totalReRun += res.rerun;
|
|
361
|
-
totalStable += res.stable;
|
|
453
|
+
totalStable += res.stable;
|
|
454
|
+
runnablePerDate[res.date] = res.runnableCount;
|
|
455
|
+
impossiblePerDate[res.date] = res.impossibleCount;
|
|
456
|
+
|
|
457
|
+
// Write detailed record
|
|
458
|
+
await db.collection(BUILD_RECORDS_COLLECTION).doc(buildId).collection('details').doc(res.date).set(res.summaryPayload);
|
|
362
459
|
}
|
|
363
|
-
}
|
|
364
|
-
|
|
460
|
+
}
|
|
461
|
+
|
|
365
462
|
await db.collection(BUILD_RECORDS_COLLECTION).doc(buildId).update({
|
|
366
463
|
checkpoint: `Processed ${Math.min(i + dateBatch.length, datesToCheck.length)}/${datesToCheck.length} dates`
|
|
367
464
|
});
|
|
368
465
|
}
|
|
369
466
|
|
|
467
|
+
// 5. Final Synthesis
|
|
468
|
+
const hashMismatchMetrics = {};
|
|
469
|
+
for (const [name, stats] of globalMismatchStats) {
|
|
470
|
+
if (stats.stored > 0) {
|
|
471
|
+
hashMismatchMetrics[name] = `${stats.mismatch}/${stats.stored}`;
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
const dynamicWarnings = generateDynamicWarnings(impossibleAnalysis, categoryStats, datesToCheck.length);
|
|
476
|
+
|
|
370
477
|
reportHeader.status = 'COMPLETED';
|
|
371
478
|
reportHeader.summary = {
|
|
372
479
|
totalReRuns: totalReRun,
|
|
373
480
|
totalNew: totalRun,
|
|
374
481
|
totalStable: totalStable,
|
|
375
482
|
totalErrors: totalErrors,
|
|
376
|
-
|
|
483
|
+
|
|
484
|
+
// Expanded Metrics
|
|
485
|
+
runnablePerDate,
|
|
486
|
+
impossiblePerDate,
|
|
487
|
+
hashMismatches: hashMismatchMetrics,
|
|
488
|
+
dynamicWarnings: dynamicWarnings
|
|
377
489
|
};
|
|
378
490
|
|
|
379
|
-
//
|
|
491
|
+
// Save
|
|
380
492
|
await db.collection(BUILD_RECORDS_COLLECTION).doc(buildId).set(reportHeader);
|
|
381
493
|
await db.collection(BUILD_RECORDS_COLLECTION).doc('latest').set({ ...reportHeader, note: "Latest completed build report." });
|
|
382
494
|
|
|
383
|
-
logger.log('SUCCESS', `[BuildReporter] Build ${buildId} completed.
|
|
495
|
+
logger.log('SUCCESS', `[BuildReporter] Build ${buildId} completed. Warnings: ${dynamicWarnings.length}`);
|
|
384
496
|
|
|
385
497
|
return { success: true, buildId, summary: reportHeader.summary };
|
|
386
498
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bulltrackers-module",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.341",
|
|
4
4
|
"description": "Helper Functions for Bulltrackers.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
"dependencies": {
|
|
32
32
|
"@google-cloud/firestore": "^7.11.3",
|
|
33
33
|
"@google-cloud/pubsub": "latest",
|
|
34
|
-
"aiden-shared-calculations-unified": "^1.0.
|
|
34
|
+
"aiden-shared-calculations-unified": "^1.0.110",
|
|
35
35
|
"cors": "^2.8.5",
|
|
36
36
|
"dotenv": "latest",
|
|
37
37
|
"express": "^4.19.2",
|
|
@@ -40,7 +40,8 @@
|
|
|
40
40
|
"node-graphviz": "^0.1.1",
|
|
41
41
|
"p-limit": "^3.1.0",
|
|
42
42
|
"require-all": "^3.0.0",
|
|
43
|
-
"sharedsetup": "latest"
|
|
43
|
+
"sharedsetup": "latest",
|
|
44
|
+
"@google-cloud/monitoring": "latest"
|
|
44
45
|
},
|
|
45
46
|
"devDependencies": {
|
|
46
47
|
"bulltracker-deployer": "file:../bulltracker-deployer"
|