bulltrackers-module 1.0.734 → 1.0.736

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/functions/computation-system-v2/config/bulltrackers.config.js +75 -5
  2. package/functions/computation-system-v2/framework/data/DataFetcher.js +107 -105
  3. package/functions/computation-system-v2/framework/execution/Orchestrator.js +357 -150
  4. package/functions/computation-system-v2/framework/execution/RemoteTaskRunner.js +327 -0
  5. package/functions/computation-system-v2/framework/execution/middleware/LineageMiddleware.js +9 -4
  6. package/functions/computation-system-v2/framework/execution/middleware/ProfilerMiddleware.js +9 -21
  7. package/functions/computation-system-v2/framework/index.js +10 -3
  8. package/functions/computation-system-v2/framework/lineage/LineageTracker.js +53 -57
  9. package/functions/computation-system-v2/framework/monitoring/Profiler.js +54 -52
  10. package/functions/computation-system-v2/framework/resilience/Checkpointer.js +173 -27
  11. package/functions/computation-system-v2/framework/storage/StorageManager.js +419 -187
  12. package/functions/computation-system-v2/handlers/index.js +10 -1
  13. package/functions/computation-system-v2/handlers/scheduler.js +85 -193
  14. package/functions/computation-system-v2/handlers/worker.js +242 -0
  15. package/functions/computation-system-v2/index.js +5 -0
  16. package/functions/computation-system-v2/test/analyze-results.js +238 -0
  17. package/functions/computation-system-v2/test/{test-dispatcher.js → other/test-dispatcher.js} +6 -6
  18. package/functions/computation-system-v2/test/{test-framework.js → other/test-framework.js} +14 -14
  19. package/functions/computation-system-v2/test/{test-real-execution.js → other/test-real-execution.js} +1 -1
  20. package/functions/computation-system-v2/test/{test-real-integration.js → other/test-real-integration.js} +3 -3
  21. package/functions/computation-system-v2/test/{test-refactor-e2e.js → other/test-refactor-e2e.js} +3 -3
  22. package/functions/computation-system-v2/test/{test-risk-metrics-computation.js → other/test-risk-metrics-computation.js} +4 -4
  23. package/functions/computation-system-v2/test/{test-scheduler.js → other/test-scheduler.js} +1 -1
  24. package/functions/computation-system-v2/test/{test-storage.js → other/test-storage.js} +2 -2
  25. package/functions/computation-system-v2/test/run-pipeline-test.js +554 -0
  26. package/functions/computation-system-v2/test/test-worker-pool.js +494 -0
  27. package/index.js +8 -39
  28. package/package.json +1 -1
  29. package/functions/computation-system-v2/computations/TestComputation.js +0 -46
  30. /package/functions/computation-system-v2/test/{test-results.json → other/test-results.json} +0 -0
@@ -1,79 +1,81 @@
1
1
  /**
2
2
  * @fileoverview Computation Profiler
3
3
  * Tracks execution metrics (duration, memory) for computations.
4
+ * * UPDATE: Now aggregates metrics in memory to allow single-row reporting.
4
5
  */
5
6
 
6
7
  class ComputationProfiler {
7
8
  constructor() {
8
- this.profiles = new Map();
9
+ this.activeProfiles = new Map();
10
+ this.completedStats = new Map(); // Stores arrays of durations/memory per computation
9
11
  }
10
12
 
11
13
  startProfile(computationName, entityId = null) {
12
- const key = entityId ? `${computationName}:${entityId}` : computationName;
14
+ const id = entityId ? `${computationName}:${entityId}` : `${computationName}:${Date.now()}:${Math.random()}`;
13
15
 
14
- this.profiles.set(key, {
16
+ this.activeProfiles.set(id, {
17
+ name: computationName,
15
18
  startTime: Date.now(),
16
- startMemory: process.memoryUsage().heapUsed,
17
- queryCount: 0,
18
- bytesProcessed: 0
19
+ startMemory: process.memoryUsage().heapUsed
19
20
  });
20
21
 
21
- return key;
22
+ return id;
22
23
  }
23
24
 
24
- endProfile(key, metadata = {}) {
25
- const profile = this.profiles.get(key);
26
- if (!profile) return;
25
+ endProfile(id, metadata = {}) {
26
+ const profile = this.activeProfiles.get(id);
27
+ if (!profile) return null;
27
28
 
28
29
  const endTime = Date.now();
29
30
  const endMemory = process.memoryUsage().heapUsed;
30
-
31
+ const duration = endTime - profile.startTime;
32
+ const memoryDelta = endMemory - profile.startMemory;
33
+
34
+ // Store for aggregation
35
+ if (!this.completedStats.has(profile.name)) {
36
+ this.completedStats.set(profile.name, { durations: [], memories: [] });
37
+ }
38
+ const stats = this.completedStats.get(profile.name);
39
+ stats.durations.push(duration);
40
+ stats.memories.push(memoryDelta);
41
+
42
+ this.activeProfiles.delete(id);
43
+
31
44
  return {
32
- duration: endTime - profile.startTime,
33
- memoryDelta: endMemory - profile.startMemory,
34
- queriesExecuted: profile.queryCount,
35
- bytesProcessed: profile.bytesProcessed,
45
+ duration,
46
+ memoryDelta,
36
47
  ...metadata
37
48
  };
38
49
  }
39
-
40
- async generateReport(dateStr, manifest) {
41
- const report = {
42
- date: dateStr,
43
- totalDuration: 0,
44
- totalQueries: 0,
45
- totalBytes: 0,
46
- computations: []
50
+
51
+ /**
52
+ * Retrieves aggregated statistics for a specific computation and clears them.
53
+ */
54
+ getAndClearStats(computationName) {
55
+ const stats = this.completedStats.get(computationName);
56
+ if (!stats || stats.durations.length === 0) return null;
57
+
58
+ const count = stats.durations.length;
59
+ const totalDuration = stats.durations.reduce((a, b) => a + b, 0);
60
+ const avgDuration = totalDuration / count;
61
+ const avgMemory = stats.memories.reduce((a, b) => a + b, 0) / count;
62
+
63
+ // Calculate P95
64
+ const sorted = [...stats.durations].sort((a, b) => a - b);
65
+ const p95 = sorted[Math.ceil(count * 0.95) - 1];
66
+
67
+ // Clear memory
68
+ this.completedStats.delete(computationName);
69
+
70
+ return {
71
+ entityCount: count,
72
+ totalDuration,
73
+ avgDuration,
74
+ p95Duration: p95,
75
+ minDuration: sorted[0],
76
+ maxDuration: sorted[count - 1],
77
+ avgMemoryDelta: avgMemory
47
78
  };
48
-
49
- for (const entry of manifest) {
50
- const compProfiles = Array.from(this.profiles.entries())
51
- .filter(([k]) => k.startsWith(entry.name));
52
-
53
- if (compProfiles.length === 0) continue;
54
-
55
- const durations = compProfiles.map(([, p]) => Date.now() - p.startTime);
56
- const memories = compProfiles.map(([, p]) => process.memoryUsage().heapUsed - p.startMemory);
57
-
58
- report.computations.push({
59
- name: entry.name,
60
- entityCount: compProfiles.length,
61
- avgDuration: durations.reduce((a, b) => a + b, 0) / durations.length,
62
- maxDuration: Math.max(...durations),
63
- p95Duration: this._percentile(durations, 0.95),
64
- avgMemory: memories.reduce((a, b) => a + b, 0) / memories.length,
65
- totalDuration: durations.reduce((a, b) => a + b, 0)
66
- });
67
- }
68
-
69
- report.computations.sort((a, b) => b.totalDuration - a.totalDuration);
70
- return report;
71
- }
72
-
73
- _percentile(arr, p) {
74
- const sorted = [...arr].sort((a, b) => a - b);
75
- const index = Math.ceil(sorted.length * p) - 1;
76
- return sorted[index];
77
79
  }
78
80
  }
79
81
 
@@ -1,42 +1,145 @@
1
1
  /**
2
- * @fileoverview Computation Checkpointer
3
- * Manages save/resume states for long-running computations using lightweight batch tracking.
2
+ * @fileoverview Computation Checkpointer (Append-Only Version)
3
+ * Manages save/resume states for long-running computations.
4
+ * Implements "Heartbeat & Steal" using an APPEND-ONLY pattern to avoid BigQuery Streaming Buffer locks.
5
+ * * UPDATE: Added 'force' parameter to initCheckpoint for local testing bypass.
4
6
  */
5
7
 
6
8
  const crypto = require('crypto');
7
9
 
10
+ // Max number of times the Scheduler can revive a Zombie before we give up.
11
+ const MAX_ATTEMPTS = 3;
12
+
8
13
  class Checkpointer {
9
14
  constructor(config, storage) {
10
15
  this.config = config;
11
16
  this.storage = storage;
12
17
  this.enabled = config.checkpointing?.enabled !== false;
18
+
19
+ // Identity of this specific process/worker
20
+ this.workerId = crypto.randomUUID();
21
+ this.heartbeatInterval = null;
22
+
23
+ // Local cache of the current state (for append-only writes)
24
+ this.currentState = null;
13
25
  }
14
26
 
15
- async initCheckpoint(dateStr, computationName, totalEntities = 0) {
27
+ /**
28
+ * Initialize or Resume a checkpoint.
29
+ * Handles zombie detection, atomic lock stealing, and version-aware dead-letter logic.
30
+ * @param {string} dateStr - Target date
31
+ * @param {string} computationName - Name of the computation
32
+ * @param {number} totalEntities - Total entities to process (approx)
33
+ * @param {string} codeHash - Hash of the current running code
34
+ * @param {boolean} force - If true, bypasses lock checks (steals immediately). Useful for testing.
35
+ */
36
+ async initCheckpoint(dateStr, computationName, totalEntities = 0, codeHash, force = false) {
16
37
  if (!this.enabled) return null;
17
38
 
18
- // Check if there is an existing running checkpoint we can resume
39
+ // 1. Get the LATEST checkpoint row (Snapshot)
40
+ // StorageManager must order by last_updated DESC limit 1 to handle append-only logs
19
41
  const existing = await this.storage.getLatestCheckpoint(dateStr, computationName);
20
42
 
21
- if (existing && existing.status === 'running') {
22
- return {
23
- id: existing.checkpoint_id,
24
- processedCount: existing.processed_count || 0,
25
- completedBatches: new Set(existing.completed_batches || []),
26
- lastEntityId: existing.last_entity_id,
27
- isResumed: true
28
- };
29
- } else if (existing && existing.status === 'completed') {
30
- // Already fully done
43
+ // FIX: Force Re-Run (Bypass everything and start fresh)
44
+ // This ensures even "completed" or "up-to-date" tasks are re-run when testing.
45
+ if (force) {
46
+ console.log(`[Checkpointer] ⚡ FORCE mode active. Resetting checkpoint for ${computationName}...`);
47
+ return this._createNew(dateStr, computationName, totalEntities, codeHash);
48
+ }
49
+
50
+ // LOGIC: Version Check
51
+ // If code changed, we generally want to invalidate previous partial progress
52
+ // unless it was fully completed.
53
+ if (existing && existing.code_hash && existing.code_hash !== codeHash && existing.status !== 'completed') {
54
+ console.log(`[Checkpointer] Code change detected (${existing.code_hash} -> ${codeHash}). Resetting checkpoint.`);
55
+ return this._createNew(dateStr, computationName, totalEntities, codeHash);
56
+ }
57
+
58
+ // CASE 1: No checkpoint exists -> Create New
59
+ if (!existing) {
60
+ return this._createNew(dateStr, computationName, totalEntities, codeHash);
61
+ }
62
+
63
+ // CASE 2: Already Completed -> Return cached
64
+ if (existing.status === 'completed') {
31
65
  return { isCompleted: true };
32
66
  }
33
67
 
34
- // Start new
35
- const checkpointId = crypto.randomUUID();
36
- await this.storage.initCheckpoint(dateStr, computationName, checkpointId, totalEntities);
68
+ // CASE 3: Zombie / Stale Lock Detection
69
+ // If status is 'running' but heartbeat is old (> 15 mins), it's a Zombie.
70
+ const lastUpdate = new Date(existing.last_updated).getTime();
71
+ const diffMinutes = (Date.now() - lastUpdate) / 1000 / 60;
72
+ const isStale = diffMinutes > 15; // 15 min timeout
73
+
74
+ // If it is running and valid (and not forced), we cannot touch it.
75
+ if (existing.status === 'running' && !isStale) {
76
+ return { isLocked: true, workerId: existing.worker_instance_id };
77
+ }
78
+
79
+ // CASE 4: Dead Letter Queue (Too many retries)
80
+ if (existing.attempts >= MAX_ATTEMPTS) {
81
+ console.warn(`[Checkpointer] ${computationName} exceeded max attempts (${existing.attempts}). Skipping.`);
82
+ return { skipped: true, reason: 'max_attempts_exceeded' };
83
+ }
84
+
85
+ // CASE 5: Resume / Steal Lock
86
+ // It's either 'failed', 'running' (stale), or 'running' (force). We steal it.
87
+ const reason = isStale ? 'ZOMBIE_DETECTED' : 'RESUME';
88
+ console.log(`[Checkpointer] Stealing lock for ${computationName} [${reason}] (Attempt ${existing.attempts + 1}/${MAX_ATTEMPTS})...`);
37
89
 
90
+ // Prepare new state based on previous
91
+ this.currentState = {
92
+ date: dateStr,
93
+ computation_name: computationName,
94
+ checkpoint_id: existing.checkpoint_id, // Keep ID to link history
95
+ worker_instance_id: this.workerId,
96
+ status: 'running',
97
+ processed_count: existing.processed_count || 0,
98
+ total_entities: totalEntities || existing.total_entities,
99
+ last_entity_id: existing.last_entity_id,
100
+ completed_batches: existing.completed_batches || [], // Keep previous progress
101
+ attempts: (existing.attempts || 1) + 1,
102
+ code_hash: codeHash,
103
+ started_at: existing.started_at,
104
+ last_updated: new Date().toISOString()
105
+ };
106
+
107
+ await this._persistState();
108
+ this._startHeartbeat();
109
+
38
110
  return {
39
- id: checkpointId,
111
+ id: this.currentState.checkpoint_id,
112
+ processedCount: this.currentState.processed_count,
113
+ completedBatches: new Set(this.currentState.completed_batches),
114
+ lastEntityId: this.currentState.last_entity_id,
115
+ isResumed: true
116
+ };
117
+ }
118
+
119
+ async _createNew(dateStr, computationName, totalEntities, codeHash) {
120
+ const id = crypto.randomUUID();
121
+
122
+ this.currentState = {
123
+ date: dateStr,
124
+ computation_name: computationName,
125
+ checkpoint_id: id,
126
+ worker_instance_id: this.workerId,
127
+ status: 'running',
128
+ processed_count: 0,
129
+ total_entities: totalEntities,
130
+ last_entity_id: null,
131
+ completed_batches: [],
132
+ attempts: 1,
133
+ code_hash: codeHash,
134
+ started_at: new Date().toISOString(),
135
+ last_updated: new Date().toISOString()
136
+ };
137
+
138
+ await this._persistState();
139
+ this._startHeartbeat();
140
+
141
+ return {
142
+ id,
40
143
  processedCount: 0,
41
144
  completedBatches: new Set(),
42
145
  lastEntityId: null,
@@ -45,21 +148,64 @@ class Checkpointer {
45
148
  }
46
149
 
47
150
  async markBatchComplete(dateStr, computationName, checkpointId, batchIndex, batchSize, lastEntityId) {
48
- if (!this.enabled || !checkpointId) return;
151
+ if (!this.enabled || !this.currentState) return;
49
152
 
50
- // We update processed count approximately based on batch size
153
+ // Update local state
51
154
  const processedCount = (batchIndex + 1) * batchSize;
155
+ const batches = new Set(this.currentState.completed_batches || []);
156
+ batches.add(batchIndex);
157
+
158
+ this.currentState.processed_count = processedCount;
159
+ this.currentState.last_entity_id = lastEntityId;
160
+ this.currentState.completed_batches = Array.from(batches); // Store as Array for BigQuery
161
+ this.currentState.last_updated = new Date().toISOString();
52
162
 
53
- await this.storage.updateCheckpoint(dateStr, computationName, checkpointId, {
54
- processedCount,
55
- lastEntityId,
56
- batchIndex
57
- });
163
+ // Append new state row
164
+ await this._persistState();
58
165
  }
59
166
 
60
167
  async complete(dateStr, computationName, checkpointId) {
61
- if (!this.enabled || !checkpointId) return;
62
- await this.storage.completeCheckpoint(dateStr, computationName, checkpointId);
168
+ this._stopHeartbeat();
169
+ if (!this.enabled || !this.currentState) return;
170
+
171
+ this.currentState.status = 'completed';
172
+ this.currentState.last_updated = new Date().toISOString();
173
+
174
+ await this._persistState();
175
+ this.currentState = null; // Done
176
+ }
177
+
178
+ async _persistState() {
179
+ if (!this.storage.saveCheckpoint) {
180
+ if (this.storage.createCheckpoint) {
181
+ return this.storage.createCheckpoint(this.currentState);
182
+ }
183
+ throw new Error("StorageManager is missing 'saveCheckpoint' or 'createCheckpoint' method for Append-Only logic.");
184
+ }
185
+ return this.storage.saveCheckpoint(this.currentState);
186
+ }
187
+
188
+ _startHeartbeat() {
189
+ if (this.heartbeatInterval) clearInterval(this.heartbeatInterval);
190
+
191
+ // Pulse every 2 minutes
192
+ this.heartbeatInterval = setInterval(async () => {
193
+ if (this.currentState) {
194
+ this.currentState.last_updated = new Date().toISOString();
195
+ try {
196
+ await this._persistState();
197
+ } catch (e) {
198
+ console.warn(`[Checkpointer] Heartbeat failed: ${e.message}`);
199
+ }
200
+ }
201
+ }, 2 * 60 * 1000);
202
+ }
203
+
204
+ _stopHeartbeat() {
205
+ if (this.heartbeatInterval) {
206
+ clearInterval(this.heartbeatInterval);
207
+ this.heartbeatInterval = null;
208
+ }
63
209
  }
64
210
  }
65
211