bulltrackers-module 1.0.275 → 1.0.277

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * @fileoverview Main Orchestrator. Coordinates the topological execution.
3
- * UPDATED: Removed 'Permanently Impossible' optimization to ensure full visibility/recovery.
3
+ * UPDATED: Includes Content-Based Dependency Short-Circuiting.
4
4
  * UPDATED: Includes 'Audit Upgrade' check.
5
5
  * UPDATED: Detailed Dependency Reporting for Impossible Chains.
6
6
  */
@@ -18,21 +18,41 @@ function groupByPass(manifest) { return manifest.reduce((acc, calc) => { (acc[c
18
18
 
19
19
  /**
20
20
  * Analyzes whether calculations should run, be skipped, or are blocked.
21
+ * [NEW] Implements ResultHash short-circuit logic.
21
22
  */
22
23
  function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus = null) {
23
24
  const report = { runnable: [], blocked: [], impossible: [], failedDependency: [], reRuns: [], skipped: [] };
24
25
  const simulationStatus = { ...dailyStatus };
25
26
  const isTargetToday = (dateStr === new Date().toISOString().slice(0, 10));
26
27
 
27
- const isDepSatisfied = (depName, currentStatusMap, manifestMap) => {
28
+ // Helper: Validates if a dependency is satisfied, either by Code Match OR Content Match
29
+ const isDepSatisfied = (depName, currentStatusMap, manifestMap, dependentStoredStatus) => {
28
30
  const norm = normalizeName(depName);
29
- const stored = currentStatusMap[norm];
31
+ const storedDep = currentStatusMap[norm];
30
32
  const depManifest = manifestMap.get(norm);
31
- if (!stored) return false;
32
- if (typeof stored.hash === 'string' && stored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) return false;
33
- if (!depManifest) return false;
34
- if (stored.hash !== depManifest.hash) return false;
35
- return true;
33
+
34
+ // 1. Basic Existence Checks
35
+ if (!storedDep) return false;
36
+ if (typeof storedDep.hash === 'string' && storedDep.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) return false;
37
+ if (!depManifest) return false;
38
+
39
+ // 2. Code Hash Check (The Standard Check)
40
+ if (storedDep.hash === depManifest.hash) return true;
41
+
42
+ // 3. [NEW] Content-Based Short-Circuit Check
43
+ // If Code Hash mismatch, check if the *Result Hash* is identical to what we used last time.
44
+ // dependentStoredStatus = The status of the calculation (B) that depends on this (A).
45
+ // dependentStoredStatus.dependencyResultHashes[depName] = The ResultHash of A when B last ran.
46
+ // storedDep.resultHash = The current ResultHash of A.
47
+ if (dependentStoredStatus &&
48
+ dependentStoredStatus.dependencyResultHashes &&
49
+ dependentStoredStatus.dependencyResultHashes[depName] &&
50
+ storedDep.resultHash &&
51
+ storedDep.resultHash === dependentStoredStatus.dependencyResultHashes[depName]) {
52
+ return true; // Short-circuit: The output didn't change, so we are safe.
53
+ }
54
+
55
+ return false;
36
56
  };
37
57
 
38
58
  for (const calc of calcsInPass) {
@@ -42,6 +62,17 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
42
62
  const storedCategory = stored ? stored.category : null;
43
63
  const currentHash = calc.hash;
44
64
 
65
+ // Collect current result hashes of dependencies for the next run
66
+ const currentDependencyResultHashes = {};
67
+ if (calc.dependencies) {
68
+ calc.dependencies.forEach(d => {
69
+ const normD = normalizeName(d);
70
+ if (simulationStatus[normD] && simulationStatus[normD].resultHash) {
71
+ currentDependencyResultHashes[d] = simulationStatus[normD].resultHash;
72
+ }
73
+ });
74
+ }
75
+
45
76
  const markImpossible = (reason, type = 'GENERIC') => {
46
77
  report.impossible.push({ name: cName, reason });
47
78
  const statusHash = `${STATUS_IMPOSSIBLE_PREFIX}:${type}`;
@@ -49,25 +80,31 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
49
80
  };
50
81
 
51
82
  const markRunnable = (isReRun = false, reRunDetails = null) => {
52
- if (isReRun) report.reRuns.push(reRunDetails);
53
- else report.runnable.push({ name: cName, ...reRunDetails });
83
+ const payload = {
84
+ name: cName,
85
+ ...reRunDetails,
86
+ dependencyResultHashes: currentDependencyResultHashes // Pass forward
87
+ };
88
+ if (isReRun) report.reRuns.push(payload);
89
+ else report.runnable.push(payload);
54
90
  // Simulate success so dependents can pass their check
55
- simulationStatus[cName] = { hash: currentHash, category: calc.category, composition: calc.composition };
91
+ simulationStatus[cName] = {
92
+ hash: currentHash,
93
+ resultHash: 'SIMULATED',
94
+ category: calc.category,
95
+ composition: calc.composition
96
+ };
56
97
  };
57
98
 
58
99
  let migrationOldCategory = null;
59
100
  if (storedCategory && storedCategory !== calc.category) { migrationOldCategory = storedCategory; }
60
101
 
61
- // [REMOVED] The "Permanently Impossible" optimization block was here.
62
- // Removal ensures we re-check Root Data every time, allowing for visibility and recovery.
63
-
64
- // 1. Check Root Data (The Primary Gate)
102
+ // 1. Check Root Data
65
103
  const rootCheck = checkRootDependencies(calc, rootDataStatus);
66
104
 
67
105
  if (!rootCheck.canRun) {
68
106
  const missingStr = rootCheck.missing.join(', ');
69
107
  if (!isTargetToday) {
70
- // If previously impossible, this confirms it. If previously run, this is a regression.
71
108
  markImpossible(`Missing Root Data: ${missingStr} (Historical)`, 'NO_DATA');
72
109
  } else {
73
110
  report.blocked.push({ name: cName, reason: `Missing Root Data: ${missingStr} (Waiting)` });
@@ -85,15 +122,15 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
85
122
  const depStored = simulationStatus[normDep];
86
123
  if (depStored && typeof depStored.hash === 'string' && depStored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) {
87
124
  dependencyIsImpossible = true;
88
- impossibleDepCause = dep; // Capture the culprit
125
+ impossibleDepCause = dep;
89
126
  break;
90
127
  }
91
- if (!isDepSatisfied(dep, simulationStatus, manifestMap)) { missingDeps.push(dep); }
128
+ // Pass 'stored' (this calc's status) to check short-circuiting
129
+ if (!isDepSatisfied(dep, simulationStatus, manifestMap, stored)) { missingDeps.push(dep); }
92
130
  }
93
131
  }
94
132
 
95
133
  if (dependencyIsImpossible) {
96
- // [UPDATED] Include the name of the failing dependency in the reason string
97
134
  markImpossible(`Dependency is Impossible (${impossibleDepCause})`, 'UPSTREAM');
98
135
  continue;
99
136
  }
@@ -117,7 +154,12 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
117
154
  markRunnable(false, { reason: "New Calculation" });
118
155
  }
119
156
  else if (storedHash !== currentHash) {
120
- // Smart Audit Logic
157
+ // [NEW] Check if Dependencies caused this, and if their content is actually same
158
+ // Note: If we are here, it means code changed.
159
+ // Short-circuiting logic was handled in 'isDepSatisfied' for upstream checks.
160
+ // But if *my* code changed, I must re-run unless I implement output-caching which is dangerous.
161
+ // So we strictly re-run if code changes.
162
+
121
163
  let changeReason = "Hash Mismatch (Unknown)";
122
164
  const oldComp = stored.composition;
123
165
  const newComp = calc.composition;
@@ -134,6 +176,11 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
134
176
  changeReason = `Layer Update: [${changedLayers.join(', ')}]`;
135
177
  }
136
178
  else if (JSON.stringify(oldComp.deps) !== JSON.stringify(newComp.deps)) {
179
+ // Dependency Hash Mismatch.
180
+ // This is where we COULD have short-circuited if we weren't enforcing code-hash strictness here.
181
+ // But typically if code hash mismatches, we re-run.
182
+ // The "Short-Circuit" benefit is mainly that *dependents* of this calculation
183
+ // won't need to re-run if *this* calculation produces the same output.
137
184
  const changedDeps = [];
138
185
  for(const dKey in newComp.deps) {
139
186
  if (newComp.deps[dKey] !== oldComp.deps[dKey]) changedDeps.push(dKey);
@@ -158,7 +205,6 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
158
205
  else if (migrationOldCategory) {
159
206
  markRunnable(true, { name: cName, reason: 'Category Migration', previousCategory: migrationOldCategory, newCategory: calc.category });
160
207
  }
161
- // Audit Upgrade Check
162
208
  else if (!stored.composition) {
163
209
  markRunnable(true, {
164
210
  name: cName,
@@ -177,8 +223,9 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
177
223
 
178
224
  /**
179
225
  * DIRECT EXECUTION PIPELINE (For Workers)
226
+ * [UPDATED] Accepts dependencyResultHashes
180
227
  */
181
- async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null) {
228
+ async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null, dependencyResultHashes = {}) {
182
229
  const { logger } = dependencies;
183
230
  const pid = generateProcessId(PROCESS_TYPES.EXECUTOR, targetComputation, dateStr);
184
231
 
@@ -187,6 +234,9 @@ async function executeDispatchTask(dateStr, pass, targetComputation, config, dep
187
234
 
188
235
  if (!calcManifest) { throw new Error(`Calculation '${targetComputation}' not found in manifest.`); }
189
236
 
237
+ // [NEW] Attach the dependency result hashes to the manifest so ResultCommitter can save them
238
+ calcManifest.dependencyResultHashes = dependencyResultHashes;
239
+
190
240
  if (previousCategory) {
191
241
  calcManifest.previousCategory = previousCategory;
192
242
  logger.log('INFO', `[Executor] Migration detected for ${calcManifest.name}. Old data will be cleaned from: ${previousCategory}`);
@@ -1,5 +1,6 @@
1
1
  /**
2
2
  * @fileoverview Execution-scoped data loader with caching.
3
+ * UPDATED: Handles Decompression of Shards.
3
4
  */
4
5
  const {
5
6
  loadDailyInsights,
@@ -7,6 +8,7 @@ const {
7
8
  getRelevantShardRefs,
8
9
  getPriceShardRefs
9
10
  } = require('../utils/data_loader');
11
+ const zlib = require('zlib'); // [NEW]
10
12
 
11
13
  class CachedDataLoader {
12
14
  constructor(config, dependencies) {
@@ -19,6 +21,19 @@ class CachedDataLoader {
19
21
  };
20
22
  }
21
23
 
24
+ // [NEW] Decompression Helper
25
+ _tryDecompress(data) {
26
+ if (data && data._compressed === true && data.payload) {
27
+ try {
28
+ return JSON.parse(zlib.gunzipSync(data.payload).toString());
29
+ } catch (e) {
30
+ console.error('[CachedDataLoader] Decompression failed', e);
31
+ return {};
32
+ }
33
+ }
34
+ return data;
35
+ }
36
+
22
37
  async loadMappings() {
23
38
  if (this.cache.mappings) return this.cache.mappings;
24
39
  const { calculationUtils } = this.deps;
@@ -52,7 +67,8 @@ class CachedDataLoader {
52
67
  try {
53
68
  const snap = await docRef.get();
54
69
  if (!snap.exists) return {};
55
- return snap.data();
70
+ // [UPDATED] Use decompression helper
71
+ return this._tryDecompress(snap.data());
56
72
  } catch (e) {
57
73
  console.error(`Error loading shard ${docRef.path}:`, e);
58
74
  return {};
@@ -1,7 +1,8 @@
1
1
  /**
2
- * @fileoverview Fetches results from previous computations, handling auto-sharding hydration.
2
+ * @fileoverview Fetches results from previous computations, handling auto-sharding and decompression.
3
3
  */
4
4
  const { normalizeName } = require('../utils/utils');
5
+ const zlib = require('zlib'); // [NEW]
5
6
 
6
7
  async function fetchExistingResults(dateStr, calcsInPass, fullManifest, config, { db }, includeSelf = false) {
7
8
  const manifestMap = new Map(fullManifest.map(c => [normalizeName(c.name), c]));
@@ -39,7 +40,20 @@ async function fetchExistingResults(dateStr, calcsInPass, fullManifest, config,
39
40
  const name = names[i];
40
41
  if (!doc.exists) return;
41
42
  const data = doc.data();
42
- if (data._sharded === true) {
43
+
44
+ // --- [NEW] DECOMPRESSION LOGIC ---
45
+ if (data._compressed === true && data.payload) {
46
+ try {
47
+ // Firestore returns Buffers automatically
48
+ const unzipped = zlib.gunzipSync(data.payload);
49
+ fetched[name] = JSON.parse(unzipped.toString());
50
+ } catch (e) {
51
+ console.error(`[Hydration] Failed to decompress ${name}:`, e);
52
+ fetched[name] = {};
53
+ }
54
+ }
55
+ // --- END NEW LOGIC ---
56
+ else if (data._sharded === true) {
43
57
  hydrationPromises.push(hydrateAutoShardedResult(doc.ref, name));
44
58
  } else if (data._completed) {
45
59
  fetched[name] = data;
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_dispatcher.js
3
3
  * PURPOSE: "Smart Dispatcher" - Analyzes state and only dispatches valid, runnable tasks.
4
- * UPDATED: Adds 'dispatchId' to payloads for precise tracing.
4
+ * UPDATED: Implements Zombie Task Recovery & Dependency Result Hash Passing.
5
5
  */
6
6
 
7
7
  const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -11,7 +11,7 @@ const { fetchComputationStatus, updateComputationStatus } = require('../persiste
11
11
  const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
12
12
  const { generateCodeHash } = require('../topology/HashManager');
13
13
  const pLimit = require('p-limit');
14
- const crypto = require('crypto'); // REQUIRED for UUID
14
+ const crypto = require('crypto');
15
15
 
16
16
  const TOPIC_NAME = 'computation-tasks';
17
17
  const STATUS_IMPOSSIBLE = 'IMPOSSIBLE';
@@ -105,18 +105,19 @@ async function dispatchComputationPass(config, dependencies, computationManifest
105
105
 
106
106
  const validToRun = [...report.runnable, ...report.reRuns];
107
107
  validToRun.forEach(item => {
108
- // [NEW] Generate Unique ID
109
108
  const uniqueDispatchId = crypto.randomUUID();
110
109
 
111
110
  tasksToDispatch.push({
112
111
  action: 'RUN_COMPUTATION_DATE',
113
- dispatchId: uniqueDispatchId, // <--- TRACKING ID
112
+ dispatchId: uniqueDispatchId,
114
113
  date: dateStr,
115
114
  pass: passToRun,
116
115
  computation: normalizeName(item.name),
117
116
  hash: item.hash || item.newHash,
118
117
  previousCategory: item.previousCategory || null,
119
- triggerReason: item.reason || "Unknown",
118
+ triggerReason: item.reason || "Unknown",
119
+ // [NEW] Pass Content-Based hashes provided by analyzeDateExecution
120
+ dependencyResultHashes: item.dependencyResultHashes || {},
120
121
  timestamp: Date.now()
121
122
  });
122
123
  });
@@ -141,16 +142,24 @@ async function dispatchComputationPass(config, dependencies, computationManifest
141
142
  await db.runTransaction(async (t) => {
142
143
  const doc = await t.get(ledgerRef);
143
144
 
144
- // If task is PENDING, we assume it's running.
145
- // However, we now OVERWRITE if it's been pending for > 1 hour (stuck state)
146
- // For safety on your budget, we stick to strict "PENDING" check.
147
- if (doc.exists && doc.data().status === 'PENDING') {
148
- return false;
145
+ // [NEW] Zombie Task Recovery Check
146
+ if (doc.exists) {
147
+ const data = doc.data();
148
+ const now = Date.now();
149
+ const isPending = data.status === 'PENDING';
150
+ // A task is a zombie if it is PENDING and the lease has expired (or lease is missing but it's been > 1h)
151
+ const isLeaseExpired = data.leaseExpiresAt && data.leaseExpiresAt < now;
152
+ // Fallback: If no lease exists, assume 1 hour timeout for legacy zombie detection
153
+ const isLegacyZombie = !data.leaseExpiresAt && data.createdAt && (now - data.createdAt.toMillis() > 3600000);
154
+
155
+ if (isPending && !isLeaseExpired && !isLegacyZombie) {
156
+ return false; // Valid active pending task, do not double dispatch
157
+ }
149
158
  }
150
159
 
151
160
  t.set(ledgerRef, {
152
161
  status: 'PENDING',
153
- dispatchId: task.dispatchId, // <--- Store ID in Ledger
162
+ dispatchId: task.dispatchId,
154
163
  computation: task.computation,
155
164
  expectedHash: task.hash || 'unknown',
156
165
  createdAt: new Date(),
@@ -1,8 +1,7 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_worker.js
3
3
  * PURPOSE: Consumes computation tasks from Pub/Sub.
4
- * UPDATED: Logs 'dispatchId' for tracing.
5
- * UPDATED: Includes Deterministic Error Short-Circuit (Poison Pill Protection).
4
+ * UPDATED: Implements Lease Claiming and passes Dependency Hashes.
6
5
  */
7
6
 
8
7
  const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
@@ -14,7 +13,7 @@ let calculationPackage;
14
13
  try { calculationPackage = require('aiden-shared-calculations-unified');
15
14
  } catch (e) {console.error("FATAL: Could not load 'aiden-shared-calculations-unified'."); throw e; }
16
15
  const calculations = calculationPackage.calculations;
17
- const MAX_RETRIES = 0; // <--- CHANGED TO 0 (Application level check, though Pub/Sub config is better)
16
+ const MAX_RETRIES = 0;
18
17
 
19
18
  async function handleComputationTask(message, config, dependencies) {
20
19
  const systemLogger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
@@ -31,17 +30,31 @@ async function handleComputationTask(message, config, dependencies) {
31
30
 
32
31
  if (!data || data.action !== 'RUN_COMPUTATION_DATE') { return; }
33
32
 
34
- // Extract Trigger Reason and Dispatch ID
35
- const { date, pass, computation, previousCategory, triggerReason, dispatchId } = data;
33
+ // Extract fields including new dependencyResultHashes
34
+ const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes } = data;
36
35
 
37
36
  if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload.`, data); return; }
38
37
 
39
- // LOG THE ID FOR TRACING
40
38
  logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date})`, {
41
39
  dispatchId: dispatchId || 'legacy',
42
40
  reason: triggerReason
43
41
  });
44
42
 
43
+ // [NEW] LEASE CLAIMING
44
+ // Mark task as IN_PROGRESS and set a lease timeout (e.g., 20 minutes) to prevent Zombies
45
+ try {
46
+ const leaseTimeMs = (config.workerLeaseMinutes || 20) * 60 * 1000;
47
+ await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
48
+ status: 'IN_PROGRESS',
49
+ workerId: process.env.K_REVISION || 'unknown',
50
+ startedAt: new Date(),
51
+ leaseExpiresAt: Date.now() + leaseTimeMs,
52
+ dispatchId: dispatchId
53
+ }, { merge: true });
54
+ } catch (leaseErr) {
55
+ logger.log('WARN', `[Worker] Failed to claim lease for ${computation}. Continuing anyway...`, leaseErr);
56
+ }
57
+
45
58
  let computationManifest;
46
59
  try { computationManifest = getManifest(config.activeProductLines || [], calculations, runDependencies);
47
60
  } catch (manifestError) {
@@ -59,7 +72,8 @@ async function handleComputationTask(message, config, dependencies) {
59
72
  config,
60
73
  runDependencies,
61
74
  computationManifest,
62
- previousCategory
75
+ previousCategory,
76
+ dependencyResultHashes // [NEW] Pass hashes to executor
63
77
  );
64
78
  const duration = Date.now() - startTime;
65
79
 
@@ -79,6 +93,13 @@ async function handleComputationTask(message, config, dependencies) {
79
93
  const metrics = successData.metrics || {};
80
94
  metrics.durationMs = duration;
81
95
  logger.log('INFO', `[Worker] ✅ Stored: ${computation}. ID: ${dispatchId}`);
96
+
97
+ // Mark Ledger as COMPLETED
98
+ await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).update({
99
+ status: 'COMPLETED',
100
+ completedAt: new Date()
101
+ }).catch(() => {});
102
+
82
103
  await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, metrics, triggerReason);
83
104
  }
84
105
  else {
@@ -86,7 +107,6 @@ async function handleComputationTask(message, config, dependencies) {
86
107
  await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', { message: 'Empty Result' }, { durationMs: duration }, triggerReason);
87
108
  }
88
109
  } catch (err) {
89
- // --- DETERMINISTIC ERROR SHORT-CIRCUIT ---
90
110
  const isDeterministicError = err.stage === 'SHARDING_LIMIT_EXCEEDED' ||
91
111
  err.stage === 'QUALITY_CIRCUIT_BREAKER' ||
92
112
  (err.message && (err.message.includes('INVALID_ARGUMENT') || err.message.includes('Transaction too big')));
@@ -101,15 +121,12 @@ async function handleComputationTask(message, config, dependencies) {
101
121
  finalAttemptAt: new Date(),
102
122
  failureReason: 'PERMANENT_DETERMINISTIC_ERROR'
103
123
  });
104
- // Return success to Pub/Sub to STOP retries
105
124
  await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'PERMANENT_FAIL' }, { durationMs: 0 }, triggerReason);
106
125
  return;
107
126
  } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
108
127
  }
109
128
 
110
- // --- STANDARD RETRY ---
111
129
  const retryCount = message.deliveryAttempt || 0;
112
- // NOTE: If you configure Pub/Sub Max Attempts = 1, this logic is redundant but safe.
113
130
  if (retryCount >= MAX_RETRIES) {
114
131
  logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation}`);
115
132
  try {
@@ -1,26 +1,23 @@
1
1
  /**
2
2
  * @fileoverview Handles saving computation results with observability and Smart Cleanup.
3
- * UPDATED: Added "Strategy 4" (50 keys) to handle 'too many index entries' errors.
4
- * UPDATED: Supports Incremental (Flush) Commits to prevent OOM.
5
- * FIX: Throws proper Error objects.
3
+ * UPDATED: Implements GZIP Compression for efficient storage.
4
+ * UPDATED: Implements Content-Based Hashing (ResultHash) for dependency short-circuiting.
6
5
  */
7
- const { commitBatchInChunks } = require('./FirestoreUtils');
6
+ const { commitBatchInChunks, generateDataHash } = require('../utils/utils');
8
7
  const { updateComputationStatus } = require('./StatusRepository');
9
8
  const { batchStoreSchemas } = require('../utils/schema_capture');
10
9
  const { generateProcessId, PROCESS_TYPES } = require('../logger/logger');
11
10
  const { HeuristicValidator } = require('./ResultsValidator');
12
11
  const validationOverrides = require('../config/validation_overrides');
13
12
  const pLimit = require('p-limit');
13
+ const zlib = require('zlib'); // [NEW] Compression Lib
14
14
 
15
15
  const NON_RETRYABLE_ERRORS = [
16
16
  'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION'
17
- // removed INVALID_ARGUMENT from here as it covers 'too many index entries' which IS retryable via sharding
18
17
  ];
19
18
 
20
19
  /**
21
20
  * Commits results to Firestore.
22
- * @param {Object} options.flushMode - 'STANDARD', 'INTERMEDIATE', 'FINAL'
23
- * @param {Object} options.shardIndexes - Map of { calcName: currentShardIndex }
24
21
  */
25
22
  async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false, options = {}) {
26
23
  const successUpdates = {};
@@ -52,7 +49,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
52
49
  const result = await calc.getResult();
53
50
  const overrides = validationOverrides[calc.manifest.name] || {};
54
51
 
55
- // Only validate if we have data or if it's the final flush
52
+ // Validation
56
53
  if (result && Object.keys(result).length > 0) {
57
54
  const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, overrides);
58
55
  if (!healthCheck.valid) {
@@ -66,16 +63,20 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
66
63
 
67
64
  const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
68
65
 
69
- // If empty and standard mode, record 0-byte success.
70
- // If empty and INTERMEDIATE flush, just skip this calc for this flush.
66
+ // Calculate Result Hash (Content-Based)
67
+ const resultHash = isEmpty ? 'empty' : generateDataHash(result);
68
+
69
+ // Handle Empty Results
71
70
  if (isEmpty) {
72
71
  if (flushMode === 'INTERMEDIATE') {
73
- nextShardIndexes[name] = currentShardIndex; // No change
72
+ nextShardIndexes[name] = currentShardIndex;
74
73
  continue;
75
74
  }
76
75
  if (calc.manifest.hash) {
77
76
  successUpdates[name] = {
78
- hash: calc.manifest.hash,
77
+ hash: calc.manifest.hash,
78
+ resultHash: resultHash,
79
+ dependencyResultHashes: calc.manifest.dependencyResultHashes || {},
79
80
  category: calc.manifest.category,
80
81
  composition: calc.manifest.composition,
81
82
  metrics: runMetrics
@@ -90,7 +91,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
90
91
  const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
91
92
 
92
93
  if (isMultiDate) {
93
- // Multi-Date Fan-Out Logic (Not optimized for incremental yet, falls back to standard per-date write)
94
94
  const datePromises = resultKeys.map((historicalDate) => fanOutLimit(async () => {
95
95
  const dailyData = result[historicalDate];
96
96
  if (!dailyData || Object.keys(dailyData).length === 0) return;
@@ -102,7 +102,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
102
102
  .collection(config.computationsSubcollection)
103
103
  .doc(name);
104
104
 
105
- // For historical Fan-Out, we assume standard flush mode (not incremental) for now
106
105
  await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD');
107
106
  }));
108
107
  await Promise.all(datePromises);
@@ -110,6 +109,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
110
109
  if (calc.manifest.hash) {
111
110
  successUpdates[name] = {
112
111
  hash: calc.manifest.hash,
112
+ resultHash: resultHash,
113
+ dependencyResultHashes: calc.manifest.dependencyResultHashes || {},
113
114
  category: calc.manifest.category,
114
115
  composition: calc.manifest.composition,
115
116
  metrics: runMetrics
@@ -117,7 +118,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
117
118
  }
118
119
 
119
120
  } else {
120
- // --- STANDARD / INCREMENTAL MODE ---
121
121
  const mainDocRef = db.collection(config.resultsCollection)
122
122
  .doc(dStr)
123
123
  .collection(config.resultsSubcollection)
@@ -131,12 +131,13 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
131
131
  runMetrics.storage.isSharded = writeStats.isSharded;
132
132
  runMetrics.storage.shardCount = writeStats.shardCount;
133
133
 
134
- // Track next index for subsequent flushes
135
134
  nextShardIndexes[name] = writeStats.nextShardIndex;
136
135
 
137
136
  if (calc.manifest.hash) {
138
137
  successUpdates[name] = {
139
138
  hash: calc.manifest.hash,
139
+ resultHash: resultHash,
140
+ dependencyResultHashes: calc.manifest.dependencyResultHashes || {},
140
141
  category: calc.manifest.category,
141
142
  composition: calc.manifest.composition,
142
143
  metrics: runMetrics
@@ -171,10 +172,44 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
171
172
  }
172
173
 
173
174
  async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD') {
174
- // Strategy 1: Standard (900KB, no key limit)
175
- // Strategy 2: Aggressive Bytes (450KB, 10k keys)
176
- // Strategy 3: Very Aggressive (200KB, 2k keys)
177
- // Strategy 4: [NEW] Index Explosion Protection (100KB, 50 keys) - Handles "too many index entries"
175
+
176
+ // --- [NEW] COMPRESSION STRATEGY ---
177
+ // Try to compress before falling back to complex sharding
178
+ try {
179
+ const jsonString = JSON.stringify(result);
180
+ const rawBuffer = Buffer.from(jsonString);
181
+
182
+ // Only attempt if meaningful size (> 50KB)
183
+ if (rawBuffer.length > 50 * 1024) {
184
+ const compressedBuffer = zlib.gzipSync(rawBuffer);
185
+
186
+ // If compressed fits in one document (< 900KB safety limit)
187
+ if (compressedBuffer.length < 900 * 1024) {
188
+ logger.log('INFO', `[Compression] ${name}: Compressed ${(rawBuffer.length/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB. Saved as Blob.`);
189
+
190
+ const compressedPayload = {
191
+ _compressed: true,
192
+ _completed: true,
193
+ _lastUpdated: new Date().toISOString(),
194
+ payload: compressedBuffer
195
+ };
196
+
197
+ // Write immediately
198
+ await docRef.set(compressedPayload, { merge: true });
199
+
200
+ return {
201
+ totalSize: compressedBuffer.length,
202
+ isSharded: false,
203
+ shardCount: 1,
204
+ nextShardIndex: startShardIndex
205
+ };
206
+ }
207
+ }
208
+ } catch (compErr) {
209
+ logger.log('WARN', `[Compression] Failed to compress ${name}. Falling back to standard sharding.`, compErr);
210
+ }
211
+ // --- END COMPRESSION STRATEGY ---
212
+
178
213
  const strategies = [
179
214
  { bytes: 900 * 1024, keys: null },
180
215
  { bytes: 450 * 1024, keys: 10000 },
@@ -190,14 +225,10 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
190
225
  const constraints = strategies[attempt];
191
226
  try {
192
227
  const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys, startShardIndex, flushMode);
228
+ const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined);
193
229
 
194
- // Analyze the update batch
195
- const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined); // Pointer is on the main doc
196
-
197
- // Calculate stats
198
230
  finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
199
231
 
200
- // Logic to determine next shard index
201
232
  let maxIndex = startShardIndex;
202
233
  updates.forEach(u => {
203
234
  const segs = u.ref.path.split('/');
@@ -224,8 +255,6 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
224
255
  lastError = commitErr;
225
256
  const msg = commitErr.message || '';
226
257
  const code = commitErr.code || '';
227
-
228
- // Check for explicit "too many index entries" or transaction size issues
229
258
  const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
230
259
  const isSizeError = msg.includes('Transaction too big') || msg.includes('payload is too large');
231
260
 
@@ -233,14 +262,11 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
233
262
  logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`);
234
263
  throw commitErr;
235
264
  }
236
-
237
265
  if (isIndexError || isSizeError) {
238
266
  logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}/${strategies.length}. Strategy: ${JSON.stringify(constraints)}. Error: ${msg}. Retrying with stricter limits...`);
239
267
  continue;
240
- }
241
- else {
268
+ } else {
242
269
  logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error. Retrying...`, { error: msg });
243
- // We typically retry same strategy for unknown transient errors, but here we iterate to be safe.
244
270
  continue;
245
271
  }
246
272
  }
@@ -261,32 +287,28 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
261
287
  let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
262
288
  let shardIndex = startShardIndex;
263
289
 
264
- // Small Data Optimization (Only valid if we are not in an incremental flow or if it's the first standard run)
265
290
  if (!maxKeys && (totalSize + docPathSize) < CHUNK_LIMIT && flushMode === 'STANDARD' && startShardIndex === 0) {
266
291
  const data = { ...result, _completed: true, _sharded: false, _lastUpdated: new Date().toISOString() };
267
292
  return [{ ref: docRef, data, options: { merge: true } }];
268
293
  }
269
294
 
270
- // Sharding Logic
271
295
  for (const [key, value] of Object.entries(result)) {
272
296
  if (key.startsWith('_')) continue;
273
297
  const keySize = Buffer.byteLength(key, 'utf8') + 1; const valueSize = calculateFirestoreBytes(value); const itemSize = keySize + valueSize;
274
298
  const byteLimitReached = (currentChunkSize + itemSize > CHUNK_LIMIT); const keyLimitReached = (maxKeys && currentKeyCount + 1 >= maxKeys);
275
299
 
276
300
  if (byteLimitReached || keyLimitReached) {
277
- writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); // Overwrite shard doc
301
+ writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
278
302
  shardIndex++; currentChunk = {}; currentChunkSize = 0; currentKeyCount = 0;
279
303
  }
280
304
  currentChunk[key] = value; currentChunkSize += itemSize; currentKeyCount++;
281
305
  }
282
306
 
283
- // Push remaining chunk
284
307
  if (Object.keys(currentChunk).length > 0) {
285
308
  writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
286
- shardIndex++; // Increment so count is correct (0-based index means count is index+1)
309
+ shardIndex++;
287
310
  }
288
311
 
289
- // Pointer Logic
290
312
  if (flushMode !== 'INTERMEDIATE') {
291
313
  const pointerData = {
292
314
  _completed: true,
@@ -294,13 +316,12 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
294
316
  _shardCount: shardIndex,
295
317
  _lastUpdated: new Date().toISOString()
296
318
  };
297
- writes.push({ ref: docRef, data: pointerData, options: { merge: true } }); // Merge pointer
319
+ writes.push({ ref: docRef, data: pointerData, options: { merge: true } });
298
320
  }
299
321
 
300
322
  return writes;
301
323
  }
302
324
 
303
- // ... (Rest of file: deleteOldCalculationData, calculateFirestoreBytes remains unchanged) ...
304
325
  async function deleteOldCalculationData(dateStr, oldCategory, calcName, config, deps) {
305
326
  const { db, logger, calculationUtils } = deps;
306
327
  const { withRetry } = calculationUtils || { withRetry: (fn) => fn() };
@@ -4,7 +4,22 @@
4
4
  * --- NEW: Added streamPortfolioData async generator ---
5
5
  * --- FIXED: streamPortfolioData and streamHistoryData now accept optional 'providedRefs' ---
6
6
  * --- UPDATE: Added Smart Shard Indexing for specific ticker lookups ---
7
+ * --- UPDATE: Added GZIP Decompression Support for robust data loading ---
7
8
  */
9
+ const zlib = require('zlib'); // [NEW]
10
+
11
+ // [NEW] Helper for decompressing any doc if needed
12
+ function tryDecompress(data) {
13
+ if (data && data._compressed === true && data.payload) {
14
+ try {
15
+ return JSON.parse(zlib.gunzipSync(data.payload).toString());
16
+ } catch (e) {
17
+ console.error('[DataLoader] Decompression failed', e);
18
+ return {};
19
+ }
20
+ }
21
+ return data;
22
+ }
8
23
 
9
24
  /** --- Data Loader Sub-Pipes (Stateless, Dependency-Injection) --- */
10
25
 
@@ -39,7 +54,10 @@ async function loadDataByRefs(config, deps, refs) {
39
54
  const snapshots = await withRetry(() => db.getAll(...batchRefs), `getAll(batch ${Math.floor(i / batchSize)})`);
40
55
  for (const doc of snapshots) {
41
56
  if (!doc.exists) continue;
42
- const data = doc.data();
57
+ const rawData = doc.data();
58
+ // [UPDATED] Decompress if needed
59
+ const data = tryDecompress(rawData);
60
+
43
61
  if (data && typeof data === 'object') Object.assign(mergedPortfolios, data);
44
62
  else logger.log('WARN', `Doc ${doc.id} exists but data is not an object`, data);
45
63
  }
@@ -68,7 +86,8 @@ async function loadDailyInsights(config, deps, dateString) {
68
86
  const docSnap = await withRetry(() => docRef.get(), `getInsights(${dateString})`);
69
87
  if (!docSnap.exists) { logger.log('WARN', `Insights not found for ${dateString}`); return null; }
70
88
  logger.log('TRACE', `Successfully loaded insights for ${dateString}`);
71
- return docSnap.data();
89
+ // [UPDATED] Decompress
90
+ return tryDecompress(docSnap.data());
72
91
  } catch (error) {
73
92
  logger.log('ERROR', `Failed to load daily insights for ${dateString}`, { errorMessage: error.message });
74
93
  return null;
@@ -86,7 +105,10 @@ async function loadDailySocialPostInsights(config, deps, dateString) {
86
105
  const querySnapshot = await withRetry(() => postsCollectionRef.get(), `getSocialPosts(${dateString})`);
87
106
  if (querySnapshot.empty) { logger.log('WARN', `No social post insights for ${dateString}`); return null; }
88
107
  const postsMap = {};
89
- querySnapshot.forEach(doc => { postsMap[doc.id] = doc.data(); });
108
+ querySnapshot.forEach(doc => {
109
+ // [UPDATED] Decompress individual posts if needed
110
+ postsMap[doc.id] = tryDecompress(doc.data());
111
+ });
90
112
  logger.log('TRACE', `Loaded ${Object.keys(postsMap).length} social post insights`);
91
113
  return postsMap;
92
114
  } catch (error) {
@@ -168,12 +190,6 @@ async function getPriceShardRefs(config, deps) {
168
190
  * when only specific tickers are needed.
169
191
  */
170
192
 
171
- /**
172
- * Ensures the Price Shard Index exists. If not, builds it by scanning all shards.
173
- * @param {object} config
174
- * @param {object} deps
175
- * @returns {Promise<Object>} The lookup map { "instrumentId": "shardDocId" }
176
- */
177
193
  /**
178
194
  * Ensures the Price Shard Index exists. If not, builds it by scanning all shards.
179
195
  * [FIX] Added TTL check to ensure new instruments are discovered.
@@ -205,7 +221,10 @@ async function ensurePriceShardIndex(config, deps) {
205
221
 
206
222
  snapshot.forEach(doc => {
207
223
  shardCount++;
208
- const data = doc.data();
224
+ // [UPDATED] Robustly handle compressed shards during indexing
225
+ const rawData = doc.data();
226
+ const data = tryDecompress(rawData);
227
+
209
228
  if (data.history) {
210
229
  Object.keys(data.history).forEach(instId => {
211
230
  index[instId] = doc.id;
@@ -273,4 +292,4 @@ module.exports = {
273
292
  getPriceShardRefs,
274
293
  ensurePriceShardIndex,
275
294
  getRelevantShardRefs
276
- };
295
+ };
@@ -28,6 +28,34 @@ function generateCodeHash(codeString) {
28
28
  return crypto.createHash('sha256').update(clean).digest('hex');
29
29
  }
30
30
 
31
+ /**
32
+ * [NEW] Generates a stable SHA-256 hash of a data object.
33
+ * Keys are sorted to ensure determinism.
34
+ */
35
+ function generateDataHash(data) {
36
+ if (data === undefined) return 'undefined';
37
+
38
+ // Recursive stable stringify
39
+ const stableStringify = (obj) => {
40
+ if (typeof obj !== 'object' || obj === null) {
41
+ return JSON.stringify(obj);
42
+ }
43
+ if (Array.isArray(obj)) {
44
+ return '[' + obj.map(stableStringify).join(',') + ']';
45
+ }
46
+ return '{' + Object.keys(obj).sort().map(k =>
47
+ JSON.stringify(k) + ':' + stableStringify(obj[k])
48
+ ).join(',') + '}';
49
+ };
50
+
51
+ try {
52
+ const str = stableStringify(data);
53
+ return crypto.createHash('sha256').update(str).digest('hex');
54
+ } catch (e) {
55
+ return 'hash_error';
56
+ }
57
+ }
58
+
31
59
  /**
32
60
  * Executes a function with exponential backoff retry logic.
33
61
  * @param {Function} fn - Async function to execute
@@ -49,7 +77,7 @@ async function withRetry(fn, operationName, maxRetries = 3) {
49
77
  }
50
78
  }
51
79
 
52
- /** * Stage 2: Commit a batch of writes in chunks
80
+ /** Stage 2: Commit a batch of writes in chunks
53
81
  * FIXED: Now respects write.options (e.g. { merge: false }) to allow overwrites/deletes.
54
82
  */
55
83
  async function commitBatchInChunks(config, deps, writes, operationName) {
@@ -220,6 +248,7 @@ module.exports = {
220
248
  getExpectedDateStrings,
221
249
  getEarliestDataDates,
222
250
  generateCodeHash,
251
+ generateDataHash, // Exported
223
252
  withRetry,
224
253
  DEFINITIVE_EARLIEST_DATES
225
254
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.275",
3
+ "version": "1.0.277",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [