bulltrackers-module 1.0.275 → 1.0.277
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/WorkflowOrchestrator.js +72 -22
- package/functions/computation-system/data/CachedDataLoader.js +17 -1
- package/functions/computation-system/data/DependencyFetcher.js +16 -2
- package/functions/computation-system/helpers/computation_dispatcher.js +20 -11
- package/functions/computation-system/helpers/computation_worker.js +28 -11
- package/functions/computation-system/persistence/ResultCommitter.js +60 -39
- package/functions/computation-system/utils/data_loader.js +30 -11
- package/functions/computation-system/utils/utils.js +30 -1
- package/package.json +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Main Orchestrator. Coordinates the topological execution.
|
|
3
|
-
* UPDATED:
|
|
3
|
+
* UPDATED: Includes Content-Based Dependency Short-Circuiting.
|
|
4
4
|
* UPDATED: Includes 'Audit Upgrade' check.
|
|
5
5
|
* UPDATED: Detailed Dependency Reporting for Impossible Chains.
|
|
6
6
|
*/
|
|
@@ -18,21 +18,41 @@ function groupByPass(manifest) { return manifest.reduce((acc, calc) => { (acc[c
|
|
|
18
18
|
|
|
19
19
|
/**
|
|
20
20
|
* Analyzes whether calculations should run, be skipped, or are blocked.
|
|
21
|
+
* [NEW] Implements ResultHash short-circuit logic.
|
|
21
22
|
*/
|
|
22
23
|
function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus = null) {
|
|
23
24
|
const report = { runnable: [], blocked: [], impossible: [], failedDependency: [], reRuns: [], skipped: [] };
|
|
24
25
|
const simulationStatus = { ...dailyStatus };
|
|
25
26
|
const isTargetToday = (dateStr === new Date().toISOString().slice(0, 10));
|
|
26
27
|
|
|
27
|
-
|
|
28
|
+
// Helper: Validates if a dependency is satisfied, either by Code Match OR Content Match
|
|
29
|
+
const isDepSatisfied = (depName, currentStatusMap, manifestMap, dependentStoredStatus) => {
|
|
28
30
|
const norm = normalizeName(depName);
|
|
29
|
-
const
|
|
31
|
+
const storedDep = currentStatusMap[norm];
|
|
30
32
|
const depManifest = manifestMap.get(norm);
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
if (!
|
|
34
|
-
if (
|
|
35
|
-
return
|
|
33
|
+
|
|
34
|
+
// 1. Basic Existence Checks
|
|
35
|
+
if (!storedDep) return false;
|
|
36
|
+
if (typeof storedDep.hash === 'string' && storedDep.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) return false;
|
|
37
|
+
if (!depManifest) return false;
|
|
38
|
+
|
|
39
|
+
// 2. Code Hash Check (The Standard Check)
|
|
40
|
+
if (storedDep.hash === depManifest.hash) return true;
|
|
41
|
+
|
|
42
|
+
// 3. [NEW] Content-Based Short-Circuit Check
|
|
43
|
+
// If Code Hash mismatch, check if the *Result Hash* is identical to what we used last time.
|
|
44
|
+
// dependentStoredStatus = The status of the calculation (B) that depends on this (A).
|
|
45
|
+
// dependentStoredStatus.dependencyResultHashes[depName] = The ResultHash of A when B last ran.
|
|
46
|
+
// storedDep.resultHash = The current ResultHash of A.
|
|
47
|
+
if (dependentStoredStatus &&
|
|
48
|
+
dependentStoredStatus.dependencyResultHashes &&
|
|
49
|
+
dependentStoredStatus.dependencyResultHashes[depName] &&
|
|
50
|
+
storedDep.resultHash &&
|
|
51
|
+
storedDep.resultHash === dependentStoredStatus.dependencyResultHashes[depName]) {
|
|
52
|
+
return true; // Short-circuit: The output didn't change, so we are safe.
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return false;
|
|
36
56
|
};
|
|
37
57
|
|
|
38
58
|
for (const calc of calcsInPass) {
|
|
@@ -42,6 +62,17 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
42
62
|
const storedCategory = stored ? stored.category : null;
|
|
43
63
|
const currentHash = calc.hash;
|
|
44
64
|
|
|
65
|
+
// Collect current result hashes of dependencies for the next run
|
|
66
|
+
const currentDependencyResultHashes = {};
|
|
67
|
+
if (calc.dependencies) {
|
|
68
|
+
calc.dependencies.forEach(d => {
|
|
69
|
+
const normD = normalizeName(d);
|
|
70
|
+
if (simulationStatus[normD] && simulationStatus[normD].resultHash) {
|
|
71
|
+
currentDependencyResultHashes[d] = simulationStatus[normD].resultHash;
|
|
72
|
+
}
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
45
76
|
const markImpossible = (reason, type = 'GENERIC') => {
|
|
46
77
|
report.impossible.push({ name: cName, reason });
|
|
47
78
|
const statusHash = `${STATUS_IMPOSSIBLE_PREFIX}:${type}`;
|
|
@@ -49,25 +80,31 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
49
80
|
};
|
|
50
81
|
|
|
51
82
|
const markRunnable = (isReRun = false, reRunDetails = null) => {
|
|
52
|
-
|
|
53
|
-
|
|
83
|
+
const payload = {
|
|
84
|
+
name: cName,
|
|
85
|
+
...reRunDetails,
|
|
86
|
+
dependencyResultHashes: currentDependencyResultHashes // Pass forward
|
|
87
|
+
};
|
|
88
|
+
if (isReRun) report.reRuns.push(payload);
|
|
89
|
+
else report.runnable.push(payload);
|
|
54
90
|
// Simulate success so dependents can pass their check
|
|
55
|
-
simulationStatus[cName] = {
|
|
91
|
+
simulationStatus[cName] = {
|
|
92
|
+
hash: currentHash,
|
|
93
|
+
resultHash: 'SIMULATED',
|
|
94
|
+
category: calc.category,
|
|
95
|
+
composition: calc.composition
|
|
96
|
+
};
|
|
56
97
|
};
|
|
57
98
|
|
|
58
99
|
let migrationOldCategory = null;
|
|
59
100
|
if (storedCategory && storedCategory !== calc.category) { migrationOldCategory = storedCategory; }
|
|
60
101
|
|
|
61
|
-
//
|
|
62
|
-
// Removal ensures we re-check Root Data every time, allowing for visibility and recovery.
|
|
63
|
-
|
|
64
|
-
// 1. Check Root Data (The Primary Gate)
|
|
102
|
+
// 1. Check Root Data
|
|
65
103
|
const rootCheck = checkRootDependencies(calc, rootDataStatus);
|
|
66
104
|
|
|
67
105
|
if (!rootCheck.canRun) {
|
|
68
106
|
const missingStr = rootCheck.missing.join(', ');
|
|
69
107
|
if (!isTargetToday) {
|
|
70
|
-
// If previously impossible, this confirms it. If previously run, this is a regression.
|
|
71
108
|
markImpossible(`Missing Root Data: ${missingStr} (Historical)`, 'NO_DATA');
|
|
72
109
|
} else {
|
|
73
110
|
report.blocked.push({ name: cName, reason: `Missing Root Data: ${missingStr} (Waiting)` });
|
|
@@ -85,15 +122,15 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
85
122
|
const depStored = simulationStatus[normDep];
|
|
86
123
|
if (depStored && typeof depStored.hash === 'string' && depStored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) {
|
|
87
124
|
dependencyIsImpossible = true;
|
|
88
|
-
impossibleDepCause = dep;
|
|
125
|
+
impossibleDepCause = dep;
|
|
89
126
|
break;
|
|
90
127
|
}
|
|
91
|
-
|
|
128
|
+
// Pass 'stored' (this calc's status) to check short-circuiting
|
|
129
|
+
if (!isDepSatisfied(dep, simulationStatus, manifestMap, stored)) { missingDeps.push(dep); }
|
|
92
130
|
}
|
|
93
131
|
}
|
|
94
132
|
|
|
95
133
|
if (dependencyIsImpossible) {
|
|
96
|
-
// [UPDATED] Include the name of the failing dependency in the reason string
|
|
97
134
|
markImpossible(`Dependency is Impossible (${impossibleDepCause})`, 'UPSTREAM');
|
|
98
135
|
continue;
|
|
99
136
|
}
|
|
@@ -117,7 +154,12 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
117
154
|
markRunnable(false, { reason: "New Calculation" });
|
|
118
155
|
}
|
|
119
156
|
else if (storedHash !== currentHash) {
|
|
120
|
-
//
|
|
157
|
+
// [NEW] Check if Dependencies caused this, and if their content is actually same
|
|
158
|
+
// Note: If we are here, it means code changed.
|
|
159
|
+
// Short-circuiting logic was handled in 'isDepSatisfied' for upstream checks.
|
|
160
|
+
// But if *my* code changed, I must re-run unless I implement output-caching which is dangerous.
|
|
161
|
+
// So we strictly re-run if code changes.
|
|
162
|
+
|
|
121
163
|
let changeReason = "Hash Mismatch (Unknown)";
|
|
122
164
|
const oldComp = stored.composition;
|
|
123
165
|
const newComp = calc.composition;
|
|
@@ -134,6 +176,11 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
134
176
|
changeReason = `Layer Update: [${changedLayers.join(', ')}]`;
|
|
135
177
|
}
|
|
136
178
|
else if (JSON.stringify(oldComp.deps) !== JSON.stringify(newComp.deps)) {
|
|
179
|
+
// Dependency Hash Mismatch.
|
|
180
|
+
// This is where we COULD have short-circuited if we weren't enforcing code-hash strictness here.
|
|
181
|
+
// But typically if code hash mismatches, we re-run.
|
|
182
|
+
// The "Short-Circuit" benefit is mainly that *dependents* of this calculation
|
|
183
|
+
// won't need to re-run if *this* calculation produces the same output.
|
|
137
184
|
const changedDeps = [];
|
|
138
185
|
for(const dKey in newComp.deps) {
|
|
139
186
|
if (newComp.deps[dKey] !== oldComp.deps[dKey]) changedDeps.push(dKey);
|
|
@@ -158,7 +205,6 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
158
205
|
else if (migrationOldCategory) {
|
|
159
206
|
markRunnable(true, { name: cName, reason: 'Category Migration', previousCategory: migrationOldCategory, newCategory: calc.category });
|
|
160
207
|
}
|
|
161
|
-
// Audit Upgrade Check
|
|
162
208
|
else if (!stored.composition) {
|
|
163
209
|
markRunnable(true, {
|
|
164
210
|
name: cName,
|
|
@@ -177,8 +223,9 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
177
223
|
|
|
178
224
|
/**
|
|
179
225
|
* DIRECT EXECUTION PIPELINE (For Workers)
|
|
226
|
+
* [UPDATED] Accepts dependencyResultHashes
|
|
180
227
|
*/
|
|
181
|
-
async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null) {
|
|
228
|
+
async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null, dependencyResultHashes = {}) {
|
|
182
229
|
const { logger } = dependencies;
|
|
183
230
|
const pid = generateProcessId(PROCESS_TYPES.EXECUTOR, targetComputation, dateStr);
|
|
184
231
|
|
|
@@ -187,6 +234,9 @@ async function executeDispatchTask(dateStr, pass, targetComputation, config, dep
|
|
|
187
234
|
|
|
188
235
|
if (!calcManifest) { throw new Error(`Calculation '${targetComputation}' not found in manifest.`); }
|
|
189
236
|
|
|
237
|
+
// [NEW] Attach the dependency result hashes to the manifest so ResultCommitter can save them
|
|
238
|
+
calcManifest.dependencyResultHashes = dependencyResultHashes;
|
|
239
|
+
|
|
190
240
|
if (previousCategory) {
|
|
191
241
|
calcManifest.previousCategory = previousCategory;
|
|
192
242
|
logger.log('INFO', `[Executor] Migration detected for ${calcManifest.name}. Old data will be cleaned from: ${previousCategory}`);
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Execution-scoped data loader with caching.
|
|
3
|
+
* UPDATED: Handles Decompression of Shards.
|
|
3
4
|
*/
|
|
4
5
|
const {
|
|
5
6
|
loadDailyInsights,
|
|
@@ -7,6 +8,7 @@ const {
|
|
|
7
8
|
getRelevantShardRefs,
|
|
8
9
|
getPriceShardRefs
|
|
9
10
|
} = require('../utils/data_loader');
|
|
11
|
+
const zlib = require('zlib'); // [NEW]
|
|
10
12
|
|
|
11
13
|
class CachedDataLoader {
|
|
12
14
|
constructor(config, dependencies) {
|
|
@@ -19,6 +21,19 @@ class CachedDataLoader {
|
|
|
19
21
|
};
|
|
20
22
|
}
|
|
21
23
|
|
|
24
|
+
// [NEW] Decompression Helper
|
|
25
|
+
_tryDecompress(data) {
|
|
26
|
+
if (data && data._compressed === true && data.payload) {
|
|
27
|
+
try {
|
|
28
|
+
return JSON.parse(zlib.gunzipSync(data.payload).toString());
|
|
29
|
+
} catch (e) {
|
|
30
|
+
console.error('[CachedDataLoader] Decompression failed', e);
|
|
31
|
+
return {};
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return data;
|
|
35
|
+
}
|
|
36
|
+
|
|
22
37
|
async loadMappings() {
|
|
23
38
|
if (this.cache.mappings) return this.cache.mappings;
|
|
24
39
|
const { calculationUtils } = this.deps;
|
|
@@ -52,7 +67,8 @@ class CachedDataLoader {
|
|
|
52
67
|
try {
|
|
53
68
|
const snap = await docRef.get();
|
|
54
69
|
if (!snap.exists) return {};
|
|
55
|
-
|
|
70
|
+
// [UPDATED] Use decompression helper
|
|
71
|
+
return this._tryDecompress(snap.data());
|
|
56
72
|
} catch (e) {
|
|
57
73
|
console.error(`Error loading shard ${docRef.path}:`, e);
|
|
58
74
|
return {};
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @fileoverview Fetches results from previous computations, handling auto-sharding
|
|
2
|
+
* @fileoverview Fetches results from previous computations, handling auto-sharding and decompression.
|
|
3
3
|
*/
|
|
4
4
|
const { normalizeName } = require('../utils/utils');
|
|
5
|
+
const zlib = require('zlib'); // [NEW]
|
|
5
6
|
|
|
6
7
|
async function fetchExistingResults(dateStr, calcsInPass, fullManifest, config, { db }, includeSelf = false) {
|
|
7
8
|
const manifestMap = new Map(fullManifest.map(c => [normalizeName(c.name), c]));
|
|
@@ -39,7 +40,20 @@ async function fetchExistingResults(dateStr, calcsInPass, fullManifest, config,
|
|
|
39
40
|
const name = names[i];
|
|
40
41
|
if (!doc.exists) return;
|
|
41
42
|
const data = doc.data();
|
|
42
|
-
|
|
43
|
+
|
|
44
|
+
// --- [NEW] DECOMPRESSION LOGIC ---
|
|
45
|
+
if (data._compressed === true && data.payload) {
|
|
46
|
+
try {
|
|
47
|
+
// Firestore returns Buffers automatically
|
|
48
|
+
const unzipped = zlib.gunzipSync(data.payload);
|
|
49
|
+
fetched[name] = JSON.parse(unzipped.toString());
|
|
50
|
+
} catch (e) {
|
|
51
|
+
console.error(`[Hydration] Failed to decompress ${name}:`, e);
|
|
52
|
+
fetched[name] = {};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// --- END NEW LOGIC ---
|
|
56
|
+
else if (data._sharded === true) {
|
|
43
57
|
hydrationPromises.push(hydrateAutoShardedResult(doc.ref, name));
|
|
44
58
|
} else if (data._completed) {
|
|
45
59
|
fetched[name] = data;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_dispatcher.js
|
|
3
3
|
* PURPOSE: "Smart Dispatcher" - Analyzes state and only dispatches valid, runnable tasks.
|
|
4
|
-
* UPDATED:
|
|
4
|
+
* UPDATED: Implements Zombie Task Recovery & Dependency Result Hash Passing.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
@@ -11,7 +11,7 @@ const { fetchComputationStatus, updateComputationStatus } = require('../persiste
|
|
|
11
11
|
const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
|
|
12
12
|
const { generateCodeHash } = require('../topology/HashManager');
|
|
13
13
|
const pLimit = require('p-limit');
|
|
14
|
-
const crypto = require('crypto');
|
|
14
|
+
const crypto = require('crypto');
|
|
15
15
|
|
|
16
16
|
const TOPIC_NAME = 'computation-tasks';
|
|
17
17
|
const STATUS_IMPOSSIBLE = 'IMPOSSIBLE';
|
|
@@ -105,18 +105,19 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
105
105
|
|
|
106
106
|
const validToRun = [...report.runnable, ...report.reRuns];
|
|
107
107
|
validToRun.forEach(item => {
|
|
108
|
-
// [NEW] Generate Unique ID
|
|
109
108
|
const uniqueDispatchId = crypto.randomUUID();
|
|
110
109
|
|
|
111
110
|
tasksToDispatch.push({
|
|
112
111
|
action: 'RUN_COMPUTATION_DATE',
|
|
113
|
-
dispatchId: uniqueDispatchId,
|
|
112
|
+
dispatchId: uniqueDispatchId,
|
|
114
113
|
date: dateStr,
|
|
115
114
|
pass: passToRun,
|
|
116
115
|
computation: normalizeName(item.name),
|
|
117
116
|
hash: item.hash || item.newHash,
|
|
118
117
|
previousCategory: item.previousCategory || null,
|
|
119
|
-
triggerReason: item.reason || "Unknown",
|
|
118
|
+
triggerReason: item.reason || "Unknown",
|
|
119
|
+
// [NEW] Pass Content-Based hashes provided by analyzeDateExecution
|
|
120
|
+
dependencyResultHashes: item.dependencyResultHashes || {},
|
|
120
121
|
timestamp: Date.now()
|
|
121
122
|
});
|
|
122
123
|
});
|
|
@@ -141,16 +142,24 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
141
142
|
await db.runTransaction(async (t) => {
|
|
142
143
|
const doc = await t.get(ledgerRef);
|
|
143
144
|
|
|
144
|
-
//
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
145
|
+
// [NEW] Zombie Task Recovery Check
|
|
146
|
+
if (doc.exists) {
|
|
147
|
+
const data = doc.data();
|
|
148
|
+
const now = Date.now();
|
|
149
|
+
const isPending = data.status === 'PENDING';
|
|
150
|
+
// A task is a zombie if it is PENDING and the lease has expired (or lease is missing but it's been > 1h)
|
|
151
|
+
const isLeaseExpired = data.leaseExpiresAt && data.leaseExpiresAt < now;
|
|
152
|
+
// Fallback: If no lease exists, assume 1 hour timeout for legacy zombie detection
|
|
153
|
+
const isLegacyZombie = !data.leaseExpiresAt && data.createdAt && (now - data.createdAt.toMillis() > 3600000);
|
|
154
|
+
|
|
155
|
+
if (isPending && !isLeaseExpired && !isLegacyZombie) {
|
|
156
|
+
return false; // Valid active pending task, do not double dispatch
|
|
157
|
+
}
|
|
149
158
|
}
|
|
150
159
|
|
|
151
160
|
t.set(ledgerRef, {
|
|
152
161
|
status: 'PENDING',
|
|
153
|
-
dispatchId: task.dispatchId,
|
|
162
|
+
dispatchId: task.dispatchId,
|
|
154
163
|
computation: task.computation,
|
|
155
164
|
expectedHash: task.hash || 'unknown',
|
|
156
165
|
createdAt: new Date(),
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_worker.js
|
|
3
3
|
* PURPOSE: Consumes computation tasks from Pub/Sub.
|
|
4
|
-
* UPDATED:
|
|
5
|
-
* UPDATED: Includes Deterministic Error Short-Circuit (Poison Pill Protection).
|
|
4
|
+
* UPDATED: Implements Lease Claiming and passes Dependency Hashes.
|
|
6
5
|
*/
|
|
7
6
|
|
|
8
7
|
const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
|
|
@@ -14,7 +13,7 @@ let calculationPackage;
|
|
|
14
13
|
try { calculationPackage = require('aiden-shared-calculations-unified');
|
|
15
14
|
} catch (e) {console.error("FATAL: Could not load 'aiden-shared-calculations-unified'."); throw e; }
|
|
16
15
|
const calculations = calculationPackage.calculations;
|
|
17
|
-
const MAX_RETRIES = 0;
|
|
16
|
+
const MAX_RETRIES = 0;
|
|
18
17
|
|
|
19
18
|
async function handleComputationTask(message, config, dependencies) {
|
|
20
19
|
const systemLogger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
|
|
@@ -31,17 +30,31 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
31
30
|
|
|
32
31
|
if (!data || data.action !== 'RUN_COMPUTATION_DATE') { return; }
|
|
33
32
|
|
|
34
|
-
// Extract
|
|
35
|
-
const { date, pass, computation, previousCategory, triggerReason, dispatchId } = data;
|
|
33
|
+
// Extract fields including new dependencyResultHashes
|
|
34
|
+
const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes } = data;
|
|
36
35
|
|
|
37
36
|
if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload.`, data); return; }
|
|
38
37
|
|
|
39
|
-
// LOG THE ID FOR TRACING
|
|
40
38
|
logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date})`, {
|
|
41
39
|
dispatchId: dispatchId || 'legacy',
|
|
42
40
|
reason: triggerReason
|
|
43
41
|
});
|
|
44
42
|
|
|
43
|
+
// [NEW] LEASE CLAIMING
|
|
44
|
+
// Mark task as IN_PROGRESS and set a lease timeout (e.g., 20 minutes) to prevent Zombies
|
|
45
|
+
try {
|
|
46
|
+
const leaseTimeMs = (config.workerLeaseMinutes || 20) * 60 * 1000;
|
|
47
|
+
await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
|
|
48
|
+
status: 'IN_PROGRESS',
|
|
49
|
+
workerId: process.env.K_REVISION || 'unknown',
|
|
50
|
+
startedAt: new Date(),
|
|
51
|
+
leaseExpiresAt: Date.now() + leaseTimeMs,
|
|
52
|
+
dispatchId: dispatchId
|
|
53
|
+
}, { merge: true });
|
|
54
|
+
} catch (leaseErr) {
|
|
55
|
+
logger.log('WARN', `[Worker] Failed to claim lease for ${computation}. Continuing anyway...`, leaseErr);
|
|
56
|
+
}
|
|
57
|
+
|
|
45
58
|
let computationManifest;
|
|
46
59
|
try { computationManifest = getManifest(config.activeProductLines || [], calculations, runDependencies);
|
|
47
60
|
} catch (manifestError) {
|
|
@@ -59,7 +72,8 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
59
72
|
config,
|
|
60
73
|
runDependencies,
|
|
61
74
|
computationManifest,
|
|
62
|
-
previousCategory
|
|
75
|
+
previousCategory,
|
|
76
|
+
dependencyResultHashes // [NEW] Pass hashes to executor
|
|
63
77
|
);
|
|
64
78
|
const duration = Date.now() - startTime;
|
|
65
79
|
|
|
@@ -79,6 +93,13 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
79
93
|
const metrics = successData.metrics || {};
|
|
80
94
|
metrics.durationMs = duration;
|
|
81
95
|
logger.log('INFO', `[Worker] ✅ Stored: ${computation}. ID: ${dispatchId}`);
|
|
96
|
+
|
|
97
|
+
// Mark Ledger as COMPLETED
|
|
98
|
+
await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).update({
|
|
99
|
+
status: 'COMPLETED',
|
|
100
|
+
completedAt: new Date()
|
|
101
|
+
}).catch(() => {});
|
|
102
|
+
|
|
82
103
|
await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, metrics, triggerReason);
|
|
83
104
|
}
|
|
84
105
|
else {
|
|
@@ -86,7 +107,6 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
86
107
|
await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', { message: 'Empty Result' }, { durationMs: duration }, triggerReason);
|
|
87
108
|
}
|
|
88
109
|
} catch (err) {
|
|
89
|
-
// --- DETERMINISTIC ERROR SHORT-CIRCUIT ---
|
|
90
110
|
const isDeterministicError = err.stage === 'SHARDING_LIMIT_EXCEEDED' ||
|
|
91
111
|
err.stage === 'QUALITY_CIRCUIT_BREAKER' ||
|
|
92
112
|
(err.message && (err.message.includes('INVALID_ARGUMENT') || err.message.includes('Transaction too big')));
|
|
@@ -101,15 +121,12 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
101
121
|
finalAttemptAt: new Date(),
|
|
102
122
|
failureReason: 'PERMANENT_DETERMINISTIC_ERROR'
|
|
103
123
|
});
|
|
104
|
-
// Return success to Pub/Sub to STOP retries
|
|
105
124
|
await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'PERMANENT_FAIL' }, { durationMs: 0 }, triggerReason);
|
|
106
125
|
return;
|
|
107
126
|
} catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
|
|
108
127
|
}
|
|
109
128
|
|
|
110
|
-
// --- STANDARD RETRY ---
|
|
111
129
|
const retryCount = message.deliveryAttempt || 0;
|
|
112
|
-
// NOTE: If you configure Pub/Sub Max Attempts = 1, this logic is redundant but safe.
|
|
113
130
|
if (retryCount >= MAX_RETRIES) {
|
|
114
131
|
logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation}`);
|
|
115
132
|
try {
|
|
@@ -1,26 +1,23 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Handles saving computation results with observability and Smart Cleanup.
|
|
3
|
-
* UPDATED:
|
|
4
|
-
* UPDATED:
|
|
5
|
-
* FIX: Throws proper Error objects.
|
|
3
|
+
* UPDATED: Implements GZIP Compression for efficient storage.
|
|
4
|
+
* UPDATED: Implements Content-Based Hashing (ResultHash) for dependency short-circuiting.
|
|
6
5
|
*/
|
|
7
|
-
const { commitBatchInChunks }
|
|
6
|
+
const { commitBatchInChunks, generateDataHash } = require('../utils/utils');
|
|
8
7
|
const { updateComputationStatus } = require('./StatusRepository');
|
|
9
8
|
const { batchStoreSchemas } = require('../utils/schema_capture');
|
|
10
9
|
const { generateProcessId, PROCESS_TYPES } = require('../logger/logger');
|
|
11
10
|
const { HeuristicValidator } = require('./ResultsValidator');
|
|
12
11
|
const validationOverrides = require('../config/validation_overrides');
|
|
13
12
|
const pLimit = require('p-limit');
|
|
13
|
+
const zlib = require('zlib'); // [NEW] Compression Lib
|
|
14
14
|
|
|
15
15
|
const NON_RETRYABLE_ERRORS = [
|
|
16
16
|
'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION'
|
|
17
|
-
// removed INVALID_ARGUMENT from here as it covers 'too many index entries' which IS retryable via sharding
|
|
18
17
|
];
|
|
19
18
|
|
|
20
19
|
/**
|
|
21
20
|
* Commits results to Firestore.
|
|
22
|
-
* @param {Object} options.flushMode - 'STANDARD', 'INTERMEDIATE', 'FINAL'
|
|
23
|
-
* @param {Object} options.shardIndexes - Map of { calcName: currentShardIndex }
|
|
24
21
|
*/
|
|
25
22
|
async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false, options = {}) {
|
|
26
23
|
const successUpdates = {};
|
|
@@ -52,7 +49,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
52
49
|
const result = await calc.getResult();
|
|
53
50
|
const overrides = validationOverrides[calc.manifest.name] || {};
|
|
54
51
|
|
|
55
|
-
//
|
|
52
|
+
// Validation
|
|
56
53
|
if (result && Object.keys(result).length > 0) {
|
|
57
54
|
const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, overrides);
|
|
58
55
|
if (!healthCheck.valid) {
|
|
@@ -66,16 +63,20 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
66
63
|
|
|
67
64
|
const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
|
|
68
65
|
|
|
69
|
-
//
|
|
70
|
-
|
|
66
|
+
// Calculate Result Hash (Content-Based)
|
|
67
|
+
const resultHash = isEmpty ? 'empty' : generateDataHash(result);
|
|
68
|
+
|
|
69
|
+
// Handle Empty Results
|
|
71
70
|
if (isEmpty) {
|
|
72
71
|
if (flushMode === 'INTERMEDIATE') {
|
|
73
|
-
nextShardIndexes[name] = currentShardIndex;
|
|
72
|
+
nextShardIndexes[name] = currentShardIndex;
|
|
74
73
|
continue;
|
|
75
74
|
}
|
|
76
75
|
if (calc.manifest.hash) {
|
|
77
76
|
successUpdates[name] = {
|
|
78
|
-
hash: calc.manifest.hash,
|
|
77
|
+
hash: calc.manifest.hash,
|
|
78
|
+
resultHash: resultHash,
|
|
79
|
+
dependencyResultHashes: calc.manifest.dependencyResultHashes || {},
|
|
79
80
|
category: calc.manifest.category,
|
|
80
81
|
composition: calc.manifest.composition,
|
|
81
82
|
metrics: runMetrics
|
|
@@ -90,7 +91,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
90
91
|
const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
|
|
91
92
|
|
|
92
93
|
if (isMultiDate) {
|
|
93
|
-
// Multi-Date Fan-Out Logic (Not optimized for incremental yet, falls back to standard per-date write)
|
|
94
94
|
const datePromises = resultKeys.map((historicalDate) => fanOutLimit(async () => {
|
|
95
95
|
const dailyData = result[historicalDate];
|
|
96
96
|
if (!dailyData || Object.keys(dailyData).length === 0) return;
|
|
@@ -102,7 +102,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
102
102
|
.collection(config.computationsSubcollection)
|
|
103
103
|
.doc(name);
|
|
104
104
|
|
|
105
|
-
// For historical Fan-Out, we assume standard flush mode (not incremental) for now
|
|
106
105
|
await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD');
|
|
107
106
|
}));
|
|
108
107
|
await Promise.all(datePromises);
|
|
@@ -110,6 +109,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
110
109
|
if (calc.manifest.hash) {
|
|
111
110
|
successUpdates[name] = {
|
|
112
111
|
hash: calc.manifest.hash,
|
|
112
|
+
resultHash: resultHash,
|
|
113
|
+
dependencyResultHashes: calc.manifest.dependencyResultHashes || {},
|
|
113
114
|
category: calc.manifest.category,
|
|
114
115
|
composition: calc.manifest.composition,
|
|
115
116
|
metrics: runMetrics
|
|
@@ -117,7 +118,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
117
118
|
}
|
|
118
119
|
|
|
119
120
|
} else {
|
|
120
|
-
// --- STANDARD / INCREMENTAL MODE ---
|
|
121
121
|
const mainDocRef = db.collection(config.resultsCollection)
|
|
122
122
|
.doc(dStr)
|
|
123
123
|
.collection(config.resultsSubcollection)
|
|
@@ -131,12 +131,13 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
131
131
|
runMetrics.storage.isSharded = writeStats.isSharded;
|
|
132
132
|
runMetrics.storage.shardCount = writeStats.shardCount;
|
|
133
133
|
|
|
134
|
-
// Track next index for subsequent flushes
|
|
135
134
|
nextShardIndexes[name] = writeStats.nextShardIndex;
|
|
136
135
|
|
|
137
136
|
if (calc.manifest.hash) {
|
|
138
137
|
successUpdates[name] = {
|
|
139
138
|
hash: calc.manifest.hash,
|
|
139
|
+
resultHash: resultHash,
|
|
140
|
+
dependencyResultHashes: calc.manifest.dependencyResultHashes || {},
|
|
140
141
|
category: calc.manifest.category,
|
|
141
142
|
composition: calc.manifest.composition,
|
|
142
143
|
metrics: runMetrics
|
|
@@ -171,10 +172,44 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
171
172
|
}
|
|
172
173
|
|
|
173
174
|
async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD') {
|
|
174
|
-
|
|
175
|
-
//
|
|
176
|
-
//
|
|
177
|
-
|
|
175
|
+
|
|
176
|
+
// --- [NEW] COMPRESSION STRATEGY ---
|
|
177
|
+
// Try to compress before falling back to complex sharding
|
|
178
|
+
try {
|
|
179
|
+
const jsonString = JSON.stringify(result);
|
|
180
|
+
const rawBuffer = Buffer.from(jsonString);
|
|
181
|
+
|
|
182
|
+
// Only attempt if meaningful size (> 50KB)
|
|
183
|
+
if (rawBuffer.length > 50 * 1024) {
|
|
184
|
+
const compressedBuffer = zlib.gzipSync(rawBuffer);
|
|
185
|
+
|
|
186
|
+
// If compressed fits in one document (< 900KB safety limit)
|
|
187
|
+
if (compressedBuffer.length < 900 * 1024) {
|
|
188
|
+
logger.log('INFO', `[Compression] ${name}: Compressed ${(rawBuffer.length/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB. Saved as Blob.`);
|
|
189
|
+
|
|
190
|
+
const compressedPayload = {
|
|
191
|
+
_compressed: true,
|
|
192
|
+
_completed: true,
|
|
193
|
+
_lastUpdated: new Date().toISOString(),
|
|
194
|
+
payload: compressedBuffer
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
// Write immediately
|
|
198
|
+
await docRef.set(compressedPayload, { merge: true });
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
totalSize: compressedBuffer.length,
|
|
202
|
+
isSharded: false,
|
|
203
|
+
shardCount: 1,
|
|
204
|
+
nextShardIndex: startShardIndex
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
} catch (compErr) {
|
|
209
|
+
logger.log('WARN', `[Compression] Failed to compress ${name}. Falling back to standard sharding.`, compErr);
|
|
210
|
+
}
|
|
211
|
+
// --- END COMPRESSION STRATEGY ---
|
|
212
|
+
|
|
178
213
|
const strategies = [
|
|
179
214
|
{ bytes: 900 * 1024, keys: null },
|
|
180
215
|
{ bytes: 450 * 1024, keys: 10000 },
|
|
@@ -190,14 +225,10 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
|
|
|
190
225
|
const constraints = strategies[attempt];
|
|
191
226
|
try {
|
|
192
227
|
const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys, startShardIndex, flushMode);
|
|
228
|
+
const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined);
|
|
193
229
|
|
|
194
|
-
// Analyze the update batch
|
|
195
|
-
const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined); // Pointer is on the main doc
|
|
196
|
-
|
|
197
|
-
// Calculate stats
|
|
198
230
|
finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
|
|
199
231
|
|
|
200
|
-
// Logic to determine next shard index
|
|
201
232
|
let maxIndex = startShardIndex;
|
|
202
233
|
updates.forEach(u => {
|
|
203
234
|
const segs = u.ref.path.split('/');
|
|
@@ -224,8 +255,6 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
|
|
|
224
255
|
lastError = commitErr;
|
|
225
256
|
const msg = commitErr.message || '';
|
|
226
257
|
const code = commitErr.code || '';
|
|
227
|
-
|
|
228
|
-
// Check for explicit "too many index entries" or transaction size issues
|
|
229
258
|
const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
|
|
230
259
|
const isSizeError = msg.includes('Transaction too big') || msg.includes('payload is too large');
|
|
231
260
|
|
|
@@ -233,14 +262,11 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
|
|
|
233
262
|
logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`);
|
|
234
263
|
throw commitErr;
|
|
235
264
|
}
|
|
236
|
-
|
|
237
265
|
if (isIndexError || isSizeError) {
|
|
238
266
|
logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}/${strategies.length}. Strategy: ${JSON.stringify(constraints)}. Error: ${msg}. Retrying with stricter limits...`);
|
|
239
267
|
continue;
|
|
240
|
-
}
|
|
241
|
-
else {
|
|
268
|
+
} else {
|
|
242
269
|
logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error. Retrying...`, { error: msg });
|
|
243
|
-
// We typically retry same strategy for unknown transient errors, but here we iterate to be safe.
|
|
244
270
|
continue;
|
|
245
271
|
}
|
|
246
272
|
}
|
|
@@ -261,32 +287,28 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
|
|
|
261
287
|
let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
|
|
262
288
|
let shardIndex = startShardIndex;
|
|
263
289
|
|
|
264
|
-
// Small Data Optimization (Only valid if we are not in an incremental flow or if it's the first standard run)
|
|
265
290
|
if (!maxKeys && (totalSize + docPathSize) < CHUNK_LIMIT && flushMode === 'STANDARD' && startShardIndex === 0) {
|
|
266
291
|
const data = { ...result, _completed: true, _sharded: false, _lastUpdated: new Date().toISOString() };
|
|
267
292
|
return [{ ref: docRef, data, options: { merge: true } }];
|
|
268
293
|
}
|
|
269
294
|
|
|
270
|
-
// Sharding Logic
|
|
271
295
|
for (const [key, value] of Object.entries(result)) {
|
|
272
296
|
if (key.startsWith('_')) continue;
|
|
273
297
|
const keySize = Buffer.byteLength(key, 'utf8') + 1; const valueSize = calculateFirestoreBytes(value); const itemSize = keySize + valueSize;
|
|
274
298
|
const byteLimitReached = (currentChunkSize + itemSize > CHUNK_LIMIT); const keyLimitReached = (maxKeys && currentKeyCount + 1 >= maxKeys);
|
|
275
299
|
|
|
276
300
|
if (byteLimitReached || keyLimitReached) {
|
|
277
|
-
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
|
|
301
|
+
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
|
|
278
302
|
shardIndex++; currentChunk = {}; currentChunkSize = 0; currentKeyCount = 0;
|
|
279
303
|
}
|
|
280
304
|
currentChunk[key] = value; currentChunkSize += itemSize; currentKeyCount++;
|
|
281
305
|
}
|
|
282
306
|
|
|
283
|
-
// Push remaining chunk
|
|
284
307
|
if (Object.keys(currentChunk).length > 0) {
|
|
285
308
|
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
|
|
286
|
-
shardIndex++;
|
|
309
|
+
shardIndex++;
|
|
287
310
|
}
|
|
288
311
|
|
|
289
|
-
// Pointer Logic
|
|
290
312
|
if (flushMode !== 'INTERMEDIATE') {
|
|
291
313
|
const pointerData = {
|
|
292
314
|
_completed: true,
|
|
@@ -294,13 +316,12 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
|
|
|
294
316
|
_shardCount: shardIndex,
|
|
295
317
|
_lastUpdated: new Date().toISOString()
|
|
296
318
|
};
|
|
297
|
-
writes.push({ ref: docRef, data: pointerData, options: { merge: true } });
|
|
319
|
+
writes.push({ ref: docRef, data: pointerData, options: { merge: true } });
|
|
298
320
|
}
|
|
299
321
|
|
|
300
322
|
return writes;
|
|
301
323
|
}
|
|
302
324
|
|
|
303
|
-
// ... (Rest of file: deleteOldCalculationData, calculateFirestoreBytes remains unchanged) ...
|
|
304
325
|
async function deleteOldCalculationData(dateStr, oldCategory, calcName, config, deps) {
|
|
305
326
|
const { db, logger, calculationUtils } = deps;
|
|
306
327
|
const { withRetry } = calculationUtils || { withRetry: (fn) => fn() };
|
|
@@ -4,7 +4,22 @@
|
|
|
4
4
|
* --- NEW: Added streamPortfolioData async generator ---
|
|
5
5
|
* --- FIXED: streamPortfolioData and streamHistoryData now accept optional 'providedRefs' ---
|
|
6
6
|
* --- UPDATE: Added Smart Shard Indexing for specific ticker lookups ---
|
|
7
|
+
* --- UPDATE: Added GZIP Decompression Support for robust data loading ---
|
|
7
8
|
*/
|
|
9
|
+
const zlib = require('zlib'); // [NEW]
|
|
10
|
+
|
|
11
|
+
// [NEW] Helper for decompressing any doc if needed
|
|
12
|
+
function tryDecompress(data) {
|
|
13
|
+
if (data && data._compressed === true && data.payload) {
|
|
14
|
+
try {
|
|
15
|
+
return JSON.parse(zlib.gunzipSync(data.payload).toString());
|
|
16
|
+
} catch (e) {
|
|
17
|
+
console.error('[DataLoader] Decompression failed', e);
|
|
18
|
+
return {};
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
return data;
|
|
22
|
+
}
|
|
8
23
|
|
|
9
24
|
/** --- Data Loader Sub-Pipes (Stateless, Dependency-Injection) --- */
|
|
10
25
|
|
|
@@ -39,7 +54,10 @@ async function loadDataByRefs(config, deps, refs) {
|
|
|
39
54
|
const snapshots = await withRetry(() => db.getAll(...batchRefs), `getAll(batch ${Math.floor(i / batchSize)})`);
|
|
40
55
|
for (const doc of snapshots) {
|
|
41
56
|
if (!doc.exists) continue;
|
|
42
|
-
const
|
|
57
|
+
const rawData = doc.data();
|
|
58
|
+
// [UPDATED] Decompress if needed
|
|
59
|
+
const data = tryDecompress(rawData);
|
|
60
|
+
|
|
43
61
|
if (data && typeof data === 'object') Object.assign(mergedPortfolios, data);
|
|
44
62
|
else logger.log('WARN', `Doc ${doc.id} exists but data is not an object`, data);
|
|
45
63
|
}
|
|
@@ -68,7 +86,8 @@ async function loadDailyInsights(config, deps, dateString) {
|
|
|
68
86
|
const docSnap = await withRetry(() => docRef.get(), `getInsights(${dateString})`);
|
|
69
87
|
if (!docSnap.exists) { logger.log('WARN', `Insights not found for ${dateString}`); return null; }
|
|
70
88
|
logger.log('TRACE', `Successfully loaded insights for ${dateString}`);
|
|
71
|
-
|
|
89
|
+
// [UPDATED] Decompress
|
|
90
|
+
return tryDecompress(docSnap.data());
|
|
72
91
|
} catch (error) {
|
|
73
92
|
logger.log('ERROR', `Failed to load daily insights for ${dateString}`, { errorMessage: error.message });
|
|
74
93
|
return null;
|
|
@@ -86,7 +105,10 @@ async function loadDailySocialPostInsights(config, deps, dateString) {
|
|
|
86
105
|
const querySnapshot = await withRetry(() => postsCollectionRef.get(), `getSocialPosts(${dateString})`);
|
|
87
106
|
if (querySnapshot.empty) { logger.log('WARN', `No social post insights for ${dateString}`); return null; }
|
|
88
107
|
const postsMap = {};
|
|
89
|
-
querySnapshot.forEach(doc => {
|
|
108
|
+
querySnapshot.forEach(doc => {
|
|
109
|
+
// [UPDATED] Decompress individual posts if needed
|
|
110
|
+
postsMap[doc.id] = tryDecompress(doc.data());
|
|
111
|
+
});
|
|
90
112
|
logger.log('TRACE', `Loaded ${Object.keys(postsMap).length} social post insights`);
|
|
91
113
|
return postsMap;
|
|
92
114
|
} catch (error) {
|
|
@@ -168,12 +190,6 @@ async function getPriceShardRefs(config, deps) {
|
|
|
168
190
|
* when only specific tickers are needed.
|
|
169
191
|
*/
|
|
170
192
|
|
|
171
|
-
/**
|
|
172
|
-
* Ensures the Price Shard Index exists. If not, builds it by scanning all shards.
|
|
173
|
-
* @param {object} config
|
|
174
|
-
* @param {object} deps
|
|
175
|
-
* @returns {Promise<Object>} The lookup map { "instrumentId": "shardDocId" }
|
|
176
|
-
*/
|
|
177
193
|
/**
|
|
178
194
|
* Ensures the Price Shard Index exists. If not, builds it by scanning all shards.
|
|
179
195
|
* [FIX] Added TTL check to ensure new instruments are discovered.
|
|
@@ -205,7 +221,10 @@ async function ensurePriceShardIndex(config, deps) {
|
|
|
205
221
|
|
|
206
222
|
snapshot.forEach(doc => {
|
|
207
223
|
shardCount++;
|
|
208
|
-
|
|
224
|
+
// [UPDATED] Robustly handle compressed shards during indexing
|
|
225
|
+
const rawData = doc.data();
|
|
226
|
+
const data = tryDecompress(rawData);
|
|
227
|
+
|
|
209
228
|
if (data.history) {
|
|
210
229
|
Object.keys(data.history).forEach(instId => {
|
|
211
230
|
index[instId] = doc.id;
|
|
@@ -273,4 +292,4 @@ module.exports = {
|
|
|
273
292
|
getPriceShardRefs,
|
|
274
293
|
ensurePriceShardIndex,
|
|
275
294
|
getRelevantShardRefs
|
|
276
|
-
};
|
|
295
|
+
};
|
|
@@ -28,6 +28,34 @@ function generateCodeHash(codeString) {
|
|
|
28
28
|
return crypto.createHash('sha256').update(clean).digest('hex');
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
/**
|
|
32
|
+
* [NEW] Generates a stable SHA-256 hash of a data object.
|
|
33
|
+
* Keys are sorted to ensure determinism.
|
|
34
|
+
*/
|
|
35
|
+
function generateDataHash(data) {
|
|
36
|
+
if (data === undefined) return 'undefined';
|
|
37
|
+
|
|
38
|
+
// Recursive stable stringify
|
|
39
|
+
const stableStringify = (obj) => {
|
|
40
|
+
if (typeof obj !== 'object' || obj === null) {
|
|
41
|
+
return JSON.stringify(obj);
|
|
42
|
+
}
|
|
43
|
+
if (Array.isArray(obj)) {
|
|
44
|
+
return '[' + obj.map(stableStringify).join(',') + ']';
|
|
45
|
+
}
|
|
46
|
+
return '{' + Object.keys(obj).sort().map(k =>
|
|
47
|
+
JSON.stringify(k) + ':' + stableStringify(obj[k])
|
|
48
|
+
).join(',') + '}';
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
const str = stableStringify(data);
|
|
53
|
+
return crypto.createHash('sha256').update(str).digest('hex');
|
|
54
|
+
} catch (e) {
|
|
55
|
+
return 'hash_error';
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
31
59
|
/**
|
|
32
60
|
* Executes a function with exponential backoff retry logic.
|
|
33
61
|
* @param {Function} fn - Async function to execute
|
|
@@ -49,7 +77,7 @@ async function withRetry(fn, operationName, maxRetries = 3) {
|
|
|
49
77
|
}
|
|
50
78
|
}
|
|
51
79
|
|
|
52
|
-
/**
|
|
80
|
+
/** Stage 2: Commit a batch of writes in chunks
|
|
53
81
|
* FIXED: Now respects write.options (e.g. { merge: false }) to allow overwrites/deletes.
|
|
54
82
|
*/
|
|
55
83
|
async function commitBatchInChunks(config, deps, writes, operationName) {
|
|
@@ -220,6 +248,7 @@ module.exports = {
|
|
|
220
248
|
getExpectedDateStrings,
|
|
221
249
|
getEarliestDataDates,
|
|
222
250
|
generateCodeHash,
|
|
251
|
+
generateDataHash, // Exported
|
|
223
252
|
withRetry,
|
|
224
253
|
DEFINITIVE_EARLIEST_DATES
|
|
225
254
|
};
|