bulltrackers-module 1.0.216 → 1.0.218
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -131,15 +131,11 @@ async function hydrateAutoShardedResult(docRef, resultName) {
|
|
|
131
131
|
// Determine subcollection name (defaulting to '_shards')
|
|
132
132
|
const shardsCol = docRef.collection('_shards');
|
|
133
133
|
const snapshot = await shardsCol.get();
|
|
134
|
-
|
|
135
134
|
const assembledData = { _completed: true }; // Rebuild the object
|
|
136
|
-
|
|
137
135
|
snapshot.forEach(doc => { const chunk = doc.data(); Object.assign(assembledData, chunk); });
|
|
138
|
-
|
|
139
136
|
// Remove internal flags if they leaked into the shards
|
|
140
137
|
delete assembledData._sharded;
|
|
141
138
|
delete assembledData._completed;
|
|
142
|
-
|
|
143
139
|
return { name: resultName, data: assembledData };
|
|
144
140
|
}
|
|
145
141
|
|
|
@@ -209,26 +205,20 @@ async function runMetaComputationPass(date, calcs, passName, config, deps, fetch
|
|
|
209
205
|
async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false) {
|
|
210
206
|
const successUpdates = {};
|
|
211
207
|
const schemas = [];
|
|
212
|
-
|
|
213
208
|
for (const name in stateObj) {
|
|
214
209
|
const calc = stateObj[name];
|
|
215
210
|
let hasData = false;
|
|
216
|
-
|
|
217
211
|
try {
|
|
218
212
|
const result = await calc.getResult();
|
|
219
213
|
if (!result) { deps.logger.log('INFO', `${name} for ${dStr}: Skipped (Empty Result)`); continue; }
|
|
220
|
-
|
|
221
214
|
const mainDocRef = deps.db.collection(config.resultsCollection).doc(dStr).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
|
|
222
|
-
|
|
223
215
|
// AUTO-SHARDING LOGIC
|
|
224
216
|
const updates = await prepareAutoShardedWrites(result, mainDocRef, deps.logger);
|
|
225
|
-
|
|
226
217
|
// Collect Schemas if present
|
|
227
218
|
if (calc.manifest.class.getSchema) {
|
|
228
219
|
const { class: _cls, ...safeMetadata } = calc.manifest;
|
|
229
220
|
schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
|
|
230
221
|
}
|
|
231
|
-
|
|
232
222
|
if (updates.length > 0) {
|
|
233
223
|
await commitBatchInChunks(config, deps, updates, `${name} Results`);
|
|
234
224
|
successUpdates[name] = calc.manifest.hash || true;
|
|
@@ -237,10 +227,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
237
227
|
} else {
|
|
238
228
|
deps.logger.log('INFO', `${name} for ${dStr}: - Empty Data`);
|
|
239
229
|
}
|
|
240
|
-
|
|
241
230
|
} catch (e) { deps.logger.log('ERROR', `${name} for ${dStr}: \u2716 FAILED Commit: ${e.message}`); }
|
|
242
231
|
}
|
|
243
|
-
|
|
244
232
|
if (schemas.length) batchStoreSchemas(deps, config, schemas).catch(() => { });
|
|
245
233
|
if (!skipStatusWrite && Object.keys(successUpdates).length > 0) {
|
|
246
234
|
await updateComputationStatus(dStr, successUpdates, config, deps);
|
|
@@ -260,32 +248,10 @@ function calculateFirestoreBytes(value) {
|
|
|
260
248
|
if (typeof value === 'number') return 8; // All numbers are 64-bit doubles or integers
|
|
261
249
|
if (typeof value === 'string') return Buffer.byteLength(value, 'utf8') + 1;
|
|
262
250
|
if (value instanceof Date) return 8; // Timestamps are 8 bytes
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
if (value.constructor && value.constructor.name === 'DocumentReference') {
|
|
266
|
-
// Path string + 16 bytes for the reference type overhead
|
|
267
|
-
return Buffer.byteLength(value.path, 'utf8') + 16;
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
// Handle Arrays: Sum of all values
|
|
271
|
-
if (Array.isArray(value)) {
|
|
272
|
-
let sum = 0;
|
|
273
|
-
for (const item of value) sum += calculateFirestoreBytes(item);
|
|
274
|
-
return sum;
|
|
275
|
-
}
|
|
276
|
-
|
|
251
|
+
if (value.constructor && value.constructor.name === 'DocumentReference') { return Buffer.byteLength(value.path, 'utf8') + 16; }
|
|
252
|
+
if (Array.isArray(value)) { let sum = 0; for (const item of value) sum += calculateFirestoreBytes(item); return sum; }
|
|
277
253
|
// Handle Objects (Maps): Sum of (Key + 1 + Value)
|
|
278
|
-
if (typeof value === 'object') {
|
|
279
|
-
let sum = 0;
|
|
280
|
-
for (const k in value) {
|
|
281
|
-
if (Object.prototype.hasOwnProperty.call(value, k)) {
|
|
282
|
-
// Key size (utf8 + 1) + Value size
|
|
283
|
-
sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]);
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
return sum;
|
|
287
|
-
}
|
|
288
|
-
|
|
254
|
+
if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; }
|
|
289
255
|
return 0; // Fallback
|
|
290
256
|
}
|
|
291
257
|
|
|
@@ -296,25 +262,18 @@ async function prepareAutoShardedWrites(result, docRef, logger) {
|
|
|
296
262
|
const CHUNK_LIMIT = SAFETY_THRESHOLD_BYTES - OVERHEAD_ALLOWANCE;
|
|
297
263
|
const totalSize = calculateFirestoreBytes(result); // 1. Calculate Total Size Once (O(N))
|
|
298
264
|
const docPathSize = Buffer.byteLength(docRef.path, 'utf8') + 16; // Add the size of the document path itself (Firestore counts this against the 1MB limit)
|
|
299
|
-
|
|
300
265
|
if ((totalSize + docPathSize) < CHUNK_LIMIT) { const data = { ...result, _completed: true, _sharded: false }; return [{ ref: docRef, data, options: { merge: true } }]; } // CASE A: Fits in one document
|
|
301
|
-
|
|
302
266
|
logger.log('INFO', `[AutoShard] Result size ~${Math.round(totalSize/1024)}KB exceeds limit. Sharding...`);
|
|
303
|
-
|
|
304
|
-
const writes = [];
|
|
267
|
+
const writes = [];
|
|
305
268
|
const shardCollection = docRef.collection('_shards');
|
|
306
|
-
|
|
307
|
-
let
|
|
308
|
-
let
|
|
309
|
-
let shardIndex = 0;
|
|
310
|
-
|
|
311
|
-
|
|
269
|
+
let currentChunk = {};
|
|
270
|
+
let currentChunkSize = 0;
|
|
271
|
+
let shardIndex = 0;
|
|
312
272
|
for (const [key, value] of Object.entries(result)) { // 2. Efficient O(N) Loop
|
|
313
273
|
if (key.startsWith('_')) continue;
|
|
314
274
|
const keySize = Buffer.byteLength(key, 'utf8') + 1; // Calculate size of just this item
|
|
315
275
|
const valueSize = calculateFirestoreBytes(value);
|
|
316
276
|
const itemSize = keySize + valueSize;
|
|
317
|
-
|
|
318
277
|
if (currentChunkSize + itemSize > CHUNK_LIMIT) { // Check if adding this item would overflow the current chunk
|
|
319
278
|
// Flush current chunk
|
|
320
279
|
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); // Overwrite
|
|
@@ -322,21 +281,14 @@ async function prepareAutoShardedWrites(result, docRef, logger) {
|
|
|
322
281
|
currentChunk = {};
|
|
323
282
|
currentChunkSize = 0;
|
|
324
283
|
}
|
|
325
|
-
|
|
326
284
|
// Add to current chunk
|
|
327
285
|
currentChunk[key] = value;
|
|
328
286
|
currentChunkSize += itemSize;
|
|
329
287
|
}
|
|
330
|
-
|
|
331
288
|
// Flush final chunk
|
|
332
289
|
if (Object.keys(currentChunk).length > 0) { writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); }
|
|
333
|
-
|
|
334
|
-
//
|
|
335
|
-
const pointerData = { _completed: true, _sharded: true, _shardCount: shardIndex + 1, _lastUpdated: new Date().toISOString() };
|
|
336
|
-
|
|
337
|
-
// Use merge: false to ensure we overwrite any previous non-sharded blob
|
|
338
|
-
writes.push({ ref: docRef, data: pointerData, options: { merge: false } });
|
|
339
|
-
|
|
290
|
+
const pointerData = { _completed: true, _sharded: true, _shardCount: shardIndex + 1, _lastUpdated: new Date().toISOString() }; // Pointer Document
|
|
291
|
+
writes.push({ ref: docRef, data: pointerData, options: { merge: false } }); // Use merge: false to ensure we overwrite any previous non-sharded blob
|
|
340
292
|
return writes;
|
|
341
293
|
}
|
|
342
294
|
|
|
@@ -240,9 +240,34 @@ class HistoryExtractor {
|
|
|
240
240
|
}
|
|
241
241
|
}
|
|
242
242
|
|
|
243
|
+
/**
|
|
244
|
+
* FIXED: InsightsExtractor to properly handle document structure
|
|
245
|
+
* Document structure: { fetchedAt: Timestamp, insights: [...] }
|
|
246
|
+
*/
|
|
247
|
+
|
|
243
248
|
class InsightsExtractor {
|
|
244
|
-
|
|
245
|
-
|
|
249
|
+
/**
|
|
250
|
+
* Gets insights array for today (default) or yesterday
|
|
251
|
+
* @param {Object} context - Computation context
|
|
252
|
+
* @param {string} timeframe - 'today' or 'yesterday'
|
|
253
|
+
* @returns {Array} Array of insight objects
|
|
254
|
+
*/
|
|
255
|
+
static getInsights(context, timeframe = 'today') {
|
|
256
|
+
const insightsData = context.insights;
|
|
257
|
+
|
|
258
|
+
if (!insightsData) return [];
|
|
259
|
+
|
|
260
|
+
// Get the document for the requested timeframe
|
|
261
|
+
const doc = insightsData[timeframe]; // { fetchedAt: ..., insights: [...] }
|
|
262
|
+
|
|
263
|
+
if (!doc) return [];
|
|
264
|
+
|
|
265
|
+
// Extract the insights array from the document
|
|
266
|
+
if (doc.insights && Array.isArray(doc.insights)) {
|
|
267
|
+
return doc.insights;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return [];
|
|
246
271
|
}
|
|
247
272
|
|
|
248
273
|
static getInsightForInstrument(insights, instrumentId) {
|
|
@@ -0,0 +1,716 @@
|
|
|
1
|
+
# BullTrackers Computation System - Comprehensive Onboarding Guide
|
|
2
|
+
|
|
3
|
+
## Table of Contents
|
|
4
|
+
1. [System Overview](#system-overview)
|
|
5
|
+
2. [Context Architecture](#context-architecture)
|
|
6
|
+
3. [Data Loading & Routing](#data-loading--routing)
|
|
7
|
+
4. [Sharding System](#sharding-system)
|
|
8
|
+
5. [Computation Types & Execution](#computation-types--execution)
|
|
9
|
+
6. [Dependency Management](#dependency-management)
|
|
10
|
+
7. [Versioning & Smart Hashing](#versioning--smart-hashing)
|
|
11
|
+
8. [Execution Modes](#execution-modes)
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## System Overview
|
|
16
|
+
|
|
17
|
+
The BullTrackers Computation System is a **dependency-aware, auto-sharding, distributed calculation engine** designed to process massive datasets across user portfolios, trading histories, market insights, and price data. The system automatically handles:
|
|
18
|
+
|
|
19
|
+
- **Smart data loading** (only loads what's needed)
|
|
20
|
+
- **Transparent sharding** (handles Firestore's 1MB document limit)
|
|
21
|
+
- **Dependency injection** (calculations receive exactly what they declare)
|
|
22
|
+
- **Historical state management** (access to yesterday's data when needed)
|
|
23
|
+
- **Incremental recomputation** (only reruns when code or dependencies change)
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Context Architecture
|
|
28
|
+
|
|
29
|
+
### The Context Object
|
|
30
|
+
|
|
31
|
+
Every computation receives a **context object** that contains all the data and tools it needs. The context is built dynamically based on the computation's declared dependencies.
|
|
32
|
+
|
|
33
|
+
### Context Structure by Computation Type
|
|
34
|
+
|
|
35
|
+
#### **Standard (Per-User) Context**
|
|
36
|
+
```javascript
|
|
37
|
+
{
|
|
38
|
+
user: {
|
|
39
|
+
id: "user_123",
|
|
40
|
+
type: "speculator", // or "normal"
|
|
41
|
+
portfolio: {
|
|
42
|
+
today: { /* Portfolio snapshot for today */ },
|
|
43
|
+
yesterday: { /* Portfolio snapshot for yesterday (if isHistorical: true) */ }
|
|
44
|
+
},
|
|
45
|
+
history: {
|
|
46
|
+
today: { /* Trading history for today */ },
|
|
47
|
+
yesterday: { /* Trading history for yesterday (if needed) */ }
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
date: {
|
|
51
|
+
today: "2024-12-07"
|
|
52
|
+
},
|
|
53
|
+
insights: {
|
|
54
|
+
today: { /* Daily instrument insights */ },
|
|
55
|
+
yesterday: { /* Yesterday's insights (if needed) */ }
|
|
56
|
+
},
|
|
57
|
+
social: {
|
|
58
|
+
today: { /* Social post insights */ },
|
|
59
|
+
yesterday: { /* Yesterday's social data (if needed) */ }
|
|
60
|
+
},
|
|
61
|
+
mappings: {
|
|
62
|
+
tickerToInstrument: { "AAPL": 123, ... },
|
|
63
|
+
instrumentToTicker: { 123: "AAPL", ... }
|
|
64
|
+
},
|
|
65
|
+
math: {
|
|
66
|
+
// All mathematical layers (extractors, primitives, signals, etc.)
|
|
67
|
+
extract: DataExtractor,
|
|
68
|
+
compute: MathPrimitives,
|
|
69
|
+
signals: SignalPrimitives,
|
|
70
|
+
// ... and more
|
|
71
|
+
},
|
|
72
|
+
computed: {
|
|
73
|
+
// Results from dependency calculations (current day)
|
|
74
|
+
"risk-metrics": { "AAPL": { volatility: 0.25 }, ... },
|
|
75
|
+
"sentiment-score": { "AAPL": { score: 0.8 }, ... }
|
|
76
|
+
},
|
|
77
|
+
previousComputed: {
|
|
78
|
+
// Results from dependency calculations (previous day, if isHistorical: true)
|
|
79
|
+
"risk-metrics": { "AAPL": { volatility: 0.23 }, ... }
|
|
80
|
+
},
|
|
81
|
+
meta: { /* Calculation metadata */ },
|
|
82
|
+
config: { /* System configuration */ },
|
|
83
|
+
deps: { /* System dependencies (db, logger, etc.) */ }
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
#### **Meta (Once-Per-Day) Context**
|
|
88
|
+
```javascript
|
|
89
|
+
{
|
|
90
|
+
date: {
|
|
91
|
+
today: "2024-12-07"
|
|
92
|
+
},
|
|
93
|
+
insights: {
|
|
94
|
+
today: { /* Daily instrument insights */ },
|
|
95
|
+
yesterday: { /* If needed */ }
|
|
96
|
+
},
|
|
97
|
+
social: {
|
|
98
|
+
today: { /* Social post insights */ },
|
|
99
|
+
yesterday: { /* If needed */ }
|
|
100
|
+
},
|
|
101
|
+
prices: {
|
|
102
|
+
history: {
|
|
103
|
+
// Price data for all instruments (or batched shards)
|
|
104
|
+
"123": {
|
|
105
|
+
ticker: "AAPL",
|
|
106
|
+
prices: {
|
|
107
|
+
"2024-12-01": 150.25,
|
|
108
|
+
"2024-12-02": 151.30,
|
|
109
|
+
// ...
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
mappings: { /* Same as Standard */ },
|
|
115
|
+
math: { /* Same as Standard */ },
|
|
116
|
+
computed: { /* Same as Standard */ },
|
|
117
|
+
previousComputed: { /* Same as Standard */ },
|
|
118
|
+
meta: { /* Calculation metadata */ },
|
|
119
|
+
config: { /* System configuration */ },
|
|
120
|
+
deps: { /* System dependencies */ }
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### How Context is Auto-Populated
|
|
125
|
+
|
|
126
|
+
The system uses a **declaration-based approach**. When you define a calculation, you declare what data you need:
|
|
127
|
+
|
|
128
|
+
```javascript
|
|
129
|
+
class MyCalculation {
|
|
130
|
+
static getMetadata() {
|
|
131
|
+
return {
|
|
132
|
+
type: 'standard', // or 'meta'
|
|
133
|
+
isHistorical: true, // Do I need yesterday's data?
|
|
134
|
+
rootDataDependencies: ['portfolio', 'insights'], // What root data do I need?
|
|
135
|
+
userType: 'all' // 'all', 'speculator', or 'normal'
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
static getDependencies() {
|
|
140
|
+
return ['risk-metrics', 'sentiment-score']; // What other calculations do I depend on?
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
The `ContextBuilder` then:
|
|
146
|
+
|
|
147
|
+
1. **Checks `rootDataDependencies`** → Loads portfolio, insights, social, history, or price data
|
|
148
|
+
2. **Checks `isHistorical`** → If true, loads yesterday's portfolio and previous computation results
|
|
149
|
+
3. **Checks `getDependencies()`** → Fetches results from other calculations
|
|
150
|
+
4. **Injects math layers** → Automatically includes all extractors, primitives, and utilities
|
|
151
|
+
5. **Adds mappings** → Provides ticker ↔ instrument ID conversion
|
|
152
|
+
|
|
153
|
+
**You only get what you ask for.** This keeps memory usage efficient and prevents unnecessary data loading.
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Data Loading & Routing
|
|
158
|
+
|
|
159
|
+
### The Data Loading Pipeline
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
163
|
+
│ DataLoader (Cached) │
|
|
164
|
+
├─────────────────────────────────────────────────────────────┤
|
|
165
|
+
│ • loadMappings() → Ticker/Instrument maps │
|
|
166
|
+
│ • loadInsights(date) → Daily instrument insights │
|
|
167
|
+
│ • loadSocial(date) → Social post insights │
|
|
168
|
+
│ • loadPriceShard(ref) → Asset price data │
|
|
169
|
+
│ • getPriceShardRefs() → All price shards │
|
|
170
|
+
│ • getSpecificPriceShardReferences(ids) → Targeted shards │
|
|
171
|
+
└─────────────────────────────────────────────────────────────┘
|
|
172
|
+
↓
|
|
173
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
174
|
+
│ ComputationExecutor │
|
|
175
|
+
├─────────────────────────────────────────────────────────────┤
|
|
176
|
+
│ • executePerUser() → Streams portfolio data │
|
|
177
|
+
│ • executeOncePerDay() → Loads global/meta data │
|
|
178
|
+
└─────────────────────────────────────────────────────────────┘
|
|
179
|
+
↓
|
|
180
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
181
|
+
│ ContextBuilder │
|
|
182
|
+
├─────────────────────────────────────────────────────────────┤
|
|
183
|
+
│ Assembles context based on metadata & dependencies │
|
|
184
|
+
└─────────────────────────────────────────────────────────────┘
|
|
185
|
+
↓
|
|
186
|
+
Your Calculation.process()
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Streaming vs Batch Loading
|
|
190
|
+
|
|
191
|
+
#### **Standard Computations: Streaming**
|
|
192
|
+
Standard (per-user) computations use **streaming** to process users in chunks:
|
|
193
|
+
|
|
194
|
+
```javascript
|
|
195
|
+
// System streams portfolio data in batches of 50 users
|
|
196
|
+
for await (const userBatch of streamPortfolioData()) {
|
|
197
|
+
// Each batch is processed in parallel
|
|
198
|
+
for (const [userId, portfolio] of Object.entries(userBatch)) {
|
|
199
|
+
const context = buildPerUserContext({ userId, portfolio, ... });
|
|
200
|
+
await calculation.process(context);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
**Why streaming?**
|
|
206
|
+
- Portfolio data is sharded across multiple documents
|
|
207
|
+
- Loading all users at once would exceed memory limits
|
|
208
|
+
- Streaming allows processing millions of users efficiently
|
|
209
|
+
|
|
210
|
+
#### **Meta Computations: Batch or Shard**
|
|
211
|
+
Meta computations have two modes:
|
|
212
|
+
|
|
213
|
+
1. **Standard Meta** (No price dependency):
|
|
214
|
+
```javascript
|
|
215
|
+
const context = buildMetaContext({ insights, social, ... });
|
|
216
|
+
await calculation.process(context);
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
2. **Price-Dependent Meta** (Batched Shard Processing):
|
|
220
|
+
```javascript
|
|
221
|
+
// System loads price data in shard batches
|
|
222
|
+
for (const shardRef of priceShardRefs) {
|
|
223
|
+
const shardData = await loadPriceShard(shardRef);
|
|
224
|
+
const context = buildMetaContext({ prices: { history: shardData } });
|
|
225
|
+
await calculation.process(context);
|
|
226
|
+
// Memory is cleared between shards
|
|
227
|
+
}
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## Sharding System
|
|
233
|
+
|
|
234
|
+
### The Problem: Firestore's 1MB Limit
|
|
235
|
+
|
|
236
|
+
Firestore has a **1MB hard limit** per document. When computation results contain thousands of tickers (e.g., momentum scores for every asset), the document exceeds this limit.
|
|
237
|
+
|
|
238
|
+
### The Solution: Auto-Sharding
|
|
239
|
+
|
|
240
|
+
The system **automatically detects** when a result is too large and splits it into a subcollection.
|
|
241
|
+
|
|
242
|
+
### How Auto-Sharding Works
|
|
243
|
+
|
|
244
|
+
```javascript
|
|
245
|
+
// When saving results:
|
|
246
|
+
const result = {
|
|
247
|
+
"AAPL": { score: 0.8, volatility: 0.25 },
|
|
248
|
+
"GOOGL": { score: 0.7, volatility: 0.22 },
|
|
249
|
+
// ... 5,000+ tickers
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
// System calculates size:
|
|
253
|
+
const totalSize = calculateFirestoreBytes(result); // ~1.2 MB
|
|
254
|
+
|
|
255
|
+
// IF size > 900KB (safety threshold):
|
|
256
|
+
// 1. Splits data into chunks < 900KB each
|
|
257
|
+
// 2. Writes chunks to: /results/{date}/{category}/{calc}/_shards/shard_0
|
|
258
|
+
// /results/{date}/{category}/{calc}/_shards/shard_1
|
|
259
|
+
// 3. Writes pointer: /results/{date}/{category}/{calc}
|
|
260
|
+
// → { _sharded: true, _shardCount: 2, _completed: true }
|
|
261
|
+
|
|
262
|
+
// IF size < 900KB:
|
|
263
|
+
// Writes normally: /results/{date}/{category}/{calc}
|
|
264
|
+
// → { "AAPL": {...}, "GOOGL": {...}, _completed: true, _sharded: false }
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### Reading Sharded Data
|
|
268
|
+
|
|
269
|
+
The system **transparently reassembles** sharded data when loading dependencies:
|
|
270
|
+
|
|
271
|
+
```javascript
|
|
272
|
+
// When loading a dependency:
|
|
273
|
+
const result = await fetchExistingResults(dateStr, ['momentum-score']);
|
|
274
|
+
|
|
275
|
+
// System checks: Is this document sharded?
|
|
276
|
+
if (doc.data()._sharded === true) {
|
|
277
|
+
// 1. Fetch all docs from _shards subcollection
|
|
278
|
+
// 2. Merge them back into a single object
|
|
279
|
+
// 3. Return as if it was never sharded
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Your calculation receives complete data, regardless of storage method
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
### Handling Mixed Storage Scenarios
|
|
286
|
+
|
|
287
|
+
**Question:** What if I need data from 2 days, where Day 1 is sharded and Day 2 is not?
|
|
288
|
+
|
|
289
|
+
**Answer:** The system handles this automatically:
|
|
290
|
+
|
|
291
|
+
```javascript
|
|
292
|
+
// Your calculation declares:
|
|
293
|
+
static getMetadata() {
|
|
294
|
+
return {
|
|
295
|
+
isHistorical: true, // I need yesterday's data
|
|
296
|
+
// ...
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// System loads BOTH days:
|
|
301
|
+
const computed = await fetchExistingResults(todayDate, ['momentum-score']);
|
|
302
|
+
// → Auto-detects if sharded, reassembles if needed
|
|
303
|
+
|
|
304
|
+
const previousComputed = await fetchExistingResults(yesterdayDate, ['momentum-score']);
|
|
305
|
+
// → Auto-detects if sharded, reassembles if needed
|
|
306
|
+
|
|
307
|
+
// Context now has both:
|
|
308
|
+
{
|
|
309
|
+
computed: { "momentum-score": { /* today's data, reassembled if sharded */ } },
|
|
310
|
+
previousComputed: { "momentum-score": { /* yesterday's data, reassembled if sharded */ } }
|
|
311
|
+
}
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
You never need to know or care whether data is sharded. The system guarantees you receive complete, reassembled data.
|
|
315
|
+
|
|
316
|
+
---
|
|
317
|
+
|
|
318
|
+
## Computation Types & Execution
|
|
319
|
+
|
|
320
|
+
### Standard Computations (`type: 'standard'`)
|
|
321
|
+
|
|
322
|
+
**Purpose:** Per-user calculations (risk profiles, P&L analysis, behavioral scoring)
|
|
323
|
+
|
|
324
|
+
**Execution:**
|
|
325
|
+
- Runs **once per user** per day
|
|
326
|
+
- Receives individual user portfolio and history
|
|
327
|
+
- Streams data in batches for memory efficiency
|
|
328
|
+
|
|
329
|
+
**Example:**
|
|
330
|
+
```javascript
|
|
331
|
+
class UserRiskProfile {
|
|
332
|
+
static getMetadata() {
|
|
333
|
+
return {
|
|
334
|
+
type: 'standard',
|
|
335
|
+
rootDataDependencies: ['portfolio', 'history'],
|
|
336
|
+
userType: 'speculator'
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
async process(context) {
|
|
341
|
+
const { user, math } = context;
|
|
342
|
+
const portfolio = user.portfolio.today;
|
|
343
|
+
const positions = math.extract.getPositions(portfolio, user.type);
|
|
344
|
+
|
|
345
|
+
// Calculate risk per user
|
|
346
|
+
this.results[user.id] = { riskScore: /* ... */ };
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
**Result Structure:**
|
|
352
|
+
```javascript
|
|
353
|
+
{
|
|
354
|
+
"user_123": { riskScore: 0.75 },
|
|
355
|
+
"user_456": { riskScore: 0.45 },
|
|
356
|
+
// ... millions of users
|
|
357
|
+
}
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
### Meta Computations (`type: 'meta'`)
|
|
361
|
+
|
|
362
|
+
**Purpose:** Platform-wide calculations (aggregate metrics, market analysis, global trends)
|
|
363
|
+
|
|
364
|
+
**Execution:**
|
|
365
|
+
- Runs **once per day** (not per user)
|
|
366
|
+
- Processes all data holistically
|
|
367
|
+
- Can access price history for all instruments
|
|
368
|
+
|
|
369
|
+
**Example:**
|
|
370
|
+
```javascript
|
|
371
|
+
class MarketMomentum {
|
|
372
|
+
static getMetadata() {
|
|
373
|
+
return {
|
|
374
|
+
type: 'meta',
|
|
375
|
+
rootDataDependencies: ['price', 'insights']
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
async process(context) {
|
|
380
|
+
const { prices, insights, math } = context;
|
|
381
|
+
|
|
382
|
+
// Calculate momentum for every ticker
|
|
383
|
+
for (const [instId, data] of Object.entries(prices.history)) {
|
|
384
|
+
const ticker = data.ticker;
|
|
385
|
+
const priceData = math.priceExtractor.getHistory(prices, ticker);
|
|
386
|
+
|
|
387
|
+
this.results[ticker] = { momentum: /* ... */ };
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
**Result Structure:**
|
|
394
|
+
```javascript
|
|
395
|
+
{
|
|
396
|
+
"AAPL": { momentum: 0.65 },
|
|
397
|
+
"GOOGL": { momentum: 0.82 },
|
|
398
|
+
// ... all tickers
|
|
399
|
+
}
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
### Price-Dependent Meta Computations
|
|
403
|
+
|
|
404
|
+
When a meta computation declares `rootDataDependencies: ['price']`, it enters **batched shard processing mode**:
|
|
405
|
+
|
|
406
|
+
```javascript
|
|
407
|
+
// Instead of loading ALL price data at once (would crash):
|
|
408
|
+
for (const shardRef of priceShardRefs) {
|
|
409
|
+
const shardData = await loadPriceShard(shardRef); // ~50-100 instruments per shard
|
|
410
|
+
|
|
411
|
+
const context = buildMetaContext({
|
|
412
|
+
prices: { history: shardData } // Only this shard's data
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
await calculation.process(context);
|
|
416
|
+
|
|
417
|
+
// Results accumulate across shards
|
|
418
|
+
// Memory is cleared between iterations
|
|
419
|
+
}
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
**Your calculation receives partial data** and processes it incrementally. The system ensures all shards are eventually processed.
|
|
423
|
+
|
|
424
|
+
---
|
|
425
|
+
|
|
426
|
+
## Dependency Management
|
|
427
|
+
|
|
428
|
+
### Declaring Dependencies
|
|
429
|
+
|
|
430
|
+
```javascript
|
|
431
|
+
static getDependencies() {
|
|
432
|
+
return ['risk-metrics', 'sentiment-score', 'momentum-analysis'];
|
|
433
|
+
}
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
This tells the system: "Before you run me, make sure these 3 calculations have completed."
|
|
437
|
+
|
|
438
|
+
### How Dependencies Are Loaded
|
|
439
|
+
|
|
440
|
+
When your calculation runs:
|
|
441
|
+
|
|
442
|
+
1. System fetches results from all declared dependencies
|
|
443
|
+
2. Checks if data is sharded → reassembles if needed
|
|
444
|
+
3. Injects into `context.computed`:
|
|
445
|
+
|
|
446
|
+
```javascript
|
|
447
|
+
{
|
|
448
|
+
computed: {
|
|
449
|
+
"risk-metrics": { "AAPL": { volatility: 0.25 }, ... },
|
|
450
|
+
"sentiment-score": { "AAPL": { score: 0.8 }, ... },
|
|
451
|
+
"momentum-analysis": { "AAPL": { momentum: 0.65 }, ... }
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
### Accessing Dependency Results
|
|
457
|
+
|
|
458
|
+
```javascript
|
|
459
|
+
async process(context) {
|
|
460
|
+
const { computed, math } = context;
|
|
461
|
+
|
|
462
|
+
// Access results from dependencies
|
|
463
|
+
const volatility = math.signals.getMetric(
|
|
464
|
+
computed,
|
|
465
|
+
'risk-metrics',
|
|
466
|
+
'AAPL',
|
|
467
|
+
'volatility'
|
|
468
|
+
);
|
|
469
|
+
|
|
470
|
+
const sentiment = math.signals.getMetric(
|
|
471
|
+
computed,
|
|
472
|
+
'sentiment-score',
|
|
473
|
+
'AAPL',
|
|
474
|
+
'score'
|
|
475
|
+
);
|
|
476
|
+
|
|
477
|
+
// Use them in your calculation
|
|
478
|
+
const combinedScore = volatility * sentiment;
|
|
479
|
+
}
|
|
480
|
+
```
|
|
481
|
+
|
|
482
|
+
### Historical Dependencies
|
|
483
|
+
|
|
484
|
+
If your calculation needs **yesterday's dependency results**:
|
|
485
|
+
|
|
486
|
+
```javascript
|
|
487
|
+
static getMetadata() {
|
|
488
|
+
return {
|
|
489
|
+
isHistorical: true, // ← Enable historical mode
|
|
490
|
+
// ...
|
|
491
|
+
};
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
async process(context) {
|
|
495
|
+
const { computed, previousComputed } = context;
|
|
496
|
+
|
|
497
|
+
// Today's risk
|
|
498
|
+
const todayRisk = computed['risk-metrics']['AAPL'].volatility;
|
|
499
|
+
|
|
500
|
+
// Yesterday's risk
|
|
501
|
+
const yesterdayRisk = previousComputed['risk-metrics']['AAPL'].volatility;
|
|
502
|
+
|
|
503
|
+
// Calculate change
|
|
504
|
+
const riskChange = todayRisk - yesterdayRisk;
|
|
505
|
+
}
|
|
506
|
+
```
|
|
507
|
+
|
|
508
|
+
---
|
|
509
|
+
|
|
510
|
+
## Versioning & Smart Hashing
|
|
511
|
+
|
|
512
|
+
### The Problem: When to Recompute?
|
|
513
|
+
|
|
514
|
+
If you fix a bug in a calculation, how does the system know to re-run it for all past dates?
|
|
515
|
+
|
|
516
|
+
### The Solution: Merkle Tree Dependency Hashing
|
|
517
|
+
|
|
518
|
+
Every calculation gets a **smart hash** that includes:
|
|
519
|
+
|
|
520
|
+
1. **Its own code** (SHA-256 of the class definition)
|
|
521
|
+
2. **Layer dependencies** (Hashes of math layers it uses)
|
|
522
|
+
3. **Calculation dependencies** (Hashes of calculations it depends on)
|
|
523
|
+
|
|
524
|
+
```javascript
|
|
525
|
+
// Example hash composition:
|
|
526
|
+
const intrinsicHash = hash(calculation.toString() + layerHashes);
|
|
527
|
+
const dependencyHashes = dependencies.map(dep => dep.hash).join('|');
|
|
528
|
+
const finalHash = hash(intrinsicHash + '|DEPS:' + dependencyHashes);
|
|
529
|
+
|
|
530
|
+
// Result: "a3f9c2e1..." (SHA-256)
|
|
531
|
+
```
|
|
532
|
+
|
|
533
|
+
### Cascading Invalidation
|
|
534
|
+
|
|
535
|
+
If **Calculation A** changes, **Calculation B** (which depends on A) automatically gets a new hash:
|
|
536
|
+
|
|
537
|
+
```
|
|
538
|
+
Risk Metrics (v1) → hash: abc123
|
|
539
|
+
↓
|
|
540
|
+
Sentiment Score → hash: def456 (includes abc123)
|
|
541
|
+
(depends on Risk)
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
If you update Risk Metrics:
|
|
545
|
+
|
|
546
|
+
```
|
|
547
|
+
Risk Metrics (v2) → hash: xyz789 (NEW!)
|
|
548
|
+
↓
|
|
549
|
+
Sentiment Score → hash: ghi012 (NEW! Because dependency changed)
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
### Recomputation Logic
|
|
553
|
+
|
|
554
|
+
For each date, the system checks:
|
|
555
|
+
|
|
556
|
+
```javascript
|
|
557
|
+
// Stored in Firestore:
|
|
558
|
+
computationStatus['2024-12-07'] = {
|
|
559
|
+
'risk-metrics': 'abc123', // Last run hash
|
|
560
|
+
'sentiment-score': 'def456'
|
|
561
|
+
};
|
|
562
|
+
|
|
563
|
+
// Current manifest:
|
|
564
|
+
manifest['risk-metrics'].hash = 'xyz789'; // NEW HASH!
|
|
565
|
+
manifest['sentiment-score'].hash = 'ghi012';
|
|
566
|
+
|
|
567
|
+
// Decision:
|
|
568
|
+
// - Risk Metrics: Hash mismatch → RERUN
|
|
569
|
+
// - Sentiment Score: Hash mismatch → RERUN (cascaded)
|
|
570
|
+
```
|
|
571
|
+
|
|
572
|
+
This ensures **incremental recomputation**: only changed calculations (and their dependents) re-run.
|
|
573
|
+
|
|
574
|
+
---
|
|
575
|
+
|
|
576
|
+
## Execution Modes
|
|
577
|
+
|
|
578
|
+
### Mode 1: Legacy (Orchestrator)
|
|
579
|
+
|
|
580
|
+
**Single-process execution** for all dates and calculations.
|
|
581
|
+
|
|
582
|
+
```bash
|
|
583
|
+
COMPUTATION_PASS_TO_RUN=1 npm run computation-orchestrator
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
- Loads manifest
|
|
587
|
+
- Iterates through all dates
|
|
588
|
+
- Runs all calculations in Pass 1 sequentially
|
|
589
|
+
- Good for: Development, debugging
|
|
590
|
+
|
|
591
|
+
### Mode 2: Dispatcher + Workers (Production)
|
|
592
|
+
|
|
593
|
+
**Distributed execution** using Pub/Sub.
|
|
594
|
+
|
|
595
|
+
#### Step 1: Dispatch Tasks
|
|
596
|
+
```bash
|
|
597
|
+
COMPUTATION_PASS_TO_RUN=1 npm run computation-dispatcher
|
|
598
|
+
```
|
|
599
|
+
|
|
600
|
+
Publishes messages to Pub/Sub:
|
|
601
|
+
```json
|
|
602
|
+
{
|
|
603
|
+
"action": "RUN_COMPUTATION_DATE",
|
|
604
|
+
"date": "2024-12-07",
|
|
605
|
+
"pass": "1"
|
|
606
|
+
}
|
|
607
|
+
```
|
|
608
|
+
|
|
609
|
+
#### Step 2: Workers Consume Tasks
|
|
610
|
+
```bash
|
|
611
|
+
# Cloud Function triggered by Pub/Sub
|
|
612
|
+
# Or: Local consumer for testing
|
|
613
|
+
npm run computation-worker
|
|
614
|
+
```
|
|
615
|
+
|
|
616
|
+
Each worker:
|
|
617
|
+
1. Receives a date + pass
|
|
618
|
+
2. Loads manifest
|
|
619
|
+
3. Runs calculations for that date only
|
|
620
|
+
4. Updates status document
|
|
621
|
+
|
|
622
|
+
**Benefits:**
|
|
623
|
+
- Parallel execution (100+ workers)
|
|
624
|
+
- Fault tolerance (failed dates retry automatically)
|
|
625
|
+
- Scales to millions of dates
|
|
626
|
+
|
|
627
|
+
### Pass System
|
|
628
|
+
|
|
629
|
+
Calculations are grouped into **passes** based on dependencies:
|
|
630
|
+
|
|
631
|
+
```
|
|
632
|
+
Pass 1: Base calculations (no dependencies)
|
|
633
|
+
- risk-metrics
|
|
634
|
+
- price-momentum
|
|
635
|
+
|
|
636
|
+
Pass 2: Depends on Pass 1
|
|
637
|
+
- sentiment-score (needs risk-metrics)
|
|
638
|
+
- trend-analysis (needs price-momentum)
|
|
639
|
+
|
|
640
|
+
Pass 3: Depends on Pass 2
|
|
641
|
+
- combined-signal (needs sentiment-score + trend-analysis)
|
|
642
|
+
```
|
|
643
|
+
|
|
644
|
+
**You run passes sequentially:**
|
|
645
|
+
```bash
|
|
646
|
+
COMPUTATION_PASS_TO_RUN=1 npm run computation-dispatcher # Wait for completion
|
|
647
|
+
COMPUTATION_PASS_TO_RUN=2 npm run computation-dispatcher # Wait for completion
|
|
648
|
+
COMPUTATION_PASS_TO_RUN=3 npm run computation-dispatcher
|
|
649
|
+
```
|
|
650
|
+
|
|
651
|
+
The manifest builder automatically assigns pass numbers via topological sort.
|
|
652
|
+
|
|
653
|
+
---
|
|
654
|
+
|
|
655
|
+
## Summary: The Complete Flow
|
|
656
|
+
|
|
657
|
+
### For a Standard Calculation
|
|
658
|
+
|
|
659
|
+
```
|
|
660
|
+
1. Manifest Builder
|
|
661
|
+
├─ Scans your calculation class
|
|
662
|
+
├─ Generates smart hash (code + layers + dependencies)
|
|
663
|
+
├─ Assigns to a pass based on dependency graph
|
|
664
|
+
└─ Validates all dependencies exist
|
|
665
|
+
|
|
666
|
+
2. Dispatcher/Orchestrator
|
|
667
|
+
├─ Loads manifest
|
|
668
|
+
├─ Iterates through all dates
|
|
669
|
+
└─ For each date:
|
|
670
|
+
├─ Checks if calculation needs to run (hash mismatch?)
|
|
671
|
+
├─ Checks if root data exists (portfolio, history, etc.)
|
|
672
|
+
└─ Dispatches task (or runs directly)
|
|
673
|
+
|
|
674
|
+
3. Worker/Executor
|
|
675
|
+
├─ Receives task for specific date
|
|
676
|
+
├─ Loads dependency results (auto-reassembles if sharded)
|
|
677
|
+
├─ Streams portfolio data in batches
|
|
678
|
+
└─ For each user batch:
|
|
679
|
+
├─ Builds per-user context
|
|
680
|
+
├─ Injects math layers, mappings, computed dependencies
|
|
681
|
+
├─ Calls your calculation.process(context)
|
|
682
|
+
└─ Accumulates results
|
|
683
|
+
|
|
684
|
+
4. Result Committer
|
|
685
|
+
├─ Calculates total result size
|
|
686
|
+
├─ IF size > 900KB:
|
|
687
|
+
│ ├─ Splits into chunks
|
|
688
|
+
│ ├─ Writes to _shards subcollection
|
|
689
|
+
│ └─ Writes pointer document
|
|
690
|
+
└─ ELSE:
|
|
691
|
+
└─ Writes single document
|
|
692
|
+
|
|
693
|
+
5. Status Updater
|
|
694
|
+
└─ Updates computation_status/{date} with new hash
|
|
695
|
+
```
|
|
696
|
+
|
|
697
|
+
### For a Meta Calculation
|
|
698
|
+
|
|
699
|
+
Same as above, except:
|
|
700
|
+
|
|
701
|
+
- **Step 3**: Loads all data once (or iterates through price shards)
|
|
702
|
+
- **Context**: Global data, not per-user
|
|
703
|
+
- **Result**: One document per date (e.g., all tickers' momentum scores)
|
|
704
|
+
|
|
705
|
+
---
|
|
706
|
+
|
|
707
|
+
## Key Takeaways
|
|
708
|
+
|
|
709
|
+
1. **Context is Auto-Built**: Declare what you need in metadata; the system handles the rest
|
|
710
|
+
2. **Sharding is Transparent**: Read and write as if documents have no size limit
|
|
711
|
+
3. **Dependencies Just Work**: Results are automatically fetched and reassembled
|
|
712
|
+
4. **Versioning is Smart**: Change code → system knows what to rerun
|
|
713
|
+
5. **Streaming is Automatic**: Standard computations stream data; you don't manage batches
|
|
714
|
+
6. **Execution is Flexible**: Run locally for dev, distributed for production
|
|
715
|
+
|
|
716
|
+
---
|