bulltrackers-module 1.0.216 → 1.0.218

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -131,15 +131,11 @@ async function hydrateAutoShardedResult(docRef, resultName) {
131
131
  // Determine subcollection name (defaulting to '_shards')
132
132
  const shardsCol = docRef.collection('_shards');
133
133
  const snapshot = await shardsCol.get();
134
-
135
134
  const assembledData = { _completed: true }; // Rebuild the object
136
-
137
135
  snapshot.forEach(doc => { const chunk = doc.data(); Object.assign(assembledData, chunk); });
138
-
139
136
  // Remove internal flags if they leaked into the shards
140
137
  delete assembledData._sharded;
141
138
  delete assembledData._completed;
142
-
143
139
  return { name: resultName, data: assembledData };
144
140
  }
145
141
 
@@ -209,26 +205,20 @@ async function runMetaComputationPass(date, calcs, passName, config, deps, fetch
209
205
  async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false) {
210
206
  const successUpdates = {};
211
207
  const schemas = [];
212
-
213
208
  for (const name in stateObj) {
214
209
  const calc = stateObj[name];
215
210
  let hasData = false;
216
-
217
211
  try {
218
212
  const result = await calc.getResult();
219
213
  if (!result) { deps.logger.log('INFO', `${name} for ${dStr}: Skipped (Empty Result)`); continue; }
220
-
221
214
  const mainDocRef = deps.db.collection(config.resultsCollection).doc(dStr).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
222
-
223
215
  // AUTO-SHARDING LOGIC
224
216
  const updates = await prepareAutoShardedWrites(result, mainDocRef, deps.logger);
225
-
226
217
  // Collect Schemas if present
227
218
  if (calc.manifest.class.getSchema) {
228
219
  const { class: _cls, ...safeMetadata } = calc.manifest;
229
220
  schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
230
221
  }
231
-
232
222
  if (updates.length > 0) {
233
223
  await commitBatchInChunks(config, deps, updates, `${name} Results`);
234
224
  successUpdates[name] = calc.manifest.hash || true;
@@ -237,10 +227,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
237
227
  } else {
238
228
  deps.logger.log('INFO', `${name} for ${dStr}: - Empty Data`);
239
229
  }
240
-
241
230
  } catch (e) { deps.logger.log('ERROR', `${name} for ${dStr}: \u2716 FAILED Commit: ${e.message}`); }
242
231
  }
243
-
244
232
  if (schemas.length) batchStoreSchemas(deps, config, schemas).catch(() => { });
245
233
  if (!skipStatusWrite && Object.keys(successUpdates).length > 0) {
246
234
  await updateComputationStatus(dStr, successUpdates, config, deps);
@@ -260,32 +248,10 @@ function calculateFirestoreBytes(value) {
260
248
  if (typeof value === 'number') return 8; // All numbers are 64-bit doubles or integers
261
249
  if (typeof value === 'string') return Buffer.byteLength(value, 'utf8') + 1;
262
250
  if (value instanceof Date) return 8; // Timestamps are 8 bytes
263
-
264
- // Handle References (approximate based on path length)
265
- if (value.constructor && value.constructor.name === 'DocumentReference') {
266
- // Path string + 16 bytes for the reference type overhead
267
- return Buffer.byteLength(value.path, 'utf8') + 16;
268
- }
269
-
270
- // Handle Arrays: Sum of all values
271
- if (Array.isArray(value)) {
272
- let sum = 0;
273
- for (const item of value) sum += calculateFirestoreBytes(item);
274
- return sum;
275
- }
276
-
251
+ if (value.constructor && value.constructor.name === 'DocumentReference') { return Buffer.byteLength(value.path, 'utf8') + 16; }
252
+ if (Array.isArray(value)) { let sum = 0; for (const item of value) sum += calculateFirestoreBytes(item); return sum; }
277
253
  // Handle Objects (Maps): Sum of (Key + 1 + Value)
278
- if (typeof value === 'object') {
279
- let sum = 0;
280
- for (const k in value) {
281
- if (Object.prototype.hasOwnProperty.call(value, k)) {
282
- // Key size (utf8 + 1) + Value size
283
- sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]);
284
- }
285
- }
286
- return sum;
287
- }
288
-
254
+ if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; }
289
255
  return 0; // Fallback
290
256
  }
291
257
 
@@ -296,25 +262,18 @@ async function prepareAutoShardedWrites(result, docRef, logger) {
296
262
  const CHUNK_LIMIT = SAFETY_THRESHOLD_BYTES - OVERHEAD_ALLOWANCE;
297
263
  const totalSize = calculateFirestoreBytes(result); // 1. Calculate Total Size Once (O(N))
298
264
  const docPathSize = Buffer.byteLength(docRef.path, 'utf8') + 16; // Add the size of the document path itself (Firestore counts this against the 1MB limit)
299
-
300
265
  if ((totalSize + docPathSize) < CHUNK_LIMIT) { const data = { ...result, _completed: true, _sharded: false }; return [{ ref: docRef, data, options: { merge: true } }]; } // CASE A: Fits in one document
301
-
302
266
  logger.log('INFO', `[AutoShard] Result size ~${Math.round(totalSize/1024)}KB exceeds limit. Sharding...`);
303
-
304
- const writes = [];
267
+ const writes = [];
305
268
  const shardCollection = docRef.collection('_shards');
306
-
307
- let currentChunk = {};
308
- let currentChunkSize = 0;
309
- let shardIndex = 0;
310
-
311
-
269
+ let currentChunk = {};
270
+ let currentChunkSize = 0;
271
+ let shardIndex = 0;
312
272
  for (const [key, value] of Object.entries(result)) { // 2. Efficient O(N) Loop
313
273
  if (key.startsWith('_')) continue;
314
274
  const keySize = Buffer.byteLength(key, 'utf8') + 1; // Calculate size of just this item
315
275
  const valueSize = calculateFirestoreBytes(value);
316
276
  const itemSize = keySize + valueSize;
317
-
318
277
  if (currentChunkSize + itemSize > CHUNK_LIMIT) { // Check if adding this item would overflow the current chunk
319
278
  // Flush current chunk
320
279
  writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); // Overwrite
@@ -322,21 +281,14 @@ async function prepareAutoShardedWrites(result, docRef, logger) {
322
281
  currentChunk = {};
323
282
  currentChunkSize = 0;
324
283
  }
325
-
326
284
  // Add to current chunk
327
285
  currentChunk[key] = value;
328
286
  currentChunkSize += itemSize;
329
287
  }
330
-
331
288
  // Flush final chunk
332
289
  if (Object.keys(currentChunk).length > 0) { writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); }
333
-
334
- // Pointer Document
335
- const pointerData = { _completed: true, _sharded: true, _shardCount: shardIndex + 1, _lastUpdated: new Date().toISOString() };
336
-
337
- // Use merge: false to ensure we overwrite any previous non-sharded blob
338
- writes.push({ ref: docRef, data: pointerData, options: { merge: false } });
339
-
290
+ const pointerData = { _completed: true, _sharded: true, _shardCount: shardIndex + 1, _lastUpdated: new Date().toISOString() }; // Pointer Document
291
+ writes.push({ ref: docRef, data: pointerData, options: { merge: false } }); // Use merge: false to ensure we overwrite any previous non-sharded blob
340
292
  return writes;
341
293
  }
342
294
 
@@ -240,9 +240,34 @@ class HistoryExtractor {
240
240
  }
241
241
  }
242
242
 
243
+ /**
244
+ * FIXED: InsightsExtractor to properly handle document structure
245
+ * Document structure: { fetchedAt: Timestamp, insights: [...] }
246
+ */
247
+
243
248
  class InsightsExtractor {
244
- static getInsights(context) {
245
- return context.insights || context.daily_instrument_insights || [];
249
+ /**
250
+ * Gets insights array for today (default) or yesterday
251
+ * @param {Object} context - Computation context
252
+ * @param {string} timeframe - 'today' or 'yesterday'
253
+ * @returns {Array} Array of insight objects
254
+ */
255
+ static getInsights(context, timeframe = 'today') {
256
+ const insightsData = context.insights;
257
+
258
+ if (!insightsData) return [];
259
+
260
+ // Get the document for the requested timeframe
261
+ const doc = insightsData[timeframe]; // { fetchedAt: ..., insights: [...] }
262
+
263
+ if (!doc) return [];
264
+
265
+ // Extract the insights array from the document
266
+ if (doc.insights && Array.isArray(doc.insights)) {
267
+ return doc.insights;
268
+ }
269
+
270
+ return [];
246
271
  }
247
272
 
248
273
  static getInsightForInstrument(insights, instrumentId) {
@@ -0,0 +1,716 @@
1
+ # BullTrackers Computation System - Comprehensive Onboarding Guide
2
+
3
+ ## Table of Contents
4
+ 1. [System Overview](#system-overview)
5
+ 2. [Context Architecture](#context-architecture)
6
+ 3. [Data Loading & Routing](#data-loading--routing)
7
+ 4. [Sharding System](#sharding-system)
8
+ 5. [Computation Types & Execution](#computation-types--execution)
9
+ 6. [Dependency Management](#dependency-management)
10
+ 7. [Versioning & Smart Hashing](#versioning--smart-hashing)
11
+ 8. [Execution Modes](#execution-modes)
12
+
13
+ ---
14
+
15
+ ## System Overview
16
+
17
+ The BullTrackers Computation System is a **dependency-aware, auto-sharding, distributed calculation engine** designed to process massive datasets across user portfolios, trading histories, market insights, and price data. The system automatically handles:
18
+
19
+ - **Smart data loading** (only loads what's needed)
20
+ - **Transparent sharding** (handles Firestore's 1MB document limit)
21
+ - **Dependency injection** (calculations receive exactly what they declare)
22
+ - **Historical state management** (access to yesterday's data when needed)
23
+ - **Incremental recomputation** (only reruns when code or dependencies change)
24
+
25
+ ---
26
+
27
+ ## Context Architecture
28
+
29
+ ### The Context Object
30
+
31
+ Every computation receives a **context object** that contains all the data and tools it needs. The context is built dynamically based on the computation's declared dependencies.
32
+
33
+ ### Context Structure by Computation Type
34
+
35
+ #### **Standard (Per-User) Context**
36
+ ```javascript
37
+ {
38
+ user: {
39
+ id: "user_123",
40
+ type: "speculator", // or "normal"
41
+ portfolio: {
42
+ today: { /* Portfolio snapshot for today */ },
43
+ yesterday: { /* Portfolio snapshot for yesterday (if isHistorical: true) */ }
44
+ },
45
+ history: {
46
+ today: { /* Trading history for today */ },
47
+ yesterday: { /* Trading history for yesterday (if needed) */ }
48
+ }
49
+ },
50
+ date: {
51
+ today: "2024-12-07"
52
+ },
53
+ insights: {
54
+ today: { /* Daily instrument insights */ },
55
+ yesterday: { /* Yesterday's insights (if needed) */ }
56
+ },
57
+ social: {
58
+ today: { /* Social post insights */ },
59
+ yesterday: { /* Yesterday's social data (if needed) */ }
60
+ },
61
+ mappings: {
62
+ tickerToInstrument: { "AAPL": 123, ... },
63
+ instrumentToTicker: { 123: "AAPL", ... }
64
+ },
65
+ math: {
66
+ // All mathematical layers (extractors, primitives, signals, etc.)
67
+ extract: DataExtractor,
68
+ compute: MathPrimitives,
69
+ signals: SignalPrimitives,
70
+ // ... and more
71
+ },
72
+ computed: {
73
+ // Results from dependency calculations (current day)
74
+ "risk-metrics": { "AAPL": { volatility: 0.25 }, ... },
75
+ "sentiment-score": { "AAPL": { score: 0.8 }, ... }
76
+ },
77
+ previousComputed: {
78
+ // Results from dependency calculations (previous day, if isHistorical: true)
79
+ "risk-metrics": { "AAPL": { volatility: 0.23 }, ... }
80
+ },
81
+ meta: { /* Calculation metadata */ },
82
+ config: { /* System configuration */ },
83
+ deps: { /* System dependencies (db, logger, etc.) */ }
84
+ }
85
+ ```
86
+
87
+ #### **Meta (Once-Per-Day) Context**
88
+ ```javascript
89
+ {
90
+ date: {
91
+ today: "2024-12-07"
92
+ },
93
+ insights: {
94
+ today: { /* Daily instrument insights */ },
95
+ yesterday: { /* If needed */ }
96
+ },
97
+ social: {
98
+ today: { /* Social post insights */ },
99
+ yesterday: { /* If needed */ }
100
+ },
101
+ prices: {
102
+ history: {
103
+ // Price data for all instruments (or batched shards)
104
+ "123": {
105
+ ticker: "AAPL",
106
+ prices: {
107
+ "2024-12-01": 150.25,
108
+ "2024-12-02": 151.30,
109
+ // ...
110
+ }
111
+ }
112
+ }
113
+ },
114
+ mappings: { /* Same as Standard */ },
115
+ math: { /* Same as Standard */ },
116
+ computed: { /* Same as Standard */ },
117
+ previousComputed: { /* Same as Standard */ },
118
+ meta: { /* Calculation metadata */ },
119
+ config: { /* System configuration */ },
120
+ deps: { /* System dependencies */ }
121
+ }
122
+ ```
123
+
124
+ ### How Context is Auto-Populated
125
+
126
+ The system uses a **declaration-based approach**. When you define a calculation, you declare what data you need:
127
+
128
+ ```javascript
129
+ class MyCalculation {
130
+ static getMetadata() {
131
+ return {
132
+ type: 'standard', // or 'meta'
133
+ isHistorical: true, // Do I need yesterday's data?
134
+ rootDataDependencies: ['portfolio', 'insights'], // What root data do I need?
135
+ userType: 'all' // 'all', 'speculator', or 'normal'
136
+ };
137
+ }
138
+
139
+ static getDependencies() {
140
+ return ['risk-metrics', 'sentiment-score']; // What other calculations do I depend on?
141
+ }
142
+ }
143
+ ```
144
+
145
+ The `ContextBuilder` then:
146
+
147
+ 1. **Checks `rootDataDependencies`** → Loads portfolio, insights, social, history, or price data
148
+ 2. **Checks `isHistorical`** → If true, loads yesterday's portfolio and previous computation results
149
+ 3. **Checks `getDependencies()`** → Fetches results from other calculations
150
+ 4. **Injects math layers** → Automatically includes all extractors, primitives, and utilities
151
+ 5. **Adds mappings** → Provides ticker ↔ instrument ID conversion
152
+
153
+ **You only get what you ask for.** This keeps memory usage efficient and prevents unnecessary data loading.
154
+
155
+ ---
156
+
157
+ ## Data Loading & Routing
158
+
159
+ ### The Data Loading Pipeline
160
+
161
+ ```
162
+ ┌─────────────────────────────────────────────────────────────┐
163
+ │ DataLoader (Cached) │
164
+ ├─────────────────────────────────────────────────────────────┤
165
+ │ • loadMappings() → Ticker/Instrument maps │
166
+ │ • loadInsights(date) → Daily instrument insights │
167
+ │ • loadSocial(date) → Social post insights │
168
+ │ • loadPriceShard(ref) → Asset price data │
169
+ │ • getPriceShardRefs() → All price shards │
170
+ │ • getSpecificPriceShardReferences(ids) → Targeted shards │
171
+ └─────────────────────────────────────────────────────────────┘
172
+
173
+ ┌─────────────────────────────────────────────────────────────┐
174
+ │ ComputationExecutor │
175
+ ├─────────────────────────────────────────────────────────────┤
176
+ │ • executePerUser() → Streams portfolio data │
177
+ │ • executeOncePerDay() → Loads global/meta data │
178
+ └─────────────────────────────────────────────────────────────┘
179
+
180
+ ┌─────────────────────────────────────────────────────────────┐
181
+ │ ContextBuilder │
182
+ ├─────────────────────────────────────────────────────────────┤
183
+ │ Assembles context based on metadata & dependencies │
184
+ └─────────────────────────────────────────────────────────────┘
185
+
186
+ Your Calculation.process()
187
+ ```
188
+
189
+ ### Streaming vs Batch Loading
190
+
191
+ #### **Standard Computations: Streaming**
192
+ Standard (per-user) computations use **streaming** to process users in chunks:
193
+
194
+ ```javascript
195
+ // System streams portfolio data in batches of 50 users
196
+ for await (const userBatch of streamPortfolioData()) {
197
+ // Each batch is processed in parallel
198
+ for (const [userId, portfolio] of Object.entries(userBatch)) {
199
+ const context = buildPerUserContext({ userId, portfolio, ... });
200
+ await calculation.process(context);
201
+ }
202
+ }
203
+ ```
204
+
205
+ **Why streaming?**
206
+ - Portfolio data is sharded across multiple documents
207
+ - Loading all users at once would exceed memory limits
208
+ - Streaming allows processing millions of users efficiently
209
+
210
+ #### **Meta Computations: Batch or Shard**
211
+ Meta computations have two modes:
212
+
213
+ 1. **Standard Meta** (No price dependency):
214
+ ```javascript
215
+ const context = buildMetaContext({ insights, social, ... });
216
+ await calculation.process(context);
217
+ ```
218
+
219
+ 2. **Price-Dependent Meta** (Batched Shard Processing):
220
+ ```javascript
221
+ // System loads price data in shard batches
222
+ for (const shardRef of priceShardRefs) {
223
+ const shardData = await loadPriceShard(shardRef);
224
+ const context = buildMetaContext({ prices: { history: shardData } });
225
+ await calculation.process(context);
226
+ // Memory is cleared between shards
227
+ }
228
+ ```
229
+
230
+ ---
231
+
232
+ ## Sharding System
233
+
234
+ ### The Problem: Firestore's 1MB Limit
235
+
236
+ Firestore has a **1MB hard limit** per document. When computation results contain thousands of tickers (e.g., momentum scores for every asset), the document exceeds this limit.
237
+
238
+ ### The Solution: Auto-Sharding
239
+
240
+ The system **automatically detects** when a result is too large and splits it into a subcollection.
241
+
242
+ ### How Auto-Sharding Works
243
+
244
+ ```javascript
245
+ // When saving results:
246
+ const result = {
247
+ "AAPL": { score: 0.8, volatility: 0.25 },
248
+ "GOOGL": { score: 0.7, volatility: 0.22 },
249
+ // ... 5,000+ tickers
250
+ };
251
+
252
+ // System calculates size:
253
+ const totalSize = calculateFirestoreBytes(result); // ~1.2 MB
254
+
255
+ // IF size > 900KB (safety threshold):
256
+ // 1. Splits data into chunks < 900KB each
257
+ // 2. Writes chunks to: /results/{date}/{category}/{calc}/_shards/shard_0
258
+ // /results/{date}/{category}/{calc}/_shards/shard_1
259
+ // 3. Writes pointer: /results/{date}/{category}/{calc}
260
+ // → { _sharded: true, _shardCount: 2, _completed: true }
261
+
262
+ // IF size < 900KB:
263
+ // Writes normally: /results/{date}/{category}/{calc}
264
+ // → { "AAPL": {...}, "GOOGL": {...}, _completed: true, _sharded: false }
265
+ ```
266
+
267
+ ### Reading Sharded Data
268
+
269
+ The system **transparently reassembles** sharded data when loading dependencies:
270
+
271
+ ```javascript
272
+ // When loading a dependency:
273
+ const result = await fetchExistingResults(dateStr, ['momentum-score']);
274
+
275
+ // System checks: Is this document sharded?
276
+ if (doc.data()._sharded === true) {
277
+ // 1. Fetch all docs from _shards subcollection
278
+ // 2. Merge them back into a single object
279
+ // 3. Return as if it was never sharded
280
+ }
281
+
282
+ // Your calculation receives complete data, regardless of storage method
283
+ ```
284
+
285
+ ### Handling Mixed Storage Scenarios
286
+
287
+ **Question:** What if I need data from 2 days, where Day 1 is sharded and Day 2 is not?
288
+
289
+ **Answer:** The system handles this automatically:
290
+
291
+ ```javascript
292
+ // Your calculation declares:
293
+ static getMetadata() {
294
+ return {
295
+ isHistorical: true, // I need yesterday's data
296
+ // ...
297
+ };
298
+ }
299
+
300
+ // System loads BOTH days:
301
+ const computed = await fetchExistingResults(todayDate, ['momentum-score']);
302
+ // → Auto-detects if sharded, reassembles if needed
303
+
304
+ const previousComputed = await fetchExistingResults(yesterdayDate, ['momentum-score']);
305
+ // → Auto-detects if sharded, reassembles if needed
306
+
307
+ // Context now has both:
308
+ {
309
+ computed: { "momentum-score": { /* today's data, reassembled if sharded */ } },
310
+ previousComputed: { "momentum-score": { /* yesterday's data, reassembled if sharded */ } }
311
+ }
312
+ ```
313
+
314
+ You never need to know or care whether data is sharded. The system guarantees you receive complete, reassembled data.
315
+
316
+ ---
317
+
318
+ ## Computation Types & Execution
319
+
320
+ ### Standard Computations (`type: 'standard'`)
321
+
322
+ **Purpose:** Per-user calculations (risk profiles, P&L analysis, behavioral scoring)
323
+
324
+ **Execution:**
325
+ - Runs **once per user** per day
326
+ - Receives individual user portfolio and history
327
+ - Streams data in batches for memory efficiency
328
+
329
+ **Example:**
330
+ ```javascript
331
+ class UserRiskProfile {
332
+ static getMetadata() {
333
+ return {
334
+ type: 'standard',
335
+ rootDataDependencies: ['portfolio', 'history'],
336
+ userType: 'speculator'
337
+ };
338
+ }
339
+
340
+ async process(context) {
341
+ const { user, math } = context;
342
+ const portfolio = user.portfolio.today;
343
+ const positions = math.extract.getPositions(portfolio, user.type);
344
+
345
+ // Calculate risk per user
346
+ this.results[user.id] = { riskScore: /* ... */ };
347
+ }
348
+ }
349
+ ```
350
+
351
+ **Result Structure:**
352
+ ```javascript
353
+ {
354
+ "user_123": { riskScore: 0.75 },
355
+ "user_456": { riskScore: 0.45 },
356
+ // ... millions of users
357
+ }
358
+ ```
359
+
360
+ ### Meta Computations (`type: 'meta'`)
361
+
362
+ **Purpose:** Platform-wide calculations (aggregate metrics, market analysis, global trends)
363
+
364
+ **Execution:**
365
+ - Runs **once per day** (not per user)
366
+ - Processes all data holistically
367
+ - Can access price history for all instruments
368
+
369
+ **Example:**
370
+ ```javascript
371
+ class MarketMomentum {
372
+ static getMetadata() {
373
+ return {
374
+ type: 'meta',
375
+ rootDataDependencies: ['price', 'insights']
376
+ };
377
+ }
378
+
379
+ async process(context) {
380
+ const { prices, insights, math } = context;
381
+
382
+ // Calculate momentum for every ticker
383
+ for (const [instId, data] of Object.entries(prices.history)) {
384
+ const ticker = data.ticker;
385
+ const priceData = math.priceExtractor.getHistory(prices, ticker);
386
+
387
+ this.results[ticker] = { momentum: /* ... */ };
388
+ }
389
+ }
390
+ }
391
+ ```
392
+
393
+ **Result Structure:**
394
+ ```javascript
395
+ {
396
+ "AAPL": { momentum: 0.65 },
397
+ "GOOGL": { momentum: 0.82 },
398
+ // ... all tickers
399
+ }
400
+ ```
401
+
402
+ ### Price-Dependent Meta Computations
403
+
404
+ When a meta computation declares `rootDataDependencies: ['price']`, it enters **batched shard processing mode**:
405
+
406
+ ```javascript
407
+ // Instead of loading ALL price data at once (would crash):
408
+ for (const shardRef of priceShardRefs) {
409
+ const shardData = await loadPriceShard(shardRef); // ~50-100 instruments per shard
410
+
411
+ const context = buildMetaContext({
412
+ prices: { history: shardData } // Only this shard's data
413
+ });
414
+
415
+ await calculation.process(context);
416
+
417
+ // Results accumulate across shards
418
+ // Memory is cleared between iterations
419
+ }
420
+ ```
421
+
422
+ **Your calculation receives partial data** and processes it incrementally. The system ensures all shards are eventually processed.
423
+
424
+ ---
425
+
426
+ ## Dependency Management
427
+
428
+ ### Declaring Dependencies
429
+
430
+ ```javascript
431
+ static getDependencies() {
432
+ return ['risk-metrics', 'sentiment-score', 'momentum-analysis'];
433
+ }
434
+ ```
435
+
436
+ This tells the system: "Before you run me, make sure these 3 calculations have completed."
437
+
438
+ ### How Dependencies Are Loaded
439
+
440
+ When your calculation runs:
441
+
442
+ 1. System fetches results from all declared dependencies
443
+ 2. Checks if data is sharded → reassembles if needed
444
+ 3. Injects into `context.computed`:
445
+
446
+ ```javascript
447
+ {
448
+ computed: {
449
+ "risk-metrics": { "AAPL": { volatility: 0.25 }, ... },
450
+ "sentiment-score": { "AAPL": { score: 0.8 }, ... },
451
+ "momentum-analysis": { "AAPL": { momentum: 0.65 }, ... }
452
+ }
453
+ }
454
+ ```
455
+
456
+ ### Accessing Dependency Results
457
+
458
+ ```javascript
459
+ async process(context) {
460
+ const { computed, math } = context;
461
+
462
+ // Access results from dependencies
463
+ const volatility = math.signals.getMetric(
464
+ computed,
465
+ 'risk-metrics',
466
+ 'AAPL',
467
+ 'volatility'
468
+ );
469
+
470
+ const sentiment = math.signals.getMetric(
471
+ computed,
472
+ 'sentiment-score',
473
+ 'AAPL',
474
+ 'score'
475
+ );
476
+
477
+ // Use them in your calculation
478
+ const combinedScore = volatility * sentiment;
479
+ }
480
+ ```
481
+
482
+ ### Historical Dependencies
483
+
484
+ If your calculation needs **yesterday's dependency results**:
485
+
486
+ ```javascript
487
+ static getMetadata() {
488
+ return {
489
+ isHistorical: true, // ← Enable historical mode
490
+ // ...
491
+ };
492
+ }
493
+
494
+ async process(context) {
495
+ const { computed, previousComputed } = context;
496
+
497
+ // Today's risk
498
+ const todayRisk = computed['risk-metrics']['AAPL'].volatility;
499
+
500
+ // Yesterday's risk
501
+ const yesterdayRisk = previousComputed['risk-metrics']['AAPL'].volatility;
502
+
503
+ // Calculate change
504
+ const riskChange = todayRisk - yesterdayRisk;
505
+ }
506
+ ```
507
+
508
+ ---
509
+
510
+ ## Versioning & Smart Hashing
511
+
512
+ ### The Problem: When to Recompute?
513
+
514
+ If you fix a bug in a calculation, how does the system know to re-run it for all past dates?
515
+
516
+ ### The Solution: Merkle Tree Dependency Hashing
517
+
518
+ Every calculation gets a **smart hash** that includes:
519
+
520
+ 1. **Its own code** (SHA-256 of the class definition)
521
+ 2. **Layer dependencies** (Hashes of math layers it uses)
522
+ 3. **Calculation dependencies** (Hashes of calculations it depends on)
523
+
524
+ ```javascript
525
+ // Example hash composition:
526
+ const intrinsicHash = hash(calculation.toString() + layerHashes);
527
+ const dependencyHashes = dependencies.map(dep => dep.hash).join('|');
528
+ const finalHash = hash(intrinsicHash + '|DEPS:' + dependencyHashes);
529
+
530
+ // Result: "a3f9c2e1..." (SHA-256)
531
+ ```
532
+
533
+ ### Cascading Invalidation
534
+
535
+ If **Calculation A** changes, **Calculation B** (which depends on A) automatically gets a new hash:
536
+
537
+ ```
538
+ Risk Metrics (v1) → hash: abc123
539
+
540
+ Sentiment Score → hash: def456 (includes abc123)
541
+ (depends on Risk)
542
+ ```
543
+
544
+ If you update Risk Metrics:
545
+
546
+ ```
547
+ Risk Metrics (v2) → hash: xyz789 (NEW!)
548
+
549
+ Sentiment Score → hash: ghi012 (NEW! Because dependency changed)
550
+ ```
551
+
552
+ ### Recomputation Logic
553
+
554
+ For each date, the system checks:
555
+
556
+ ```javascript
557
+ // Stored in Firestore:
558
+ computationStatus['2024-12-07'] = {
559
+ 'risk-metrics': 'abc123', // Last run hash
560
+ 'sentiment-score': 'def456'
561
+ };
562
+
563
+ // Current manifest:
564
+ manifest['risk-metrics'].hash = 'xyz789'; // NEW HASH!
565
+ manifest['sentiment-score'].hash = 'ghi012';
566
+
567
+ // Decision:
568
+ // - Risk Metrics: Hash mismatch → RERUN
569
+ // - Sentiment Score: Hash mismatch → RERUN (cascaded)
570
+ ```
571
+
572
+ This ensures **incremental recomputation**: only changed calculations (and their dependents) re-run.
573
+
574
+ ---
575
+
576
+ ## Execution Modes
577
+
578
+ ### Mode 1: Legacy (Orchestrator)
579
+
580
+ **Single-process execution** for all dates and calculations.
581
+
582
+ ```bash
583
+ COMPUTATION_PASS_TO_RUN=1 npm run computation-orchestrator
584
+ ```
585
+
586
+ - Loads manifest
587
+ - Iterates through all dates
588
+ - Runs all calculations in Pass 1 sequentially
589
+ - Good for: Development, debugging
590
+
591
+ ### Mode 2: Dispatcher + Workers (Production)
592
+
593
+ **Distributed execution** using Pub/Sub.
594
+
595
+ #### Step 1: Dispatch Tasks
596
+ ```bash
597
+ COMPUTATION_PASS_TO_RUN=1 npm run computation-dispatcher
598
+ ```
599
+
600
+ Publishes messages to Pub/Sub:
601
+ ```json
602
+ {
603
+ "action": "RUN_COMPUTATION_DATE",
604
+ "date": "2024-12-07",
605
+ "pass": "1"
606
+ }
607
+ ```
608
+
609
+ #### Step 2: Workers Consume Tasks
610
+ ```bash
611
+ # Cloud Function triggered by Pub/Sub
612
+ # Or: Local consumer for testing
613
+ npm run computation-worker
614
+ ```
615
+
616
+ Each worker:
617
+ 1. Receives a date + pass
618
+ 2. Loads manifest
619
+ 3. Runs calculations for that date only
620
+ 4. Updates status document
621
+
622
+ **Benefits:**
623
+ - Parallel execution (100+ workers)
624
+ - Fault tolerance (failed dates retry automatically)
625
+ - Scales to millions of dates
626
+
627
+ ### Pass System
628
+
629
+ Calculations are grouped into **passes** based on dependencies:
630
+
631
+ ```
632
+ Pass 1: Base calculations (no dependencies)
633
+ - risk-metrics
634
+ - price-momentum
635
+
636
+ Pass 2: Depends on Pass 1
637
+ - sentiment-score (needs risk-metrics)
638
+ - trend-analysis (needs price-momentum)
639
+
640
+ Pass 3: Depends on Pass 2
641
+ - combined-signal (needs sentiment-score + trend-analysis)
642
+ ```
643
+
644
+ **You run passes sequentially:**
645
+ ```bash
646
+ COMPUTATION_PASS_TO_RUN=1 npm run computation-dispatcher # Wait for completion
647
+ COMPUTATION_PASS_TO_RUN=2 npm run computation-dispatcher # Wait for completion
648
+ COMPUTATION_PASS_TO_RUN=3 npm run computation-dispatcher
649
+ ```
650
+
651
+ The manifest builder automatically assigns pass numbers via topological sort.
652
+
653
+ ---
654
+
655
+ ## Summary: The Complete Flow
656
+
657
+ ### For a Standard Calculation
658
+
659
+ ```
660
+ 1. Manifest Builder
661
+ ├─ Scans your calculation class
662
+ ├─ Generates smart hash (code + layers + dependencies)
663
+ ├─ Assigns to a pass based on dependency graph
664
+ └─ Validates all dependencies exist
665
+
666
+ 2. Dispatcher/Orchestrator
667
+ ├─ Loads manifest
668
+ ├─ Iterates through all dates
669
+ └─ For each date:
670
+ ├─ Checks if calculation needs to run (hash mismatch?)
671
+ ├─ Checks if root data exists (portfolio, history, etc.)
672
+ └─ Dispatches task (or runs directly)
673
+
674
+ 3. Worker/Executor
675
+ ├─ Receives task for specific date
676
+ ├─ Loads dependency results (auto-reassembles if sharded)
677
+ ├─ Streams portfolio data in batches
678
+ └─ For each user batch:
679
+ ├─ Builds per-user context
680
+ ├─ Injects math layers, mappings, computed dependencies
681
+ ├─ Calls your calculation.process(context)
682
+ └─ Accumulates results
683
+
684
+ 4. Result Committer
685
+ ├─ Calculates total result size
686
+ ├─ IF size > 900KB:
687
+ │ ├─ Splits into chunks
688
+ │ ├─ Writes to _shards subcollection
689
+ │ └─ Writes pointer document
690
+ └─ ELSE:
691
+ └─ Writes single document
692
+
693
+ 5. Status Updater
694
+ └─ Updates computation_status/{date} with new hash
695
+ ```
696
+
697
+ ### For a Meta Calculation
698
+
699
+ Same as above, except:
700
+
701
+ - **Step 3**: Loads all data once (or iterates through price shards)
702
+ - **Context**: Global data, not per-user
703
+ - **Result**: One document per date (e.g., all tickers' momentum scores)
704
+
705
+ ---
706
+
707
+ ## Key Takeaways
708
+
709
+ 1. **Context is Auto-Built**: Declare what you need in metadata; the system handles the rest
710
+ 2. **Sharding is Transparent**: Read and write as if documents have no size limit
711
+ 3. **Dependencies Just Work**: Results are automatically fetched and reassembled
712
+ 4. **Versioning is Smart**: Change code → system knows what to rerun
713
+ 5. **Streaming is Automatic**: Standard computations stream data; you don't manage batches
714
+ 6. **Execution is Flexible**: Run locally for dev, distributed for production
715
+
716
+ ---
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.216",
3
+ "version": "1.0.218",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [