bulltrackers-module 1.0.203 → 1.0.204

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,40 +1,93 @@
1
1
  /**
2
2
  * @fileoverview Computation system sub-pipes and utils.
3
3
  * REFACTORED: Now stateless and receive dependencies where needed.
4
- * DYNAMIC: Categorization logic is removed, replaced by manifest.
5
- * --- MODIFIED: getFirstDateFromSourceData is now getEarliestDataDates
6
- * and queries all data sources to build an availability map. ---
4
+ * FIXED: 'commitBatchInChunks' now respects Firestore 10MB size limit.
7
5
  */
8
- /** --- Computation System Sub-Pipes & Utils (Stateless, Dependency-Injection) --- */
9
6
 
10
7
  const { FieldValue, FieldPath } = require('@google-cloud/firestore');
11
8
 
12
9
  /** Stage 1: Normalize a calculation name to kebab-case */
13
10
  function normalizeName(name) { return name.replace(/_/g, '-'); }
14
11
 
15
- /** Stage 2: Commit a batch of writes in chunks */
12
+ /** * Stage 2: Commit a batch of writes in chunks
13
+ * FIXED: Now splits batches by SIZE (9MB limit) and COUNT (450 docs)
14
+ * to prevent "Request payload size exceeds the limit" errors.
15
+ */
16
16
  async function commitBatchInChunks(config, deps, writes, operationName) {
17
17
  const { db, logger, calculationUtils } = deps;
18
18
  const { withRetry } = calculationUtils;
19
- const batchSizeLimit = config.batchSizeLimit || 450;
20
- if (!writes.length) { logger.log('WARN', `[${operationName}] No writes to commit.`); return; }
21
- for (let i = 0; i < writes.length; i += batchSizeLimit) {
22
- const chunk = writes.slice(i, i + batchSizeLimit);
23
- const batch = db.batch();
24
- chunk.forEach(write => batch.set(write.ref, write.data, { merge: true }));
25
- const chunkNum = Math.floor(i / batchSizeLimit) + 1;
26
- const totalChunks = Math.ceil(writes.length / batchSizeLimit);
27
- await withRetry(() => batch.commit(), `${operationName} (Chunk ${chunkNum}/${totalChunks})`);
28
- logger.log('INFO', `[${operationName}] Committed chunk ${chunkNum}/${totalChunks} (${chunk.length} ops).`); }
19
+
20
+ if (!writes || !writes.length) {
21
+ logger.log('WARN', `[${operationName}] No writes to commit.`);
22
+ return;
23
+ }
24
+
25
+ // Firestore Constraints
26
+ const MAX_BATCH_OPS = 300; // Safety limit (Max 500)
27
+ const MAX_BATCH_BYTES = 9 * 1024 * 1024; // 9MB Safety limit (Max 10MB)
28
+
29
+ let currentBatch = db.batch();
30
+ let currentOpsCount = 0;
31
+ let currentBytesEst = 0;
32
+ let batchIndex = 1;
33
+ let totalChunks = 0; // We don't know total chunks in advance now due to dynamic sizing
34
+
35
+ // Helper to commit the current batch and reset
36
+ const commitAndReset = async () => {
37
+ if (currentOpsCount > 0) {
38
+ try {
39
+ await withRetry(
40
+ () => currentBatch.commit(),
41
+ `${operationName} (Chunk ${batchIndex})`
42
+ );
43
+ logger.log('INFO', `[${operationName}] Committed chunk ${batchIndex} (${currentOpsCount} ops, ~${(currentBytesEst / 1024 / 1024).toFixed(2)} MB).`);
44
+ batchIndex++;
45
+ } catch (err) {
46
+ logger.log('ERROR', `[${operationName}] Failed to commit chunk ${batchIndex}. Size: ${(currentBytesEst / 1024 / 1024).toFixed(2)} MB.`, { error: err.message });
47
+ throw err;
48
+ }
49
+ }
50
+ currentBatch = db.batch();
51
+ currentOpsCount = 0;
52
+ currentBytesEst = 0;
53
+ };
54
+
55
+ for (const write of writes) {
56
+ // 1. Estimate Size: JSON stringify is a decent proxy for Firestore payload size
57
+ // We handle potential circular refs or failures gracefully by assuming a minimum size
58
+ let docSize = 100;
59
+ try {
60
+ if (write.data) docSize = JSON.stringify(write.data).length;
61
+ } catch (e) { /* ignore size check error */ }
62
+
63
+ // 2. Warn if a SINGLE document is approaching the 1MB limit
64
+ if (docSize > 900 * 1024) {
65
+ logger.log('WARN', `[${operationName}] Large document detected (~${(docSize / 1024).toFixed(2)} KB). This allows few ops per batch.`);
66
+ }
67
+
68
+ // 3. Check if adding this write would overflow the batch
69
+ if ((currentOpsCount + 1 > MAX_BATCH_OPS) || (currentBytesEst + docSize > MAX_BATCH_BYTES)) {
70
+ await commitAndReset();
71
+ }
72
+
73
+ // 4. Add to batch
74
+ currentBatch.set(write.ref, write.data, { merge: true });
75
+ currentOpsCount++;
76
+ currentBytesEst += docSize;
77
+ }
78
+
79
+ // 5. Commit remaining
80
+ await commitAndReset();
29
81
  }
30
82
 
31
83
  /** Stage 3: Generate an array of expected date strings between two dates */
32
84
  function getExpectedDateStrings(startDate, endDate) {
33
85
  const dateStrings = [];
34
86
  if (startDate <= endDate) {
35
- const startUTC = new Date(Date.UTC(startDate.getUTCFullYear(), startDate.getUTCMonth(), startDate.getUTCDate()));
36
- const endUTC = new Date(Date.UTC(endDate.getUTCFullYear(), endDate.getUTCMonth(), endDate.getUTCDate()));
37
- for (let d = startUTC; d <= endUTC; d.setUTCDate(d.getUTCDate() + 1)) { dateStrings.push(new Date(d).toISOString().slice(0, 10)); } }
87
+ const startUTC = new Date(Date.UTC(startDate.getUTCFullYear(), startDate.getUTCMonth(), startDate.getUTCDate()));
88
+ const endUTC = new Date(Date.UTC(endDate.getUTCFullYear(), endDate.getUTCMonth(), endDate.getUTCDate()));
89
+ for (let d = startUTC; d <= endUTC; d.setUTCDate(d.getUTCDate() + 1)) { dateStrings.push(new Date(d).toISOString().slice(0, 10)); }
90
+ }
38
91
  return dateStrings;
39
92
  }
40
93
 
@@ -46,10 +99,10 @@ async function getFirstDateFromSimpleCollection(config, deps, collectionName) {
46
99
  const { db, logger, calculationUtils } = deps;
47
100
  const { withRetry } = calculationUtils;
48
101
  try {
49
- if (!collectionName) { logger.log('WARN', `[Core Utils] Collection name not provided for simple date query.`); return null; }
50
- const query = db.collection(collectionName) .where(FieldPath.documentId(), '>=', '2000-01-01') .orderBy(FieldPath.documentId(), 'asc') .limit(1);
51
- const snapshot = await withRetry(() => query.get(), `GetEarliestDoc(${collectionName})`);
52
- if (!snapshot.empty && /^\d{4}-\d{2}-\d{2}$/.test(snapshot.docs[0].id)) { return new Date(snapshot.docs[0].id + 'T00:00:00Z'); }
102
+ if (!collectionName) { logger.log('WARN', `[Core Utils] Collection name not provided for simple date query.`); return null; }
103
+ const query = db.collection(collectionName).where(FieldPath.documentId(), '>=', '2000-01-01').orderBy(FieldPath.documentId(), 'asc').limit(1);
104
+ const snapshot = await withRetry(() => query.get(), `GetEarliestDoc(${collectionName})`);
105
+ if (!snapshot.empty && /^\d{4}-\d{2}-\d{2}$/.test(snapshot.docs[0].id)) { return new Date(snapshot.docs[0].id + 'T00:00:00Z'); }
53
106
  } catch (e) { logger.log('ERROR', `GetFirstDate failed for ${collectionName}`, { errorMessage: e.message }); }
54
107
  return null;
55
108
  }
@@ -59,13 +112,19 @@ async function getFirstDateFromCollection(config, deps, collectionName) {
59
112
  const { db, logger, calculationUtils } = deps;
60
113
  const { withRetry } = calculationUtils;
61
114
  let earliestDate = null;
62
- try { if (!collectionName) { logger.log('WARN', `[Core Utils] Collection name not provided for sharded date query.`); return null; }
63
- const blockDocRefs = await withRetry(() => db.collection(collectionName).listDocuments(), `GetBlocks(${collectionName})`);
64
- if (!blockDocRefs.length) { logger.log('WARN', `No block documents in collection: ${collectionName}`); return null; }
65
- for (const blockDocRef of blockDocRefs) { const snapshotQuery = blockDocRef.collection(config.snapshotsSubcollection) .where(FieldPath.documentId(), '>=', '2000-01-01') .orderBy(FieldPath.documentId(), 'asc') .limit(1);
66
- const snapshotSnap = await withRetry(() => snapshotQuery.get(), `GetEarliestSnapshot(${blockDocRef.path})`);
67
- if (!snapshotSnap.empty && /^\d{4}-\d{2}-\d{2}$/.test(snapshotSnap.docs[0].id)) { const foundDate = new Date(snapshotSnap.docs[0].id + 'T00:00:00Z');
68
- if (!earliestDate || foundDate < earliestDate) earliestDate = foundDate; } } } catch (e) { logger.log('ERROR', `GetFirstDate failed for ${collectionName}`, { errorMessage: e.message }); }
115
+ try {
116
+ if (!collectionName) { logger.log('WARN', `[Core Utils] Collection name not provided for sharded date query.`); return null; }
117
+ const blockDocRefs = await withRetry(() => db.collection(collectionName).listDocuments(), `GetBlocks(${collectionName})`);
118
+ if (!blockDocRefs.length) { logger.log('WARN', `No block documents in collection: ${collectionName}`); return null; }
119
+ for (const blockDocRef of blockDocRefs) {
120
+ const snapshotQuery = blockDocRef.collection(config.snapshotsSubcollection).where(FieldPath.documentId(), '>=', '2000-01-01').orderBy(FieldPath.documentId(), 'asc').limit(1);
121
+ const snapshotSnap = await withRetry(() => snapshotQuery.get(), `GetEarliestSnapshot(${blockDocRef.path})`);
122
+ if (!snapshotSnap.empty && /^\d{4}-\d{2}-\d{2}$/.test(snapshotSnap.docs[0].id)) {
123
+ const foundDate = new Date(snapshotSnap.docs[0].id + 'T00:00:00Z');
124
+ if (!earliestDate || foundDate < earliestDate) earliestDate = foundDate;
125
+ }
126
+ }
127
+ } catch (e) { logger.log('ERROR', `GetFirstDate failed for ${collectionName}`, { errorMessage: e.message }); }
69
128
  return earliestDate;
70
129
  }
71
130
 
@@ -75,15 +134,15 @@ async function getFirstDateFromCollection(config, deps, collectionName) {
75
134
  async function getEarliestDataDates(config, deps) {
76
135
  const { logger } = deps;
77
136
  logger.log('INFO', 'Querying for earliest date from ALL source data collections...');
78
-
79
- const [
80
- investorDate,
81
- speculatorDate,
82
- investorHistoryDate,
83
- speculatorHistoryDate,
84
- insightsDate,
137
+
138
+ const [
139
+ investorDate,
140
+ speculatorDate,
141
+ investorHistoryDate,
142
+ speculatorHistoryDate,
143
+ insightsDate,
85
144
  socialDate,
86
- priceDate
145
+ priceDate
87
146
  ] = await Promise.all([
88
147
  getFirstDateFromCollection(config, deps, config.normalUserPortfolioCollection),
89
148
  getFirstDateFromCollection(config, deps, config.speculatorPortfolioCollection),
@@ -91,90 +150,84 @@ async function getEarliestDataDates(config, deps) {
91
150
  getFirstDateFromCollection(config, deps, config.speculatorHistoryCollection),
92
151
  getFirstDateFromSimpleCollection(config, deps, config.insightsCollectionName),
93
152
  getFirstDateFromSimpleCollection(config, deps, config.socialInsightsCollectionName),
94
- getFirstDateFromPriceCollection(config, deps) //TODO, Why no config.pricecollectionname here, looks ugly.
153
+ getFirstDateFromPriceCollection(config, deps)
95
154
  ]);
96
-
97
- const getMinDate = (...dates) => {
98
- const validDates = dates.filter(Boolean);
99
- if (validDates.length === 0) return null;
100
- return new Date(Math.min(...validDates));
155
+
156
+ const getMinDate = (...dates) => {
157
+ const validDates = dates.filter(Boolean);
158
+ if (validDates.length === 0) return null;
159
+ return new Date(Math.min(...validDates));
101
160
  };
102
-
161
+
103
162
  const earliestPortfolioDate = getMinDate(investorDate, speculatorDate);
104
- const earliestHistoryDate = getMinDate(investorHistoryDate, speculatorHistoryDate);
105
- const earliestInsightsDate = getMinDate(insightsDate);
106
- const earliestSocialDate = getMinDate(socialDate);
107
- const earliestPriceDate = getMinDate(priceDate);
108
- const absoluteEarliest = getMinDate(
109
- earliestPortfolioDate,
110
- earliestHistoryDate,
111
- earliestInsightsDate,
163
+ const earliestHistoryDate = getMinDate(investorHistoryDate, speculatorHistoryDate);
164
+ const earliestInsightsDate = getMinDate(insightsDate);
165
+ const earliestSocialDate = getMinDate(socialDate);
166
+ const earliestPriceDate = getMinDate(priceDate);
167
+ const absoluteEarliest = getMinDate(
168
+ earliestPortfolioDate,
169
+ earliestHistoryDate,
170
+ earliestInsightsDate,
112
171
  earliestSocialDate,
113
- earliestPriceDate
172
+ earliestPriceDate
114
173
  );
115
-
174
+
116
175
  const fallbackDate = new Date(config.earliestComputationDate + 'T00:00:00Z' || '2023-01-01T00:00:00Z');
117
-
118
- const result = {
119
- portfolio: earliestPortfolioDate || new Date('2999-12-31T00:00:00Z'),
120
- history: earliestHistoryDate || new Date('2999-12-31T00:00:00Z'),
121
- insights: earliestInsightsDate || new Date('2999-12-31T00:00:00Z'),
122
- social: earliestSocialDate || new Date('2999-12-31T00:00:00Z'),
123
- price: earliestPriceDate || new Date('2999-12-31T00:00:00Z'),
176
+
177
+ const result = {
178
+ portfolio: earliestPortfolioDate || new Date('2999-12-31T00:00:00Z'),
179
+ history: earliestHistoryDate || new Date('2999-12-31T00:00:00Z'),
180
+ insights: earliestInsightsDate || new Date('2999-12-31T00:00:00Z'),
181
+ social: earliestSocialDate || new Date('2999-12-31T00:00:00Z'),
182
+ price: earliestPriceDate || new Date('2999-12-31T00:00:00Z'),
124
183
  absoluteEarliest: absoluteEarliest || fallbackDate
125
184
  };
126
-
127
- logger.log('INFO', 'Earliest data availability map built:', {
128
- portfolio: result.portfolio.toISOString().slice(0, 10),
129
- history: result.history.toISOString().slice(0, 10),
130
- insights: result.insights.toISOString().slice(0, 10),
131
- social: result.social.toISOString().slice(0, 10),
132
- price: result.price.toISOString().slice(0, 10),
133
- absoluteEarliest: result.absoluteEarliest.toISOString().slice(0, 10)
185
+
186
+ logger.log('INFO', 'Earliest data availability map built:', {
187
+ portfolio: result.portfolio.toISOString().slice(0, 10),
188
+ history: result.history.toISOString().slice(0, 10),
189
+ insights: result.insights.toISOString().slice(0, 10),
190
+ social: result.social.toISOString().slice(0, 10),
191
+ price: result.price.toISOString().slice(0, 10),
192
+ absoluteEarliest: result.absoluteEarliest.toISOString().slice(0, 10)
134
193
  });
135
-
194
+
136
195
  return result;
137
196
  }
138
197
 
139
198
  /**
140
199
  * NEW HELPER: Get the earliest date from price collection
141
- * Price data is sharded differently - each shard contains instrumentId -> {prices: {date: price}}
142
200
  */
143
201
  async function getFirstDateFromPriceCollection(config, deps) {
144
202
  const { db, logger, calculationUtils } = deps;
145
203
  const { withRetry } = calculationUtils;
146
- const collection = config.priceCollection || 'asset_prices'; // TODO This hardcode is right, but we should really be passing the config directly as other data sources do.
147
-
204
+ const collection = config.priceCollection || 'asset_prices';
205
+
148
206
  try {
149
207
  logger.log('TRACE', `[getFirstDateFromPriceCollection] Querying ${collection}...`);
150
-
151
- // Get all shards (limit to first few for performance)
208
+
152
209
  const snapshot = await withRetry(
153
- () => db.collection(collection).limit(10).get(),
210
+ () => db.collection(collection).limit(10).get(),
154
211
  `GetPriceShards(${collection})`
155
212
  );
156
-
213
+
157
214
  if (snapshot.empty) {
158
215
  logger.log('WARN', `No price shards found in ${collection}`);
159
216
  return null;
160
217
  }
161
-
218
+
162
219
  let earliestDate = null;
163
-
164
- // Iterate through shards to find the earliest date across all instruments
220
+
165
221
  snapshot.forEach(doc => {
166
222
  const shardData = doc.data();
167
-
168
- // Each shard has structure: { instrumentId: { ticker, prices: { "YYYY-MM-DD": price } } }
169
223
  for (const instrumentId in shardData) {
170
224
  const instrumentData = shardData[instrumentId];
171
225
  if (!instrumentData.prices) continue;
172
-
173
- // Get all dates for this instrument
226
+
174
227
  const dates = Object.keys(instrumentData.prices)
175
228
  .filter(d => /^\d{4}-\d{2}-\d{2}$/.test(d))
176
229
  .sort();
177
-
230
+
178
231
  if (dates.length > 0) {
179
232
  const firstDate = new Date(dates[0] + 'T00:00:00Z');
180
233
  if (!earliestDate || firstDate < earliestDate) {
@@ -183,13 +236,13 @@ async function getFirstDateFromPriceCollection(config, deps) {
183
236
  }
184
237
  }
185
238
  });
186
-
239
+
187
240
  if (earliestDate) {
188
- logger.log('TRACE', `[getFirstDateFromPriceCollection] Earliest price date: ${earliestDate.toISOString().slice(0, 10)}`); // TODO, WTF IS THIS TS ERROR. Property 'toISOString' does not exist on type 'never'.ts(2339)
241
+ logger.log('TRACE', `[getFirstDateFromPriceCollection] Earliest price date: ${earliestDate.toISOString().slice(0, 10)}`);
189
242
  }
190
-
243
+
191
244
  return earliestDate;
192
-
245
+
193
246
  } catch (e) {
194
247
  logger.log('ERROR', `Failed to get earliest price date from ${collection}`, { errorMessage: e.message });
195
248
  return null;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.203",
3
+ "version": "1.0.204",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [