bulltrackers-module 1.0.753 → 1.0.755

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@
9
9
  * * FIX: Switched to bigquery.createJob for GCS imports to prevent local file path interpretation errors.
10
10
  * * FIX: Improved error logging to catch swallowed BigQuery insert errors.
11
11
  * * FIX: finalizeResults now checks for file existence to prevent "Not found" errors on empty results.
12
+ * * FIX: Added SAFE.PARSE_JSON to MERGE statement to handle JSON type mismatch.
12
13
  */
13
14
 
14
15
  const { Firestore } = require('@google-cloud/firestore');
@@ -520,6 +521,10 @@ class StorageManager {
520
521
 
521
522
  await this._ensureBigQueryTable(targetTable);
522
523
 
524
+ // FIX: Added SAFE.PARSE_JSON() to dependency_result_hashes
525
+ // The source (temp table) has this as a STRING (from the JSON file).
526
+ // The destination (target table) has this as JSON.
527
+ // We must explicitly parse the string to JSON during the merge.
523
528
  const mergeQuery = `
524
529
  MERGE INTO ${fullTarget} T
525
530
  USING (
@@ -533,7 +538,7 @@ class StorageManager {
533
538
  UPDATE SET
534
539
  code_hash = S.code_hash,
535
540
  result_hash = S.result_hash,
536
- dependency_result_hashes = S.dependency_result_hashes,
541
+ dependency_result_hashes = SAFE.PARSE_JSON(S.dependency_result_hashes),
537
542
  entity_count = S.entity_count,
538
543
  result_data = S.result_data,
539
544
  updated_at = S.updated_at
@@ -541,7 +546,7 @@ class StorageManager {
541
546
  INSERT (date, computation_name, category, entity_id, code_hash, result_hash,
542
547
  dependency_result_hashes, entity_count, result_data, updated_at)
543
548
  VALUES (S.date, S.computation_name, S.category, S.entity_id, S.code_hash, S.result_hash,
544
- S.dependency_result_hashes, S.entity_count, S.result_data, S.updated_at)
549
+ SAFE.PARSE_JSON(S.dependency_result_hashes), S.entity_count, S.result_data, S.updated_at)
545
550
  `;
546
551
 
547
552
  await this.bigquery.query({ query: mergeQuery, location: this.config.bigquery.location });
@@ -9,7 +9,7 @@
9
9
  FUNCTION_NAME="orchestrator-http"
10
10
  REGION="europe-west1"
11
11
  DATE=$(date +%Y-%m-%d) # Defaults to today
12
- USER_TYPE="normal" # Options: normal, speculator, popular_investor
12
+ USER_TYPE="popular_investor" # Options: normal, speculator, popular_investor
13
13
  WINDOWS=1 # 1 window = immediate execution (0s delay)
14
14
 
15
15
  # --- 1. FETCH URL DYNAMICALLY ---
@@ -1,26 +1,17 @@
1
1
  /**
2
2
  * @fileoverview BigQuery Batch Manager for Task Engine
3
- *
4
- * Batches BigQuery writes and flushes them using MERGE operations.
5
- * Integrates with FirestoreBatchManager to flush together.
6
- *
7
- * UPDATED: Now uses insertRowsWithMerge to prevent duplicate rows
8
- * when a user is processed twice on the same day.
9
- *
10
- * Deduplication keys:
11
- * - portfolio_snapshots: ['date', 'user_id', 'user_type']
12
- * - trade_history_snapshots: ['date', 'user_id', 'user_type']
13
- * - social_post_snapshots: ['date', 'user_id', 'user_type']
3
+ * * FIXED: Switched to APPEND-ONLY (insertRows) to avoid DML quotas.
4
+ * PREVIOUSLY: Used MERGE, which hit the 1,500 DML/day limit.
14
5
  */
15
6
 
16
7
  const {
17
8
  ensurePortfolioSnapshotsTable,
18
9
  ensureTradeHistorySnapshotsTable,
19
10
  ensureSocialPostSnapshotsTable,
20
- insertRowsWithMerge
11
+ insertRows // <--- CHANGED: Using direct insert instead of merge
21
12
  } = require('../../core/utils/bigquery_utils');
22
13
 
23
- // Deduplication keys for each table type
14
+ // Deduplication keys (Kept for reference, though not used in Append mode)
24
15
  const PORTFOLIO_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
25
16
  const HISTORY_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
26
17
  const SOCIAL_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
@@ -43,9 +34,6 @@ class BigQueryBatchManager {
43
34
  };
44
35
  }
45
36
 
46
- /**
47
- * Add portfolio row to buffer
48
- */
49
37
  async addPortfolioRow(row) {
50
38
  if (!this.tablesEnsured.portfolio) {
51
39
  await ensurePortfolioSnapshotsTable(this.logger);
@@ -54,9 +42,6 @@ class BigQueryBatchManager {
54
42
  this.portfolioBuffer.push(row);
55
43
  }
56
44
 
57
- /**
58
- * Add trade history row to buffer
59
- */
60
45
  async addHistoryRow(row) {
61
46
  if (!this.tablesEnsured.history) {
62
47
  await ensureTradeHistorySnapshotsTable(this.logger);
@@ -65,9 +50,6 @@ class BigQueryBatchManager {
65
50
  this.historyBuffer.push(row);
66
51
  }
67
52
 
68
- /**
69
- * Add social post row to buffer
70
- */
71
53
  async addSocialRow(row) {
72
54
  if (!this.tablesEnsured.social) {
73
55
  await ensureSocialPostSnapshotsTable(this.logger);
@@ -77,55 +59,47 @@ class BigQueryBatchManager {
77
59
  }
78
60
 
79
61
  /**
80
- * Flush a buffer to BigQuery using MERGE operation
81
- * Uses insertRowsWithMerge to prevent duplicates when a user is processed twice on same day
62
+ * Flush a buffer to BigQuery using APPEND (Load Job)
63
+ * FIXED: Removed MERGE logic to bypass DML quotas.
82
64
  */
83
- async _flushBuffer(buffer, tableId, tableName, dedupKeys) {
65
+ async _flushBuffer(buffer, tableId, tableName) {
84
66
  if (buffer.length === 0) return 0;
85
67
 
86
- const rows = [...buffer]; // Copy buffer
87
- buffer.length = 0; // Clear buffer
68
+ const rows = [...buffer];
69
+ buffer.length = 0;
88
70
 
89
71
  try {
90
- // Use insertRowsWithMerge for deduplication
91
- // This prevents duplicate rows if a user is processed twice on the same day
92
- const rowsInserted = await insertRowsWithMerge(
72
+ // CHANGED: insertRows uses a LOAD Job with WRITE_APPEND.
73
+ // This is FREE and has a 100,000 jobs/day limit.
74
+ await insertRows(
93
75
  this.datasetId,
94
76
  tableId,
95
77
  rows,
96
- dedupKeys,
97
78
  this.logger
98
79
  );
99
80
 
100
81
  if (this.logger) {
101
- this.logger.log('INFO', `[BigQueryBatch] ✅ Flushed ${rows.length} ${tableName} rows to BigQuery using MERGE (${rowsInserted} new, ${rows.length - rowsInserted} updated)`);
82
+ this.logger.log('INFO', `[BigQueryBatch] ✅ Appended ${rows.length} ${tableName} rows to BigQuery (Load Job)`);
102
83
  }
103
84
 
104
85
  return rows.length;
105
86
  } catch (error) {
106
- // Log error but don't throw - allow Firestore writes to continue
107
87
  if (this.logger) {
108
88
  this.logger.log('WARN', `[BigQueryBatch] Failed to flush ${tableName} to BigQuery: ${error.message}`);
109
89
  }
110
- // Put rows back in buffer for retry on next flush
90
+ // Put rows back in buffer for retry
111
91
  buffer.push(...rows);
112
92
  return 0;
113
93
  }
114
94
  }
115
95
 
116
- /**
117
- * Flush all buffers to BigQuery
118
- * Called by FirestoreBatchManager.flushBatches()
119
- */
120
96
  async flushBatches() {
121
- if (process.env.BIGQUERY_ENABLED === 'false') {
122
- return; // Skip if BigQuery disabled
123
- }
97
+ if (process.env.BIGQUERY_ENABLED === 'false') return;
124
98
 
125
99
  const results = await Promise.allSettled([
126
- this._flushBuffer(this.portfolioBuffer, 'portfolio_snapshots', 'portfolio', PORTFOLIO_DEDUP_KEYS),
127
- this._flushBuffer(this.historyBuffer, 'trade_history_snapshots', 'history', HISTORY_DEDUP_KEYS),
128
- this._flushBuffer(this.socialBuffer, 'social_post_snapshots', 'social', SOCIAL_DEDUP_KEYS)
100
+ this._flushBuffer(this.portfolioBuffer, 'portfolio_snapshots', 'portfolio'),
101
+ this._flushBuffer(this.historyBuffer, 'trade_history_snapshots', 'history'),
102
+ this._flushBuffer(this.socialBuffer, 'social_post_snapshots', 'social')
129
103
  ]);
130
104
 
131
105
  const totalFlushed = results
@@ -133,13 +107,10 @@ class BigQueryBatchManager {
133
107
  .reduce((sum, r) => sum + r.value, 0);
134
108
 
135
109
  if (totalFlushed > 0 && this.logger) {
136
- this.logger.log('INFO', `[BigQueryBatch] Flushed ${totalFlushed} total rows to BigQuery`);
110
+ this.logger.log('INFO', `[BigQueryBatch] Flushed ${totalFlushed} total rows.`);
137
111
  }
138
112
  }
139
113
 
140
- /**
141
- * Get buffer sizes (for monitoring)
142
- */
143
114
  getBufferSizes() {
144
115
  return {
145
116
  portfolio: this.portfolioBuffer.length,
@@ -149,4 +120,4 @@ class BigQueryBatchManager {
149
120
  }
150
121
  }
151
122
 
152
- module.exports = { BigQueryBatchManager };
123
+ module.exports = { BigQueryBatchManager };
@@ -1,7 +1,5 @@
1
1
  /** @fileoverview Utility class to manage stateful Firestore write batches.
2
- * REFACTORED: Added support for Popular Investor and Signed-In User sharding.
3
- * Mapped new user types to their respective collections.
4
- * IMPLEMENTS: Round-Robin Sharding for maximum user density per document.
2
+ * REFACTORED: Increased default batch size to 500 to reduce API calls.
5
3
  */
6
4
 
7
5
  const { FieldValue } = require('@google-cloud/firestore');
@@ -73,14 +71,16 @@ class FirestoreBatchManager {
73
71
  return `cid_map_shard_${Math.floor(parseInt(cid) / 10000) % 10}`;
74
72
  }
75
73
 
76
- async _scheduleFlush() { // Added async
77
- // LOWER THIS LIMIT: 400 is too high for PI history. Use 50.
78
- const maxBatch = this.config.TASK_ENGINE_MAX_BATCH_SIZE ? Number(this.config.TASK_ENGINE_MAX_BATCH_SIZE) : 50;
74
+ async _scheduleFlush() {
75
+ // OPTIMIZATION: Increased default to 500.
76
+ // Firestore limit is 500 writes. Since we shard users into buckets,
77
+ // 500 users results in much fewer than 500 writes (likely <10 writes).
78
+ const maxBatch = this.config.TASK_ENGINE_MAX_BATCH_SIZE ? Number(this.config.TASK_ENGINE_MAX_BATCH_SIZE) : 500;
79
79
  const totalOps = this._estimateBatchSize();
80
80
 
81
81
  if (totalOps >= maxBatch) {
82
82
  this.logger.log('INFO', `[BATCH] Hit limit (${totalOps} >= ${maxBatch}). Flushing...`);
83
- await this.flushBatches(); // Added await
83
+ await this.flushBatches();
84
84
  return;
85
85
  }
86
86
  }
@@ -135,7 +135,7 @@ class FirestoreBatchManager {
135
135
  this.usernameMapUpdates[shardId] = {};
136
136
  }
137
137
  this.usernameMapUpdates[shardId][cidStr] = { username };
138
- this._scheduleFlush(); // TODO Should we wait this? Would need to be async and update any call to it to await the function call. Not sure best here. Leave for now.
138
+ this._scheduleFlush();
139
139
  }
140
140
 
141
141
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.753",
3
+ "version": "1.0.755",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [