bulltrackers-module 1.0.753 → 1.0.755
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/framework/storage/StorageManager.js +7 -2
- package/functions/task-engine/tests/test-task-engine.sh +1 -1
- package/functions/task-engine/utils/bigquery_batch_manager.js +20 -49
- package/functions/task-engine/utils/firestore_batch_manager.js +8 -8
- package/package.json +1 -1
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
* * FIX: Switched to bigquery.createJob for GCS imports to prevent local file path interpretation errors.
|
|
10
10
|
* * FIX: Improved error logging to catch swallowed BigQuery insert errors.
|
|
11
11
|
* * FIX: finalizeResults now checks for file existence to prevent "Not found" errors on empty results.
|
|
12
|
+
* * FIX: Added SAFE.PARSE_JSON to MERGE statement to handle JSON type mismatch.
|
|
12
13
|
*/
|
|
13
14
|
|
|
14
15
|
const { Firestore } = require('@google-cloud/firestore');
|
|
@@ -520,6 +521,10 @@ class StorageManager {
|
|
|
520
521
|
|
|
521
522
|
await this._ensureBigQueryTable(targetTable);
|
|
522
523
|
|
|
524
|
+
// FIX: Added SAFE.PARSE_JSON() to dependency_result_hashes
|
|
525
|
+
// The source (temp table) has this as a STRING (from the JSON file).
|
|
526
|
+
// The destination (target table) has this as JSON.
|
|
527
|
+
// We must explicitly parse the string to JSON during the merge.
|
|
523
528
|
const mergeQuery = `
|
|
524
529
|
MERGE INTO ${fullTarget} T
|
|
525
530
|
USING (
|
|
@@ -533,7 +538,7 @@ class StorageManager {
|
|
|
533
538
|
UPDATE SET
|
|
534
539
|
code_hash = S.code_hash,
|
|
535
540
|
result_hash = S.result_hash,
|
|
536
|
-
dependency_result_hashes = S.dependency_result_hashes,
|
|
541
|
+
dependency_result_hashes = SAFE.PARSE_JSON(S.dependency_result_hashes),
|
|
537
542
|
entity_count = S.entity_count,
|
|
538
543
|
result_data = S.result_data,
|
|
539
544
|
updated_at = S.updated_at
|
|
@@ -541,7 +546,7 @@ class StorageManager {
|
|
|
541
546
|
INSERT (date, computation_name, category, entity_id, code_hash, result_hash,
|
|
542
547
|
dependency_result_hashes, entity_count, result_data, updated_at)
|
|
543
548
|
VALUES (S.date, S.computation_name, S.category, S.entity_id, S.code_hash, S.result_hash,
|
|
544
|
-
S.dependency_result_hashes, S.entity_count, S.result_data, S.updated_at)
|
|
549
|
+
SAFE.PARSE_JSON(S.dependency_result_hashes), S.entity_count, S.result_data, S.updated_at)
|
|
545
550
|
`;
|
|
546
551
|
|
|
547
552
|
await this.bigquery.query({ query: mergeQuery, location: this.config.bigquery.location });
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
FUNCTION_NAME="orchestrator-http"
|
|
10
10
|
REGION="europe-west1"
|
|
11
11
|
DATE=$(date +%Y-%m-%d) # Defaults to today
|
|
12
|
-
USER_TYPE="
|
|
12
|
+
USER_TYPE="popular_investor" # Options: normal, speculator, popular_investor
|
|
13
13
|
WINDOWS=1 # 1 window = immediate execution (0s delay)
|
|
14
14
|
|
|
15
15
|
# --- 1. FETCH URL DYNAMICALLY ---
|
|
@@ -1,26 +1,17 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview BigQuery Batch Manager for Task Engine
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
* Integrates with FirestoreBatchManager to flush together.
|
|
6
|
-
*
|
|
7
|
-
* UPDATED: Now uses insertRowsWithMerge to prevent duplicate rows
|
|
8
|
-
* when a user is processed twice on the same day.
|
|
9
|
-
*
|
|
10
|
-
* Deduplication keys:
|
|
11
|
-
* - portfolio_snapshots: ['date', 'user_id', 'user_type']
|
|
12
|
-
* - trade_history_snapshots: ['date', 'user_id', 'user_type']
|
|
13
|
-
* - social_post_snapshots: ['date', 'user_id', 'user_type']
|
|
3
|
+
* * FIXED: Switched to APPEND-ONLY (insertRows) to avoid DML quotas.
|
|
4
|
+
* PREVIOUSLY: Used MERGE, which hit the 1,500 DML/day limit.
|
|
14
5
|
*/
|
|
15
6
|
|
|
16
7
|
const {
|
|
17
8
|
ensurePortfolioSnapshotsTable,
|
|
18
9
|
ensureTradeHistorySnapshotsTable,
|
|
19
10
|
ensureSocialPostSnapshotsTable,
|
|
20
|
-
|
|
11
|
+
insertRows // <--- CHANGED: Using direct insert instead of merge
|
|
21
12
|
} = require('../../core/utils/bigquery_utils');
|
|
22
13
|
|
|
23
|
-
// Deduplication keys for
|
|
14
|
+
// Deduplication keys (Kept for reference, though not used in Append mode)
|
|
24
15
|
const PORTFOLIO_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
|
|
25
16
|
const HISTORY_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
|
|
26
17
|
const SOCIAL_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
|
|
@@ -43,9 +34,6 @@ class BigQueryBatchManager {
|
|
|
43
34
|
};
|
|
44
35
|
}
|
|
45
36
|
|
|
46
|
-
/**
|
|
47
|
-
* Add portfolio row to buffer
|
|
48
|
-
*/
|
|
49
37
|
async addPortfolioRow(row) {
|
|
50
38
|
if (!this.tablesEnsured.portfolio) {
|
|
51
39
|
await ensurePortfolioSnapshotsTable(this.logger);
|
|
@@ -54,9 +42,6 @@ class BigQueryBatchManager {
|
|
|
54
42
|
this.portfolioBuffer.push(row);
|
|
55
43
|
}
|
|
56
44
|
|
|
57
|
-
/**
|
|
58
|
-
* Add trade history row to buffer
|
|
59
|
-
*/
|
|
60
45
|
async addHistoryRow(row) {
|
|
61
46
|
if (!this.tablesEnsured.history) {
|
|
62
47
|
await ensureTradeHistorySnapshotsTable(this.logger);
|
|
@@ -65,9 +50,6 @@ class BigQueryBatchManager {
|
|
|
65
50
|
this.historyBuffer.push(row);
|
|
66
51
|
}
|
|
67
52
|
|
|
68
|
-
/**
|
|
69
|
-
* Add social post row to buffer
|
|
70
|
-
*/
|
|
71
53
|
async addSocialRow(row) {
|
|
72
54
|
if (!this.tablesEnsured.social) {
|
|
73
55
|
await ensureSocialPostSnapshotsTable(this.logger);
|
|
@@ -77,55 +59,47 @@ class BigQueryBatchManager {
|
|
|
77
59
|
}
|
|
78
60
|
|
|
79
61
|
/**
|
|
80
|
-
* Flush a buffer to BigQuery using
|
|
81
|
-
*
|
|
62
|
+
* Flush a buffer to BigQuery using APPEND (Load Job)
|
|
63
|
+
* FIXED: Removed MERGE logic to bypass DML quotas.
|
|
82
64
|
*/
|
|
83
|
-
async _flushBuffer(buffer, tableId, tableName
|
|
65
|
+
async _flushBuffer(buffer, tableId, tableName) {
|
|
84
66
|
if (buffer.length === 0) return 0;
|
|
85
67
|
|
|
86
|
-
const rows = [...buffer];
|
|
87
|
-
buffer.length = 0;
|
|
68
|
+
const rows = [...buffer];
|
|
69
|
+
buffer.length = 0;
|
|
88
70
|
|
|
89
71
|
try {
|
|
90
|
-
//
|
|
91
|
-
// This
|
|
92
|
-
|
|
72
|
+
// CHANGED: insertRows uses a LOAD Job with WRITE_APPEND.
|
|
73
|
+
// This is FREE and has a 100,000 jobs/day limit.
|
|
74
|
+
await insertRows(
|
|
93
75
|
this.datasetId,
|
|
94
76
|
tableId,
|
|
95
77
|
rows,
|
|
96
|
-
dedupKeys,
|
|
97
78
|
this.logger
|
|
98
79
|
);
|
|
99
80
|
|
|
100
81
|
if (this.logger) {
|
|
101
|
-
this.logger.log('INFO', `[BigQueryBatch] ✅
|
|
82
|
+
this.logger.log('INFO', `[BigQueryBatch] ✅ Appended ${rows.length} ${tableName} rows to BigQuery (Load Job)`);
|
|
102
83
|
}
|
|
103
84
|
|
|
104
85
|
return rows.length;
|
|
105
86
|
} catch (error) {
|
|
106
|
-
// Log error but don't throw - allow Firestore writes to continue
|
|
107
87
|
if (this.logger) {
|
|
108
88
|
this.logger.log('WARN', `[BigQueryBatch] Failed to flush ${tableName} to BigQuery: ${error.message}`);
|
|
109
89
|
}
|
|
110
|
-
// Put rows back in buffer for retry
|
|
90
|
+
// Put rows back in buffer for retry
|
|
111
91
|
buffer.push(...rows);
|
|
112
92
|
return 0;
|
|
113
93
|
}
|
|
114
94
|
}
|
|
115
95
|
|
|
116
|
-
/**
|
|
117
|
-
* Flush all buffers to BigQuery
|
|
118
|
-
* Called by FirestoreBatchManager.flushBatches()
|
|
119
|
-
*/
|
|
120
96
|
async flushBatches() {
|
|
121
|
-
if (process.env.BIGQUERY_ENABLED === 'false')
|
|
122
|
-
return; // Skip if BigQuery disabled
|
|
123
|
-
}
|
|
97
|
+
if (process.env.BIGQUERY_ENABLED === 'false') return;
|
|
124
98
|
|
|
125
99
|
const results = await Promise.allSettled([
|
|
126
|
-
this._flushBuffer(this.portfolioBuffer, 'portfolio_snapshots', 'portfolio'
|
|
127
|
-
this._flushBuffer(this.historyBuffer, 'trade_history_snapshots', 'history'
|
|
128
|
-
this._flushBuffer(this.socialBuffer, 'social_post_snapshots', 'social'
|
|
100
|
+
this._flushBuffer(this.portfolioBuffer, 'portfolio_snapshots', 'portfolio'),
|
|
101
|
+
this._flushBuffer(this.historyBuffer, 'trade_history_snapshots', 'history'),
|
|
102
|
+
this._flushBuffer(this.socialBuffer, 'social_post_snapshots', 'social')
|
|
129
103
|
]);
|
|
130
104
|
|
|
131
105
|
const totalFlushed = results
|
|
@@ -133,13 +107,10 @@ class BigQueryBatchManager {
|
|
|
133
107
|
.reduce((sum, r) => sum + r.value, 0);
|
|
134
108
|
|
|
135
109
|
if (totalFlushed > 0 && this.logger) {
|
|
136
|
-
this.logger.log('INFO', `[BigQueryBatch] Flushed ${totalFlushed} total rows
|
|
110
|
+
this.logger.log('INFO', `[BigQueryBatch] Flushed ${totalFlushed} total rows.`);
|
|
137
111
|
}
|
|
138
112
|
}
|
|
139
113
|
|
|
140
|
-
/**
|
|
141
|
-
* Get buffer sizes (for monitoring)
|
|
142
|
-
*/
|
|
143
114
|
getBufferSizes() {
|
|
144
115
|
return {
|
|
145
116
|
portfolio: this.portfolioBuffer.length,
|
|
@@ -149,4 +120,4 @@ class BigQueryBatchManager {
|
|
|
149
120
|
}
|
|
150
121
|
}
|
|
151
122
|
|
|
152
|
-
module.exports = { BigQueryBatchManager };
|
|
123
|
+
module.exports = { BigQueryBatchManager };
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
/** @fileoverview Utility class to manage stateful Firestore write batches.
|
|
2
|
-
* REFACTORED:
|
|
3
|
-
* Mapped new user types to their respective collections.
|
|
4
|
-
* IMPLEMENTS: Round-Robin Sharding for maximum user density per document.
|
|
2
|
+
* REFACTORED: Increased default batch size to 500 to reduce API calls.
|
|
5
3
|
*/
|
|
6
4
|
|
|
7
5
|
const { FieldValue } = require('@google-cloud/firestore');
|
|
@@ -73,14 +71,16 @@ class FirestoreBatchManager {
|
|
|
73
71
|
return `cid_map_shard_${Math.floor(parseInt(cid) / 10000) % 10}`;
|
|
74
72
|
}
|
|
75
73
|
|
|
76
|
-
async _scheduleFlush() {
|
|
77
|
-
//
|
|
78
|
-
|
|
74
|
+
async _scheduleFlush() {
|
|
75
|
+
// OPTIMIZATION: Increased default to 500.
|
|
76
|
+
// Firestore limit is 500 writes. Since we shard users into buckets,
|
|
77
|
+
// 500 users results in much fewer than 500 writes (likely <10 writes).
|
|
78
|
+
const maxBatch = this.config.TASK_ENGINE_MAX_BATCH_SIZE ? Number(this.config.TASK_ENGINE_MAX_BATCH_SIZE) : 500;
|
|
79
79
|
const totalOps = this._estimateBatchSize();
|
|
80
80
|
|
|
81
81
|
if (totalOps >= maxBatch) {
|
|
82
82
|
this.logger.log('INFO', `[BATCH] Hit limit (${totalOps} >= ${maxBatch}). Flushing...`);
|
|
83
|
-
await this.flushBatches();
|
|
83
|
+
await this.flushBatches();
|
|
84
84
|
return;
|
|
85
85
|
}
|
|
86
86
|
}
|
|
@@ -135,7 +135,7 @@ class FirestoreBatchManager {
|
|
|
135
135
|
this.usernameMapUpdates[shardId] = {};
|
|
136
136
|
}
|
|
137
137
|
this.usernameMapUpdates[shardId][cidStr] = { username };
|
|
138
|
-
this._scheduleFlush();
|
|
138
|
+
this._scheduleFlush();
|
|
139
139
|
}
|
|
140
140
|
|
|
141
141
|
/**
|