bulltrackers-module 1.0.766 → 1.0.769
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/UserPortfolioMetrics.js +50 -0
- package/functions/computation-system-v2/computations/BehavioralAnomaly.js +559 -227
- package/functions/computation-system-v2/computations/GlobalAumPerAsset30D.js +103 -0
- package/functions/computation-system-v2/computations/NewSectorExposure.js +82 -35
- package/functions/computation-system-v2/computations/NewSocialPost.js +52 -24
- package/functions/computation-system-v2/computations/PIDailyAssetAUM.js +134 -0
- package/functions/computation-system-v2/computations/PiFeatureVectors.js +227 -0
- package/functions/computation-system-v2/computations/PiRecommender.js +359 -0
- package/functions/computation-system-v2/computations/PopularInvestorProfileMetrics.js +354 -641
- package/functions/computation-system-v2/computations/SignedInUserList.js +51 -0
- package/functions/computation-system-v2/computations/SignedInUserMirrorHistory.js +138 -0
- package/functions/computation-system-v2/computations/SignedInUserPIProfileMetrics.js +106 -0
- package/functions/computation-system-v2/computations/SignedInUserProfileMetrics.js +324 -0
- package/functions/computation-system-v2/config/bulltrackers.config.js +40 -126
- package/functions/computation-system-v2/core-api.js +17 -9
- package/functions/computation-system-v2/data_schema_reference.MD +108 -0
- package/functions/computation-system-v2/devtools/builder/builder.js +362 -0
- package/functions/computation-system-v2/devtools/builder/examples/user-metrics.yaml +26 -0
- package/functions/computation-system-v2/devtools/index.js +36 -0
- package/functions/computation-system-v2/devtools/shared/MockDataFactory.js +235 -0
- package/functions/computation-system-v2/devtools/shared/SchemaTemplates.js +475 -0
- package/functions/computation-system-v2/devtools/shared/SystemIntrospector.js +517 -0
- package/functions/computation-system-v2/devtools/shared/index.js +16 -0
- package/functions/computation-system-v2/devtools/simulation/DAGAnalyzer.js +243 -0
- package/functions/computation-system-v2/devtools/simulation/MockDataFetcher.js +306 -0
- package/functions/computation-system-v2/devtools/simulation/MockStorageManager.js +336 -0
- package/functions/computation-system-v2/devtools/simulation/SimulationEngine.js +525 -0
- package/functions/computation-system-v2/devtools/simulation/SimulationServer.js +581 -0
- package/functions/computation-system-v2/devtools/simulation/index.js +17 -0
- package/functions/computation-system-v2/devtools/simulation/simulate.js +324 -0
- package/functions/computation-system-v2/devtools/vscode-computation/package.json +90 -0
- package/functions/computation-system-v2/devtools/vscode-computation/snippets/computation.json +128 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/extension.ts +401 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/codeActions.ts +152 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/completions.ts +207 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/diagnostics.ts +205 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/hover.ts +205 -0
- package/functions/computation-system-v2/devtools/vscode-computation/tsconfig.json +22 -0
- package/functions/computation-system-v2/docs/HowToCreateComputations.MD +602 -0
- package/functions/computation-system-v2/framework/core/Manifest.js +9 -16
- package/functions/computation-system-v2/framework/core/RunAnalyzer.js +2 -1
- package/functions/computation-system-v2/framework/data/DataFetcher.js +330 -126
- package/functions/computation-system-v2/framework/data/MaterializedViewManager.js +84 -0
- package/functions/computation-system-v2/framework/data/QueryBuilder.js +38 -38
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +226 -153
- package/functions/computation-system-v2/framework/scheduling/ScheduleValidator.js +17 -19
- package/functions/computation-system-v2/framework/storage/StateRepository.js +32 -2
- package/functions/computation-system-v2/framework/storage/StorageManager.js +111 -83
- package/functions/computation-system-v2/framework/testing/ComputationTester.js +161 -66
- package/functions/computation-system-v2/handlers/dispatcher.js +57 -29
- package/functions/computation-system-v2/legacy/PiAssetRecommender.js.old +115 -0
- package/functions/computation-system-v2/legacy/PiSimilarityMatrix.js +104 -0
- package/functions/computation-system-v2/legacy/PiSimilarityVector.js +71 -0
- package/functions/computation-system-v2/scripts/debug_aggregation.js +25 -0
- package/functions/computation-system-v2/scripts/test-computation-dag.js +109 -0
- package/functions/computation-system-v2/scripts/test-invalidation-scenarios.js +234 -0
- package/functions/task-engine/helpers/data_storage_helpers.js +6 -6
- package/package.json +1 -1
- package/functions/computation-system-v2/computations/PopularInvestorRiskAssessment.js +0 -176
- package/functions/computation-system-v2/computations/PopularInvestorRiskMetrics.js +0 -294
- package/functions/computation-system-v2/computations/UserPortfolioSummary.js +0 -172
- package/functions/computation-system-v2/scripts/migrate-sectors.js +0 -73
- package/functions/computation-system-v2/test/analyze-results.js +0 -238
- package/functions/computation-system-v2/test/other/test-dependency-cascade.js +0 -150
- package/functions/computation-system-v2/test/other/test-dispatcher.js +0 -317
- package/functions/computation-system-v2/test/other/test-framework.js +0 -500
- package/functions/computation-system-v2/test/other/test-real-execution.js +0 -166
- package/functions/computation-system-v2/test/other/test-real-integration.js +0 -194
- package/functions/computation-system-v2/test/other/test-refactor-e2e.js +0 -131
- package/functions/computation-system-v2/test/other/test-results.json +0 -31
- package/functions/computation-system-v2/test/other/test-risk-metrics-computation.js +0 -329
- package/functions/computation-system-v2/test/other/test-scheduler.js +0 -204
- package/functions/computation-system-v2/test/other/test-storage.js +0 -449
- package/functions/computation-system-v2/test/run-pipeline-test.js +0 -554
- package/functions/computation-system-v2/test/test-full-pipeline.js +0 -227
- package/functions/computation-system-v2/test/test-worker-pool.js +0 -266
|
@@ -1,22 +1,19 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Data Fetcher - Executes queries and transforms results
|
|
3
3
|
* * The single point of data access for computations.
|
|
4
|
-
*
|
|
5
|
-
* *
|
|
6
|
-
* * V2.3 FIX: "Insufficient History" bug.
|
|
7
|
-
* - fetchBatched now orders by Entity ID to keep historical rows together.
|
|
8
|
-
* - Implemented "Entity-Atomic Batching" to prevent splitting a user's history across batches.
|
|
9
|
-
* * V2.4 FIX: Runaway Query Cost Prevention [Fix #3].
|
|
10
|
-
* * V2.5 UPDATE: Super-Entity Monitoring [Safety Valve for Fix #6].
|
|
11
|
-
* - Warns if a single entity exceeds reasonable batch limits (Memory Risk).
|
|
4
|
+
* * V3.3 FIX: "Driver Priority" & "Identity Crisis" Patch.
|
|
5
|
+
* * V3.5 GUARD: Cost Protections (Dry Run & Limit Checks).
|
|
12
6
|
*/
|
|
13
7
|
|
|
14
8
|
const { BigQuery } = require('@google-cloud/bigquery');
|
|
9
|
+
const crypto = require('crypto');
|
|
10
|
+
const { MaterializedViewManager } = require('./MaterializedViewManager');
|
|
15
11
|
|
|
16
|
-
//
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
const
|
|
12
|
+
// SAFETY CONFIGURATION
|
|
13
|
+
// You can move these to your main config file if preferred
|
|
14
|
+
const DEFAULT_SAFETY_LIMIT_GB = 10; // Max GB per query
|
|
15
|
+
const MAX_LOOKBACK_DAYS = 60;
|
|
16
|
+
const BATCH_GROWTH_WARNING_THRESHOLD = 5;
|
|
20
17
|
|
|
21
18
|
class DataFetcher {
|
|
22
19
|
constructor(config, queryBuilder, logger = null) {
|
|
@@ -26,28 +23,54 @@ class DataFetcher {
|
|
|
26
23
|
this.tables = config.tables || {};
|
|
27
24
|
this.queryBuilder = queryBuilder;
|
|
28
25
|
this.logger = logger;
|
|
29
|
-
|
|
26
|
+
|
|
27
|
+
// Safety Limit from Config or Default
|
|
28
|
+
this.safetyLimitGb = config.safetyLimitGb || DEFAULT_SAFETY_LIMIT_GB;
|
|
29
|
+
|
|
30
30
|
this.client = new BigQuery({ projectId: this.projectId });
|
|
31
|
-
|
|
31
|
+
|
|
32
|
+
// Initialize MV Manager
|
|
33
|
+
this.mvManager = new MaterializedViewManager(this.client, this.logger, config);
|
|
34
|
+
|
|
35
|
+
// Cache Configuration
|
|
36
|
+
this.cacheConfig = config.queryCache || {
|
|
37
|
+
enabled: true,
|
|
38
|
+
ttlMs: 300000, // 5 minutes default
|
|
39
|
+
maxSize: 1000 // Max unique queries to cache
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
// Use Map as LRU cache
|
|
43
|
+
this.cache = new Map();
|
|
44
|
+
|
|
32
45
|
this.stats = {
|
|
33
46
|
queries: 0,
|
|
34
47
|
rowsFetched: 0,
|
|
35
48
|
errors: 0,
|
|
36
|
-
bytesProcessed: 0
|
|
49
|
+
bytesProcessed: 0,
|
|
50
|
+
cacheHits: 0,
|
|
51
|
+
cacheMisses: 0,
|
|
52
|
+
cacheEvictions: 0
|
|
37
53
|
};
|
|
38
54
|
}
|
|
39
|
-
|
|
55
|
+
|
|
40
56
|
/**
|
|
41
57
|
* Fetch data for a computation's requirements.
|
|
42
58
|
*/
|
|
43
59
|
async fetchForComputation(requires, targetDate, entities = null) {
|
|
44
60
|
const results = {};
|
|
45
61
|
const errors = [];
|
|
46
|
-
|
|
47
|
-
await Promise.all(Object.entries(requires).map(async ([
|
|
62
|
+
|
|
63
|
+
await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
|
|
48
64
|
try {
|
|
65
|
+
// INTERCEPT METRICS
|
|
66
|
+
if (spec.type === 'metric') {
|
|
67
|
+
results[key] = await this._fetchMetric(spec, targetDate, entities);
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Standard Table Fetch
|
|
49
72
|
const data = await this.fetch({
|
|
50
|
-
table:
|
|
73
|
+
table: key,
|
|
51
74
|
targetDate,
|
|
52
75
|
lookback: spec.lookback || 0,
|
|
53
76
|
mandatory: spec.mandatory || false,
|
|
@@ -55,38 +78,82 @@ class DataFetcher {
|
|
|
55
78
|
fields: spec.fields || null,
|
|
56
79
|
entities: entities
|
|
57
80
|
});
|
|
58
|
-
|
|
59
|
-
results[
|
|
60
|
-
|
|
81
|
+
|
|
82
|
+
results[key] = data;
|
|
83
|
+
|
|
61
84
|
if (spec.mandatory && this._isEmpty(data)) {
|
|
62
|
-
errors.push({ table:
|
|
85
|
+
errors.push({ table: key, reason: 'MANDATORY_MISSING' });
|
|
63
86
|
}
|
|
64
|
-
|
|
87
|
+
|
|
65
88
|
} catch (e) {
|
|
66
89
|
if (spec.mandatory) {
|
|
67
|
-
errors.push({ table:
|
|
90
|
+
errors.push({ table: key, reason: e.message });
|
|
68
91
|
} else {
|
|
69
|
-
this._log('WARN', `Optional
|
|
92
|
+
this._log('WARN', `Optional requirement ${key} failed: ${e.message}`);
|
|
70
93
|
}
|
|
71
|
-
results[
|
|
94
|
+
results[key] = null;
|
|
72
95
|
}
|
|
73
96
|
}));
|
|
74
|
-
|
|
97
|
+
|
|
75
98
|
if (errors.length > 0) {
|
|
76
99
|
const msg = errors.map(e => `${e.table}: ${e.reason}`).join(', ');
|
|
77
100
|
throw new Error(`[DataFetcher] Missing mandatory data: ${msg}`);
|
|
78
101
|
}
|
|
79
|
-
|
|
102
|
+
|
|
80
103
|
return results;
|
|
81
104
|
}
|
|
82
105
|
|
|
106
|
+
/**
|
|
107
|
+
* Fetch Metric with optional Time Series
|
|
108
|
+
*/
|
|
109
|
+
async _fetchMetric(spec, targetDate, entities) {
|
|
110
|
+
const mvName = await this.mvManager.ensureMetricView(spec.source, spec);
|
|
111
|
+
|
|
112
|
+
const selectClause = spec.series ? 'entity_id, date, value' : 'entity_id, SUM(value) as value';
|
|
113
|
+
const groupByClause = spec.series ? '' : 'GROUP BY entity_id';
|
|
114
|
+
|
|
115
|
+
let sql = `
|
|
116
|
+
SELECT ${selectClause}
|
|
117
|
+
FROM \`${this.projectId}.${this.dataset}.${mvName}\`
|
|
118
|
+
WHERE date BETWEEN DATE_SUB(@targetDate, INTERVAL @lookback DAY) AND @targetDate
|
|
119
|
+
`;
|
|
120
|
+
|
|
121
|
+
const params = { targetDate, lookback: spec.lookback || 0 };
|
|
122
|
+
|
|
123
|
+
if (entities && entities.length > 0) {
|
|
124
|
+
sql += ` AND entity_id IN UNNEST(@entities)`;
|
|
125
|
+
params.entities = entities.map(String);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
sql += ` ${groupByClause}`;
|
|
129
|
+
|
|
130
|
+
const rows = await this._execute({ sql, params, table: mvName });
|
|
131
|
+
const result = {};
|
|
132
|
+
|
|
133
|
+
if (spec.series) {
|
|
134
|
+
rows.forEach(r => {
|
|
135
|
+
const eid = r.entity_id;
|
|
136
|
+
const d = r.date.value || r.date;
|
|
137
|
+
if (!result[eid]) result[eid] = {};
|
|
138
|
+
result[eid][d] = r.value;
|
|
139
|
+
});
|
|
140
|
+
} else {
|
|
141
|
+
rows.forEach(r => { result[r.entity_id] = r.value; });
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return result;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* STREAMING: Fetch data for computation in batches.
|
|
149
|
+
*/
|
|
83
150
|
/**
|
|
84
151
|
* STREAMING: Fetch data for computation in batches.
|
|
85
152
|
*/
|
|
86
153
|
async *fetchComputationBatched(requires, targetDate, batchSize = 1000) {
|
|
87
154
|
const driverTableName = this._getDriverTable(requires);
|
|
88
155
|
const driverConfig = this.tables[driverTableName] || {};
|
|
89
|
-
|
|
156
|
+
|
|
90
157
|
if (!driverTableName) {
|
|
91
158
|
this._log('WARN', 'No entity-keyed table found for batching. Falling back to full fetch.');
|
|
92
159
|
const fullData = await this.fetchForComputation(requires, targetDate);
|
|
@@ -95,9 +162,12 @@ class DataFetcher {
|
|
|
95
162
|
return;
|
|
96
163
|
}
|
|
97
164
|
|
|
98
|
-
this._log('INFO', `Starting batched fetch driven by table: ${driverTableName} (${driverConfig.entityField})`);
|
|
99
165
|
const driverSpec = requires[driverTableName];
|
|
100
166
|
|
|
167
|
+
// [DEBUG] Explicitly log the filter being applied to the driver
|
|
168
|
+
this._log('INFO', `Starting batched fetch driven by table: ${driverTableName} (${driverConfig.entityField})`);
|
|
169
|
+
this._log('INFO', `Driver Filter: ${JSON.stringify(driverSpec.filter || {})}`);
|
|
170
|
+
|
|
101
171
|
const driverStream = this.fetchBatched({
|
|
102
172
|
table: driverTableName,
|
|
103
173
|
targetDate,
|
|
@@ -108,145 +178,154 @@ class DataFetcher {
|
|
|
108
178
|
}, batchSize);
|
|
109
179
|
|
|
110
180
|
for await (const batch of driverStream) {
|
|
111
|
-
// FIX: Robust ID Extraction
|
|
112
181
|
const entityIds = this._extractEntityIdsFromBatch(batch, driverTableName);
|
|
113
|
-
|
|
182
|
+
|
|
114
183
|
if (entityIds.length === 0) {
|
|
115
|
-
this._log('WARN', `Driver batch from ${driverTableName} yielded 0 entity IDs
|
|
184
|
+
this._log('WARN', `Driver batch from ${driverTableName} yielded 0 entity IDs.`);
|
|
116
185
|
continue;
|
|
117
186
|
}
|
|
118
187
|
|
|
119
188
|
const batchResults = { [driverTableName]: batch };
|
|
120
189
|
const errors = [];
|
|
121
190
|
|
|
122
|
-
await Promise.all(Object.entries(requires).map(async ([
|
|
123
|
-
if (
|
|
124
|
-
|
|
125
|
-
// FIX: Identity Crisis Check
|
|
126
|
-
const depConfig = this.tables[tableName] || {};
|
|
127
|
-
const shouldFilterById = depConfig.entityField === driverConfig.entityField;
|
|
191
|
+
await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
|
|
192
|
+
if (key === driverTableName) return;
|
|
128
193
|
|
|
129
194
|
try {
|
|
195
|
+
if (spec.type === 'metric') {
|
|
196
|
+
batchResults[key] = await this._fetchMetric(spec, targetDate, entityIds);
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const depConfig = this.tables[key] || {};
|
|
201
|
+
const hasEntityField = !!depConfig.entityField;
|
|
202
|
+
|
|
130
203
|
const data = await this.fetch({
|
|
131
|
-
table:
|
|
204
|
+
table: key,
|
|
132
205
|
targetDate,
|
|
133
206
|
lookback: spec.lookback || 0,
|
|
134
207
|
mandatory: spec.mandatory || false,
|
|
135
208
|
filter: spec.filter || {},
|
|
136
209
|
fields: spec.fields || null,
|
|
137
|
-
entities:
|
|
210
|
+
entities: hasEntityField ? entityIds : null
|
|
138
211
|
});
|
|
139
|
-
|
|
140
|
-
batchResults[
|
|
212
|
+
|
|
213
|
+
batchResults[key] = data;
|
|
141
214
|
|
|
142
215
|
if (spec.mandatory && this._isEmpty(data)) {
|
|
143
|
-
this._log('WARN', `Batch warning: Mandatory table ${
|
|
216
|
+
this._log('WARN', `Batch warning: Mandatory table ${key} returned 0 rows.`);
|
|
144
217
|
}
|
|
145
218
|
} catch (e) {
|
|
146
|
-
if (spec.mandatory) errors.push({ table:
|
|
147
|
-
batchResults[
|
|
219
|
+
if (spec.mandatory) errors.push({ table: key, reason: e.message });
|
|
220
|
+
batchResults[key] = null;
|
|
148
221
|
}
|
|
149
222
|
}));
|
|
150
223
|
|
|
151
224
|
if (errors.length > 0) {
|
|
152
|
-
this._log('WARN', `Batch missing mandatory data
|
|
153
|
-
continue;
|
|
225
|
+
this._log('WARN', `Batch missing mandatory data: ${errors.map(e => e.table).join(', ')}. Skipping.`);
|
|
226
|
+
continue;
|
|
154
227
|
}
|
|
155
228
|
|
|
156
229
|
yield { data: batchResults, entityIds };
|
|
157
230
|
}
|
|
158
231
|
}
|
|
159
|
-
|
|
232
|
+
|
|
160
233
|
async fetch(options) {
|
|
161
234
|
const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
|
|
162
|
-
|
|
163
|
-
// FIX #3: Prevent Runaway Costs
|
|
235
|
+
|
|
164
236
|
if (lookback > MAX_LOOKBACK_DAYS) {
|
|
165
|
-
throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit
|
|
237
|
+
throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit.`);
|
|
166
238
|
}
|
|
167
239
|
|
|
168
240
|
const tableConfig = this.tables[table] || {};
|
|
169
241
|
const { dateField, entityField, dataField } = tableConfig;
|
|
170
|
-
|
|
242
|
+
const physicalTable = tableConfig.tableName || table;
|
|
243
|
+
|
|
171
244
|
const query = await this.queryBuilder.build({
|
|
172
|
-
table
|
|
245
|
+
table: physicalTable,
|
|
246
|
+
select: fields,
|
|
247
|
+
where: filter,
|
|
248
|
+
dateField,
|
|
249
|
+
targetDate,
|
|
250
|
+
lookback,
|
|
251
|
+
entityField,
|
|
252
|
+
entities,
|
|
173
253
|
orderBy: dateField || entityField
|
|
174
254
|
});
|
|
175
|
-
|
|
255
|
+
|
|
176
256
|
const rows = await this._execute(query);
|
|
177
|
-
|
|
257
|
+
|
|
178
258
|
if (!rows || rows.length === 0) return null;
|
|
179
|
-
|
|
259
|
+
|
|
180
260
|
return this._transform(rows, { lookback, dateField, entityField, dataField });
|
|
181
261
|
}
|
|
182
262
|
|
|
183
263
|
async *fetchBatched(options, batchSize = 1000) {
|
|
184
264
|
const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
|
|
185
265
|
|
|
186
|
-
// FIX #3: Prevent Runaway Costs
|
|
187
266
|
if (lookback > MAX_LOOKBACK_DAYS) {
|
|
188
|
-
throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit
|
|
267
|
+
throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit.`);
|
|
189
268
|
}
|
|
190
269
|
|
|
191
270
|
const tableConfig = this.tables[table] || {};
|
|
192
271
|
const { dateField, entityField, dataField } = tableConfig;
|
|
272
|
+
const physicalTable = tableConfig.tableName || table;
|
|
193
273
|
|
|
194
|
-
// FIX #1: Prioritize ordering by Entity to keep historical rows together
|
|
195
274
|
const query = await this.queryBuilder.build({
|
|
196
|
-
table
|
|
197
|
-
|
|
275
|
+
table: physicalTable,
|
|
276
|
+
select: fields,
|
|
277
|
+
where: filter,
|
|
278
|
+
dateField,
|
|
279
|
+
targetDate,
|
|
280
|
+
lookback,
|
|
281
|
+
entityField,
|
|
282
|
+
entities,
|
|
283
|
+
orderBy: entityField || dateField
|
|
198
284
|
});
|
|
199
285
|
|
|
200
286
|
const rowStream = this._executeStream(query);
|
|
201
|
-
|
|
287
|
+
|
|
202
288
|
let batch = [];
|
|
203
289
|
let currentEntity = null;
|
|
204
|
-
let batchHasWarned = false;
|
|
290
|
+
let batchHasWarned = false;
|
|
205
291
|
|
|
206
292
|
for await (const row of rowStream) {
|
|
207
|
-
// FIX #2: Entity-Atomic Batching
|
|
208
|
-
// If we have an entity field, verify we don't split an entity across batches
|
|
209
293
|
if (entityField) {
|
|
294
|
+
|
|
210
295
|
const rowEntity = String(row[entityField]);
|
|
211
|
-
|
|
212
|
-
// Check if we should yield
|
|
213
|
-
// Condition: Batch is full AND we are on a NEW entity
|
|
296
|
+
|
|
214
297
|
if (batch.length >= batchSize && rowEntity !== currentEntity && currentEntity !== null) {
|
|
215
298
|
yield this._transform(batch, { lookback, dateField, entityField, dataField });
|
|
216
299
|
batch = [];
|
|
217
300
|
batchHasWarned = false;
|
|
218
301
|
}
|
|
219
|
-
|
|
220
|
-
// SAFETY VALVE (Fix #6 Alternative):
|
|
221
|
-
// If batch grows huge (Super Entity) and we CANNOT split (same entity), warn the admin.
|
|
302
|
+
|
|
222
303
|
if (batch.length > batchSize * BATCH_GROWTH_WARNING_THRESHOLD && !batchHasWarned) {
|
|
223
|
-
this._log('WARN', `SUPER ENTITY DETECTED: Entity '${currentEntity}' in table '${table}' has >${batch.length} rows
|
|
224
|
-
`This exceeds batch size ${batchSize} by ${BATCH_GROWTH_WARNING_THRESHOLD}x. ` +
|
|
225
|
-
`Risk of OOM or Timeouts. Consider filtering this entity.`);
|
|
304
|
+
this._log('WARN', `SUPER ENTITY DETECTED: Entity '${currentEntity}' in table '${table}' has >${batch.length} rows.`);
|
|
226
305
|
batchHasWarned = true;
|
|
227
306
|
}
|
|
228
|
-
|
|
307
|
+
|
|
229
308
|
currentEntity = rowEntity;
|
|
230
309
|
} else {
|
|
231
|
-
// Fallback for non-entity tables (strict count)
|
|
232
310
|
if (batch.length >= batchSize) {
|
|
233
311
|
yield this._transform(batch, { lookback, dateField, entityField, dataField });
|
|
234
312
|
batch = [];
|
|
235
313
|
}
|
|
236
314
|
}
|
|
237
|
-
|
|
238
315
|
batch.push(row);
|
|
239
316
|
}
|
|
240
|
-
|
|
317
|
+
|
|
241
318
|
if (batch.length > 0) {
|
|
242
319
|
yield this._transform(batch, { lookback, dateField, entityField, dataField });
|
|
243
320
|
}
|
|
244
321
|
}
|
|
245
|
-
|
|
322
|
+
|
|
246
323
|
async hasData(table, targetDate) {
|
|
247
324
|
const tableConfig = this.tables[table] || {};
|
|
248
325
|
const { dateField } = tableConfig;
|
|
249
|
-
const
|
|
326
|
+
const physicalTable = tableConfig.tableName || table;
|
|
327
|
+
|
|
328
|
+
const query = await this.queryBuilder.buildExistsQuery(physicalTable, dateField, targetDate);
|
|
250
329
|
try {
|
|
251
330
|
const rows = await this._execute(query);
|
|
252
331
|
return rows && rows.length > 0;
|
|
@@ -255,47 +334,115 @@ class DataFetcher {
|
|
|
255
334
|
return false;
|
|
256
335
|
}
|
|
257
336
|
}
|
|
258
|
-
|
|
337
|
+
|
|
338
|
+
// ... checkAvailability, getStats, resetStats, clearCache (unchanged) ...
|
|
259
339
|
async checkAvailability(requires, targetDate) {
|
|
260
340
|
const available = [];
|
|
261
341
|
const missing = [];
|
|
262
|
-
|
|
263
|
-
await Promise.all(Object.entries(requires).map(async ([
|
|
342
|
+
|
|
343
|
+
await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
|
|
344
|
+
const tableName = spec.type === 'metric' ? spec.source : key;
|
|
264
345
|
const hasData = await this.hasData(tableName, targetDate);
|
|
265
346
|
if (hasData) {
|
|
266
|
-
available.push(
|
|
347
|
+
available.push(key);
|
|
267
348
|
} else if (spec.mandatory) {
|
|
268
|
-
missing.push(
|
|
349
|
+
missing.push(key);
|
|
269
350
|
} else {
|
|
270
|
-
available.push(
|
|
351
|
+
available.push(key);
|
|
271
352
|
}
|
|
272
353
|
}));
|
|
273
|
-
|
|
354
|
+
|
|
274
355
|
return { canRun: missing.length === 0, available, missing };
|
|
275
356
|
}
|
|
276
|
-
|
|
357
|
+
|
|
277
358
|
getStats() { return { ...this.stats }; }
|
|
278
|
-
|
|
279
|
-
|
|
359
|
+
|
|
360
|
+
resetStats() {
|
|
361
|
+
this.stats = {
|
|
362
|
+
queries: 0, rowsFetched: 0, errors: 0, bytesProcessed: 0,
|
|
363
|
+
cacheHits: 0, cacheMisses: 0, cacheEvictions: 0
|
|
364
|
+
};
|
|
365
|
+
this.cache.clear();
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
clearCache() {
|
|
369
|
+
this.cache.clear();
|
|
370
|
+
this._log('DEBUG', 'Query cache cleared');
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
// =========================================================================
|
|
374
|
+
// PRIVATE METHODS
|
|
375
|
+
// =========================================================================
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* Executes a query with Cost Guard protection.
|
|
379
|
+
*/
|
|
280
380
|
async _execute(query) {
|
|
381
|
+
// Caching Logic
|
|
382
|
+
if (this.cacheConfig.enabled) {
|
|
383
|
+
const cacheKey = this._generateCacheKey(query);
|
|
384
|
+
const cached = this.cache.get(cacheKey);
|
|
385
|
+
if (cached) {
|
|
386
|
+
if (Date.now() - cached.timestamp < this.cacheConfig.ttlMs) {
|
|
387
|
+
this.stats.cacheHits++;
|
|
388
|
+
this.cache.delete(cacheKey);
|
|
389
|
+
this.cache.set(cacheKey, cached);
|
|
390
|
+
return cached.rows;
|
|
391
|
+
} else {
|
|
392
|
+
this.cache.delete(cacheKey);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
this.stats.cacheMisses++;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// --- COST GUARD: DRY RUN ---
|
|
399
|
+
await this._enforceCostSafety(query);
|
|
400
|
+
// ---------------------------
|
|
401
|
+
|
|
402
|
+
// =========================================================
|
|
403
|
+
// 🛑 DEBUGGING: PRINT EXACT SQL
|
|
404
|
+
// =========================================================
|
|
405
|
+
console.log('\n--- 🔍 EXECUTING SQL -----------------------------------');
|
|
406
|
+
console.log(query.sql);
|
|
407
|
+
console.log('PARAMS:', JSON.stringify(query.params));
|
|
408
|
+
console.log('--------------------------------------------------------\n');
|
|
409
|
+
// =========================================================
|
|
410
|
+
|
|
281
411
|
this.stats.queries++;
|
|
412
|
+
|
|
282
413
|
try {
|
|
283
414
|
const [job] = await this.client.createQueryJob({
|
|
284
415
|
query: query.sql, params: query.params, location: this.location
|
|
285
416
|
});
|
|
286
417
|
const [rows] = await job.getQueryResults();
|
|
287
418
|
const [metadata] = await job.getMetadata();
|
|
419
|
+
|
|
288
420
|
this.stats.rowsFetched += rows.length;
|
|
289
421
|
this.stats.bytesProcessed += parseInt(metadata.statistics?.totalBytesProcessed || 0, 10);
|
|
290
|
-
|
|
422
|
+
|
|
423
|
+
const normalizedRows = rows.map(r => this._normalizeRow(r));
|
|
424
|
+
|
|
425
|
+
if (this.cacheConfig.enabled) {
|
|
426
|
+
this._addToCache(query, normalizedRows);
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
return normalizedRows;
|
|
291
430
|
} catch (e) {
|
|
292
431
|
this.stats.errors++;
|
|
293
|
-
this._log('ERROR', `Query failed: ${e.message}`);
|
|
432
|
+
this._log('ERROR', `Query failed: ${e.message}\nSQL: ${query.sql}`);
|
|
294
433
|
throw e;
|
|
295
434
|
}
|
|
296
435
|
}
|
|
297
436
|
|
|
298
437
|
async *_executeStream(query) {
|
|
438
|
+
await this._enforceCostSafety(query);
|
|
439
|
+
|
|
440
|
+
// [CRITICAL FIX] Added SQL Logging to Stream Execution
|
|
441
|
+
console.log('\n--- 🔍 EXECUTING DRIVER STREAM SQL ---------------------');
|
|
442
|
+
console.log(query.sql);
|
|
443
|
+
console.log('PARAMS:', JSON.stringify(query.params));
|
|
444
|
+
console.log('--------------------------------------------------------\n');
|
|
445
|
+
|
|
299
446
|
this.stats.queries++;
|
|
300
447
|
try {
|
|
301
448
|
const [job] = await this.client.createQueryJob({
|
|
@@ -304,37 +451,99 @@ class DataFetcher {
|
|
|
304
451
|
const stream = job.getQueryResultsStream();
|
|
305
452
|
for await (const row of stream) {
|
|
306
453
|
this.stats.rowsFetched++;
|
|
307
|
-
yield row;
|
|
454
|
+
yield this._normalizeRow(row);
|
|
308
455
|
}
|
|
309
456
|
} catch (e) {
|
|
310
457
|
this.stats.errors++;
|
|
311
|
-
this._log('ERROR', `Stream Query failed: ${e.message}`);
|
|
458
|
+
this._log('ERROR', `Stream Query failed: ${e.message}\nSQL: ${query.sql}`);
|
|
312
459
|
throw e;
|
|
313
460
|
}
|
|
314
461
|
}
|
|
315
|
-
|
|
462
|
+
|
|
316
463
|
/**
|
|
317
|
-
*
|
|
318
|
-
* FIX: PRIORITIZE ENTITY FIELD.
|
|
319
|
-
* If entityField exists, we MUST return { [id]: [rows] } so IDs can be extracted.
|
|
464
|
+
* NEW: Runs a Dry Run to estimate cost and blocks expensive queries.
|
|
320
465
|
*/
|
|
466
|
+
async _enforceCostSafety(query) {
|
|
467
|
+
try {
|
|
468
|
+
const [dryRunJob] = await this.client.createQueryJob({
|
|
469
|
+
query: query.sql,
|
|
470
|
+
params: query.params,
|
|
471
|
+
location: this.location,
|
|
472
|
+
dryRun: true // Costs $0, returns stats only
|
|
473
|
+
});
|
|
474
|
+
|
|
475
|
+
const bytes = parseInt(dryRunJob.metadata.statistics.totalBytesProcessed, 10);
|
|
476
|
+
const gb = bytes / (1024 * 1024 * 1024);
|
|
477
|
+
|
|
478
|
+
if (gb > this.safetyLimitGb) {
|
|
479
|
+
const errorMsg = `[DataFetcher] 🛑 COST VIOLATION: Query blocked! Estimated size: ${gb.toFixed(2)} GB (Limit: ${this.safetyLimitGb} GB). ` +
|
|
480
|
+
`Check your partition filters or clustering. Table: ${query.table}`;
|
|
481
|
+
|
|
482
|
+
this._log('ERROR', errorMsg);
|
|
483
|
+
// We log the offending SQL for debugging
|
|
484
|
+
this._log('ERROR', `BLOCKED SQL START:\n${query.sql.substring(0, 500)}...\nEND SQL`);
|
|
485
|
+
|
|
486
|
+
throw new Error(errorMsg);
|
|
487
|
+
}
|
|
488
|
+
} catch (e) {
|
|
489
|
+
// If the dry run fails (e.g. invalid SQL), we let the real execution fail it naturally,
|
|
490
|
+
// unless it was our cost violation error.
|
|
491
|
+
if (e.message.includes('COST VIOLATION')) throw e;
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
_normalizeRow(row) {
|
|
496
|
+
const normalized = { ...row };
|
|
497
|
+
for (const [key, value] of Object.entries(normalized)) {
|
|
498
|
+
if (typeof value === 'string') {
|
|
499
|
+
const trimmed = value.trim();
|
|
500
|
+
if (trimmed.startsWith('{') || trimmed.startsWith('[') || trimmed.startsWith('"')) {
|
|
501
|
+
normalized[key] = this._safeRecursiveParse(value);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
return normalized;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
_safeRecursiveParse(input) {
|
|
509
|
+
if (!input) return null;
|
|
510
|
+
if (typeof input === 'object') return input;
|
|
511
|
+
try {
|
|
512
|
+
const parsed = JSON.parse(input);
|
|
513
|
+
if (typeof parsed === 'string') return this._safeRecursiveParse(parsed);
|
|
514
|
+
return parsed;
|
|
515
|
+
} catch (e) {
|
|
516
|
+
return input;
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
_generateCacheKey(query) {
|
|
521
|
+
const str = query.sql + JSON.stringify(query.params || {});
|
|
522
|
+
return crypto.createHash('md5').update(str).digest('hex');
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
_addToCache(query, rows) {
|
|
526
|
+
const key = this._generateCacheKey(query);
|
|
527
|
+
if (this.cache.size >= this.cacheConfig.maxSize) {
|
|
528
|
+
const oldestKey = this.cache.keys().next().value;
|
|
529
|
+
this.cache.delete(oldestKey);
|
|
530
|
+
this.stats.cacheEvictions++;
|
|
531
|
+
}
|
|
532
|
+
this.cache.set(key, { rows: rows, timestamp: Date.now() });
|
|
533
|
+
}
|
|
534
|
+
|
|
321
535
|
_transform(rows, config) {
|
|
322
536
|
const { lookback, dateField, entityField, dataField } = config;
|
|
323
537
|
const rowArray = Array.isArray(rows) ? rows : [rows];
|
|
324
538
|
|
|
325
|
-
// FIX: Primary Grouping = Entity
|
|
326
539
|
if (entityField) {
|
|
327
540
|
const byEntity = {};
|
|
328
541
|
for (const row of rowArray) {
|
|
329
542
|
const entityKey = String(row[entityField]);
|
|
330
543
|
if (!byEntity[entityKey]) {
|
|
331
|
-
// If simple fetch (no history), value is single object
|
|
332
|
-
// If history fetch (lookback), value is Array of rows
|
|
333
544
|
byEntity[entityKey] = lookback > 0 ? [] : null;
|
|
334
545
|
}
|
|
335
|
-
|
|
336
546
|
const value = dataField ? row[dataField] : row;
|
|
337
|
-
|
|
338
547
|
if (Array.isArray(byEntity[entityKey])) {
|
|
339
548
|
byEntity[entityKey].push(value);
|
|
340
549
|
} else {
|
|
@@ -344,7 +553,6 @@ class DataFetcher {
|
|
|
344
553
|
return byEntity;
|
|
345
554
|
}
|
|
346
555
|
|
|
347
|
-
// Fallback: Date Grouping (Only if no Entity ID)
|
|
348
556
|
if (lookback > 0 && dateField) {
|
|
349
557
|
const byDate = {};
|
|
350
558
|
for (const row of rowArray) {
|
|
@@ -355,12 +563,19 @@ class DataFetcher {
|
|
|
355
563
|
}
|
|
356
564
|
return byDate;
|
|
357
565
|
}
|
|
358
|
-
|
|
359
566
|
return rowArray;
|
|
360
567
|
}
|
|
361
568
|
|
|
362
569
|
_getDriverTable(requires) {
|
|
570
|
+
// PASS 1: Prioritize tables with a DATE field
|
|
571
|
+
for (const [name, spec] of Object.entries(requires)) {
|
|
572
|
+
if (spec.type === 'metric') continue;
|
|
573
|
+
const config = this.tables[name];
|
|
574
|
+
if (config && config.entityField && config.dateField) return name;
|
|
575
|
+
}
|
|
576
|
+
// PASS 2: Fallback to any entity table
|
|
363
577
|
for (const [name, spec] of Object.entries(requires)) {
|
|
578
|
+
if (spec.type === 'metric') continue;
|
|
364
579
|
const config = this.tables[name];
|
|
365
580
|
if (config && config.entityField) return name;
|
|
366
581
|
}
|
|
@@ -371,25 +586,14 @@ class DataFetcher {
|
|
|
371
586
|
const config = this.tables[tableName] || {};
|
|
372
587
|
const field = config.entityField;
|
|
373
588
|
|
|
374
|
-
// Case 1: Transformed Object { "id1": data, "id2": data }
|
|
375
589
|
if (field && batchData && !Array.isArray(batchData)) {
|
|
376
590
|
return Object.keys(batchData);
|
|
377
591
|
}
|
|
378
|
-
|
|
379
|
-
// Case 2: Array of Rows (Only if _transform didn't group by entity)
|
|
380
592
|
if (Array.isArray(batchData) && field) {
|
|
381
593
|
const ids = [];
|
|
382
|
-
let undefinedCount = 0;
|
|
383
594
|
for (const r of batchData) {
|
|
384
595
|
const val = r[field];
|
|
385
|
-
if (val
|
|
386
|
-
undefinedCount++;
|
|
387
|
-
} else {
|
|
388
|
-
ids.push(String(val));
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
if (undefinedCount > 0) {
|
|
392
|
-
this._log('ERROR', `CRITICAL CONFIG ERROR: Found ${undefinedCount} rows in '${tableName}' where entityField '${field}' was UNDEFINED.`);
|
|
596
|
+
if (val !== undefined) ids.push(String(val));
|
|
393
597
|
}
|
|
394
598
|
return ids;
|
|
395
599
|
}
|
|
@@ -399,28 +603,28 @@ class DataFetcher {
|
|
|
399
603
|
_extractAllEntityIds(fullData) {
|
|
400
604
|
const ids = new Set();
|
|
401
605
|
Object.values(fullData || {}).forEach(tableData => {
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
606
|
+
if (tableData && typeof tableData === 'object' && !Array.isArray(tableData)) {
|
|
607
|
+
Object.keys(tableData).forEach(k => ids.add(k));
|
|
608
|
+
}
|
|
405
609
|
});
|
|
406
610
|
return Array.from(ids);
|
|
407
611
|
}
|
|
408
|
-
|
|
612
|
+
|
|
409
613
|
_formatDate(dateValue) {
|
|
410
614
|
if (!dateValue) return null;
|
|
411
615
|
if (typeof dateValue === 'string') return dateValue;
|
|
412
|
-
if (dateValue.value) return dateValue.value;
|
|
616
|
+
if (dateValue.value) return dateValue.value;
|
|
413
617
|
if (dateValue instanceof Date) return dateValue.toISOString().slice(0, 10);
|
|
414
618
|
return String(dateValue);
|
|
415
619
|
}
|
|
416
|
-
|
|
620
|
+
|
|
417
621
|
_isEmpty(data) {
|
|
418
622
|
if (data == null) return true;
|
|
419
623
|
if (Array.isArray(data)) return data.length === 0;
|
|
420
624
|
if (typeof data === 'object') return Object.keys(data).length === 0;
|
|
421
625
|
return false;
|
|
422
626
|
}
|
|
423
|
-
|
|
627
|
+
|
|
424
628
|
_log(level, message) {
|
|
425
629
|
if (this.logger && typeof this.logger.log === 'function') {
|
|
426
630
|
this.logger.log(level, `[DataFetcher] ${message}`);
|