bulltrackers-module 1.0.768 → 1.0.770
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/UserPortfolioMetrics.js +50 -0
- package/functions/computation-system-v2/computations/BehavioralAnomaly.js +557 -337
- package/functions/computation-system-v2/computations/GlobalAumPerAsset30D.js +103 -0
- package/functions/computation-system-v2/computations/PIDailyAssetAUM.js +134 -0
- package/functions/computation-system-v2/computations/PiFeatureVectors.js +227 -0
- package/functions/computation-system-v2/computations/PiRecommender.js +359 -0
- package/functions/computation-system-v2/computations/RiskScoreIncrease.js +13 -13
- package/functions/computation-system-v2/computations/SignedInUserMirrorHistory.js +138 -0
- package/functions/computation-system-v2/computations/SignedInUserPIProfileMetrics.js +106 -0
- package/functions/computation-system-v2/computations/SignedInUserProfileMetrics.js +324 -0
- package/functions/computation-system-v2/config/bulltrackers.config.js +30 -128
- package/functions/computation-system-v2/core-api.js +17 -9
- package/functions/computation-system-v2/data_schema_reference.MD +108 -0
- package/functions/computation-system-v2/devtools/builder/builder.js +362 -0
- package/functions/computation-system-v2/devtools/builder/examples/user-metrics.yaml +26 -0
- package/functions/computation-system-v2/devtools/index.js +36 -0
- package/functions/computation-system-v2/devtools/shared/MockDataFactory.js +235 -0
- package/functions/computation-system-v2/devtools/shared/SchemaTemplates.js +475 -0
- package/functions/computation-system-v2/devtools/shared/SystemIntrospector.js +517 -0
- package/functions/computation-system-v2/devtools/shared/index.js +16 -0
- package/functions/computation-system-v2/devtools/simulation/DAGAnalyzer.js +243 -0
- package/functions/computation-system-v2/devtools/simulation/MockDataFetcher.js +306 -0
- package/functions/computation-system-v2/devtools/simulation/MockStorageManager.js +336 -0
- package/functions/computation-system-v2/devtools/simulation/SimulationEngine.js +525 -0
- package/functions/computation-system-v2/devtools/simulation/SimulationServer.js +581 -0
- package/functions/computation-system-v2/devtools/simulation/index.js +17 -0
- package/functions/computation-system-v2/devtools/simulation/simulate.js +324 -0
- package/functions/computation-system-v2/devtools/vscode-computation/package.json +90 -0
- package/functions/computation-system-v2/devtools/vscode-computation/snippets/computation.json +128 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/extension.ts +401 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/codeActions.ts +152 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/completions.ts +207 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/diagnostics.ts +205 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/hover.ts +205 -0
- package/functions/computation-system-v2/devtools/vscode-computation/tsconfig.json +22 -0
- package/functions/computation-system-v2/docs/HowToCreateComputations.MD +602 -0
- package/functions/computation-system-v2/framework/data/DataFetcher.js +250 -184
- package/functions/computation-system-v2/framework/data/MaterializedViewManager.js +84 -0
- package/functions/computation-system-v2/framework/data/QueryBuilder.js +38 -38
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +215 -129
- package/functions/computation-system-v2/framework/scheduling/ScheduleValidator.js +17 -19
- package/functions/computation-system-v2/framework/storage/StateRepository.js +32 -2
- package/functions/computation-system-v2/framework/storage/StorageManager.js +105 -67
- package/functions/computation-system-v2/framework/testing/ComputationTester.js +12 -6
- package/functions/computation-system-v2/handlers/dispatcher.js +57 -29
- package/functions/computation-system-v2/handlers/scheduler.js +172 -203
- package/functions/computation-system-v2/legacy/PiAssetRecommender.js.old +115 -0
- package/functions/computation-system-v2/legacy/PiSimilarityMatrix.js +104 -0
- package/functions/computation-system-v2/legacy/PiSimilarityVector.js +71 -0
- package/functions/computation-system-v2/scripts/debug_aggregation.js +25 -0
- package/functions/computation-system-v2/scripts/test-invalidation-scenarios.js +234 -0
- package/package.json +1 -1
|
@@ -1,27 +1,19 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Data Fetcher - Executes queries and transforms results
|
|
3
3
|
* * The single point of data access for computations.
|
|
4
|
-
*
|
|
5
|
-
* *
|
|
6
|
-
* * V2.3 FIX: "Insufficient History" bug.
|
|
7
|
-
* - fetchBatched now orders by Entity ID to keep historical rows together.
|
|
8
|
-
* - Implemented "Entity-Atomic Batching" to prevent splitting a user's history across batches.
|
|
9
|
-
* * V2.4 FIX: Runaway Query Cost Prevention [Fix #3].
|
|
10
|
-
* * V2.5 UPDATE: Super-Entity Monitoring [Safety Valve for Fix #6].
|
|
11
|
-
* - Warns if a single entity exceeds reasonable batch limits (Memory Risk).
|
|
12
|
-
* * V2.6 UPDATE: Query Result Caching.
|
|
13
|
-
* - Implemented in-memory LRU cache to prevent redundant BigQuery costs for reference data.
|
|
14
|
-
* * V2.7 FIX: Double-Encoded JSON Normalization.
|
|
15
|
-
* - Automatically detects and recursively parses JSON strings (e.g. posts_data) to prevent downstream parsing errors.
|
|
4
|
+
* * V3.3 FIX: "Driver Priority" & "Identity Crisis" Patch.
|
|
5
|
+
* * V3.5 GUARD: Cost Protections (Dry Run & Limit Checks).
|
|
16
6
|
*/
|
|
17
7
|
|
|
18
8
|
const { BigQuery } = require('@google-cloud/bigquery');
|
|
19
9
|
const crypto = require('crypto');
|
|
10
|
+
const { MaterializedViewManager } = require('./MaterializedViewManager');
|
|
20
11
|
|
|
21
|
-
//
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
const
|
|
12
|
+
// SAFETY CONFIGURATION
|
|
13
|
+
// You can move these to your main config file if preferred
|
|
14
|
+
const DEFAULT_SAFETY_LIMIT_GB = 10; // Max GB per query
|
|
15
|
+
const MAX_LOOKBACK_DAYS = 60;
|
|
16
|
+
const BATCH_GROWTH_WARNING_THRESHOLD = 5;
|
|
25
17
|
|
|
26
18
|
class DataFetcher {
|
|
27
19
|
constructor(config, queryBuilder, logger = null) {
|
|
@@ -31,19 +23,25 @@ class DataFetcher {
|
|
|
31
23
|
this.tables = config.tables || {};
|
|
32
24
|
this.queryBuilder = queryBuilder;
|
|
33
25
|
this.logger = logger;
|
|
34
|
-
|
|
26
|
+
|
|
27
|
+
// Safety Limit from Config or Default
|
|
28
|
+
this.safetyLimitGb = config.safetyLimitGb || DEFAULT_SAFETY_LIMIT_GB;
|
|
29
|
+
|
|
35
30
|
this.client = new BigQuery({ projectId: this.projectId });
|
|
36
|
-
|
|
37
|
-
//
|
|
31
|
+
|
|
32
|
+
// Initialize MV Manager
|
|
33
|
+
this.mvManager = new MaterializedViewManager(this.client, this.logger, config);
|
|
34
|
+
|
|
35
|
+
// Cache Configuration
|
|
38
36
|
this.cacheConfig = config.queryCache || {
|
|
39
37
|
enabled: true,
|
|
40
38
|
ttlMs: 300000, // 5 minutes default
|
|
41
39
|
maxSize: 1000 // Max unique queries to cache
|
|
42
40
|
};
|
|
43
|
-
|
|
44
|
-
// Use Map as LRU cache
|
|
41
|
+
|
|
42
|
+
// Use Map as LRU cache
|
|
45
43
|
this.cache = new Map();
|
|
46
|
-
|
|
44
|
+
|
|
47
45
|
this.stats = {
|
|
48
46
|
queries: 0,
|
|
49
47
|
rowsFetched: 0,
|
|
@@ -54,18 +52,25 @@ class DataFetcher {
|
|
|
54
52
|
cacheEvictions: 0
|
|
55
53
|
};
|
|
56
54
|
}
|
|
57
|
-
|
|
55
|
+
|
|
58
56
|
/**
|
|
59
57
|
* Fetch data for a computation's requirements.
|
|
60
58
|
*/
|
|
61
59
|
async fetchForComputation(requires, targetDate, entities = null) {
|
|
62
60
|
const results = {};
|
|
63
61
|
const errors = [];
|
|
64
|
-
|
|
65
|
-
await Promise.all(Object.entries(requires).map(async ([
|
|
62
|
+
|
|
63
|
+
await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
|
|
66
64
|
try {
|
|
65
|
+
// INTERCEPT METRICS
|
|
66
|
+
if (spec.type === 'metric') {
|
|
67
|
+
results[key] = await this._fetchMetric(spec, targetDate, entities);
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Standard Table Fetch
|
|
67
72
|
const data = await this.fetch({
|
|
68
|
-
table:
|
|
73
|
+
table: key,
|
|
69
74
|
targetDate,
|
|
70
75
|
lookback: spec.lookback || 0,
|
|
71
76
|
mandatory: spec.mandatory || false,
|
|
@@ -73,38 +78,82 @@ class DataFetcher {
|
|
|
73
78
|
fields: spec.fields || null,
|
|
74
79
|
entities: entities
|
|
75
80
|
});
|
|
76
|
-
|
|
77
|
-
results[
|
|
78
|
-
|
|
81
|
+
|
|
82
|
+
results[key] = data;
|
|
83
|
+
|
|
79
84
|
if (spec.mandatory && this._isEmpty(data)) {
|
|
80
|
-
errors.push({ table:
|
|
85
|
+
errors.push({ table: key, reason: 'MANDATORY_MISSING' });
|
|
81
86
|
}
|
|
82
|
-
|
|
87
|
+
|
|
83
88
|
} catch (e) {
|
|
84
89
|
if (spec.mandatory) {
|
|
85
|
-
errors.push({ table:
|
|
90
|
+
errors.push({ table: key, reason: e.message });
|
|
86
91
|
} else {
|
|
87
|
-
this._log('WARN', `Optional
|
|
92
|
+
this._log('WARN', `Optional requirement ${key} failed: ${e.message}`);
|
|
88
93
|
}
|
|
89
|
-
results[
|
|
94
|
+
results[key] = null;
|
|
90
95
|
}
|
|
91
96
|
}));
|
|
92
|
-
|
|
97
|
+
|
|
93
98
|
if (errors.length > 0) {
|
|
94
99
|
const msg = errors.map(e => `${e.table}: ${e.reason}`).join(', ');
|
|
95
100
|
throw new Error(`[DataFetcher] Missing mandatory data: ${msg}`);
|
|
96
101
|
}
|
|
97
|
-
|
|
102
|
+
|
|
98
103
|
return results;
|
|
99
104
|
}
|
|
100
105
|
|
|
106
|
+
/**
|
|
107
|
+
* Fetch Metric with optional Time Series
|
|
108
|
+
*/
|
|
109
|
+
async _fetchMetric(spec, targetDate, entities) {
|
|
110
|
+
const mvName = await this.mvManager.ensureMetricView(spec.source, spec);
|
|
111
|
+
|
|
112
|
+
const selectClause = spec.series ? 'entity_id, date, value' : 'entity_id, SUM(value) as value';
|
|
113
|
+
const groupByClause = spec.series ? '' : 'GROUP BY entity_id';
|
|
114
|
+
|
|
115
|
+
let sql = `
|
|
116
|
+
SELECT ${selectClause}
|
|
117
|
+
FROM \`${this.projectId}.${this.dataset}.${mvName}\`
|
|
118
|
+
WHERE date BETWEEN DATE_SUB(@targetDate, INTERVAL @lookback DAY) AND @targetDate
|
|
119
|
+
`;
|
|
120
|
+
|
|
121
|
+
const params = { targetDate, lookback: spec.lookback || 0 };
|
|
122
|
+
|
|
123
|
+
if (entities && entities.length > 0) {
|
|
124
|
+
sql += ` AND entity_id IN UNNEST(@entities)`;
|
|
125
|
+
params.entities = entities.map(String);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
sql += ` ${groupByClause}`;
|
|
129
|
+
|
|
130
|
+
const rows = await this._execute({ sql, params, table: mvName });
|
|
131
|
+
const result = {};
|
|
132
|
+
|
|
133
|
+
if (spec.series) {
|
|
134
|
+
rows.forEach(r => {
|
|
135
|
+
const eid = r.entity_id;
|
|
136
|
+
const d = r.date.value || r.date;
|
|
137
|
+
if (!result[eid]) result[eid] = {};
|
|
138
|
+
result[eid][d] = r.value;
|
|
139
|
+
});
|
|
140
|
+
} else {
|
|
141
|
+
rows.forEach(r => { result[r.entity_id] = r.value; });
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return result;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* STREAMING: Fetch data for computation in batches.
|
|
149
|
+
*/
|
|
101
150
|
/**
|
|
102
151
|
* STREAMING: Fetch data for computation in batches.
|
|
103
152
|
*/
|
|
104
153
|
async *fetchComputationBatched(requires, targetDate, batchSize = 1000) {
|
|
105
154
|
const driverTableName = this._getDriverTable(requires);
|
|
106
155
|
const driverConfig = this.tables[driverTableName] || {};
|
|
107
|
-
|
|
156
|
+
|
|
108
157
|
if (!driverTableName) {
|
|
109
158
|
this._log('WARN', 'No entity-keyed table found for batching. Falling back to full fetch.');
|
|
110
159
|
const fullData = await this.fetchForComputation(requires, targetDate);
|
|
@@ -113,9 +162,12 @@ class DataFetcher {
|
|
|
113
162
|
return;
|
|
114
163
|
}
|
|
115
164
|
|
|
116
|
-
this._log('INFO', `Starting batched fetch driven by table: ${driverTableName} (${driverConfig.entityField})`);
|
|
117
165
|
const driverSpec = requires[driverTableName];
|
|
118
166
|
|
|
167
|
+
// [DEBUG] Explicitly log the filter being applied to the driver
|
|
168
|
+
this._log('INFO', `Starting batched fetch driven by table: ${driverTableName} (${driverConfig.entityField})`);
|
|
169
|
+
this._log('INFO', `Driver Filter: ${JSON.stringify(driverSpec.filter || {})}`);
|
|
170
|
+
|
|
119
171
|
const driverStream = this.fetchBatched({
|
|
120
172
|
table: driverTableName,
|
|
121
173
|
targetDate,
|
|
@@ -126,145 +178,154 @@ class DataFetcher {
|
|
|
126
178
|
}, batchSize);
|
|
127
179
|
|
|
128
180
|
for await (const batch of driverStream) {
|
|
129
|
-
// FIX: Robust ID Extraction
|
|
130
181
|
const entityIds = this._extractEntityIdsFromBatch(batch, driverTableName);
|
|
131
|
-
|
|
182
|
+
|
|
132
183
|
if (entityIds.length === 0) {
|
|
133
|
-
this._log('WARN', `Driver batch from ${driverTableName} yielded 0 entity IDs
|
|
184
|
+
this._log('WARN', `Driver batch from ${driverTableName} yielded 0 entity IDs.`);
|
|
134
185
|
continue;
|
|
135
186
|
}
|
|
136
187
|
|
|
137
188
|
const batchResults = { [driverTableName]: batch };
|
|
138
189
|
const errors = [];
|
|
139
190
|
|
|
140
|
-
await Promise.all(Object.entries(requires).map(async ([
|
|
141
|
-
if (
|
|
142
|
-
|
|
143
|
-
// FIX: Identity Crisis Check
|
|
144
|
-
const depConfig = this.tables[tableName] || {};
|
|
145
|
-
const shouldFilterById = depConfig.entityField === driverConfig.entityField;
|
|
191
|
+
await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
|
|
192
|
+
if (key === driverTableName) return;
|
|
146
193
|
|
|
147
194
|
try {
|
|
195
|
+
if (spec.type === 'metric') {
|
|
196
|
+
batchResults[key] = await this._fetchMetric(spec, targetDate, entityIds);
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const depConfig = this.tables[key] || {};
|
|
201
|
+
const hasEntityField = !!depConfig.entityField;
|
|
202
|
+
|
|
148
203
|
const data = await this.fetch({
|
|
149
|
-
table:
|
|
204
|
+
table: key,
|
|
150
205
|
targetDate,
|
|
151
206
|
lookback: spec.lookback || 0,
|
|
152
207
|
mandatory: spec.mandatory || false,
|
|
153
208
|
filter: spec.filter || {},
|
|
154
209
|
fields: spec.fields || null,
|
|
155
|
-
entities:
|
|
210
|
+
entities: hasEntityField ? entityIds : null
|
|
156
211
|
});
|
|
157
|
-
|
|
158
|
-
batchResults[
|
|
212
|
+
|
|
213
|
+
batchResults[key] = data;
|
|
159
214
|
|
|
160
215
|
if (spec.mandatory && this._isEmpty(data)) {
|
|
161
|
-
this._log('WARN', `Batch warning: Mandatory table ${
|
|
216
|
+
this._log('WARN', `Batch warning: Mandatory table ${key} returned 0 rows.`);
|
|
162
217
|
}
|
|
163
218
|
} catch (e) {
|
|
164
|
-
if (spec.mandatory) errors.push({ table:
|
|
165
|
-
batchResults[
|
|
219
|
+
if (spec.mandatory) errors.push({ table: key, reason: e.message });
|
|
220
|
+
batchResults[key] = null;
|
|
166
221
|
}
|
|
167
222
|
}));
|
|
168
223
|
|
|
169
224
|
if (errors.length > 0) {
|
|
170
|
-
this._log('WARN', `Batch missing mandatory data
|
|
171
|
-
continue;
|
|
225
|
+
this._log('WARN', `Batch missing mandatory data: ${errors.map(e => e.table).join(', ')}. Skipping.`);
|
|
226
|
+
continue;
|
|
172
227
|
}
|
|
173
228
|
|
|
174
229
|
yield { data: batchResults, entityIds };
|
|
175
230
|
}
|
|
176
231
|
}
|
|
177
|
-
|
|
232
|
+
|
|
178
233
|
async fetch(options) {
|
|
179
234
|
const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
|
|
180
|
-
|
|
181
|
-
// FIX #3: Prevent Runaway Costs
|
|
235
|
+
|
|
182
236
|
if (lookback > MAX_LOOKBACK_DAYS) {
|
|
183
|
-
throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit
|
|
237
|
+
throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit.`);
|
|
184
238
|
}
|
|
185
239
|
|
|
186
240
|
const tableConfig = this.tables[table] || {};
|
|
187
241
|
const { dateField, entityField, dataField } = tableConfig;
|
|
188
|
-
|
|
242
|
+
const physicalTable = tableConfig.tableName || table;
|
|
243
|
+
|
|
189
244
|
const query = await this.queryBuilder.build({
|
|
190
|
-
table
|
|
245
|
+
table: physicalTable,
|
|
246
|
+
select: fields,
|
|
247
|
+
where: filter,
|
|
248
|
+
dateField,
|
|
249
|
+
targetDate,
|
|
250
|
+
lookback,
|
|
251
|
+
entityField,
|
|
252
|
+
entities,
|
|
191
253
|
orderBy: dateField || entityField
|
|
192
254
|
});
|
|
193
|
-
|
|
255
|
+
|
|
194
256
|
const rows = await this._execute(query);
|
|
195
|
-
|
|
257
|
+
|
|
196
258
|
if (!rows || rows.length === 0) return null;
|
|
197
|
-
|
|
259
|
+
|
|
198
260
|
return this._transform(rows, { lookback, dateField, entityField, dataField });
|
|
199
261
|
}
|
|
200
262
|
|
|
201
263
|
async *fetchBatched(options, batchSize = 1000) {
|
|
202
264
|
const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
|
|
203
265
|
|
|
204
|
-
// FIX #3: Prevent Runaway Costs
|
|
205
266
|
if (lookback > MAX_LOOKBACK_DAYS) {
|
|
206
|
-
throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit
|
|
267
|
+
throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit.`);
|
|
207
268
|
}
|
|
208
269
|
|
|
209
270
|
const tableConfig = this.tables[table] || {};
|
|
210
271
|
const { dateField, entityField, dataField } = tableConfig;
|
|
272
|
+
const physicalTable = tableConfig.tableName || table;
|
|
211
273
|
|
|
212
|
-
// FIX #1: Prioritize ordering by Entity to keep historical rows together
|
|
213
274
|
const query = await this.queryBuilder.build({
|
|
214
|
-
table
|
|
215
|
-
|
|
275
|
+
table: physicalTable,
|
|
276
|
+
select: fields,
|
|
277
|
+
where: filter,
|
|
278
|
+
dateField,
|
|
279
|
+
targetDate,
|
|
280
|
+
lookback,
|
|
281
|
+
entityField,
|
|
282
|
+
entities,
|
|
283
|
+
orderBy: entityField || dateField
|
|
216
284
|
});
|
|
217
285
|
|
|
218
286
|
const rowStream = this._executeStream(query);
|
|
219
|
-
|
|
287
|
+
|
|
220
288
|
let batch = [];
|
|
221
289
|
let currentEntity = null;
|
|
222
|
-
let batchHasWarned = false;
|
|
290
|
+
let batchHasWarned = false;
|
|
223
291
|
|
|
224
292
|
for await (const row of rowStream) {
|
|
225
|
-
// FIX #2: Entity-Atomic Batching
|
|
226
|
-
// If we have an entity field, verify we don't split an entity across batches
|
|
227
293
|
if (entityField) {
|
|
294
|
+
|
|
228
295
|
const rowEntity = String(row[entityField]);
|
|
229
|
-
|
|
230
|
-
// Check if we should yield
|
|
231
|
-
// Condition: Batch is full AND we are on a NEW entity
|
|
296
|
+
|
|
232
297
|
if (batch.length >= batchSize && rowEntity !== currentEntity && currentEntity !== null) {
|
|
233
298
|
yield this._transform(batch, { lookback, dateField, entityField, dataField });
|
|
234
299
|
batch = [];
|
|
235
300
|
batchHasWarned = false;
|
|
236
301
|
}
|
|
237
|
-
|
|
238
|
-
// SAFETY VALVE (Fix #6 Alternative):
|
|
239
|
-
// If batch grows huge (Super Entity) and we CANNOT split (same entity), warn the admin.
|
|
302
|
+
|
|
240
303
|
if (batch.length > batchSize * BATCH_GROWTH_WARNING_THRESHOLD && !batchHasWarned) {
|
|
241
|
-
this._log('WARN', `SUPER ENTITY DETECTED: Entity '${currentEntity}' in table '${table}' has >${batch.length} rows
|
|
242
|
-
`This exceeds batch size ${batchSize} by ${BATCH_GROWTH_WARNING_THRESHOLD}x. ` +
|
|
243
|
-
`Risk of OOM or Timeouts. Consider filtering this entity.`);
|
|
304
|
+
this._log('WARN', `SUPER ENTITY DETECTED: Entity '${currentEntity}' in table '${table}' has >${batch.length} rows.`);
|
|
244
305
|
batchHasWarned = true;
|
|
245
306
|
}
|
|
246
|
-
|
|
307
|
+
|
|
247
308
|
currentEntity = rowEntity;
|
|
248
309
|
} else {
|
|
249
|
-
// Fallback for non-entity tables (strict count)
|
|
250
310
|
if (batch.length >= batchSize) {
|
|
251
311
|
yield this._transform(batch, { lookback, dateField, entityField, dataField });
|
|
252
312
|
batch = [];
|
|
253
313
|
}
|
|
254
314
|
}
|
|
255
|
-
|
|
256
315
|
batch.push(row);
|
|
257
316
|
}
|
|
258
|
-
|
|
317
|
+
|
|
259
318
|
if (batch.length > 0) {
|
|
260
319
|
yield this._transform(batch, { lookback, dateField, entityField, dataField });
|
|
261
320
|
}
|
|
262
321
|
}
|
|
263
|
-
|
|
322
|
+
|
|
264
323
|
async hasData(table, targetDate) {
|
|
265
324
|
const tableConfig = this.tables[table] || {};
|
|
266
325
|
const { dateField } = tableConfig;
|
|
267
|
-
const
|
|
326
|
+
const physicalTable = tableConfig.tableName || table;
|
|
327
|
+
|
|
328
|
+
const query = await this.queryBuilder.buildExistsQuery(physicalTable, dateField, targetDate);
|
|
268
329
|
try {
|
|
269
330
|
const rows = await this._execute(query);
|
|
270
331
|
return rows && rows.length > 0;
|
|
@@ -273,38 +334,34 @@ class DataFetcher {
|
|
|
273
334
|
return false;
|
|
274
335
|
}
|
|
275
336
|
}
|
|
276
|
-
|
|
337
|
+
|
|
338
|
+
// ... checkAvailability, getStats, resetStats, clearCache (unchanged) ...
|
|
277
339
|
async checkAvailability(requires, targetDate) {
|
|
278
340
|
const available = [];
|
|
279
341
|
const missing = [];
|
|
280
|
-
|
|
281
|
-
await Promise.all(Object.entries(requires).map(async ([
|
|
342
|
+
|
|
343
|
+
await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
|
|
344
|
+
const tableName = spec.type === 'metric' ? spec.source : key;
|
|
282
345
|
const hasData = await this.hasData(tableName, targetDate);
|
|
283
346
|
if (hasData) {
|
|
284
|
-
available.push(
|
|
347
|
+
available.push(key);
|
|
285
348
|
} else if (spec.mandatory) {
|
|
286
|
-
missing.push(
|
|
349
|
+
missing.push(key);
|
|
287
350
|
} else {
|
|
288
|
-
available.push(
|
|
351
|
+
available.push(key);
|
|
289
352
|
}
|
|
290
353
|
}));
|
|
291
|
-
|
|
354
|
+
|
|
292
355
|
return { canRun: missing.length === 0, available, missing };
|
|
293
356
|
}
|
|
294
357
|
|
|
295
|
-
|
|
296
358
|
getStats() { return { ...this.stats }; }
|
|
297
|
-
|
|
298
|
-
resetStats() {
|
|
299
|
-
this.stats = {
|
|
300
|
-
queries: 0,
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
bytesProcessed: 0,
|
|
304
|
-
cacheHits: 0,
|
|
305
|
-
cacheMisses: 0,
|
|
306
|
-
cacheEvictions: 0
|
|
307
|
-
};
|
|
359
|
+
|
|
360
|
+
resetStats() {
|
|
361
|
+
this.stats = {
|
|
362
|
+
queries: 0, rowsFetched: 0, errors: 0, bytesProcessed: 0,
|
|
363
|
+
cacheHits: 0, cacheMisses: 0, cacheEvictions: 0
|
|
364
|
+
};
|
|
308
365
|
this.cache.clear();
|
|
309
366
|
}
|
|
310
367
|
|
|
@@ -312,63 +369,80 @@ class DataFetcher {
|
|
|
312
369
|
this.cache.clear();
|
|
313
370
|
this._log('DEBUG', 'Query cache cleared');
|
|
314
371
|
}
|
|
315
|
-
|
|
372
|
+
|
|
316
373
|
// =========================================================================
|
|
317
374
|
// PRIVATE METHODS
|
|
318
375
|
// =========================================================================
|
|
319
|
-
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* Executes a query with Cost Guard protection.
|
|
379
|
+
*/
|
|
320
380
|
async _execute(query) {
|
|
321
|
-
//
|
|
381
|
+
// Caching Logic
|
|
322
382
|
if (this.cacheConfig.enabled) {
|
|
323
383
|
const cacheKey = this._generateCacheKey(query);
|
|
324
384
|
const cached = this.cache.get(cacheKey);
|
|
325
|
-
|
|
326
385
|
if (cached) {
|
|
327
386
|
if (Date.now() - cached.timestamp < this.cacheConfig.ttlMs) {
|
|
328
387
|
this.stats.cacheHits++;
|
|
329
|
-
// Refresh LRU position (delete and re-set moves to end)
|
|
330
388
|
this.cache.delete(cacheKey);
|
|
331
389
|
this.cache.set(cacheKey, cached);
|
|
332
|
-
// Return cached rows immediately - no BigQuery cost
|
|
333
390
|
return cached.rows;
|
|
334
391
|
} else {
|
|
335
|
-
this.cache.delete(cacheKey);
|
|
392
|
+
this.cache.delete(cacheKey);
|
|
336
393
|
}
|
|
337
394
|
}
|
|
338
395
|
this.stats.cacheMisses++;
|
|
339
396
|
}
|
|
340
397
|
|
|
398
|
+
// --- COST GUARD: DRY RUN ---
|
|
399
|
+
await this._enforceCostSafety(query);
|
|
400
|
+
// ---------------------------
|
|
401
|
+
|
|
402
|
+
// =========================================================
|
|
403
|
+
// 🛑 DEBUGGING: PRINT EXACT SQL
|
|
404
|
+
// =========================================================
|
|
405
|
+
console.log('\n--- 🔍 EXECUTING SQL -----------------------------------');
|
|
406
|
+
console.log(query.sql);
|
|
407
|
+
console.log('PARAMS:', JSON.stringify(query.params));
|
|
408
|
+
console.log('--------------------------------------------------------\n');
|
|
409
|
+
// =========================================================
|
|
410
|
+
|
|
341
411
|
this.stats.queries++;
|
|
342
|
-
|
|
412
|
+
|
|
343
413
|
try {
|
|
344
414
|
const [job] = await this.client.createQueryJob({
|
|
345
415
|
query: query.sql, params: query.params, location: this.location
|
|
346
416
|
});
|
|
347
417
|
const [rows] = await job.getQueryResults();
|
|
348
418
|
const [metadata] = await job.getMetadata();
|
|
349
|
-
|
|
419
|
+
|
|
350
420
|
this.stats.rowsFetched += rows.length;
|
|
351
421
|
this.stats.bytesProcessed += parseInt(metadata.statistics?.totalBytesProcessed || 0, 10);
|
|
352
|
-
|
|
353
|
-
// FIX V2.7: Normalize Rows (Recursive JSON Parse) BEFORE caching
|
|
422
|
+
|
|
354
423
|
const normalizedRows = rows.map(r => this._normalizeRow(r));
|
|
355
|
-
|
|
356
|
-
// Store in cache if enabled
|
|
424
|
+
|
|
357
425
|
if (this.cacheConfig.enabled) {
|
|
358
426
|
this._addToCache(query, normalizedRows);
|
|
359
427
|
}
|
|
360
|
-
|
|
428
|
+
|
|
361
429
|
return normalizedRows;
|
|
362
430
|
} catch (e) {
|
|
363
431
|
this.stats.errors++;
|
|
364
|
-
this._log('ERROR', `Query failed: ${e.message}`);
|
|
432
|
+
this._log('ERROR', `Query failed: ${e.message}\nSQL: ${query.sql}`);
|
|
365
433
|
throw e;
|
|
366
434
|
}
|
|
367
435
|
}
|
|
368
436
|
|
|
369
437
|
async *_executeStream(query) {
|
|
370
|
-
|
|
371
|
-
|
|
438
|
+
await this._enforceCostSafety(query);
|
|
439
|
+
|
|
440
|
+
// [CRITICAL FIX] Added SQL Logging to Stream Execution
|
|
441
|
+
console.log('\n--- 🔍 EXECUTING DRIVER STREAM SQL ---------------------');
|
|
442
|
+
console.log(query.sql);
|
|
443
|
+
console.log('PARAMS:', JSON.stringify(query.params));
|
|
444
|
+
console.log('--------------------------------------------------------\n');
|
|
445
|
+
|
|
372
446
|
this.stats.queries++;
|
|
373
447
|
try {
|
|
374
448
|
const [job] = await this.client.createQueryJob({
|
|
@@ -377,25 +451,52 @@ class DataFetcher {
|
|
|
377
451
|
const stream = job.getQueryResultsStream();
|
|
378
452
|
for await (const row of stream) {
|
|
379
453
|
this.stats.rowsFetched++;
|
|
380
|
-
// FIX V2.7: Normalize Rows (Recursive JSON Parse)
|
|
381
454
|
yield this._normalizeRow(row);
|
|
382
455
|
}
|
|
383
456
|
} catch (e) {
|
|
384
457
|
this.stats.errors++;
|
|
385
|
-
this._log('ERROR', `Stream Query failed: ${e.message}`);
|
|
458
|
+
this._log('ERROR', `Stream Query failed: ${e.message}\nSQL: ${query.sql}`);
|
|
386
459
|
throw e;
|
|
387
460
|
}
|
|
388
461
|
}
|
|
389
462
|
|
|
390
463
|
/**
|
|
391
|
-
*
|
|
464
|
+
* NEW: Runs a Dry Run to estimate cost and blocks expensive queries.
|
|
392
465
|
*/
|
|
466
|
+
async _enforceCostSafety(query) {
|
|
467
|
+
try {
|
|
468
|
+
const [dryRunJob] = await this.client.createQueryJob({
|
|
469
|
+
query: query.sql,
|
|
470
|
+
params: query.params,
|
|
471
|
+
location: this.location,
|
|
472
|
+
dryRun: true // Costs $0, returns stats only
|
|
473
|
+
});
|
|
474
|
+
|
|
475
|
+
const bytes = parseInt(dryRunJob.metadata.statistics.totalBytesProcessed, 10);
|
|
476
|
+
const gb = bytes / (1024 * 1024 * 1024);
|
|
477
|
+
|
|
478
|
+
if (gb > this.safetyLimitGb) {
|
|
479
|
+
const errorMsg = `[DataFetcher] 🛑 COST VIOLATION: Query blocked! Estimated size: ${gb.toFixed(2)} GB (Limit: ${this.safetyLimitGb} GB). ` +
|
|
480
|
+
`Check your partition filters or clustering. Table: ${query.table}`;
|
|
481
|
+
|
|
482
|
+
this._log('ERROR', errorMsg);
|
|
483
|
+
// We log the offending SQL for debugging
|
|
484
|
+
this._log('ERROR', `BLOCKED SQL START:\n${query.sql.substring(0, 500)}...\nEND SQL`);
|
|
485
|
+
|
|
486
|
+
throw new Error(errorMsg);
|
|
487
|
+
}
|
|
488
|
+
} catch (e) {
|
|
489
|
+
// If the dry run fails (e.g. invalid SQL), we let the real execution fail it naturally,
|
|
490
|
+
// unless it was our cost violation error.
|
|
491
|
+
if (e.message.includes('COST VIOLATION')) throw e;
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
393
495
|
_normalizeRow(row) {
|
|
394
496
|
const normalized = { ...row };
|
|
395
497
|
for (const [key, value] of Object.entries(normalized)) {
|
|
396
498
|
if (typeof value === 'string') {
|
|
397
499
|
const trimmed = value.trim();
|
|
398
|
-
// FIX: Check for " (Double Encoded JSON) in addition to { and [
|
|
399
500
|
if (trimmed.startsWith('{') || trimmed.startsWith('[') || trimmed.startsWith('"')) {
|
|
400
501
|
normalized[key] = this._safeRecursiveParse(value);
|
|
401
502
|
}
|
|
@@ -404,75 +505,45 @@ class DataFetcher {
|
|
|
404
505
|
return normalized;
|
|
405
506
|
}
|
|
406
507
|
|
|
407
|
-
|
|
408
|
-
* V2.7 FIX: Helper to safely recursively parse JSON.
|
|
409
|
-
* Handles: Double-Encoded JSON Strings (parsed recursively)
|
|
410
|
-
*/
|
|
411
|
-
_safeRecursiveParse(input) {
|
|
508
|
+
_safeRecursiveParse(input) {
|
|
412
509
|
if (!input) return null;
|
|
413
510
|
if (typeof input === 'object') return input;
|
|
414
511
|
try {
|
|
415
512
|
const parsed = JSON.parse(input);
|
|
416
|
-
// Recursion for double-encoded strings
|
|
417
513
|
if (typeof parsed === 'string') return this._safeRecursiveParse(parsed);
|
|
418
514
|
return parsed;
|
|
419
515
|
} catch (e) {
|
|
420
|
-
return input;
|
|
516
|
+
return input;
|
|
421
517
|
}
|
|
422
518
|
}
|
|
423
519
|
|
|
424
|
-
/**
|
|
425
|
-
* V2.6: Generate a unique cache key for a query
|
|
426
|
-
*/
|
|
427
520
|
_generateCacheKey(query) {
|
|
428
|
-
// Hash the SQL + Params to ensure uniqueness
|
|
429
521
|
const str = query.sql + JSON.stringify(query.params || {});
|
|
430
522
|
return crypto.createHash('md5').update(str).digest('hex');
|
|
431
523
|
}
|
|
432
524
|
|
|
433
|
-
/**
|
|
434
|
-
* V2.6: Add to cache with LRU eviction
|
|
435
|
-
*/
|
|
436
525
|
_addToCache(query, rows) {
|
|
437
|
-
// Generate key
|
|
438
526
|
const key = this._generateCacheKey(query);
|
|
439
|
-
|
|
440
|
-
// Eviction Logic
|
|
441
527
|
if (this.cache.size >= this.cacheConfig.maxSize) {
|
|
442
|
-
// Map iterator yields in insertion order. First item is oldest.
|
|
443
528
|
const oldestKey = this.cache.keys().next().value;
|
|
444
529
|
this.cache.delete(oldestKey);
|
|
445
530
|
this.stats.cacheEvictions++;
|
|
446
531
|
}
|
|
447
|
-
|
|
448
|
-
this.cache.set(key, {
|
|
449
|
-
rows: rows,
|
|
450
|
-
timestamp: Date.now()
|
|
451
|
-
});
|
|
532
|
+
this.cache.set(key, { rows: rows, timestamp: Date.now() });
|
|
452
533
|
}
|
|
453
|
-
|
|
454
|
-
/**
|
|
455
|
-
* Transforms raw rows into a structured object.
|
|
456
|
-
* FIX: PRIORITIZE ENTITY FIELD.
|
|
457
|
-
* If entityField exists, we MUST return { [id]: [rows] } so IDs can be extracted.
|
|
458
|
-
*/
|
|
534
|
+
|
|
459
535
|
_transform(rows, config) {
|
|
460
536
|
const { lookback, dateField, entityField, dataField } = config;
|
|
461
537
|
const rowArray = Array.isArray(rows) ? rows : [rows];
|
|
462
538
|
|
|
463
|
-
// FIX: Primary Grouping = Entity
|
|
464
539
|
if (entityField) {
|
|
465
540
|
const byEntity = {};
|
|
466
541
|
for (const row of rowArray) {
|
|
467
542
|
const entityKey = String(row[entityField]);
|
|
468
543
|
if (!byEntity[entityKey]) {
|
|
469
|
-
// If simple fetch (no history), value is single object
|
|
470
|
-
// If history fetch (lookback), value is Array of rows
|
|
471
544
|
byEntity[entityKey] = lookback > 0 ? [] : null;
|
|
472
545
|
}
|
|
473
|
-
|
|
474
546
|
const value = dataField ? row[dataField] : row;
|
|
475
|
-
|
|
476
547
|
if (Array.isArray(byEntity[entityKey])) {
|
|
477
548
|
byEntity[entityKey].push(value);
|
|
478
549
|
} else {
|
|
@@ -482,7 +553,6 @@ class DataFetcher {
|
|
|
482
553
|
return byEntity;
|
|
483
554
|
}
|
|
484
555
|
|
|
485
|
-
// Fallback: Date Grouping (Only if no Entity ID)
|
|
486
556
|
if (lookback > 0 && dateField) {
|
|
487
557
|
const byDate = {};
|
|
488
558
|
for (const row of rowArray) {
|
|
@@ -493,12 +563,19 @@ class DataFetcher {
|
|
|
493
563
|
}
|
|
494
564
|
return byDate;
|
|
495
565
|
}
|
|
496
|
-
|
|
497
566
|
return rowArray;
|
|
498
567
|
}
|
|
499
568
|
|
|
500
569
|
_getDriverTable(requires) {
|
|
570
|
+
// PASS 1: Prioritize tables with a DATE field
|
|
571
|
+
for (const [name, spec] of Object.entries(requires)) {
|
|
572
|
+
if (spec.type === 'metric') continue;
|
|
573
|
+
const config = this.tables[name];
|
|
574
|
+
if (config && config.entityField && config.dateField) return name;
|
|
575
|
+
}
|
|
576
|
+
// PASS 2: Fallback to any entity table
|
|
501
577
|
for (const [name, spec] of Object.entries(requires)) {
|
|
578
|
+
if (spec.type === 'metric') continue;
|
|
502
579
|
const config = this.tables[name];
|
|
503
580
|
if (config && config.entityField) return name;
|
|
504
581
|
}
|
|
@@ -509,25 +586,14 @@ class DataFetcher {
|
|
|
509
586
|
const config = this.tables[tableName] || {};
|
|
510
587
|
const field = config.entityField;
|
|
511
588
|
|
|
512
|
-
// Case 1: Transformed Object { "id1": data, "id2": data }
|
|
513
589
|
if (field && batchData && !Array.isArray(batchData)) {
|
|
514
590
|
return Object.keys(batchData);
|
|
515
591
|
}
|
|
516
|
-
|
|
517
|
-
// Case 2: Array of Rows (Only if _transform didn't group by entity)
|
|
518
592
|
if (Array.isArray(batchData) && field) {
|
|
519
593
|
const ids = [];
|
|
520
|
-
let undefinedCount = 0;
|
|
521
594
|
for (const r of batchData) {
|
|
522
595
|
const val = r[field];
|
|
523
|
-
if (val
|
|
524
|
-
undefinedCount++;
|
|
525
|
-
} else {
|
|
526
|
-
ids.push(String(val));
|
|
527
|
-
}
|
|
528
|
-
}
|
|
529
|
-
if (undefinedCount > 0) {
|
|
530
|
-
this._log('ERROR', `CRITICAL CONFIG ERROR: Found ${undefinedCount} rows in '${tableName}' where entityField '${field}' was UNDEFINED.`);
|
|
596
|
+
if (val !== undefined) ids.push(String(val));
|
|
531
597
|
}
|
|
532
598
|
return ids;
|
|
533
599
|
}
|
|
@@ -537,28 +603,28 @@ class DataFetcher {
|
|
|
537
603
|
_extractAllEntityIds(fullData) {
|
|
538
604
|
const ids = new Set();
|
|
539
605
|
Object.values(fullData || {}).forEach(tableData => {
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
606
|
+
if (tableData && typeof tableData === 'object' && !Array.isArray(tableData)) {
|
|
607
|
+
Object.keys(tableData).forEach(k => ids.add(k));
|
|
608
|
+
}
|
|
543
609
|
});
|
|
544
610
|
return Array.from(ids);
|
|
545
611
|
}
|
|
546
|
-
|
|
612
|
+
|
|
547
613
|
_formatDate(dateValue) {
|
|
548
614
|
if (!dateValue) return null;
|
|
549
615
|
if (typeof dateValue === 'string') return dateValue;
|
|
550
|
-
if (dateValue.value) return dateValue.value;
|
|
616
|
+
if (dateValue.value) return dateValue.value;
|
|
551
617
|
if (dateValue instanceof Date) return dateValue.toISOString().slice(0, 10);
|
|
552
618
|
return String(dateValue);
|
|
553
619
|
}
|
|
554
|
-
|
|
620
|
+
|
|
555
621
|
_isEmpty(data) {
|
|
556
622
|
if (data == null) return true;
|
|
557
623
|
if (Array.isArray(data)) return data.length === 0;
|
|
558
624
|
if (typeof data === 'object') return Object.keys(data).length === 0;
|
|
559
625
|
return false;
|
|
560
626
|
}
|
|
561
|
-
|
|
627
|
+
|
|
562
628
|
_log(level, message) {
|
|
563
629
|
if (this.logger && typeof this.logger.log === 'function') {
|
|
564
630
|
this.logger.log(level, `[DataFetcher] ${message}`);
|