bulltrackers-module 1.0.768 → 1.0.769

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/functions/computation-system-v2/UserPortfolioMetrics.js +50 -0
  2. package/functions/computation-system-v2/computations/BehavioralAnomaly.js +557 -337
  3. package/functions/computation-system-v2/computations/GlobalAumPerAsset30D.js +103 -0
  4. package/functions/computation-system-v2/computations/PIDailyAssetAUM.js +134 -0
  5. package/functions/computation-system-v2/computations/PiFeatureVectors.js +227 -0
  6. package/functions/computation-system-v2/computations/PiRecommender.js +359 -0
  7. package/functions/computation-system-v2/computations/SignedInUserList.js +51 -0
  8. package/functions/computation-system-v2/computations/SignedInUserMirrorHistory.js +138 -0
  9. package/functions/computation-system-v2/computations/SignedInUserPIProfileMetrics.js +106 -0
  10. package/functions/computation-system-v2/computations/SignedInUserProfileMetrics.js +324 -0
  11. package/functions/computation-system-v2/config/bulltrackers.config.js +30 -128
  12. package/functions/computation-system-v2/core-api.js +17 -9
  13. package/functions/computation-system-v2/data_schema_reference.MD +108 -0
  14. package/functions/computation-system-v2/devtools/builder/builder.js +362 -0
  15. package/functions/computation-system-v2/devtools/builder/examples/user-metrics.yaml +26 -0
  16. package/functions/computation-system-v2/devtools/index.js +36 -0
  17. package/functions/computation-system-v2/devtools/shared/MockDataFactory.js +235 -0
  18. package/functions/computation-system-v2/devtools/shared/SchemaTemplates.js +475 -0
  19. package/functions/computation-system-v2/devtools/shared/SystemIntrospector.js +517 -0
  20. package/functions/computation-system-v2/devtools/shared/index.js +16 -0
  21. package/functions/computation-system-v2/devtools/simulation/DAGAnalyzer.js +243 -0
  22. package/functions/computation-system-v2/devtools/simulation/MockDataFetcher.js +306 -0
  23. package/functions/computation-system-v2/devtools/simulation/MockStorageManager.js +336 -0
  24. package/functions/computation-system-v2/devtools/simulation/SimulationEngine.js +525 -0
  25. package/functions/computation-system-v2/devtools/simulation/SimulationServer.js +581 -0
  26. package/functions/computation-system-v2/devtools/simulation/index.js +17 -0
  27. package/functions/computation-system-v2/devtools/simulation/simulate.js +324 -0
  28. package/functions/computation-system-v2/devtools/vscode-computation/package.json +90 -0
  29. package/functions/computation-system-v2/devtools/vscode-computation/snippets/computation.json +128 -0
  30. package/functions/computation-system-v2/devtools/vscode-computation/src/extension.ts +401 -0
  31. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/codeActions.ts +152 -0
  32. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/completions.ts +207 -0
  33. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/diagnostics.ts +205 -0
  34. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/hover.ts +205 -0
  35. package/functions/computation-system-v2/devtools/vscode-computation/tsconfig.json +22 -0
  36. package/functions/computation-system-v2/docs/HowToCreateComputations.MD +602 -0
  37. package/functions/computation-system-v2/framework/data/DataFetcher.js +250 -184
  38. package/functions/computation-system-v2/framework/data/MaterializedViewManager.js +84 -0
  39. package/functions/computation-system-v2/framework/data/QueryBuilder.js +38 -38
  40. package/functions/computation-system-v2/framework/execution/Orchestrator.js +215 -129
  41. package/functions/computation-system-v2/framework/scheduling/ScheduleValidator.js +17 -19
  42. package/functions/computation-system-v2/framework/storage/StateRepository.js +32 -2
  43. package/functions/computation-system-v2/framework/storage/StorageManager.js +105 -67
  44. package/functions/computation-system-v2/framework/testing/ComputationTester.js +12 -6
  45. package/functions/computation-system-v2/handlers/dispatcher.js +57 -29
  46. package/functions/computation-system-v2/legacy/PiAssetRecommender.js.old +115 -0
  47. package/functions/computation-system-v2/legacy/PiSimilarityMatrix.js +104 -0
  48. package/functions/computation-system-v2/legacy/PiSimilarityVector.js +71 -0
  49. package/functions/computation-system-v2/scripts/debug_aggregation.js +25 -0
  50. package/functions/computation-system-v2/scripts/test-invalidation-scenarios.js +234 -0
  51. package/package.json +1 -1
@@ -1,27 +1,19 @@
1
1
  /**
2
2
  * @fileoverview Data Fetcher - Executes queries and transforms results
3
3
  * * The single point of data access for computations.
4
- * Uses QueryBuilder for validation, executes against BigQuery, transforms results.
5
- * * V2.2 FIX: "Identity Crisis" & "Date as ID" bugs.
6
- * * V2.3 FIX: "Insufficient History" bug.
7
- * - fetchBatched now orders by Entity ID to keep historical rows together.
8
- * - Implemented "Entity-Atomic Batching" to prevent splitting a user's history across batches.
9
- * * V2.4 FIX: Runaway Query Cost Prevention [Fix #3].
10
- * * V2.5 UPDATE: Super-Entity Monitoring [Safety Valve for Fix #6].
11
- * - Warns if a single entity exceeds reasonable batch limits (Memory Risk).
12
- * * V2.6 UPDATE: Query Result Caching.
13
- * - Implemented in-memory LRU cache to prevent redundant BigQuery costs for reference data.
14
- * * V2.7 FIX: Double-Encoded JSON Normalization.
15
- * - Automatically detects and recursively parses JSON strings (e.g. posts_data) to prevent downstream parsing errors.
4
+ * * V3.3 FIX: "Driver Priority" & "Identity Crisis" Patch.
5
+ * * V3.5 GUARD: Cost Protections (Dry Run & Limit Checks).
16
6
  */
17
7
 
18
8
  const { BigQuery } = require('@google-cloud/bigquery');
19
9
  const crypto = require('crypto');
10
+ const { MaterializedViewManager } = require('./MaterializedViewManager');
20
11
 
21
- // FIX #3: Hard limit to prevent cost spirals
22
- const MAX_LOOKBACK_DAYS = 30;
23
- // FIX #6 (Alternative): Warn if an entity is massive (e.g. > 5x batch size)
24
- const BATCH_GROWTH_WARNING_THRESHOLD = 5;
12
+ // SAFETY CONFIGURATION
13
+ // You can move these to your main config file if preferred
14
+ const DEFAULT_SAFETY_LIMIT_GB = 10; // Max GB per query
15
+ const MAX_LOOKBACK_DAYS = 60;
16
+ const BATCH_GROWTH_WARNING_THRESHOLD = 5;
25
17
 
26
18
  class DataFetcher {
27
19
  constructor(config, queryBuilder, logger = null) {
@@ -31,19 +23,25 @@ class DataFetcher {
31
23
  this.tables = config.tables || {};
32
24
  this.queryBuilder = queryBuilder;
33
25
  this.logger = logger;
34
-
26
+
27
+ // Safety Limit from Config or Default
28
+ this.safetyLimitGb = config.safetyLimitGb || DEFAULT_SAFETY_LIMIT_GB;
29
+
35
30
  this.client = new BigQuery({ projectId: this.projectId });
36
-
37
- // Cache Configuration (V2.6)
31
+
32
+ // Initialize MV Manager
33
+ this.mvManager = new MaterializedViewManager(this.client, this.logger, config);
34
+
35
+ // Cache Configuration
38
36
  this.cacheConfig = config.queryCache || {
39
37
  enabled: true,
40
38
  ttlMs: 300000, // 5 minutes default
41
39
  maxSize: 1000 // Max unique queries to cache
42
40
  };
43
-
44
- // Use Map as LRU cache (insertion order preserved)
41
+
42
+ // Use Map as LRU cache
45
43
  this.cache = new Map();
46
-
44
+
47
45
  this.stats = {
48
46
  queries: 0,
49
47
  rowsFetched: 0,
@@ -54,18 +52,25 @@ class DataFetcher {
54
52
  cacheEvictions: 0
55
53
  };
56
54
  }
57
-
55
+
58
56
  /**
59
57
  * Fetch data for a computation's requirements.
60
58
  */
61
59
  async fetchForComputation(requires, targetDate, entities = null) {
62
60
  const results = {};
63
61
  const errors = [];
64
-
65
- await Promise.all(Object.entries(requires).map(async ([tableName, spec]) => {
62
+
63
+ await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
66
64
  try {
65
+ // INTERCEPT METRICS
66
+ if (spec.type === 'metric') {
67
+ results[key] = await this._fetchMetric(spec, targetDate, entities);
68
+ return;
69
+ }
70
+
71
+ // Standard Table Fetch
67
72
  const data = await this.fetch({
68
- table: tableName,
73
+ table: key,
69
74
  targetDate,
70
75
  lookback: spec.lookback || 0,
71
76
  mandatory: spec.mandatory || false,
@@ -73,38 +78,82 @@ class DataFetcher {
73
78
  fields: spec.fields || null,
74
79
  entities: entities
75
80
  });
76
-
77
- results[tableName] = data;
78
-
81
+
82
+ results[key] = data;
83
+
79
84
  if (spec.mandatory && this._isEmpty(data)) {
80
- errors.push({ table: tableName, reason: 'MANDATORY_MISSING' });
85
+ errors.push({ table: key, reason: 'MANDATORY_MISSING' });
81
86
  }
82
-
87
+
83
88
  } catch (e) {
84
89
  if (spec.mandatory) {
85
- errors.push({ table: tableName, reason: e.message });
90
+ errors.push({ table: key, reason: e.message });
86
91
  } else {
87
- this._log('WARN', `Optional table ${tableName} failed: ${e.message}`);
92
+ this._log('WARN', `Optional requirement ${key} failed: ${e.message}`);
88
93
  }
89
- results[tableName] = null;
94
+ results[key] = null;
90
95
  }
91
96
  }));
92
-
97
+
93
98
  if (errors.length > 0) {
94
99
  const msg = errors.map(e => `${e.table}: ${e.reason}`).join(', ');
95
100
  throw new Error(`[DataFetcher] Missing mandatory data: ${msg}`);
96
101
  }
97
-
102
+
98
103
  return results;
99
104
  }
100
105
 
106
+ /**
107
+ * Fetch Metric with optional Time Series
108
+ */
109
+ async _fetchMetric(spec, targetDate, entities) {
110
+ const mvName = await this.mvManager.ensureMetricView(spec.source, spec);
111
+
112
+ const selectClause = spec.series ? 'entity_id, date, value' : 'entity_id, SUM(value) as value';
113
+ const groupByClause = spec.series ? '' : 'GROUP BY entity_id';
114
+
115
+ let sql = `
116
+ SELECT ${selectClause}
117
+ FROM \`${this.projectId}.${this.dataset}.${mvName}\`
118
+ WHERE date BETWEEN DATE_SUB(@targetDate, INTERVAL @lookback DAY) AND @targetDate
119
+ `;
120
+
121
+ const params = { targetDate, lookback: spec.lookback || 0 };
122
+
123
+ if (entities && entities.length > 0) {
124
+ sql += ` AND entity_id IN UNNEST(@entities)`;
125
+ params.entities = entities.map(String);
126
+ }
127
+
128
+ sql += ` ${groupByClause}`;
129
+
130
+ const rows = await this._execute({ sql, params, table: mvName });
131
+ const result = {};
132
+
133
+ if (spec.series) {
134
+ rows.forEach(r => {
135
+ const eid = r.entity_id;
136
+ const d = r.date.value || r.date;
137
+ if (!result[eid]) result[eid] = {};
138
+ result[eid][d] = r.value;
139
+ });
140
+ } else {
141
+ rows.forEach(r => { result[r.entity_id] = r.value; });
142
+ }
143
+
144
+ return result;
145
+ }
146
+
147
+ /**
148
+ * STREAMING: Fetch data for computation in batches.
149
+ */
101
150
  /**
102
151
  * STREAMING: Fetch data for computation in batches.
103
152
  */
104
153
  async *fetchComputationBatched(requires, targetDate, batchSize = 1000) {
105
154
  const driverTableName = this._getDriverTable(requires);
106
155
  const driverConfig = this.tables[driverTableName] || {};
107
-
156
+
108
157
  if (!driverTableName) {
109
158
  this._log('WARN', 'No entity-keyed table found for batching. Falling back to full fetch.');
110
159
  const fullData = await this.fetchForComputation(requires, targetDate);
@@ -113,9 +162,12 @@ class DataFetcher {
113
162
  return;
114
163
  }
115
164
 
116
- this._log('INFO', `Starting batched fetch driven by table: ${driverTableName} (${driverConfig.entityField})`);
117
165
  const driverSpec = requires[driverTableName];
118
166
 
167
+ // [DEBUG] Explicitly log the filter being applied to the driver
168
+ this._log('INFO', `Starting batched fetch driven by table: ${driverTableName} (${driverConfig.entityField})`);
169
+ this._log('INFO', `Driver Filter: ${JSON.stringify(driverSpec.filter || {})}`);
170
+
119
171
  const driverStream = this.fetchBatched({
120
172
  table: driverTableName,
121
173
  targetDate,
@@ -126,145 +178,154 @@ class DataFetcher {
126
178
  }, batchSize);
127
179
 
128
180
  for await (const batch of driverStream) {
129
- // FIX: Robust ID Extraction
130
181
  const entityIds = this._extractEntityIdsFromBatch(batch, driverTableName);
131
-
182
+
132
183
  if (entityIds.length === 0) {
133
- this._log('WARN', `Driver batch from ${driverTableName} yielded 0 entity IDs. Check config.entityField matches BigQuery column.`);
184
+ this._log('WARN', `Driver batch from ${driverTableName} yielded 0 entity IDs.`);
134
185
  continue;
135
186
  }
136
187
 
137
188
  const batchResults = { [driverTableName]: batch };
138
189
  const errors = [];
139
190
 
140
- await Promise.all(Object.entries(requires).map(async ([tableName, spec]) => {
141
- if (tableName === driverTableName) return;
142
-
143
- // FIX: Identity Crisis Check
144
- const depConfig = this.tables[tableName] || {};
145
- const shouldFilterById = depConfig.entityField === driverConfig.entityField;
191
+ await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
192
+ if (key === driverTableName) return;
146
193
 
147
194
  try {
195
+ if (spec.type === 'metric') {
196
+ batchResults[key] = await this._fetchMetric(spec, targetDate, entityIds);
197
+ return;
198
+ }
199
+
200
+ const depConfig = this.tables[key] || {};
201
+ const hasEntityField = !!depConfig.entityField;
202
+
148
203
  const data = await this.fetch({
149
- table: tableName,
204
+ table: key,
150
205
  targetDate,
151
206
  lookback: spec.lookback || 0,
152
207
  mandatory: spec.mandatory || false,
153
208
  filter: spec.filter || {},
154
209
  fields: spec.fields || null,
155
- entities: shouldFilterById ? entityIds : null
210
+ entities: hasEntityField ? entityIds : null
156
211
  });
157
-
158
- batchResults[tableName] = data;
212
+
213
+ batchResults[key] = data;
159
214
 
160
215
  if (spec.mandatory && this._isEmpty(data)) {
161
- this._log('WARN', `Batch warning: Mandatory table ${tableName} returned 0 rows. (Filtered by ID: ${shouldFilterById})`);
216
+ this._log('WARN', `Batch warning: Mandatory table ${key} returned 0 rows.`);
162
217
  }
163
218
  } catch (e) {
164
- if (spec.mandatory) errors.push({ table: tableName, reason: e.message });
165
- batchResults[tableName] = null;
219
+ if (spec.mandatory) errors.push({ table: key, reason: e.message });
220
+ batchResults[key] = null;
166
221
  }
167
222
  }));
168
223
 
169
224
  if (errors.length > 0) {
170
- this._log('WARN', `Batch missing mandatory data due to errors: ${errors.map(e => e.table).join(', ')}. Skipping batch.`);
171
- continue;
225
+ this._log('WARN', `Batch missing mandatory data: ${errors.map(e => e.table).join(', ')}. Skipping.`);
226
+ continue;
172
227
  }
173
228
 
174
229
  yield { data: batchResults, entityIds };
175
230
  }
176
231
  }
177
-
232
+
178
233
  async fetch(options) {
179
234
  const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
180
-
181
- // FIX #3: Prevent Runaway Costs
235
+
182
236
  if (lookback > MAX_LOOKBACK_DAYS) {
183
- throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit of ${MAX_LOOKBACK_DAYS}. Table: ${table}`);
237
+ throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit.`);
184
238
  }
185
239
 
186
240
  const tableConfig = this.tables[table] || {};
187
241
  const { dateField, entityField, dataField } = tableConfig;
188
-
242
+ const physicalTable = tableConfig.tableName || table;
243
+
189
244
  const query = await this.queryBuilder.build({
190
- table, select: fields, where: filter, dateField, targetDate, lookback, entityField, entities,
245
+ table: physicalTable,
246
+ select: fields,
247
+ where: filter,
248
+ dateField,
249
+ targetDate,
250
+ lookback,
251
+ entityField,
252
+ entities,
191
253
  orderBy: dateField || entityField
192
254
  });
193
-
255
+
194
256
  const rows = await this._execute(query);
195
-
257
+
196
258
  if (!rows || rows.length === 0) return null;
197
-
259
+
198
260
  return this._transform(rows, { lookback, dateField, entityField, dataField });
199
261
  }
200
262
 
201
263
  async *fetchBatched(options, batchSize = 1000) {
202
264
  const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
203
265
 
204
- // FIX #3: Prevent Runaway Costs
205
266
  if (lookback > MAX_LOOKBACK_DAYS) {
206
- throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit of ${MAX_LOOKBACK_DAYS}. Table: ${table}`);
267
+ throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit.`);
207
268
  }
208
269
 
209
270
  const tableConfig = this.tables[table] || {};
210
271
  const { dateField, entityField, dataField } = tableConfig;
272
+ const physicalTable = tableConfig.tableName || table;
211
273
 
212
- // FIX #1: Prioritize ordering by Entity to keep historical rows together
213
274
  const query = await this.queryBuilder.build({
214
- table, select: fields, where: filter, dateField, targetDate, lookback, entityField, entities,
215
- orderBy: entityField || dateField
275
+ table: physicalTable,
276
+ select: fields,
277
+ where: filter,
278
+ dateField,
279
+ targetDate,
280
+ lookback,
281
+ entityField,
282
+ entities,
283
+ orderBy: entityField || dateField
216
284
  });
217
285
 
218
286
  const rowStream = this._executeStream(query);
219
-
287
+
220
288
  let batch = [];
221
289
  let currentEntity = null;
222
- let batchHasWarned = false; // Flag to prevent log spam for a single massive batch
290
+ let batchHasWarned = false;
223
291
 
224
292
  for await (const row of rowStream) {
225
- // FIX #2: Entity-Atomic Batching
226
- // If we have an entity field, verify we don't split an entity across batches
227
293
  if (entityField) {
294
+
228
295
  const rowEntity = String(row[entityField]);
229
-
230
- // Check if we should yield
231
- // Condition: Batch is full AND we are on a NEW entity
296
+
232
297
  if (batch.length >= batchSize && rowEntity !== currentEntity && currentEntity !== null) {
233
298
  yield this._transform(batch, { lookback, dateField, entityField, dataField });
234
299
  batch = [];
235
300
  batchHasWarned = false;
236
301
  }
237
-
238
- // SAFETY VALVE (Fix #6 Alternative):
239
- // If batch grows huge (Super Entity) and we CANNOT split (same entity), warn the admin.
302
+
240
303
  if (batch.length > batchSize * BATCH_GROWTH_WARNING_THRESHOLD && !batchHasWarned) {
241
- this._log('WARN', `SUPER ENTITY DETECTED: Entity '${currentEntity}' in table '${table}' has >${batch.length} rows. ` +
242
- `This exceeds batch size ${batchSize} by ${BATCH_GROWTH_WARNING_THRESHOLD}x. ` +
243
- `Risk of OOM or Timeouts. Consider filtering this entity.`);
304
+ this._log('WARN', `SUPER ENTITY DETECTED: Entity '${currentEntity}' in table '${table}' has >${batch.length} rows.`);
244
305
  batchHasWarned = true;
245
306
  }
246
-
307
+
247
308
  currentEntity = rowEntity;
248
309
  } else {
249
- // Fallback for non-entity tables (strict count)
250
310
  if (batch.length >= batchSize) {
251
311
  yield this._transform(batch, { lookback, dateField, entityField, dataField });
252
312
  batch = [];
253
313
  }
254
314
  }
255
-
256
315
  batch.push(row);
257
316
  }
258
-
317
+
259
318
  if (batch.length > 0) {
260
319
  yield this._transform(batch, { lookback, dateField, entityField, dataField });
261
320
  }
262
321
  }
263
-
322
+
264
323
  async hasData(table, targetDate) {
265
324
  const tableConfig = this.tables[table] || {};
266
325
  const { dateField } = tableConfig;
267
- const query = await this.queryBuilder.buildExistsQuery(table, dateField, targetDate);
326
+ const physicalTable = tableConfig.tableName || table;
327
+
328
+ const query = await this.queryBuilder.buildExistsQuery(physicalTable, dateField, targetDate);
268
329
  try {
269
330
  const rows = await this._execute(query);
270
331
  return rows && rows.length > 0;
@@ -273,38 +334,34 @@ class DataFetcher {
273
334
  return false;
274
335
  }
275
336
  }
276
-
337
+
338
+ // ... checkAvailability, getStats, resetStats, clearCache (unchanged) ...
277
339
  async checkAvailability(requires, targetDate) {
278
340
  const available = [];
279
341
  const missing = [];
280
-
281
- await Promise.all(Object.entries(requires).map(async ([tableName, spec]) => {
342
+
343
+ await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
344
+ const tableName = spec.type === 'metric' ? spec.source : key;
282
345
  const hasData = await this.hasData(tableName, targetDate);
283
346
  if (hasData) {
284
- available.push(tableName);
347
+ available.push(key);
285
348
  } else if (spec.mandatory) {
286
- missing.push(tableName);
349
+ missing.push(key);
287
350
  } else {
288
- available.push(tableName);
351
+ available.push(key);
289
352
  }
290
353
  }));
291
-
354
+
292
355
  return { canRun: missing.length === 0, available, missing };
293
356
  }
294
357
 
295
-
296
358
  getStats() { return { ...this.stats }; }
297
-
298
- resetStats() {
299
- this.stats = {
300
- queries: 0,
301
- rowsFetched: 0,
302
- errors: 0,
303
- bytesProcessed: 0,
304
- cacheHits: 0,
305
- cacheMisses: 0,
306
- cacheEvictions: 0
307
- };
359
+
360
+ resetStats() {
361
+ this.stats = {
362
+ queries: 0, rowsFetched: 0, errors: 0, bytesProcessed: 0,
363
+ cacheHits: 0, cacheMisses: 0, cacheEvictions: 0
364
+ };
308
365
  this.cache.clear();
309
366
  }
310
367
 
@@ -312,63 +369,80 @@ class DataFetcher {
312
369
  this.cache.clear();
313
370
  this._log('DEBUG', 'Query cache cleared');
314
371
  }
315
-
372
+
316
373
  // =========================================================================
317
374
  // PRIVATE METHODS
318
375
  // =========================================================================
319
-
376
+
377
+ /**
378
+ * Executes a query with Cost Guard protection.
379
+ */
320
380
  async _execute(query) {
321
- // V2.6: Query Caching
381
+ // Caching Logic
322
382
  if (this.cacheConfig.enabled) {
323
383
  const cacheKey = this._generateCacheKey(query);
324
384
  const cached = this.cache.get(cacheKey);
325
-
326
385
  if (cached) {
327
386
  if (Date.now() - cached.timestamp < this.cacheConfig.ttlMs) {
328
387
  this.stats.cacheHits++;
329
- // Refresh LRU position (delete and re-set moves to end)
330
388
  this.cache.delete(cacheKey);
331
389
  this.cache.set(cacheKey, cached);
332
- // Return cached rows immediately - no BigQuery cost
333
390
  return cached.rows;
334
391
  } else {
335
- this.cache.delete(cacheKey); // Expired
392
+ this.cache.delete(cacheKey);
336
393
  }
337
394
  }
338
395
  this.stats.cacheMisses++;
339
396
  }
340
397
 
398
+ // --- COST GUARD: DRY RUN ---
399
+ await this._enforceCostSafety(query);
400
+ // ---------------------------
401
+
402
+ // =========================================================
403
+ // 🛑 DEBUGGING: PRINT EXACT SQL
404
+ // =========================================================
405
+ console.log('\n--- 🔍 EXECUTING SQL -----------------------------------');
406
+ console.log(query.sql);
407
+ console.log('PARAMS:', JSON.stringify(query.params));
408
+ console.log('--------------------------------------------------------\n');
409
+ // =========================================================
410
+
341
411
  this.stats.queries++;
342
-
412
+
343
413
  try {
344
414
  const [job] = await this.client.createQueryJob({
345
415
  query: query.sql, params: query.params, location: this.location
346
416
  });
347
417
  const [rows] = await job.getQueryResults();
348
418
  const [metadata] = await job.getMetadata();
349
-
419
+
350
420
  this.stats.rowsFetched += rows.length;
351
421
  this.stats.bytesProcessed += parseInt(metadata.statistics?.totalBytesProcessed || 0, 10);
352
-
353
- // FIX V2.7: Normalize Rows (Recursive JSON Parse) BEFORE caching
422
+
354
423
  const normalizedRows = rows.map(r => this._normalizeRow(r));
355
-
356
- // Store in cache if enabled
424
+
357
425
  if (this.cacheConfig.enabled) {
358
426
  this._addToCache(query, normalizedRows);
359
427
  }
360
-
428
+
361
429
  return normalizedRows;
362
430
  } catch (e) {
363
431
  this.stats.errors++;
364
- this._log('ERROR', `Query failed: ${e.message}`);
432
+ this._log('ERROR', `Query failed: ${e.message}\nSQL: ${query.sql}`);
365
433
  throw e;
366
434
  }
367
435
  }
368
436
 
369
437
  async *_executeStream(query) {
370
- // NOTE: We do NOT cache streams. They are typically massive datasets (batch processing)
371
- // and caching them in memory would cause OOM.
438
+ await this._enforceCostSafety(query);
439
+
440
+ // [CRITICAL FIX] Added SQL Logging to Stream Execution
441
+ console.log('\n--- 🔍 EXECUTING DRIVER STREAM SQL ---------------------');
442
+ console.log(query.sql);
443
+ console.log('PARAMS:', JSON.stringify(query.params));
444
+ console.log('--------------------------------------------------------\n');
445
+
372
446
  this.stats.queries++;
373
447
  try {
374
448
  const [job] = await this.client.createQueryJob({
@@ -377,25 +451,52 @@ class DataFetcher {
377
451
  const stream = job.getQueryResultsStream();
378
452
  for await (const row of stream) {
379
453
  this.stats.rowsFetched++;
380
- // FIX V2.7: Normalize Rows (Recursive JSON Parse)
381
454
  yield this._normalizeRow(row);
382
455
  }
383
456
  } catch (e) {
384
457
  this.stats.errors++;
385
- this._log('ERROR', `Stream Query failed: ${e.message}`);
458
+ this._log('ERROR', `Stream Query failed: ${e.message}\nSQL: ${query.sql}`);
386
459
  throw e;
387
460
  }
388
461
  }
389
462
 
390
463
  /**
391
- * V2.8 FIX: JSON Detection Logic
464
+ * NEW: Runs a Dry Run to estimate cost and blocks expensive queries.
392
465
  */
466
+ async _enforceCostSafety(query) {
467
+ try {
468
+ const [dryRunJob] = await this.client.createQueryJob({
469
+ query: query.sql,
470
+ params: query.params,
471
+ location: this.location,
472
+ dryRun: true // Costs $0, returns stats only
473
+ });
474
+
475
+ const bytes = parseInt(dryRunJob.metadata.statistics.totalBytesProcessed, 10);
476
+ const gb = bytes / (1024 * 1024 * 1024);
477
+
478
+ if (gb > this.safetyLimitGb) {
479
+ const errorMsg = `[DataFetcher] 🛑 COST VIOLATION: Query blocked! Estimated size: ${gb.toFixed(2)} GB (Limit: ${this.safetyLimitGb} GB). ` +
480
+ `Check your partition filters or clustering. Table: ${query.table}`;
481
+
482
+ this._log('ERROR', errorMsg);
483
+ // We log the offending SQL for debugging
484
+ this._log('ERROR', `BLOCKED SQL START:\n${query.sql.substring(0, 500)}...\nEND SQL`);
485
+
486
+ throw new Error(errorMsg);
487
+ }
488
+ } catch (e) {
489
+ // If the dry run fails (e.g. invalid SQL), we let the real execution fail it naturally,
490
+ // unless it was our cost violation error.
491
+ if (e.message.includes('COST VIOLATION')) throw e;
492
+ }
493
+ }
494
+
393
495
  _normalizeRow(row) {
394
496
  const normalized = { ...row };
395
497
  for (const [key, value] of Object.entries(normalized)) {
396
498
  if (typeof value === 'string') {
397
499
  const trimmed = value.trim();
398
- // FIX: Check for " (Double Encoded JSON) in addition to { and [
399
500
  if (trimmed.startsWith('{') || trimmed.startsWith('[') || trimmed.startsWith('"')) {
400
501
  normalized[key] = this._safeRecursiveParse(value);
401
502
  }
@@ -404,75 +505,45 @@ class DataFetcher {
404
505
  return normalized;
405
506
  }
406
507
 
407
- /**
408
- * V2.7 FIX: Helper to safely recursively parse JSON.
409
- * Handles: Double-Encoded JSON Strings (parsed recursively)
410
- */
411
- _safeRecursiveParse(input) {
508
+ _safeRecursiveParse(input) {
412
509
  if (!input) return null;
413
510
  if (typeof input === 'object') return input;
414
511
  try {
415
512
  const parsed = JSON.parse(input);
416
- // Recursion for double-encoded strings
417
513
  if (typeof parsed === 'string') return this._safeRecursiveParse(parsed);
418
514
  return parsed;
419
515
  } catch (e) {
420
- return input; // Not JSON, return original
516
+ return input;
421
517
  }
422
518
  }
423
519
 
424
- /**
425
- * V2.6: Generate a unique cache key for a query
426
- */
427
520
  _generateCacheKey(query) {
428
- // Hash the SQL + Params to ensure uniqueness
429
521
  const str = query.sql + JSON.stringify(query.params || {});
430
522
  return crypto.createHash('md5').update(str).digest('hex');
431
523
  }
432
524
 
433
- /**
434
- * V2.6: Add to cache with LRU eviction
435
- */
436
525
  _addToCache(query, rows) {
437
- // Generate key
438
526
  const key = this._generateCacheKey(query);
439
-
440
- // Eviction Logic
441
527
  if (this.cache.size >= this.cacheConfig.maxSize) {
442
- // Map iterator yields in insertion order. First item is oldest.
443
528
  const oldestKey = this.cache.keys().next().value;
444
529
  this.cache.delete(oldestKey);
445
530
  this.stats.cacheEvictions++;
446
531
  }
447
-
448
- this.cache.set(key, {
449
- rows: rows,
450
- timestamp: Date.now()
451
- });
532
+ this.cache.set(key, { rows: rows, timestamp: Date.now() });
452
533
  }
453
-
454
- /**
455
- * Transforms raw rows into a structured object.
456
- * FIX: PRIORITIZE ENTITY FIELD.
457
- * If entityField exists, we MUST return { [id]: [rows] } so IDs can be extracted.
458
- */
534
+
459
535
  _transform(rows, config) {
460
536
  const { lookback, dateField, entityField, dataField } = config;
461
537
  const rowArray = Array.isArray(rows) ? rows : [rows];
462
538
 
463
- // FIX: Primary Grouping = Entity
464
539
  if (entityField) {
465
540
  const byEntity = {};
466
541
  for (const row of rowArray) {
467
542
  const entityKey = String(row[entityField]);
468
543
  if (!byEntity[entityKey]) {
469
- // If simple fetch (no history), value is single object
470
- // If history fetch (lookback), value is Array of rows
471
544
  byEntity[entityKey] = lookback > 0 ? [] : null;
472
545
  }
473
-
474
546
  const value = dataField ? row[dataField] : row;
475
-
476
547
  if (Array.isArray(byEntity[entityKey])) {
477
548
  byEntity[entityKey].push(value);
478
549
  } else {
@@ -482,7 +553,6 @@ class DataFetcher {
482
553
  return byEntity;
483
554
  }
484
555
 
485
- // Fallback: Date Grouping (Only if no Entity ID)
486
556
  if (lookback > 0 && dateField) {
487
557
  const byDate = {};
488
558
  for (const row of rowArray) {
@@ -493,12 +563,19 @@ class DataFetcher {
493
563
  }
494
564
  return byDate;
495
565
  }
496
-
497
566
  return rowArray;
498
567
  }
499
568
 
500
569
  _getDriverTable(requires) {
570
+ // PASS 1: Prioritize tables with a DATE field
571
+ for (const [name, spec] of Object.entries(requires)) {
572
+ if (spec.type === 'metric') continue;
573
+ const config = this.tables[name];
574
+ if (config && config.entityField && config.dateField) return name;
575
+ }
576
+ // PASS 2: Fallback to any entity table
501
577
  for (const [name, spec] of Object.entries(requires)) {
578
+ if (spec.type === 'metric') continue;
502
579
  const config = this.tables[name];
503
580
  if (config && config.entityField) return name;
504
581
  }
@@ -509,25 +586,14 @@ class DataFetcher {
509
586
  const config = this.tables[tableName] || {};
510
587
  const field = config.entityField;
511
588
 
512
- // Case 1: Transformed Object { "id1": data, "id2": data }
513
589
  if (field && batchData && !Array.isArray(batchData)) {
514
590
  return Object.keys(batchData);
515
591
  }
516
-
517
- // Case 2: Array of Rows (Only if _transform didn't group by entity)
518
592
  if (Array.isArray(batchData) && field) {
519
593
  const ids = [];
520
- let undefinedCount = 0;
521
594
  for (const r of batchData) {
522
595
  const val = r[field];
523
- if (val === undefined) {
524
- undefinedCount++;
525
- } else {
526
- ids.push(String(val));
527
- }
528
- }
529
- if (undefinedCount > 0) {
530
- this._log('ERROR', `CRITICAL CONFIG ERROR: Found ${undefinedCount} rows in '${tableName}' where entityField '${field}' was UNDEFINED.`);
596
+ if (val !== undefined) ids.push(String(val));
531
597
  }
532
598
  return ids;
533
599
  }
@@ -537,28 +603,28 @@ class DataFetcher {
537
603
  _extractAllEntityIds(fullData) {
538
604
  const ids = new Set();
539
605
  Object.values(fullData || {}).forEach(tableData => {
540
- if (tableData && typeof tableData === 'object' && !Array.isArray(tableData)) {
541
- Object.keys(tableData).forEach(k => ids.add(k));
542
- }
606
+ if (tableData && typeof tableData === 'object' && !Array.isArray(tableData)) {
607
+ Object.keys(tableData).forEach(k => ids.add(k));
608
+ }
543
609
  });
544
610
  return Array.from(ids);
545
611
  }
546
-
612
+
547
613
  _formatDate(dateValue) {
548
614
  if (!dateValue) return null;
549
615
  if (typeof dateValue === 'string') return dateValue;
550
- if (dateValue.value) return dateValue.value;
616
+ if (dateValue.value) return dateValue.value;
551
617
  if (dateValue instanceof Date) return dateValue.toISOString().slice(0, 10);
552
618
  return String(dateValue);
553
619
  }
554
-
620
+
555
621
  _isEmpty(data) {
556
622
  if (data == null) return true;
557
623
  if (Array.isArray(data)) return data.length === 0;
558
624
  if (typeof data === 'object') return Object.keys(data).length === 0;
559
625
  return false;
560
626
  }
561
-
627
+
562
628
  _log(level, message) {
563
629
  if (this.logger && typeof this.logger.log === 'function') {
564
630
  this.logger.log(level, `[DataFetcher] ${message}`);