bulltrackers-module 1.0.766 → 1.0.769

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/functions/computation-system-v2/UserPortfolioMetrics.js +50 -0
  2. package/functions/computation-system-v2/computations/BehavioralAnomaly.js +559 -227
  3. package/functions/computation-system-v2/computations/GlobalAumPerAsset30D.js +103 -0
  4. package/functions/computation-system-v2/computations/NewSectorExposure.js +82 -35
  5. package/functions/computation-system-v2/computations/NewSocialPost.js +52 -24
  6. package/functions/computation-system-v2/computations/PIDailyAssetAUM.js +134 -0
  7. package/functions/computation-system-v2/computations/PiFeatureVectors.js +227 -0
  8. package/functions/computation-system-v2/computations/PiRecommender.js +359 -0
  9. package/functions/computation-system-v2/computations/PopularInvestorProfileMetrics.js +354 -641
  10. package/functions/computation-system-v2/computations/SignedInUserList.js +51 -0
  11. package/functions/computation-system-v2/computations/SignedInUserMirrorHistory.js +138 -0
  12. package/functions/computation-system-v2/computations/SignedInUserPIProfileMetrics.js +106 -0
  13. package/functions/computation-system-v2/computations/SignedInUserProfileMetrics.js +324 -0
  14. package/functions/computation-system-v2/config/bulltrackers.config.js +40 -126
  15. package/functions/computation-system-v2/core-api.js +17 -9
  16. package/functions/computation-system-v2/data_schema_reference.MD +108 -0
  17. package/functions/computation-system-v2/devtools/builder/builder.js +362 -0
  18. package/functions/computation-system-v2/devtools/builder/examples/user-metrics.yaml +26 -0
  19. package/functions/computation-system-v2/devtools/index.js +36 -0
  20. package/functions/computation-system-v2/devtools/shared/MockDataFactory.js +235 -0
  21. package/functions/computation-system-v2/devtools/shared/SchemaTemplates.js +475 -0
  22. package/functions/computation-system-v2/devtools/shared/SystemIntrospector.js +517 -0
  23. package/functions/computation-system-v2/devtools/shared/index.js +16 -0
  24. package/functions/computation-system-v2/devtools/simulation/DAGAnalyzer.js +243 -0
  25. package/functions/computation-system-v2/devtools/simulation/MockDataFetcher.js +306 -0
  26. package/functions/computation-system-v2/devtools/simulation/MockStorageManager.js +336 -0
  27. package/functions/computation-system-v2/devtools/simulation/SimulationEngine.js +525 -0
  28. package/functions/computation-system-v2/devtools/simulation/SimulationServer.js +581 -0
  29. package/functions/computation-system-v2/devtools/simulation/index.js +17 -0
  30. package/functions/computation-system-v2/devtools/simulation/simulate.js +324 -0
  31. package/functions/computation-system-v2/devtools/vscode-computation/package.json +90 -0
  32. package/functions/computation-system-v2/devtools/vscode-computation/snippets/computation.json +128 -0
  33. package/functions/computation-system-v2/devtools/vscode-computation/src/extension.ts +401 -0
  34. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/codeActions.ts +152 -0
  35. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/completions.ts +207 -0
  36. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/diagnostics.ts +205 -0
  37. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/hover.ts +205 -0
  38. package/functions/computation-system-v2/devtools/vscode-computation/tsconfig.json +22 -0
  39. package/functions/computation-system-v2/docs/HowToCreateComputations.MD +602 -0
  40. package/functions/computation-system-v2/framework/core/Manifest.js +9 -16
  41. package/functions/computation-system-v2/framework/core/RunAnalyzer.js +2 -1
  42. package/functions/computation-system-v2/framework/data/DataFetcher.js +330 -126
  43. package/functions/computation-system-v2/framework/data/MaterializedViewManager.js +84 -0
  44. package/functions/computation-system-v2/framework/data/QueryBuilder.js +38 -38
  45. package/functions/computation-system-v2/framework/execution/Orchestrator.js +226 -153
  46. package/functions/computation-system-v2/framework/scheduling/ScheduleValidator.js +17 -19
  47. package/functions/computation-system-v2/framework/storage/StateRepository.js +32 -2
  48. package/functions/computation-system-v2/framework/storage/StorageManager.js +111 -83
  49. package/functions/computation-system-v2/framework/testing/ComputationTester.js +161 -66
  50. package/functions/computation-system-v2/handlers/dispatcher.js +57 -29
  51. package/functions/computation-system-v2/legacy/PiAssetRecommender.js.old +115 -0
  52. package/functions/computation-system-v2/legacy/PiSimilarityMatrix.js +104 -0
  53. package/functions/computation-system-v2/legacy/PiSimilarityVector.js +71 -0
  54. package/functions/computation-system-v2/scripts/debug_aggregation.js +25 -0
  55. package/functions/computation-system-v2/scripts/test-computation-dag.js +109 -0
  56. package/functions/computation-system-v2/scripts/test-invalidation-scenarios.js +234 -0
  57. package/functions/task-engine/helpers/data_storage_helpers.js +6 -6
  58. package/package.json +1 -1
  59. package/functions/computation-system-v2/computations/PopularInvestorRiskAssessment.js +0 -176
  60. package/functions/computation-system-v2/computations/PopularInvestorRiskMetrics.js +0 -294
  61. package/functions/computation-system-v2/computations/UserPortfolioSummary.js +0 -172
  62. package/functions/computation-system-v2/scripts/migrate-sectors.js +0 -73
  63. package/functions/computation-system-v2/test/analyze-results.js +0 -238
  64. package/functions/computation-system-v2/test/other/test-dependency-cascade.js +0 -150
  65. package/functions/computation-system-v2/test/other/test-dispatcher.js +0 -317
  66. package/functions/computation-system-v2/test/other/test-framework.js +0 -500
  67. package/functions/computation-system-v2/test/other/test-real-execution.js +0 -166
  68. package/functions/computation-system-v2/test/other/test-real-integration.js +0 -194
  69. package/functions/computation-system-v2/test/other/test-refactor-e2e.js +0 -131
  70. package/functions/computation-system-v2/test/other/test-results.json +0 -31
  71. package/functions/computation-system-v2/test/other/test-risk-metrics-computation.js +0 -329
  72. package/functions/computation-system-v2/test/other/test-scheduler.js +0 -204
  73. package/functions/computation-system-v2/test/other/test-storage.js +0 -449
  74. package/functions/computation-system-v2/test/run-pipeline-test.js +0 -554
  75. package/functions/computation-system-v2/test/test-full-pipeline.js +0 -227
  76. package/functions/computation-system-v2/test/test-worker-pool.js +0 -266
@@ -1,22 +1,19 @@
1
1
  /**
2
2
  * @fileoverview Data Fetcher - Executes queries and transforms results
3
3
  * * The single point of data access for computations.
4
- * Uses QueryBuilder for validation, executes against BigQuery, transforms results.
5
- * * V2.2 FIX: "Identity Crisis" & "Date as ID" bugs.
6
- * * V2.3 FIX: "Insufficient History" bug.
7
- * - fetchBatched now orders by Entity ID to keep historical rows together.
8
- * - Implemented "Entity-Atomic Batching" to prevent splitting a user's history across batches.
9
- * * V2.4 FIX: Runaway Query Cost Prevention [Fix #3].
10
- * * V2.5 UPDATE: Super-Entity Monitoring [Safety Valve for Fix #6].
11
- * - Warns if a single entity exceeds reasonable batch limits (Memory Risk).
4
+ * * V3.3 FIX: "Driver Priority" & "Identity Crisis" Patch.
5
+ * * V3.5 GUARD: Cost Protections (Dry Run & Limit Checks).
12
6
  */
13
7
 
14
8
  const { BigQuery } = require('@google-cloud/bigquery');
9
+ const crypto = require('crypto');
10
+ const { MaterializedViewManager } = require('./MaterializedViewManager');
15
11
 
16
- // FIX #3: Hard limit to prevent cost spirals
17
- const MAX_LOOKBACK_DAYS = 30;
18
- // FIX #6 (Alternative): Warn if an entity is massive (e.g. > 5x batch size)
19
- const BATCH_GROWTH_WARNING_THRESHOLD = 5;
12
+ // SAFETY CONFIGURATION
13
+ // You can move these to your main config file if preferred
14
+ const DEFAULT_SAFETY_LIMIT_GB = 10; // Max GB per query
15
+ const MAX_LOOKBACK_DAYS = 60;
16
+ const BATCH_GROWTH_WARNING_THRESHOLD = 5;
20
17
 
21
18
  class DataFetcher {
22
19
  constructor(config, queryBuilder, logger = null) {
@@ -26,28 +23,54 @@ class DataFetcher {
26
23
  this.tables = config.tables || {};
27
24
  this.queryBuilder = queryBuilder;
28
25
  this.logger = logger;
29
-
26
+
27
+ // Safety Limit from Config or Default
28
+ this.safetyLimitGb = config.safetyLimitGb || DEFAULT_SAFETY_LIMIT_GB;
29
+
30
30
  this.client = new BigQuery({ projectId: this.projectId });
31
-
31
+
32
+ // Initialize MV Manager
33
+ this.mvManager = new MaterializedViewManager(this.client, this.logger, config);
34
+
35
+ // Cache Configuration
36
+ this.cacheConfig = config.queryCache || {
37
+ enabled: true,
38
+ ttlMs: 300000, // 5 minutes default
39
+ maxSize: 1000 // Max unique queries to cache
40
+ };
41
+
42
+ // Use Map as LRU cache
43
+ this.cache = new Map();
44
+
32
45
  this.stats = {
33
46
  queries: 0,
34
47
  rowsFetched: 0,
35
48
  errors: 0,
36
- bytesProcessed: 0
49
+ bytesProcessed: 0,
50
+ cacheHits: 0,
51
+ cacheMisses: 0,
52
+ cacheEvictions: 0
37
53
  };
38
54
  }
39
-
55
+
40
56
  /**
41
57
  * Fetch data for a computation's requirements.
42
58
  */
43
59
  async fetchForComputation(requires, targetDate, entities = null) {
44
60
  const results = {};
45
61
  const errors = [];
46
-
47
- await Promise.all(Object.entries(requires).map(async ([tableName, spec]) => {
62
+
63
+ await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
48
64
  try {
65
+ // INTERCEPT METRICS
66
+ if (spec.type === 'metric') {
67
+ results[key] = await this._fetchMetric(spec, targetDate, entities);
68
+ return;
69
+ }
70
+
71
+ // Standard Table Fetch
49
72
  const data = await this.fetch({
50
- table: tableName,
73
+ table: key,
51
74
  targetDate,
52
75
  lookback: spec.lookback || 0,
53
76
  mandatory: spec.mandatory || false,
@@ -55,38 +78,82 @@ class DataFetcher {
55
78
  fields: spec.fields || null,
56
79
  entities: entities
57
80
  });
58
-
59
- results[tableName] = data;
60
-
81
+
82
+ results[key] = data;
83
+
61
84
  if (spec.mandatory && this._isEmpty(data)) {
62
- errors.push({ table: tableName, reason: 'MANDATORY_MISSING' });
85
+ errors.push({ table: key, reason: 'MANDATORY_MISSING' });
63
86
  }
64
-
87
+
65
88
  } catch (e) {
66
89
  if (spec.mandatory) {
67
- errors.push({ table: tableName, reason: e.message });
90
+ errors.push({ table: key, reason: e.message });
68
91
  } else {
69
- this._log('WARN', `Optional table ${tableName} failed: ${e.message}`);
92
+ this._log('WARN', `Optional requirement ${key} failed: ${e.message}`);
70
93
  }
71
- results[tableName] = null;
94
+ results[key] = null;
72
95
  }
73
96
  }));
74
-
97
+
75
98
  if (errors.length > 0) {
76
99
  const msg = errors.map(e => `${e.table}: ${e.reason}`).join(', ');
77
100
  throw new Error(`[DataFetcher] Missing mandatory data: ${msg}`);
78
101
  }
79
-
102
+
80
103
  return results;
81
104
  }
82
105
 
106
+ /**
107
+ * Fetch Metric with optional Time Series
108
+ */
109
+ async _fetchMetric(spec, targetDate, entities) {
110
+ const mvName = await this.mvManager.ensureMetricView(spec.source, spec);
111
+
112
+ const selectClause = spec.series ? 'entity_id, date, value' : 'entity_id, SUM(value) as value';
113
+ const groupByClause = spec.series ? '' : 'GROUP BY entity_id';
114
+
115
+ let sql = `
116
+ SELECT ${selectClause}
117
+ FROM \`${this.projectId}.${this.dataset}.${mvName}\`
118
+ WHERE date BETWEEN DATE_SUB(@targetDate, INTERVAL @lookback DAY) AND @targetDate
119
+ `;
120
+
121
+ const params = { targetDate, lookback: spec.lookback || 0 };
122
+
123
+ if (entities && entities.length > 0) {
124
+ sql += ` AND entity_id IN UNNEST(@entities)`;
125
+ params.entities = entities.map(String);
126
+ }
127
+
128
+ sql += ` ${groupByClause}`;
129
+
130
+ const rows = await this._execute({ sql, params, table: mvName });
131
+ const result = {};
132
+
133
+ if (spec.series) {
134
+ rows.forEach(r => {
135
+ const eid = r.entity_id;
136
+ const d = r.date.value || r.date;
137
+ if (!result[eid]) result[eid] = {};
138
+ result[eid][d] = r.value;
139
+ });
140
+ } else {
141
+ rows.forEach(r => { result[r.entity_id] = r.value; });
142
+ }
143
+
144
+ return result;
145
+ }
146
+
147
+ /**
148
+ * STREAMING: Fetch data for computation in batches.
149
+ */
83
150
  /**
84
151
  * STREAMING: Fetch data for computation in batches.
85
152
  */
86
153
  async *fetchComputationBatched(requires, targetDate, batchSize = 1000) {
87
154
  const driverTableName = this._getDriverTable(requires);
88
155
  const driverConfig = this.tables[driverTableName] || {};
89
-
156
+
90
157
  if (!driverTableName) {
91
158
  this._log('WARN', 'No entity-keyed table found for batching. Falling back to full fetch.');
92
159
  const fullData = await this.fetchForComputation(requires, targetDate);
@@ -95,9 +162,12 @@ class DataFetcher {
95
162
  return;
96
163
  }
97
164
 
98
- this._log('INFO', `Starting batched fetch driven by table: ${driverTableName} (${driverConfig.entityField})`);
99
165
  const driverSpec = requires[driverTableName];
100
166
 
167
+ // [DEBUG] Explicitly log the filter being applied to the driver
168
+ this._log('INFO', `Starting batched fetch driven by table: ${driverTableName} (${driverConfig.entityField})`);
169
+ this._log('INFO', `Driver Filter: ${JSON.stringify(driverSpec.filter || {})}`);
170
+
101
171
  const driverStream = this.fetchBatched({
102
172
  table: driverTableName,
103
173
  targetDate,
@@ -108,145 +178,154 @@ class DataFetcher {
108
178
  }, batchSize);
109
179
 
110
180
  for await (const batch of driverStream) {
111
- // FIX: Robust ID Extraction
112
181
  const entityIds = this._extractEntityIdsFromBatch(batch, driverTableName);
113
-
182
+
114
183
  if (entityIds.length === 0) {
115
- this._log('WARN', `Driver batch from ${driverTableName} yielded 0 entity IDs. Check config.entityField matches BigQuery column.`);
184
+ this._log('WARN', `Driver batch from ${driverTableName} yielded 0 entity IDs.`);
116
185
  continue;
117
186
  }
118
187
 
119
188
  const batchResults = { [driverTableName]: batch };
120
189
  const errors = [];
121
190
 
122
- await Promise.all(Object.entries(requires).map(async ([tableName, spec]) => {
123
- if (tableName === driverTableName) return;
124
-
125
- // FIX: Identity Crisis Check
126
- const depConfig = this.tables[tableName] || {};
127
- const shouldFilterById = depConfig.entityField === driverConfig.entityField;
191
+ await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
192
+ if (key === driverTableName) return;
128
193
 
129
194
  try {
195
+ if (spec.type === 'metric') {
196
+ batchResults[key] = await this._fetchMetric(spec, targetDate, entityIds);
197
+ return;
198
+ }
199
+
200
+ const depConfig = this.tables[key] || {};
201
+ const hasEntityField = !!depConfig.entityField;
202
+
130
203
  const data = await this.fetch({
131
- table: tableName,
204
+ table: key,
132
205
  targetDate,
133
206
  lookback: spec.lookback || 0,
134
207
  mandatory: spec.mandatory || false,
135
208
  filter: spec.filter || {},
136
209
  fields: spec.fields || null,
137
- entities: shouldFilterById ? entityIds : null
210
+ entities: hasEntityField ? entityIds : null
138
211
  });
139
-
140
- batchResults[tableName] = data;
212
+
213
+ batchResults[key] = data;
141
214
 
142
215
  if (spec.mandatory && this._isEmpty(data)) {
143
- this._log('WARN', `Batch warning: Mandatory table ${tableName} returned 0 rows. (Filtered by ID: ${shouldFilterById})`);
216
+ this._log('WARN', `Batch warning: Mandatory table ${key} returned 0 rows.`);
144
217
  }
145
218
  } catch (e) {
146
- if (spec.mandatory) errors.push({ table: tableName, reason: e.message });
147
- batchResults[tableName] = null;
219
+ if (spec.mandatory) errors.push({ table: key, reason: e.message });
220
+ batchResults[key] = null;
148
221
  }
149
222
  }));
150
223
 
151
224
  if (errors.length > 0) {
152
- this._log('WARN', `Batch missing mandatory data due to errors: ${errors.map(e => e.table).join(', ')}. Skipping batch.`);
153
- continue;
225
+ this._log('WARN', `Batch missing mandatory data: ${errors.map(e => e.table).join(', ')}. Skipping.`);
226
+ continue;
154
227
  }
155
228
 
156
229
  yield { data: batchResults, entityIds };
157
230
  }
158
231
  }
159
-
232
+
160
233
  async fetch(options) {
161
234
  const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
162
-
163
- // FIX #3: Prevent Runaway Costs
235
+
164
236
  if (lookback > MAX_LOOKBACK_DAYS) {
165
- throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit of ${MAX_LOOKBACK_DAYS}. Table: ${table}`);
237
+ throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit.`);
166
238
  }
167
239
 
168
240
  const tableConfig = this.tables[table] || {};
169
241
  const { dateField, entityField, dataField } = tableConfig;
170
-
242
+ const physicalTable = tableConfig.tableName || table;
243
+
171
244
  const query = await this.queryBuilder.build({
172
- table, select: fields, where: filter, dateField, targetDate, lookback, entityField, entities,
245
+ table: physicalTable,
246
+ select: fields,
247
+ where: filter,
248
+ dateField,
249
+ targetDate,
250
+ lookback,
251
+ entityField,
252
+ entities,
173
253
  orderBy: dateField || entityField
174
254
  });
175
-
255
+
176
256
  const rows = await this._execute(query);
177
-
257
+
178
258
  if (!rows || rows.length === 0) return null;
179
-
259
+
180
260
  return this._transform(rows, { lookback, dateField, entityField, dataField });
181
261
  }
182
262
 
183
263
  async *fetchBatched(options, batchSize = 1000) {
184
264
  const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
185
265
 
186
- // FIX #3: Prevent Runaway Costs
187
266
  if (lookback > MAX_LOOKBACK_DAYS) {
188
- throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit of ${MAX_LOOKBACK_DAYS}. Table: ${table}`);
267
+ throw new Error(`[DataFetcher] COST GUARD: Lookback of ${lookback} days exceeds limit.`);
189
268
  }
190
269
 
191
270
  const tableConfig = this.tables[table] || {};
192
271
  const { dateField, entityField, dataField } = tableConfig;
272
+ const physicalTable = tableConfig.tableName || table;
193
273
 
194
- // FIX #1: Prioritize ordering by Entity to keep historical rows together
195
274
  const query = await this.queryBuilder.build({
196
- table, select: fields, where: filter, dateField, targetDate, lookback, entityField, entities,
197
- orderBy: entityField || dateField
275
+ table: physicalTable,
276
+ select: fields,
277
+ where: filter,
278
+ dateField,
279
+ targetDate,
280
+ lookback,
281
+ entityField,
282
+ entities,
283
+ orderBy: entityField || dateField
198
284
  });
199
285
 
200
286
  const rowStream = this._executeStream(query);
201
-
287
+
202
288
  let batch = [];
203
289
  let currentEntity = null;
204
- let batchHasWarned = false; // Flag to prevent log spam for a single massive batch
290
+ let batchHasWarned = false;
205
291
 
206
292
  for await (const row of rowStream) {
207
- // FIX #2: Entity-Atomic Batching
208
- // If we have an entity field, verify we don't split an entity across batches
209
293
  if (entityField) {
294
+
210
295
  const rowEntity = String(row[entityField]);
211
-
212
- // Check if we should yield
213
- // Condition: Batch is full AND we are on a NEW entity
296
+
214
297
  if (batch.length >= batchSize && rowEntity !== currentEntity && currentEntity !== null) {
215
298
  yield this._transform(batch, { lookback, dateField, entityField, dataField });
216
299
  batch = [];
217
300
  batchHasWarned = false;
218
301
  }
219
-
220
- // SAFETY VALVE (Fix #6 Alternative):
221
- // If batch grows huge (Super Entity) and we CANNOT split (same entity), warn the admin.
302
+
222
303
  if (batch.length > batchSize * BATCH_GROWTH_WARNING_THRESHOLD && !batchHasWarned) {
223
- this._log('WARN', `SUPER ENTITY DETECTED: Entity '${currentEntity}' in table '${table}' has >${batch.length} rows. ` +
224
- `This exceeds batch size ${batchSize} by ${BATCH_GROWTH_WARNING_THRESHOLD}x. ` +
225
- `Risk of OOM or Timeouts. Consider filtering this entity.`);
304
+ this._log('WARN', `SUPER ENTITY DETECTED: Entity '${currentEntity}' in table '${table}' has >${batch.length} rows.`);
226
305
  batchHasWarned = true;
227
306
  }
228
-
307
+
229
308
  currentEntity = rowEntity;
230
309
  } else {
231
- // Fallback for non-entity tables (strict count)
232
310
  if (batch.length >= batchSize) {
233
311
  yield this._transform(batch, { lookback, dateField, entityField, dataField });
234
312
  batch = [];
235
313
  }
236
314
  }
237
-
238
315
  batch.push(row);
239
316
  }
240
-
317
+
241
318
  if (batch.length > 0) {
242
319
  yield this._transform(batch, { lookback, dateField, entityField, dataField });
243
320
  }
244
321
  }
245
-
322
+
246
323
  async hasData(table, targetDate) {
247
324
  const tableConfig = this.tables[table] || {};
248
325
  const { dateField } = tableConfig;
249
- const query = await this.queryBuilder.buildExistsQuery(table, dateField, targetDate);
326
+ const physicalTable = tableConfig.tableName || table;
327
+
328
+ const query = await this.queryBuilder.buildExistsQuery(physicalTable, dateField, targetDate);
250
329
  try {
251
330
  const rows = await this._execute(query);
252
331
  return rows && rows.length > 0;
@@ -255,47 +334,115 @@ class DataFetcher {
255
334
  return false;
256
335
  }
257
336
  }
258
-
337
+
338
+ // ... checkAvailability, getStats, resetStats, clearCache (unchanged) ...
259
339
  async checkAvailability(requires, targetDate) {
260
340
  const available = [];
261
341
  const missing = [];
262
-
263
- await Promise.all(Object.entries(requires).map(async ([tableName, spec]) => {
342
+
343
+ await Promise.all(Object.entries(requires).map(async ([key, spec]) => {
344
+ const tableName = spec.type === 'metric' ? spec.source : key;
264
345
  const hasData = await this.hasData(tableName, targetDate);
265
346
  if (hasData) {
266
- available.push(tableName);
347
+ available.push(key);
267
348
  } else if (spec.mandatory) {
268
- missing.push(tableName);
349
+ missing.push(key);
269
350
  } else {
270
- available.push(tableName);
351
+ available.push(key);
271
352
  }
272
353
  }));
273
-
354
+
274
355
  return { canRun: missing.length === 0, available, missing };
275
356
  }
276
-
357
+
277
358
  getStats() { return { ...this.stats }; }
278
- resetStats() { this.stats = { queries: 0, rowsFetched: 0, errors: 0, bytesProcessed: 0 }; }
279
-
359
+
360
+ resetStats() {
361
+ this.stats = {
362
+ queries: 0, rowsFetched: 0, errors: 0, bytesProcessed: 0,
363
+ cacheHits: 0, cacheMisses: 0, cacheEvictions: 0
364
+ };
365
+ this.cache.clear();
366
+ }
367
+
368
+ clearCache() {
369
+ this.cache.clear();
370
+ this._log('DEBUG', 'Query cache cleared');
371
+ }
372
+
373
+ // =========================================================================
374
+ // PRIVATE METHODS
375
+ // =========================================================================
376
+
377
+ /**
378
+ * Executes a query with Cost Guard protection.
379
+ */
280
380
  async _execute(query) {
381
+ // Caching Logic
382
+ if (this.cacheConfig.enabled) {
383
+ const cacheKey = this._generateCacheKey(query);
384
+ const cached = this.cache.get(cacheKey);
385
+ if (cached) {
386
+ if (Date.now() - cached.timestamp < this.cacheConfig.ttlMs) {
387
+ this.stats.cacheHits++;
388
+ this.cache.delete(cacheKey);
389
+ this.cache.set(cacheKey, cached);
390
+ return cached.rows;
391
+ } else {
392
+ this.cache.delete(cacheKey);
393
+ }
394
+ }
395
+ this.stats.cacheMisses++;
396
+ }
397
+
398
+ // --- COST GUARD: DRY RUN ---
399
+ await this._enforceCostSafety(query);
400
+ // ---------------------------
401
+
402
+ // =========================================================
403
+ // 🛑 DEBUGGING: PRINT EXACT SQL
404
+ // =========================================================
405
+ console.log('\n--- 🔍 EXECUTING SQL -----------------------------------');
406
+ console.log(query.sql);
407
+ console.log('PARAMS:', JSON.stringify(query.params));
408
+ console.log('--------------------------------------------------------\n');
409
+ // =========================================================
410
+
281
411
  this.stats.queries++;
412
+
282
413
  try {
283
414
  const [job] = await this.client.createQueryJob({
284
415
  query: query.sql, params: query.params, location: this.location
285
416
  });
286
417
  const [rows] = await job.getQueryResults();
287
418
  const [metadata] = await job.getMetadata();
419
+
288
420
  this.stats.rowsFetched += rows.length;
289
421
  this.stats.bytesProcessed += parseInt(metadata.statistics?.totalBytesProcessed || 0, 10);
290
- return rows;
422
+
423
+ const normalizedRows = rows.map(r => this._normalizeRow(r));
424
+
425
+ if (this.cacheConfig.enabled) {
426
+ this._addToCache(query, normalizedRows);
427
+ }
428
+
429
+ return normalizedRows;
291
430
  } catch (e) {
292
431
  this.stats.errors++;
293
- this._log('ERROR', `Query failed: ${e.message}`);
432
+ this._log('ERROR', `Query failed: ${e.message}\nSQL: ${query.sql}`);
294
433
  throw e;
295
434
  }
296
435
  }
297
436
 
298
437
  async *_executeStream(query) {
438
+ await this._enforceCostSafety(query);
439
+
440
+ // [CRITICAL FIX] Added SQL Logging to Stream Execution
441
+ console.log('\n--- 🔍 EXECUTING DRIVER STREAM SQL ---------------------');
442
+ console.log(query.sql);
443
+ console.log('PARAMS:', JSON.stringify(query.params));
444
+ console.log('--------------------------------------------------------\n');
445
+
299
446
  this.stats.queries++;
300
447
  try {
301
448
  const [job] = await this.client.createQueryJob({
@@ -304,37 +451,99 @@ class DataFetcher {
304
451
  const stream = job.getQueryResultsStream();
305
452
  for await (const row of stream) {
306
453
  this.stats.rowsFetched++;
307
- yield row;
454
+ yield this._normalizeRow(row);
308
455
  }
309
456
  } catch (e) {
310
457
  this.stats.errors++;
311
- this._log('ERROR', `Stream Query failed: ${e.message}`);
458
+ this._log('ERROR', `Stream Query failed: ${e.message}\nSQL: ${query.sql}`);
312
459
  throw e;
313
460
  }
314
461
  }
315
-
462
+
316
463
  /**
317
- * Transforms raw rows into a structured object.
318
- * FIX: PRIORITIZE ENTITY FIELD.
319
- * If entityField exists, we MUST return { [id]: [rows] } so IDs can be extracted.
464
+ * NEW: Runs a Dry Run to estimate cost and blocks expensive queries.
320
465
  */
466
+ async _enforceCostSafety(query) {
467
+ try {
468
+ const [dryRunJob] = await this.client.createQueryJob({
469
+ query: query.sql,
470
+ params: query.params,
471
+ location: this.location,
472
+ dryRun: true // Costs $0, returns stats only
473
+ });
474
+
475
+ const bytes = parseInt(dryRunJob.metadata.statistics.totalBytesProcessed, 10);
476
+ const gb = bytes / (1024 * 1024 * 1024);
477
+
478
+ if (gb > this.safetyLimitGb) {
479
+ const errorMsg = `[DataFetcher] 🛑 COST VIOLATION: Query blocked! Estimated size: ${gb.toFixed(2)} GB (Limit: ${this.safetyLimitGb} GB). ` +
480
+ `Check your partition filters or clustering. Table: ${query.table}`;
481
+
482
+ this._log('ERROR', errorMsg);
483
+ // We log the offending SQL for debugging
484
+ this._log('ERROR', `BLOCKED SQL START:\n${query.sql.substring(0, 500)}...\nEND SQL`);
485
+
486
+ throw new Error(errorMsg);
487
+ }
488
+ } catch (e) {
489
+ // If the dry run fails (e.g. invalid SQL), we let the real execution fail it naturally,
490
+ // unless it was our cost violation error.
491
+ if (e.message.includes('COST VIOLATION')) throw e;
492
+ }
493
+ }
494
+
495
+ _normalizeRow(row) {
496
+ const normalized = { ...row };
497
+ for (const [key, value] of Object.entries(normalized)) {
498
+ if (typeof value === 'string') {
499
+ const trimmed = value.trim();
500
+ if (trimmed.startsWith('{') || trimmed.startsWith('[') || trimmed.startsWith('"')) {
501
+ normalized[key] = this._safeRecursiveParse(value);
502
+ }
503
+ }
504
+ }
505
+ return normalized;
506
+ }
507
+
508
+ _safeRecursiveParse(input) {
509
+ if (!input) return null;
510
+ if (typeof input === 'object') return input;
511
+ try {
512
+ const parsed = JSON.parse(input);
513
+ if (typeof parsed === 'string') return this._safeRecursiveParse(parsed);
514
+ return parsed;
515
+ } catch (e) {
516
+ return input;
517
+ }
518
+ }
519
+
520
+ _generateCacheKey(query) {
521
+ const str = query.sql + JSON.stringify(query.params || {});
522
+ return crypto.createHash('md5').update(str).digest('hex');
523
+ }
524
+
525
+ _addToCache(query, rows) {
526
+ const key = this._generateCacheKey(query);
527
+ if (this.cache.size >= this.cacheConfig.maxSize) {
528
+ const oldestKey = this.cache.keys().next().value;
529
+ this.cache.delete(oldestKey);
530
+ this.stats.cacheEvictions++;
531
+ }
532
+ this.cache.set(key, { rows: rows, timestamp: Date.now() });
533
+ }
534
+
321
535
  _transform(rows, config) {
322
536
  const { lookback, dateField, entityField, dataField } = config;
323
537
  const rowArray = Array.isArray(rows) ? rows : [rows];
324
538
 
325
- // FIX: Primary Grouping = Entity
326
539
  if (entityField) {
327
540
  const byEntity = {};
328
541
  for (const row of rowArray) {
329
542
  const entityKey = String(row[entityField]);
330
543
  if (!byEntity[entityKey]) {
331
- // If simple fetch (no history), value is single object
332
- // If history fetch (lookback), value is Array of rows
333
544
  byEntity[entityKey] = lookback > 0 ? [] : null;
334
545
  }
335
-
336
546
  const value = dataField ? row[dataField] : row;
337
-
338
547
  if (Array.isArray(byEntity[entityKey])) {
339
548
  byEntity[entityKey].push(value);
340
549
  } else {
@@ -344,7 +553,6 @@ class DataFetcher {
344
553
  return byEntity;
345
554
  }
346
555
 
347
- // Fallback: Date Grouping (Only if no Entity ID)
348
556
  if (lookback > 0 && dateField) {
349
557
  const byDate = {};
350
558
  for (const row of rowArray) {
@@ -355,12 +563,19 @@ class DataFetcher {
355
563
  }
356
564
  return byDate;
357
565
  }
358
-
359
566
  return rowArray;
360
567
  }
361
568
 
362
569
  _getDriverTable(requires) {
570
+ // PASS 1: Prioritize tables with a DATE field
571
+ for (const [name, spec] of Object.entries(requires)) {
572
+ if (spec.type === 'metric') continue;
573
+ const config = this.tables[name];
574
+ if (config && config.entityField && config.dateField) return name;
575
+ }
576
+ // PASS 2: Fallback to any entity table
363
577
  for (const [name, spec] of Object.entries(requires)) {
578
+ if (spec.type === 'metric') continue;
364
579
  const config = this.tables[name];
365
580
  if (config && config.entityField) return name;
366
581
  }
@@ -371,25 +586,14 @@ class DataFetcher {
371
586
  const config = this.tables[tableName] || {};
372
587
  const field = config.entityField;
373
588
 
374
- // Case 1: Transformed Object { "id1": data, "id2": data }
375
589
  if (field && batchData && !Array.isArray(batchData)) {
376
590
  return Object.keys(batchData);
377
591
  }
378
-
379
- // Case 2: Array of Rows (Only if _transform didn't group by entity)
380
592
  if (Array.isArray(batchData) && field) {
381
593
  const ids = [];
382
- let undefinedCount = 0;
383
594
  for (const r of batchData) {
384
595
  const val = r[field];
385
- if (val === undefined) {
386
- undefinedCount++;
387
- } else {
388
- ids.push(String(val));
389
- }
390
- }
391
- if (undefinedCount > 0) {
392
- this._log('ERROR', `CRITICAL CONFIG ERROR: Found ${undefinedCount} rows in '${tableName}' where entityField '${field}' was UNDEFINED.`);
596
+ if (val !== undefined) ids.push(String(val));
393
597
  }
394
598
  return ids;
395
599
  }
@@ -399,28 +603,28 @@ class DataFetcher {
399
603
  _extractAllEntityIds(fullData) {
400
604
  const ids = new Set();
401
605
  Object.values(fullData || {}).forEach(tableData => {
402
- if (tableData && typeof tableData === 'object' && !Array.isArray(tableData)) {
403
- Object.keys(tableData).forEach(k => ids.add(k));
404
- }
606
+ if (tableData && typeof tableData === 'object' && !Array.isArray(tableData)) {
607
+ Object.keys(tableData).forEach(k => ids.add(k));
608
+ }
405
609
  });
406
610
  return Array.from(ids);
407
611
  }
408
-
612
+
409
613
  _formatDate(dateValue) {
410
614
  if (!dateValue) return null;
411
615
  if (typeof dateValue === 'string') return dateValue;
412
- if (dateValue.value) return dateValue.value;
616
+ if (dateValue.value) return dateValue.value;
413
617
  if (dateValue instanceof Date) return dateValue.toISOString().slice(0, 10);
414
618
  return String(dateValue);
415
619
  }
416
-
620
+
417
621
  _isEmpty(data) {
418
622
  if (data == null) return true;
419
623
  if (Array.isArray(data)) return data.length === 0;
420
624
  if (typeof data === 'object') return Object.keys(data).length === 0;
421
625
  return false;
422
626
  }
423
-
627
+
424
628
  _log(level, message) {
425
629
  if (this.logger && typeof this.logger.log === 'function') {
426
630
  this.logger.log(level, `[DataFetcher] ${message}`);