bulltrackers-module 1.0.733 → 1.0.734
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/README.md +152 -0
- package/functions/computation-system-v2/computations/PopularInvestorProfileMetrics.js +720 -0
- package/functions/computation-system-v2/computations/PopularInvestorRiskAssessment.js +176 -0
- package/functions/computation-system-v2/computations/PopularInvestorRiskMetrics.js +294 -0
- package/functions/computation-system-v2/computations/TestComputation.js +46 -0
- package/functions/computation-system-v2/computations/UserPortfolioSummary.js +172 -0
- package/functions/computation-system-v2/config/bulltrackers.config.js +317 -0
- package/functions/computation-system-v2/framework/core/Computation.js +73 -0
- package/functions/computation-system-v2/framework/core/Manifest.js +223 -0
- package/functions/computation-system-v2/framework/core/RuleInjector.js +53 -0
- package/functions/computation-system-v2/framework/core/Rules.js +231 -0
- package/functions/computation-system-v2/framework/core/RunAnalyzer.js +163 -0
- package/functions/computation-system-v2/framework/cost/CostTracker.js +154 -0
- package/functions/computation-system-v2/framework/data/DataFetcher.js +399 -0
- package/functions/computation-system-v2/framework/data/QueryBuilder.js +232 -0
- package/functions/computation-system-v2/framework/data/SchemaRegistry.js +287 -0
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +498 -0
- package/functions/computation-system-v2/framework/execution/TaskRunner.js +35 -0
- package/functions/computation-system-v2/framework/execution/middleware/CostTrackerMiddleware.js +32 -0
- package/functions/computation-system-v2/framework/execution/middleware/LineageMiddleware.js +32 -0
- package/functions/computation-system-v2/framework/execution/middleware/Middleware.js +14 -0
- package/functions/computation-system-v2/framework/execution/middleware/ProfilerMiddleware.js +47 -0
- package/functions/computation-system-v2/framework/index.js +45 -0
- package/functions/computation-system-v2/framework/lineage/LineageTracker.js +147 -0
- package/functions/computation-system-v2/framework/monitoring/Profiler.js +80 -0
- package/functions/computation-system-v2/framework/resilience/Checkpointer.js +66 -0
- package/functions/computation-system-v2/framework/scheduling/ScheduleValidator.js +327 -0
- package/functions/computation-system-v2/framework/storage/StateRepository.js +286 -0
- package/functions/computation-system-v2/framework/storage/StorageManager.js +469 -0
- package/functions/computation-system-v2/framework/storage/index.js +9 -0
- package/functions/computation-system-v2/framework/testing/ComputationTester.js +86 -0
- package/functions/computation-system-v2/framework/utils/Graph.js +205 -0
- package/functions/computation-system-v2/handlers/dispatcher.js +109 -0
- package/functions/computation-system-v2/handlers/index.js +23 -0
- package/functions/computation-system-v2/handlers/onDemand.js +289 -0
- package/functions/computation-system-v2/handlers/scheduler.js +327 -0
- package/functions/computation-system-v2/index.js +163 -0
- package/functions/computation-system-v2/rules/index.js +49 -0
- package/functions/computation-system-v2/rules/instruments.js +465 -0
- package/functions/computation-system-v2/rules/metrics.js +304 -0
- package/functions/computation-system-v2/rules/portfolio.js +534 -0
- package/functions/computation-system-v2/rules/rankings.js +655 -0
- package/functions/computation-system-v2/rules/social.js +562 -0
- package/functions/computation-system-v2/rules/trades.js +545 -0
- package/functions/computation-system-v2/scripts/migrate-sectors.js +73 -0
- package/functions/computation-system-v2/test/test-dispatcher.js +317 -0
- package/functions/computation-system-v2/test/test-framework.js +500 -0
- package/functions/computation-system-v2/test/test-real-execution.js +166 -0
- package/functions/computation-system-v2/test/test-real-integration.js +194 -0
- package/functions/computation-system-v2/test/test-refactor-e2e.js +131 -0
- package/functions/computation-system-v2/test/test-results.json +31 -0
- package/functions/computation-system-v2/test/test-risk-metrics-computation.js +329 -0
- package/functions/computation-system-v2/test/test-scheduler.js +204 -0
- package/functions/computation-system-v2/test/test-storage.js +449 -0
- package/functions/orchestrator/index.js +18 -26
- package/package.json +3 -2
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Data Fetcher - Executes queries and transforms results
|
|
3
|
+
* * The single point of data access for computations.
|
|
4
|
+
* Uses QueryBuilder for validation, executes against BigQuery, transforms results.
|
|
5
|
+
* * V2.1 UPDATE: Added Streaming/Batching support to fix "Memory God" issues.
|
|
6
|
+
* Now supports O(1) memory usage for large datasets via fetchBatched.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const { BigQuery } = require('@google-cloud/bigquery');
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* @typedef {Object} FetchOptions
|
|
13
|
+
* @property {string} table - Table name
|
|
14
|
+
* @property {string} targetDate - Target date (YYYY-MM-DD)
|
|
15
|
+
* @property {number} [lookback=0] - Days to look back
|
|
16
|
+
* @property {boolean} [mandatory=false] - If true, throws on no data
|
|
17
|
+
* @property {Object} [filter] - Additional WHERE conditions
|
|
18
|
+
* @property {string[]} [fields] - Specific fields to select
|
|
19
|
+
* @property {string[]} [entities] - Specific entity IDs to fetch
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
class DataFetcher {
|
|
23
|
+
/**
|
|
24
|
+
* @param {Object} config - Configuration object
|
|
25
|
+
* @param {string} config.projectId - GCP project ID
|
|
26
|
+
* @param {string} config.dataset - BigQuery dataset name
|
|
27
|
+
* @param {string} [config.location='US'] - BigQuery location
|
|
28
|
+
* @param {Object} config.tables - Table configuration map
|
|
29
|
+
* @param {QueryBuilder} queryBuilder - Query builder instance
|
|
30
|
+
* @param {Object} [logger] - Logger instance
|
|
31
|
+
*/
|
|
32
|
+
constructor(config, queryBuilder, logger = null) {
|
|
33
|
+
this.projectId = config.projectId;
|
|
34
|
+
this.dataset = config.dataset;
|
|
35
|
+
this.location = config.location || 'US';
|
|
36
|
+
this.tables = config.tables || {};
|
|
37
|
+
this.queryBuilder = queryBuilder;
|
|
38
|
+
this.logger = logger;
|
|
39
|
+
|
|
40
|
+
this.client = new BigQuery({ projectId: this.projectId });
|
|
41
|
+
|
|
42
|
+
// Query stats
|
|
43
|
+
this.stats = {
|
|
44
|
+
queries: 0,
|
|
45
|
+
rowsFetched: 0,
|
|
46
|
+
errors: 0,
|
|
47
|
+
bytesProcessed: 0
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Fetch data for a computation's requirements.
|
|
53
|
+
* @param {Object} requires - Computation's requires declaration
|
|
54
|
+
* @param {string} targetDate - Target date
|
|
55
|
+
* @param {string[]} [entities] - Optional entity IDs to filter
|
|
56
|
+
* @returns {Promise<Object>} Data keyed by table name
|
|
57
|
+
*/
|
|
58
|
+
async fetchForComputation(requires, targetDate, entities = null) {
|
|
59
|
+
const results = {};
|
|
60
|
+
const errors = [];
|
|
61
|
+
|
|
62
|
+
// Fetch all tables in parallel
|
|
63
|
+
await Promise.all(Object.entries(requires).map(async ([tableName, spec]) => {
|
|
64
|
+
try {
|
|
65
|
+
const data = await this.fetch({
|
|
66
|
+
table: tableName,
|
|
67
|
+
targetDate,
|
|
68
|
+
lookback: spec.lookback || 0,
|
|
69
|
+
mandatory: spec.mandatory || false,
|
|
70
|
+
filter: spec.filter || {},
|
|
71
|
+
fields: spec.fields || null,
|
|
72
|
+
entities: entities
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
results[tableName] = data;
|
|
76
|
+
|
|
77
|
+
if (spec.mandatory && this._isEmpty(data)) {
|
|
78
|
+
errors.push({ table: tableName, reason: 'MANDATORY_MISSING' });
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
} catch (e) {
|
|
82
|
+
if (spec.mandatory) {
|
|
83
|
+
errors.push({ table: tableName, reason: e.message });
|
|
84
|
+
} else {
|
|
85
|
+
this._log('WARN', `Optional table ${tableName} failed: ${e.message}`);
|
|
86
|
+
}
|
|
87
|
+
results[tableName] = null;
|
|
88
|
+
}
|
|
89
|
+
}));
|
|
90
|
+
|
|
91
|
+
// Throw if mandatory data is missing
|
|
92
|
+
if (errors.length > 0) {
|
|
93
|
+
const msg = errors.map(e => `${e.table}: ${e.reason}`).join(', ');
|
|
94
|
+
throw new Error(`[DataFetcher] Missing mandatory data: ${msg}`);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return results;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* STREAMING: Fetch data for computation in batches.
|
|
102
|
+
* Uses a "Driver Table" (one with entityField) to stream entities,
|
|
103
|
+
* then fetches dependencies for just that batch.
|
|
104
|
+
* * @param {Object} requires - Computation requires
|
|
105
|
+
* @param {string} targetDate - Target Date
|
|
106
|
+
* @param {number} batchSize - Rows per batch
|
|
107
|
+
* @returns {AsyncGenerator<{data: Object, entityIds: string[]}>}
|
|
108
|
+
*/
|
|
109
|
+
async *fetchComputationBatched(requires, targetDate, batchSize = 1000) {
|
|
110
|
+
// 1. Identify Driver Table (The one we iterate over)
|
|
111
|
+
const driverTableName = this._getDriverTable(requires);
|
|
112
|
+
|
|
113
|
+
if (!driverTableName) {
|
|
114
|
+
this._log('WARN', 'No entity-keyed table found for batching. Falling back to full fetch.');
|
|
115
|
+
const fullData = await this.fetchForComputation(requires, targetDate);
|
|
116
|
+
yield { data: fullData, entityIds: null };
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
this._log('INFO', `Starting batched fetch driven by table: ${driverTableName}`);
|
|
121
|
+
const driverSpec = requires[driverTableName];
|
|
122
|
+
|
|
123
|
+
// 2. Stream Driver Table
|
|
124
|
+
const driverStream = this.fetchBatched({
|
|
125
|
+
table: driverTableName,
|
|
126
|
+
targetDate,
|
|
127
|
+
lookback: driverSpec.lookback || 0,
|
|
128
|
+
filter: driverSpec.filter || {},
|
|
129
|
+
fields: driverSpec.fields,
|
|
130
|
+
mandatory: driverSpec.mandatory
|
|
131
|
+
}, batchSize);
|
|
132
|
+
|
|
133
|
+
// 3. Process Streams
|
|
134
|
+
for await (const batch of driverStream) {
|
|
135
|
+
// Extract Entity IDs from this batch
|
|
136
|
+
// The batch is already transformed (e.g. Object by EntityID or Array)
|
|
137
|
+
const entityIds = this._extractEntityIdsFromBatch(batch, driverTableName);
|
|
138
|
+
|
|
139
|
+
if (entityIds.length === 0) continue;
|
|
140
|
+
|
|
141
|
+
const batchResults = { [driverTableName]: batch };
|
|
142
|
+
const errors = [];
|
|
143
|
+
|
|
144
|
+
// 4. Fetch Dependencies for this SPECIFIC batch of entities
|
|
145
|
+
await Promise.all(Object.entries(requires).map(async ([tableName, spec]) => {
|
|
146
|
+
if (tableName === driverTableName) return; // Already have this
|
|
147
|
+
|
|
148
|
+
try {
|
|
149
|
+
const data = await this.fetch({
|
|
150
|
+
table: tableName,
|
|
151
|
+
targetDate,
|
|
152
|
+
lookback: spec.lookback || 0,
|
|
153
|
+
mandatory: spec.mandatory || false,
|
|
154
|
+
filter: spec.filter || {},
|
|
155
|
+
fields: spec.fields || null,
|
|
156
|
+
entities: entityIds // <--- CRITICAL: Filter by current batch
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
batchResults[tableName] = data;
|
|
160
|
+
|
|
161
|
+
if (spec.mandatory && this._isEmpty(data)) {
|
|
162
|
+
errors.push({ table: tableName, reason: 'MANDATORY_MISSING_IN_BATCH' });
|
|
163
|
+
}
|
|
164
|
+
} catch (e) {
|
|
165
|
+
if (spec.mandatory) errors.push({ table: tableName, reason: e.message });
|
|
166
|
+
batchResults[tableName] = null;
|
|
167
|
+
}
|
|
168
|
+
}));
|
|
169
|
+
|
|
170
|
+
if (errors.length > 0) {
|
|
171
|
+
this._log('WARN', `Batch missing mandatory data: ${errors.map(e => e.table).join(', ')}. Skipping batch.`);
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
yield { data: batchResults, entityIds };
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Fetch data from a single table (Full Load).
|
|
181
|
+
* @param {FetchOptions} options - Fetch options
|
|
182
|
+
* @returns {Promise<Object|Array|null>} Transformed data
|
|
183
|
+
*/
|
|
184
|
+
async fetch(options) {
|
|
185
|
+
const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
|
|
186
|
+
|
|
187
|
+
const tableConfig = this.tables[table] || {};
|
|
188
|
+
const { dateField, entityField, dataField } = tableConfig;
|
|
189
|
+
|
|
190
|
+
const query = await this.queryBuilder.build({
|
|
191
|
+
table, select: fields, where: filter, dateField, targetDate, lookback, entityField, entities,
|
|
192
|
+
orderBy: dateField || entityField
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
const rows = await this._execute(query);
|
|
196
|
+
|
|
197
|
+
if (!rows || rows.length === 0) return null;
|
|
198
|
+
|
|
199
|
+
return this._transform(rows, { lookback, dateField, entityField, dataField });
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* STREAMING: Fetch data from a single table in batches.
|
|
204
|
+
* @param {FetchOptions} options
|
|
205
|
+
* @param {number} batchSize
|
|
206
|
+
* @returns {AsyncGenerator<Object|Array>}
|
|
207
|
+
*/
|
|
208
|
+
async *fetchBatched(options, batchSize = 1000) {
|
|
209
|
+
const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
|
|
210
|
+
const tableConfig = this.tables[table] || {};
|
|
211
|
+
const { dateField, entityField, dataField } = tableConfig;
|
|
212
|
+
|
|
213
|
+
const query = await this.queryBuilder.build({
|
|
214
|
+
table, select: fields, where: filter, dateField, targetDate, lookback, entityField, entities,
|
|
215
|
+
orderBy: dateField || entityField
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
// Use Stream Executor
|
|
219
|
+
const rowStream = this._executeStream(query);
|
|
220
|
+
|
|
221
|
+
let batch = [];
|
|
222
|
+
for await (const row of rowStream) {
|
|
223
|
+
batch.push(row);
|
|
224
|
+
if (batch.length >= batchSize) {
|
|
225
|
+
yield this._transform(batch, { lookback, dateField, entityField, dataField });
|
|
226
|
+
batch = [];
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if (batch.length > 0) {
|
|
231
|
+
yield this._transform(batch, { lookback, dateField, entityField, dataField });
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Check if data exists for a table on a given date.
|
|
237
|
+
*/
|
|
238
|
+
async hasData(table, targetDate) {
|
|
239
|
+
const tableConfig = this.tables[table] || {};
|
|
240
|
+
const { dateField } = tableConfig;
|
|
241
|
+
const query = await this.queryBuilder.buildExistsQuery(table, dateField, targetDate);
|
|
242
|
+
try {
|
|
243
|
+
const rows = await this._execute(query);
|
|
244
|
+
return rows && rows.length > 0;
|
|
245
|
+
} catch (e) {
|
|
246
|
+
this._log('WARN', `hasData check failed for ${table}: ${e.message}`);
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Check availability for multiple tables.
|
|
253
|
+
*/
|
|
254
|
+
async checkAvailability(requires, targetDate) {
|
|
255
|
+
const available = [];
|
|
256
|
+
const missing = [];
|
|
257
|
+
|
|
258
|
+
await Promise.all(Object.entries(requires).map(async ([tableName, spec]) => {
|
|
259
|
+
const hasData = await this.hasData(tableName, targetDate);
|
|
260
|
+
if (hasData) {
|
|
261
|
+
available.push(tableName);
|
|
262
|
+
} else if (spec.mandatory) {
|
|
263
|
+
missing.push(tableName);
|
|
264
|
+
} else {
|
|
265
|
+
available.push(tableName);
|
|
266
|
+
}
|
|
267
|
+
}));
|
|
268
|
+
|
|
269
|
+
return { canRun: missing.length === 0, available, missing };
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
getStats() { return { ...this.stats }; }
|
|
273
|
+
resetStats() { this.stats = { queries: 0, rowsFetched: 0, errors: 0, bytesProcessed: 0 }; }
|
|
274
|
+
|
|
275
|
+
// =========================================================================
|
|
276
|
+
// PRIVATE METHODS
|
|
277
|
+
// =========================================================================
|
|
278
|
+
|
|
279
|
+
async _execute(query) {
|
|
280
|
+
this.stats.queries++;
|
|
281
|
+
try {
|
|
282
|
+
this._log('DEBUG', `Executing (Full): ${query.sql.substring(0, 100)}...`);
|
|
283
|
+
const [job] = await this.client.createQueryJob({
|
|
284
|
+
query: query.sql, params: query.params, location: this.location
|
|
285
|
+
});
|
|
286
|
+
const [rows] = await job.getQueryResults();
|
|
287
|
+
|
|
288
|
+
const [metadata] = await job.getMetadata();
|
|
289
|
+
const bytesProcessed = parseInt(metadata.statistics?.totalBytesProcessed || 0, 10);
|
|
290
|
+
this.stats.rowsFetched += rows.length;
|
|
291
|
+
this.stats.bytesProcessed += bytesProcessed;
|
|
292
|
+
|
|
293
|
+
return rows;
|
|
294
|
+
} catch (e) {
|
|
295
|
+
this.stats.errors++;
|
|
296
|
+
this._log('ERROR', `Query failed: ${e.message}`);
|
|
297
|
+
throw e;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// New Stream Executor using BigQuery Streams
|
|
302
|
+
async *_executeStream(query) {
|
|
303
|
+
this.stats.queries++;
|
|
304
|
+
try {
|
|
305
|
+
this._log('DEBUG', `Executing (Stream): ${query.sql.substring(0, 100)}...`);
|
|
306
|
+
const [job] = await this.client.createQueryJob({
|
|
307
|
+
query: query.sql, params: query.params, location: this.location
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
const stream = job.getQueryResultsStream();
|
|
311
|
+
|
|
312
|
+
for await (const row of stream) {
|
|
313
|
+
this.stats.rowsFetched++;
|
|
314
|
+
yield row;
|
|
315
|
+
}
|
|
316
|
+
} catch (e) {
|
|
317
|
+
this.stats.errors++;
|
|
318
|
+
this._log('ERROR', `Stream Query failed: ${e.message}`);
|
|
319
|
+
throw e;
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
_transform(rows, config) {
|
|
324
|
+
const { lookback, dateField, entityField, dataField } = config;
|
|
325
|
+
|
|
326
|
+
// Ensure rows is array
|
|
327
|
+
const rowArray = Array.isArray(rows) ? rows : [rows];
|
|
328
|
+
|
|
329
|
+
// For lookback > 0, return date-keyed object
|
|
330
|
+
if (lookback > 0 && dateField) {
|
|
331
|
+
const byDate = {};
|
|
332
|
+
for (const row of rowArray) {
|
|
333
|
+
const dateKey = this._formatDate(row[dateField]);
|
|
334
|
+
if (!byDate[dateKey]) byDate[dateKey] = [];
|
|
335
|
+
const value = dataField ? row[dataField] : row;
|
|
336
|
+
byDate[dateKey].push(value);
|
|
337
|
+
}
|
|
338
|
+
return byDate;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// For entity-keyed tables, return entity-keyed object
|
|
342
|
+
if (entityField) {
|
|
343
|
+
const byEntity = {};
|
|
344
|
+
for (const row of rowArray) {
|
|
345
|
+
const entityKey = String(row[entityField]);
|
|
346
|
+
byEntity[entityKey] = dataField ? row[dataField] : row;
|
|
347
|
+
}
|
|
348
|
+
return byEntity;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Default: return array
|
|
352
|
+
return rowArray;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
_getDriverTable(requires) {
|
|
356
|
+
for (const [name, spec] of Object.entries(requires)) {
|
|
357
|
+
const config = this.tables[name];
|
|
358
|
+
if (config && config.entityField) return name;
|
|
359
|
+
}
|
|
360
|
+
return null;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
_extractEntityIdsFromBatch(batchData, tableName) {
|
|
364
|
+
const config = this.tables[tableName] || {};
|
|
365
|
+
if (config.entityField && batchData && !Array.isArray(batchData)) {
|
|
366
|
+
return Object.keys(batchData);
|
|
367
|
+
}
|
|
368
|
+
// If it's an array, we need to map
|
|
369
|
+
if (Array.isArray(batchData) && config.entityField) {
|
|
370
|
+
return batchData.map(r => String(r[config.entityField]));
|
|
371
|
+
}
|
|
372
|
+
return [];
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
_formatDate(dateValue) {
|
|
376
|
+
if (!dateValue) return null;
|
|
377
|
+
if (typeof dateValue === 'string') return dateValue;
|
|
378
|
+
if (dateValue.value) return dateValue.value;
|
|
379
|
+
if (dateValue instanceof Date) return dateValue.toISOString().slice(0, 10);
|
|
380
|
+
return String(dateValue);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
_isEmpty(data) {
|
|
384
|
+
if (data == null) return true;
|
|
385
|
+
if (Array.isArray(data)) return data.length === 0;
|
|
386
|
+
if (typeof data === 'object') return Object.keys(data).length === 0;
|
|
387
|
+
return false;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
_log(level, message) {
|
|
391
|
+
if (this.logger && typeof this.logger.log === 'function') {
|
|
392
|
+
this.logger.log(level, `[DataFetcher] ${message}`);
|
|
393
|
+
} else if (level === 'ERROR') {
|
|
394
|
+
console.error(`[DataFetcher] ${message}`);
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
module.exports = { DataFetcher };
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Query Builder - Builds validated SQL queries
|
|
3
|
+
*
|
|
4
|
+
* All queries are validated against the SchemaRegistry BEFORE execution.
|
|
5
|
+
* This catches errors early and prevents runtime failures.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* @typedef {Object} QuerySpec
|
|
10
|
+
* @property {string} table - Table name
|
|
11
|
+
* @property {string[]} [select] - Columns to select (null = *)
|
|
12
|
+
* @property {Object} [where] - WHERE conditions as { column: value }
|
|
13
|
+
* @property {string} [dateField] - Date field name for date filtering
|
|
14
|
+
* @property {string} [targetDate] - Target date (YYYY-MM-DD)
|
|
15
|
+
* @property {number} [lookback=0] - Days to look back (0 = target date only)
|
|
16
|
+
* @property {string} [entityField] - Entity field name for entity filtering
|
|
17
|
+
* @property {string[]} [entities] - Entity IDs to filter
|
|
18
|
+
* @property {string} [orderBy] - Column to order by
|
|
19
|
+
* @property {string} [orderDir='DESC'] - Order direction
|
|
20
|
+
* @property {number} [limit] - Limit number of rows
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* @typedef {Object} BuiltQuery
|
|
25
|
+
* @property {string} sql - The SQL query string
|
|
26
|
+
* @property {Object} params - Query parameters
|
|
27
|
+
* @property {string} table - Table that was queried
|
|
28
|
+
* @property {Object} spec - Original query spec
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
class QueryBuilder {
|
|
32
|
+
/**
|
|
33
|
+
* @param {Object} config - Configuration object
|
|
34
|
+
* @param {string} config.projectId - GCP project ID
|
|
35
|
+
* @param {string} config.dataset - BigQuery dataset name
|
|
36
|
+
* @param {SchemaRegistry} schemaRegistry - Schema registry instance
|
|
37
|
+
* @param {Object} [logger] - Logger instance
|
|
38
|
+
*/
|
|
39
|
+
constructor(config, schemaRegistry, logger = null) {
|
|
40
|
+
this.projectId = config.projectId;
|
|
41
|
+
this.dataset = config.dataset;
|
|
42
|
+
this.schemaRegistry = schemaRegistry;
|
|
43
|
+
this.logger = logger;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Build a validated query.
|
|
48
|
+
* @param {QuerySpec} spec - Query specification
|
|
49
|
+
* @returns {Promise<BuiltQuery>}
|
|
50
|
+
* @throws {Error} If validation fails
|
|
51
|
+
*/
|
|
52
|
+
async build(spec) {
|
|
53
|
+
const { table } = spec;
|
|
54
|
+
|
|
55
|
+
if (!table) {
|
|
56
|
+
throw new Error('[QueryBuilder] Table name is required');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Validate table exists and get schema
|
|
60
|
+
const schema = await this.schemaRegistry.getSchema(table);
|
|
61
|
+
|
|
62
|
+
// Validate SELECT columns
|
|
63
|
+
if (spec.select && spec.select.length > 0) {
|
|
64
|
+
await this.schemaRegistry.validateColumns(table, spec.select);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Validate WHERE columns
|
|
68
|
+
if (spec.where) {
|
|
69
|
+
await this.schemaRegistry.validateWhere(table, spec.where);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Validate date field exists
|
|
73
|
+
if (spec.dateField) {
|
|
74
|
+
const hasDateField = await this.schemaRegistry.hasColumn(table, spec.dateField);
|
|
75
|
+
if (!hasDateField) {
|
|
76
|
+
throw new Error(`[QueryBuilder] Date field '${spec.dateField}' not found in table '${table}'`);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Validate entity field exists
|
|
81
|
+
if (spec.entityField) {
|
|
82
|
+
const hasEntityField = await this.schemaRegistry.hasColumn(table, spec.entityField);
|
|
83
|
+
if (!hasEntityField) {
|
|
84
|
+
throw new Error(`[QueryBuilder] Entity field '${spec.entityField}' not found in table '${table}'`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Validate orderBy column
|
|
89
|
+
if (spec.orderBy) {
|
|
90
|
+
const hasOrderBy = await this.schemaRegistry.hasColumn(table, spec.orderBy);
|
|
91
|
+
if (!hasOrderBy) {
|
|
92
|
+
throw new Error(`[QueryBuilder] Order by field '${spec.orderBy}' not found in table '${table}'`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Build the query
|
|
97
|
+
return this._buildSQL(spec, schema);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Build a simple existence check query.
|
|
102
|
+
* @param {string} table - Table name
|
|
103
|
+
* @param {string} dateField - Date field name
|
|
104
|
+
* @param {string} targetDate - Target date
|
|
105
|
+
* @returns {Promise<BuiltQuery>}
|
|
106
|
+
*/
|
|
107
|
+
async buildExistsQuery(table, dateField, targetDate) {
|
|
108
|
+
// Validate table and date field
|
|
109
|
+
await this.schemaRegistry.getSchema(table);
|
|
110
|
+
if (dateField) {
|
|
111
|
+
await this.schemaRegistry.validateColumns(table, [dateField]);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const tablePath = `\`${this.projectId}.${this.dataset}.${table}\``;
|
|
115
|
+
|
|
116
|
+
let sql, params;
|
|
117
|
+
if (dateField) {
|
|
118
|
+
sql = `SELECT 1 FROM ${tablePath} WHERE ${dateField} = @targetDate LIMIT 1`;
|
|
119
|
+
params = { targetDate };
|
|
120
|
+
} else {
|
|
121
|
+
sql = `SELECT 1 FROM ${tablePath} LIMIT 1`;
|
|
122
|
+
params = {};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return { sql, params, table, spec: { table, dateField, targetDate } };
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// =========================================================================
|
|
129
|
+
// PRIVATE METHODS
|
|
130
|
+
// =========================================================================
|
|
131
|
+
|
|
132
|
+
_buildSQL(spec, schema) {
|
|
133
|
+
const {
|
|
134
|
+
table,
|
|
135
|
+
select = null,
|
|
136
|
+
where = {},
|
|
137
|
+
dateField = null,
|
|
138
|
+
targetDate = null,
|
|
139
|
+
lookback = 0,
|
|
140
|
+
entityField = null,
|
|
141
|
+
entities = null,
|
|
142
|
+
orderBy = null,
|
|
143
|
+
orderDir = 'DESC',
|
|
144
|
+
limit = null
|
|
145
|
+
} = spec;
|
|
146
|
+
|
|
147
|
+
const tablePath = `\`${this.projectId}.${this.dataset}.${table}\``;
|
|
148
|
+
const params = {};
|
|
149
|
+
const conditions = [];
|
|
150
|
+
|
|
151
|
+
// SELECT clause
|
|
152
|
+
const selectClause = select && select.length > 0
|
|
153
|
+
? select.join(', ')
|
|
154
|
+
: '*';
|
|
155
|
+
|
|
156
|
+
// Date filtering
|
|
157
|
+
if (dateField && targetDate) {
|
|
158
|
+
if (lookback === 0) {
|
|
159
|
+
conditions.push(`${dateField} = @targetDate`);
|
|
160
|
+
params.targetDate = targetDate;
|
|
161
|
+
} else {
|
|
162
|
+
conditions.push(`${dateField} BETWEEN DATE_SUB(@targetDate, INTERVAL ${lookback} DAY) AND @targetDate`);
|
|
163
|
+
params.targetDate = targetDate;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Entity filtering
|
|
168
|
+
if (entityField && entities && entities.length > 0) {
|
|
169
|
+
conditions.push(`${entityField} IN UNNEST(@entities)`);
|
|
170
|
+
// Detect if entity field is numeric based on schema
|
|
171
|
+
const entityColumn = schema.columns.find(c => c.name === entityField);
|
|
172
|
+
if (entityColumn && this._isNumericType(entityColumn.type)) {
|
|
173
|
+
params.entities = entities.map(e => parseInt(e, 10));
|
|
174
|
+
} else {
|
|
175
|
+
params.entities = entities.map(String);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Additional WHERE conditions
|
|
180
|
+
let whereIndex = 0;
|
|
181
|
+
for (const [column, value] of Object.entries(where)) {
|
|
182
|
+
const paramName = `where_${whereIndex++}`;
|
|
183
|
+
|
|
184
|
+
if (Array.isArray(value)) {
|
|
185
|
+
conditions.push(`${column} IN UNNEST(@${paramName})`);
|
|
186
|
+
params[paramName] = value;
|
|
187
|
+
} else if (value === null) {
|
|
188
|
+
conditions.push(`${column} IS NULL`);
|
|
189
|
+
} else {
|
|
190
|
+
conditions.push(`${column} = @${paramName}`);
|
|
191
|
+
params[paramName] = value;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Build WHERE clause
|
|
196
|
+
const whereClause = conditions.length > 0
|
|
197
|
+
? `WHERE ${conditions.join(' AND ')}`
|
|
198
|
+
: '';
|
|
199
|
+
|
|
200
|
+
// ORDER BY clause
|
|
201
|
+
const orderClause = orderBy
|
|
202
|
+
? `ORDER BY ${orderBy} ${orderDir}`
|
|
203
|
+
: '';
|
|
204
|
+
|
|
205
|
+
// LIMIT clause
|
|
206
|
+
const limitClause = limit
|
|
207
|
+
? `LIMIT ${limit}`
|
|
208
|
+
: '';
|
|
209
|
+
|
|
210
|
+
const sql = [
|
|
211
|
+
`SELECT ${selectClause}`,
|
|
212
|
+
`FROM ${tablePath}`,
|
|
213
|
+
whereClause,
|
|
214
|
+
orderClause,
|
|
215
|
+
limitClause
|
|
216
|
+
].filter(Boolean).join('\n');
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
sql,
|
|
220
|
+
params,
|
|
221
|
+
table,
|
|
222
|
+
spec
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
_isNumericType(type) {
|
|
227
|
+
const numericTypes = ['INT64', 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'INTEGER', 'FLOAT'];
|
|
228
|
+
return numericTypes.includes(type.toUpperCase());
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
module.exports = { QueryBuilder };
|