bulltrackers-module 1.0.734 → 1.0.736
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/config/bulltrackers.config.js +75 -5
- package/functions/computation-system-v2/framework/data/DataFetcher.js +107 -105
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +357 -150
- package/functions/computation-system-v2/framework/execution/RemoteTaskRunner.js +327 -0
- package/functions/computation-system-v2/framework/execution/middleware/LineageMiddleware.js +9 -4
- package/functions/computation-system-v2/framework/execution/middleware/ProfilerMiddleware.js +9 -21
- package/functions/computation-system-v2/framework/index.js +10 -3
- package/functions/computation-system-v2/framework/lineage/LineageTracker.js +53 -57
- package/functions/computation-system-v2/framework/monitoring/Profiler.js +54 -52
- package/functions/computation-system-v2/framework/resilience/Checkpointer.js +173 -27
- package/functions/computation-system-v2/framework/storage/StorageManager.js +419 -187
- package/functions/computation-system-v2/handlers/index.js +10 -1
- package/functions/computation-system-v2/handlers/scheduler.js +85 -193
- package/functions/computation-system-v2/handlers/worker.js +242 -0
- package/functions/computation-system-v2/index.js +5 -0
- package/functions/computation-system-v2/test/analyze-results.js +238 -0
- package/functions/computation-system-v2/test/{test-dispatcher.js → other/test-dispatcher.js} +6 -6
- package/functions/computation-system-v2/test/{test-framework.js → other/test-framework.js} +14 -14
- package/functions/computation-system-v2/test/{test-real-execution.js → other/test-real-execution.js} +1 -1
- package/functions/computation-system-v2/test/{test-real-integration.js → other/test-real-integration.js} +3 -3
- package/functions/computation-system-v2/test/{test-refactor-e2e.js → other/test-refactor-e2e.js} +3 -3
- package/functions/computation-system-v2/test/{test-risk-metrics-computation.js → other/test-risk-metrics-computation.js} +4 -4
- package/functions/computation-system-v2/test/{test-scheduler.js → other/test-scheduler.js} +1 -1
- package/functions/computation-system-v2/test/{test-storage.js → other/test-storage.js} +2 -2
- package/functions/computation-system-v2/test/run-pipeline-test.js +554 -0
- package/functions/computation-system-v2/test/test-worker-pool.js +494 -0
- package/index.js +8 -39
- package/package.json +1 -1
- package/functions/computation-system-v2/computations/TestComputation.js +0 -46
- /package/functions/computation-system-v2/test/{test-results.json → other/test-results.json} +0 -0
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview BullTrackers Configuration for Computation System v2
|
|
3
|
-
*
|
|
4
|
-
* This is the ONLY file that contains BullTrackers-specific knowledge.
|
|
3
|
+
* * This is the ONLY file that contains BullTrackers-specific knowledge.
|
|
5
4
|
* The framework itself is completely domain-agnostic.
|
|
6
|
-
*
|
|
7
|
-
* Business Rules:
|
|
5
|
+
* * Business Rules:
|
|
8
6
|
* - Rules are injected into computations automatically
|
|
9
7
|
* - When a rule changes, computations using it are re-run
|
|
10
8
|
* - Computations should be "simple recipes" that call rules
|
|
@@ -37,6 +35,15 @@ module.exports = {
|
|
|
37
35
|
location: 'europe-west1',
|
|
38
36
|
cacheTTLMs: 3600000 // 1 hour schema cache
|
|
39
37
|
},
|
|
38
|
+
|
|
39
|
+
// =========================================================================
|
|
40
|
+
// GCS CONFIGURATION (NEW: For Batch Loading)
|
|
41
|
+
// =========================================================================
|
|
42
|
+
|
|
43
|
+
gcs: {
|
|
44
|
+
bucket: process.env.GCS_BUCKET_ID || 'bulltrackers-computation-staging',
|
|
45
|
+
prefix: 'staging'
|
|
46
|
+
},
|
|
40
47
|
|
|
41
48
|
// =========================================================================
|
|
42
49
|
// TABLE DEFINITIONS
|
|
@@ -313,5 +320,68 @@ module.exports = {
|
|
|
313
320
|
// null = all computations allowed
|
|
314
321
|
// array = only listed computations allowed
|
|
315
322
|
allowedComputations: null
|
|
323
|
+
},
|
|
324
|
+
|
|
325
|
+
// =========================================================================
|
|
326
|
+
// WORKER POOL CONFIGURATION (SERVERLESS WORKERS)
|
|
327
|
+
// =========================================================================
|
|
328
|
+
//
|
|
329
|
+
// Enables offloading per-entity computations to a serverless worker pool.
|
|
330
|
+
// Workers run as separate Cloud Functions with high concurrency.
|
|
331
|
+
//
|
|
332
|
+
// Benefits:
|
|
333
|
+
// - Massive parallelism (100s of concurrent entity computations)
|
|
334
|
+
// - Cost efficient (workers scale to zero, high concurrency per instance)
|
|
335
|
+
// - Fault isolation (one entity failure doesn't affect others)
|
|
336
|
+
// - Memory efficient (workers only load one entity's data at a time)
|
|
337
|
+
//
|
|
338
|
+
// Data Flow:
|
|
339
|
+
// 1. Orchestrator packages entity data → GCS
|
|
340
|
+
// 2. Orchestrator invokes workers in parallel
|
|
341
|
+
// 3. Workers load data from GCS, execute, return result
|
|
342
|
+
// 4. Orchestrator collects results, commits to storage
|
|
343
|
+
// =========================================================================
|
|
344
|
+
|
|
345
|
+
workerPool: {
|
|
346
|
+
// Master switch - set to true to enable worker pool
|
|
347
|
+
enabled: process.env.WORKER_POOL_ENABLED === 'true',
|
|
348
|
+
|
|
349
|
+
// Local mode for testing - runs workers in-process without GCS/HTTP
|
|
350
|
+
// Set WORKER_LOCAL_MODE=true or pass localMode: true in config
|
|
351
|
+
localMode: process.env.WORKER_LOCAL_MODE === 'true',
|
|
352
|
+
|
|
353
|
+
// Worker Cloud Function URL
|
|
354
|
+
workerUrl: process.env.WORKER_URL ||
|
|
355
|
+
'https://europe-west1-stocks-12345.cloudfunctions.net/computation-worker',
|
|
356
|
+
|
|
357
|
+
// GCS bucket for temporary context packages
|
|
358
|
+
// Should have lifecycle rule to auto-delete after 1 day
|
|
359
|
+
tempBucket: process.env.WORKER_TEMP_BUCKET || 'bulltrackers-worker-staging',
|
|
360
|
+
|
|
361
|
+
// Max concurrent worker invocations
|
|
362
|
+
// Higher = faster but more network/GCS load
|
|
363
|
+
// Recommended: 100-200 for production
|
|
364
|
+
concurrency: parseInt(process.env.WORKER_CONCURRENCY || '100', 10),
|
|
365
|
+
|
|
366
|
+
// Worker invocation timeout (ms)
|
|
367
|
+
// Should be slightly less than worker function timeout
|
|
368
|
+
timeout: parseInt(process.env.WORKER_TIMEOUT || '60000', 10),
|
|
369
|
+
|
|
370
|
+
// Retry count for transient failures
|
|
371
|
+
retries: 2,
|
|
372
|
+
|
|
373
|
+
// Minimum entities to trigger worker pool
|
|
374
|
+
// Below this threshold, run locally (overhead not worth it)
|
|
375
|
+
minEntitiesForOffload: parseInt(process.env.WORKER_MIN_ENTITIES || '50', 10),
|
|
376
|
+
|
|
377
|
+
// Computations that should NEVER use worker pool
|
|
378
|
+
// (e.g., need persistent state, special middleware)
|
|
379
|
+
excludeComputations: [],
|
|
380
|
+
|
|
381
|
+
// Computations that MUST use worker pool (override threshold)
|
|
382
|
+
// Useful for testing specific computations
|
|
383
|
+
forceOffloadComputations: process.env.WORKER_FORCE_COMPUTATIONS
|
|
384
|
+
? process.env.WORKER_FORCE_COMPUTATIONS.split(',')
|
|
385
|
+
: []
|
|
316
386
|
}
|
|
317
|
-
};
|
|
387
|
+
};
|
|
@@ -2,33 +2,15 @@
|
|
|
2
2
|
* @fileoverview Data Fetcher - Executes queries and transforms results
|
|
3
3
|
* * The single point of data access for computations.
|
|
4
4
|
* Uses QueryBuilder for validation, executes against BigQuery, transforms results.
|
|
5
|
-
* * V2.
|
|
6
|
-
*
|
|
5
|
+
* * V2.2 FIX: "Identity Crisis" & "Date as ID" bugs.
|
|
6
|
+
* * V2.3 FIX: "Insufficient History" bug.
|
|
7
|
+
* - fetchBatched now orders by Entity ID to keep historical rows together.
|
|
8
|
+
* - Implemented "Entity-Atomic Batching" to prevent splitting a user's history across batches.
|
|
7
9
|
*/
|
|
8
10
|
|
|
9
11
|
const { BigQuery } = require('@google-cloud/bigquery');
|
|
10
12
|
|
|
11
|
-
/**
|
|
12
|
-
* @typedef {Object} FetchOptions
|
|
13
|
-
* @property {string} table - Table name
|
|
14
|
-
* @property {string} targetDate - Target date (YYYY-MM-DD)
|
|
15
|
-
* @property {number} [lookback=0] - Days to look back
|
|
16
|
-
* @property {boolean} [mandatory=false] - If true, throws on no data
|
|
17
|
-
* @property {Object} [filter] - Additional WHERE conditions
|
|
18
|
-
* @property {string[]} [fields] - Specific fields to select
|
|
19
|
-
* @property {string[]} [entities] - Specific entity IDs to fetch
|
|
20
|
-
*/
|
|
21
|
-
|
|
22
13
|
class DataFetcher {
|
|
23
|
-
/**
|
|
24
|
-
* @param {Object} config - Configuration object
|
|
25
|
-
* @param {string} config.projectId - GCP project ID
|
|
26
|
-
* @param {string} config.dataset - BigQuery dataset name
|
|
27
|
-
* @param {string} [config.location='US'] - BigQuery location
|
|
28
|
-
* @param {Object} config.tables - Table configuration map
|
|
29
|
-
* @param {QueryBuilder} queryBuilder - Query builder instance
|
|
30
|
-
* @param {Object} [logger] - Logger instance
|
|
31
|
-
*/
|
|
32
14
|
constructor(config, queryBuilder, logger = null) {
|
|
33
15
|
this.projectId = config.projectId;
|
|
34
16
|
this.dataset = config.dataset;
|
|
@@ -39,7 +21,6 @@ class DataFetcher {
|
|
|
39
21
|
|
|
40
22
|
this.client = new BigQuery({ projectId: this.projectId });
|
|
41
23
|
|
|
42
|
-
// Query stats
|
|
43
24
|
this.stats = {
|
|
44
25
|
queries: 0,
|
|
45
26
|
rowsFetched: 0,
|
|
@@ -50,16 +31,11 @@ class DataFetcher {
|
|
|
50
31
|
|
|
51
32
|
/**
|
|
52
33
|
* Fetch data for a computation's requirements.
|
|
53
|
-
* @param {Object} requires - Computation's requires declaration
|
|
54
|
-
* @param {string} targetDate - Target date
|
|
55
|
-
* @param {string[]} [entities] - Optional entity IDs to filter
|
|
56
|
-
* @returns {Promise<Object>} Data keyed by table name
|
|
57
34
|
*/
|
|
58
35
|
async fetchForComputation(requires, targetDate, entities = null) {
|
|
59
36
|
const results = {};
|
|
60
37
|
const errors = [];
|
|
61
38
|
|
|
62
|
-
// Fetch all tables in parallel
|
|
63
39
|
await Promise.all(Object.entries(requires).map(async ([tableName, spec]) => {
|
|
64
40
|
try {
|
|
65
41
|
const data = await this.fetch({
|
|
@@ -88,7 +64,6 @@ class DataFetcher {
|
|
|
88
64
|
}
|
|
89
65
|
}));
|
|
90
66
|
|
|
91
|
-
// Throw if mandatory data is missing
|
|
92
67
|
if (errors.length > 0) {
|
|
93
68
|
const msg = errors.map(e => `${e.table}: ${e.reason}`).join(', ');
|
|
94
69
|
throw new Error(`[DataFetcher] Missing mandatory data: ${msg}`);
|
|
@@ -99,28 +74,22 @@ class DataFetcher {
|
|
|
99
74
|
|
|
100
75
|
/**
|
|
101
76
|
* STREAMING: Fetch data for computation in batches.
|
|
102
|
-
* Uses a "Driver Table" (one with entityField) to stream entities,
|
|
103
|
-
* then fetches dependencies for just that batch.
|
|
104
|
-
* * @param {Object} requires - Computation requires
|
|
105
|
-
* @param {string} targetDate - Target Date
|
|
106
|
-
* @param {number} batchSize - Rows per batch
|
|
107
|
-
* @returns {AsyncGenerator<{data: Object, entityIds: string[]}>}
|
|
108
77
|
*/
|
|
109
78
|
async *fetchComputationBatched(requires, targetDate, batchSize = 1000) {
|
|
110
|
-
// 1. Identify Driver Table (The one we iterate over)
|
|
111
79
|
const driverTableName = this._getDriverTable(requires);
|
|
80
|
+
const driverConfig = this.tables[driverTableName] || {};
|
|
112
81
|
|
|
113
82
|
if (!driverTableName) {
|
|
114
83
|
this._log('WARN', 'No entity-keyed table found for batching. Falling back to full fetch.');
|
|
115
84
|
const fullData = await this.fetchForComputation(requires, targetDate);
|
|
116
|
-
|
|
85
|
+
const allIds = fullData ? this._extractAllEntityIds(fullData) : [];
|
|
86
|
+
yield { data: fullData, entityIds: allIds };
|
|
117
87
|
return;
|
|
118
88
|
}
|
|
119
89
|
|
|
120
|
-
this._log('INFO', `Starting batched fetch driven by table: ${driverTableName}`);
|
|
90
|
+
this._log('INFO', `Starting batched fetch driven by table: ${driverTableName} (${driverConfig.entityField})`);
|
|
121
91
|
const driverSpec = requires[driverTableName];
|
|
122
92
|
|
|
123
|
-
// 2. Stream Driver Table
|
|
124
93
|
const driverStream = this.fetchBatched({
|
|
125
94
|
table: driverTableName,
|
|
126
95
|
targetDate,
|
|
@@ -130,20 +99,24 @@ class DataFetcher {
|
|
|
130
99
|
mandatory: driverSpec.mandatory
|
|
131
100
|
}, batchSize);
|
|
132
101
|
|
|
133
|
-
// 3. Process Streams
|
|
134
102
|
for await (const batch of driverStream) {
|
|
135
|
-
//
|
|
136
|
-
// The batch is already transformed (e.g. Object by EntityID or Array)
|
|
103
|
+
// FIX: Robust ID Extraction
|
|
137
104
|
const entityIds = this._extractEntityIdsFromBatch(batch, driverTableName);
|
|
138
105
|
|
|
139
|
-
if (entityIds.length === 0)
|
|
106
|
+
if (entityIds.length === 0) {
|
|
107
|
+
this._log('WARN', `Driver batch from ${driverTableName} yielded 0 entity IDs. Check config.entityField matches BigQuery column.`);
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
140
110
|
|
|
141
111
|
const batchResults = { [driverTableName]: batch };
|
|
142
112
|
const errors = [];
|
|
143
113
|
|
|
144
|
-
// 4. Fetch Dependencies for this SPECIFIC batch of entities
|
|
145
114
|
await Promise.all(Object.entries(requires).map(async ([tableName, spec]) => {
|
|
146
|
-
if (tableName === driverTableName) return;
|
|
115
|
+
if (tableName === driverTableName) return;
|
|
116
|
+
|
|
117
|
+
// FIX: Identity Crisis Check
|
|
118
|
+
const depConfig = this.tables[tableName] || {};
|
|
119
|
+
const shouldFilterById = depConfig.entityField === driverConfig.entityField;
|
|
147
120
|
|
|
148
121
|
try {
|
|
149
122
|
const data = await this.fetch({
|
|
@@ -153,13 +126,13 @@ class DataFetcher {
|
|
|
153
126
|
mandatory: spec.mandatory || false,
|
|
154
127
|
filter: spec.filter || {},
|
|
155
128
|
fields: spec.fields || null,
|
|
156
|
-
entities:
|
|
129
|
+
entities: shouldFilterById ? entityIds : null
|
|
157
130
|
});
|
|
158
131
|
|
|
159
132
|
batchResults[tableName] = data;
|
|
160
133
|
|
|
161
134
|
if (spec.mandatory && this._isEmpty(data)) {
|
|
162
|
-
|
|
135
|
+
this._log('WARN', `Batch warning: Mandatory table ${tableName} returned 0 rows. (Filtered by ID: ${shouldFilterById})`);
|
|
163
136
|
}
|
|
164
137
|
} catch (e) {
|
|
165
138
|
if (spec.mandatory) errors.push({ table: tableName, reason: e.message });
|
|
@@ -168,7 +141,7 @@ class DataFetcher {
|
|
|
168
141
|
}));
|
|
169
142
|
|
|
170
143
|
if (errors.length > 0) {
|
|
171
|
-
this._log('WARN', `Batch missing mandatory data: ${errors.map(e => e.table).join(', ')}. Skipping batch.`);
|
|
144
|
+
this._log('WARN', `Batch missing mandatory data due to errors: ${errors.map(e => e.table).join(', ')}. Skipping batch.`);
|
|
172
145
|
continue;
|
|
173
146
|
}
|
|
174
147
|
|
|
@@ -176,14 +149,8 @@ class DataFetcher {
|
|
|
176
149
|
}
|
|
177
150
|
}
|
|
178
151
|
|
|
179
|
-
/**
|
|
180
|
-
* Fetch data from a single table (Full Load).
|
|
181
|
-
* @param {FetchOptions} options - Fetch options
|
|
182
|
-
* @returns {Promise<Object|Array|null>} Transformed data
|
|
183
|
-
*/
|
|
184
152
|
async fetch(options) {
|
|
185
153
|
const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
|
|
186
|
-
|
|
187
154
|
const tableConfig = this.tables[table] || {};
|
|
188
155
|
const { dateField, entityField, dataField } = tableConfig;
|
|
189
156
|
|
|
@@ -199,32 +166,44 @@ class DataFetcher {
|
|
|
199
166
|
return this._transform(rows, { lookback, dateField, entityField, dataField });
|
|
200
167
|
}
|
|
201
168
|
|
|
202
|
-
/**
|
|
203
|
-
* STREAMING: Fetch data from a single table in batches.
|
|
204
|
-
* @param {FetchOptions} options
|
|
205
|
-
* @param {number} batchSize
|
|
206
|
-
* @returns {AsyncGenerator<Object|Array>}
|
|
207
|
-
*/
|
|
208
169
|
async *fetchBatched(options, batchSize = 1000) {
|
|
209
170
|
const { table, targetDate, lookback = 0, filter = {}, fields = null, entities = null } = options;
|
|
210
171
|
const tableConfig = this.tables[table] || {};
|
|
211
172
|
const { dateField, entityField, dataField } = tableConfig;
|
|
212
173
|
|
|
174
|
+
// FIX #1: Prioritize ordering by Entity to keep historical rows together
|
|
213
175
|
const query = await this.queryBuilder.build({
|
|
214
176
|
table, select: fields, where: filter, dateField, targetDate, lookback, entityField, entities,
|
|
215
|
-
orderBy:
|
|
177
|
+
orderBy: entityField || dateField
|
|
216
178
|
});
|
|
217
179
|
|
|
218
|
-
// Use Stream Executor
|
|
219
180
|
const rowStream = this._executeStream(query);
|
|
220
181
|
|
|
221
182
|
let batch = [];
|
|
183
|
+
let currentEntity = null;
|
|
184
|
+
|
|
222
185
|
for await (const row of rowStream) {
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
186
|
+
// FIX #2: Entity-Atomic Batching
|
|
187
|
+
// If we have an entity field, verify we don't split an entity across batches
|
|
188
|
+
if (entityField) {
|
|
189
|
+
const rowEntity = String(row[entityField]);
|
|
190
|
+
|
|
191
|
+
// If batch is full AND we have moved to a new entity, yield the batch
|
|
192
|
+
// This ensures the current entity (which might have many rows) stays together
|
|
193
|
+
if (batch.length >= batchSize && rowEntity !== currentEntity && currentEntity !== null) {
|
|
194
|
+
yield this._transform(batch, { lookback, dateField, entityField, dataField });
|
|
195
|
+
batch = [];
|
|
196
|
+
}
|
|
197
|
+
currentEntity = rowEntity;
|
|
198
|
+
} else {
|
|
199
|
+
// Fallback for non-entity tables (strict count)
|
|
200
|
+
if (batch.length >= batchSize) {
|
|
201
|
+
yield this._transform(batch, { lookback, dateField, entityField, dataField });
|
|
202
|
+
batch = [];
|
|
203
|
+
}
|
|
227
204
|
}
|
|
205
|
+
|
|
206
|
+
batch.push(row);
|
|
228
207
|
}
|
|
229
208
|
|
|
230
209
|
if (batch.length > 0) {
|
|
@@ -232,9 +211,6 @@ class DataFetcher {
|
|
|
232
211
|
}
|
|
233
212
|
}
|
|
234
213
|
|
|
235
|
-
/**
|
|
236
|
-
* Check if data exists for a table on a given date.
|
|
237
|
-
*/
|
|
238
214
|
async hasData(table, targetDate) {
|
|
239
215
|
const tableConfig = this.tables[table] || {};
|
|
240
216
|
const { dateField } = tableConfig;
|
|
@@ -248,9 +224,6 @@ class DataFetcher {
|
|
|
248
224
|
}
|
|
249
225
|
}
|
|
250
226
|
|
|
251
|
-
/**
|
|
252
|
-
* Check availability for multiple tables.
|
|
253
|
-
*/
|
|
254
227
|
async checkAvailability(requires, targetDate) {
|
|
255
228
|
const available = [];
|
|
256
229
|
const missing = [];
|
|
@@ -272,24 +245,16 @@ class DataFetcher {
|
|
|
272
245
|
getStats() { return { ...this.stats }; }
|
|
273
246
|
resetStats() { this.stats = { queries: 0, rowsFetched: 0, errors: 0, bytesProcessed: 0 }; }
|
|
274
247
|
|
|
275
|
-
// =========================================================================
|
|
276
|
-
// PRIVATE METHODS
|
|
277
|
-
// =========================================================================
|
|
278
|
-
|
|
279
248
|
async _execute(query) {
|
|
280
249
|
this.stats.queries++;
|
|
281
250
|
try {
|
|
282
|
-
this._log('DEBUG', `Executing (Full): ${query.sql.substring(0, 100)}...`);
|
|
283
251
|
const [job] = await this.client.createQueryJob({
|
|
284
252
|
query: query.sql, params: query.params, location: this.location
|
|
285
253
|
});
|
|
286
254
|
const [rows] = await job.getQueryResults();
|
|
287
|
-
|
|
288
255
|
const [metadata] = await job.getMetadata();
|
|
289
|
-
const bytesProcessed = parseInt(metadata.statistics?.totalBytesProcessed || 0, 10);
|
|
290
256
|
this.stats.rowsFetched += rows.length;
|
|
291
|
-
this.stats.bytesProcessed +=
|
|
292
|
-
|
|
257
|
+
this.stats.bytesProcessed += parseInt(metadata.statistics?.totalBytesProcessed || 0, 10);
|
|
293
258
|
return rows;
|
|
294
259
|
} catch (e) {
|
|
295
260
|
this.stats.errors++;
|
|
@@ -298,17 +263,13 @@ class DataFetcher {
|
|
|
298
263
|
}
|
|
299
264
|
}
|
|
300
265
|
|
|
301
|
-
// New Stream Executor using BigQuery Streams
|
|
302
266
|
async *_executeStream(query) {
|
|
303
267
|
this.stats.queries++;
|
|
304
268
|
try {
|
|
305
|
-
this._log('DEBUG', `Executing (Stream): ${query.sql.substring(0, 100)}...`);
|
|
306
269
|
const [job] = await this.client.createQueryJob({
|
|
307
270
|
query: query.sql, params: query.params, location: this.location
|
|
308
271
|
});
|
|
309
|
-
|
|
310
272
|
const stream = job.getQueryResultsStream();
|
|
311
|
-
|
|
312
273
|
for await (const row of stream) {
|
|
313
274
|
this.stats.rowsFetched++;
|
|
314
275
|
yield row;
|
|
@@ -320,13 +281,38 @@ class DataFetcher {
|
|
|
320
281
|
}
|
|
321
282
|
}
|
|
322
283
|
|
|
284
|
+
/**
|
|
285
|
+
* Transforms raw rows into a structured object.
|
|
286
|
+
* FIX: PRIORITIZE ENTITY FIELD.
|
|
287
|
+
* If entityField exists, we MUST return { [id]: [rows] } so IDs can be extracted.
|
|
288
|
+
*/
|
|
323
289
|
_transform(rows, config) {
|
|
324
290
|
const { lookback, dateField, entityField, dataField } = config;
|
|
325
|
-
|
|
326
|
-
// Ensure rows is array
|
|
327
291
|
const rowArray = Array.isArray(rows) ? rows : [rows];
|
|
328
292
|
|
|
329
|
-
//
|
|
293
|
+
// FIX: Primary Grouping = Entity
|
|
294
|
+
if (entityField) {
|
|
295
|
+
const byEntity = {};
|
|
296
|
+
for (const row of rowArray) {
|
|
297
|
+
const entityKey = String(row[entityField]);
|
|
298
|
+
if (!byEntity[entityKey]) {
|
|
299
|
+
// If simple fetch (no history), value is single object
|
|
300
|
+
// If history fetch (lookback), value is Array of rows
|
|
301
|
+
byEntity[entityKey] = lookback > 0 ? [] : null;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
const value = dataField ? row[dataField] : row;
|
|
305
|
+
|
|
306
|
+
if (Array.isArray(byEntity[entityKey])) {
|
|
307
|
+
byEntity[entityKey].push(value);
|
|
308
|
+
} else {
|
|
309
|
+
byEntity[entityKey] = value;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
return byEntity;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// Fallback: Date Grouping (Only if no Entity ID)
|
|
330
316
|
if (lookback > 0 && dateField) {
|
|
331
317
|
const byDate = {};
|
|
332
318
|
for (const row of rowArray) {
|
|
@@ -338,17 +324,6 @@ class DataFetcher {
|
|
|
338
324
|
return byDate;
|
|
339
325
|
}
|
|
340
326
|
|
|
341
|
-
// For entity-keyed tables, return entity-keyed object
|
|
342
|
-
if (entityField) {
|
|
343
|
-
const byEntity = {};
|
|
344
|
-
for (const row of rowArray) {
|
|
345
|
-
const entityKey = String(row[entityField]);
|
|
346
|
-
byEntity[entityKey] = dataField ? row[dataField] : row;
|
|
347
|
-
}
|
|
348
|
-
return byEntity;
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
// Default: return array
|
|
352
327
|
return rowArray;
|
|
353
328
|
}
|
|
354
329
|
|
|
@@ -362,15 +337,42 @@ class DataFetcher {
|
|
|
362
337
|
|
|
363
338
|
_extractEntityIdsFromBatch(batchData, tableName) {
|
|
364
339
|
const config = this.tables[tableName] || {};
|
|
365
|
-
|
|
340
|
+
const field = config.entityField;
|
|
341
|
+
|
|
342
|
+
// Case 1: Transformed Object { "id1": data, "id2": data }
|
|
343
|
+
if (field && batchData && !Array.isArray(batchData)) {
|
|
366
344
|
return Object.keys(batchData);
|
|
367
345
|
}
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
346
|
+
|
|
347
|
+
// Case 2: Array of Rows (Only if _transform didn't group by entity)
|
|
348
|
+
if (Array.isArray(batchData) && field) {
|
|
349
|
+
const ids = [];
|
|
350
|
+
let undefinedCount = 0;
|
|
351
|
+
for (const r of batchData) {
|
|
352
|
+
const val = r[field];
|
|
353
|
+
if (val === undefined) {
|
|
354
|
+
undefinedCount++;
|
|
355
|
+
} else {
|
|
356
|
+
ids.push(String(val));
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
if (undefinedCount > 0) {
|
|
360
|
+
this._log('ERROR', `CRITICAL CONFIG ERROR: Found ${undefinedCount} rows in '${tableName}' where entityField '${field}' was UNDEFINED.`);
|
|
361
|
+
}
|
|
362
|
+
return ids;
|
|
371
363
|
}
|
|
372
364
|
return [];
|
|
373
365
|
}
|
|
366
|
+
|
|
367
|
+
_extractAllEntityIds(fullData) {
|
|
368
|
+
const ids = new Set();
|
|
369
|
+
Object.values(fullData || {}).forEach(tableData => {
|
|
370
|
+
if (tableData && typeof tableData === 'object' && !Array.isArray(tableData)) {
|
|
371
|
+
Object.keys(tableData).forEach(k => ids.add(k));
|
|
372
|
+
}
|
|
373
|
+
});
|
|
374
|
+
return Array.from(ids);
|
|
375
|
+
}
|
|
374
376
|
|
|
375
377
|
_formatDate(dateValue) {
|
|
376
378
|
if (!dateValue) return null;
|
|
@@ -390,8 +392,8 @@ class DataFetcher {
|
|
|
390
392
|
_log(level, message) {
|
|
391
393
|
if (this.logger && typeof this.logger.log === 'function') {
|
|
392
394
|
this.logger.log(level, `[DataFetcher] ${message}`);
|
|
393
|
-
} else if (level === 'ERROR') {
|
|
394
|
-
console.error(`[DataFetcher] ${message}`);
|
|
395
|
+
} else if (level === 'ERROR' || level === 'WARN') {
|
|
396
|
+
console.error(`[${level}] [DataFetcher] ${message}`);
|
|
395
397
|
}
|
|
396
398
|
}
|
|
397
399
|
}
|