bulltrackers-module 1.0.765 → 1.0.768

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/functions/computation-system-v2/computations/BehavioralAnomaly.js +298 -186
  2. package/functions/computation-system-v2/computations/NewSectorExposure.js +82 -35
  3. package/functions/computation-system-v2/computations/NewSocialPost.js +52 -24
  4. package/functions/computation-system-v2/computations/PopularInvestorProfileMetrics.js +354 -641
  5. package/functions/computation-system-v2/config/bulltrackers.config.js +26 -14
  6. package/functions/computation-system-v2/framework/core/Manifest.js +9 -16
  7. package/functions/computation-system-v2/framework/core/RunAnalyzer.js +2 -1
  8. package/functions/computation-system-v2/framework/data/DataFetcher.js +142 -4
  9. package/functions/computation-system-v2/framework/execution/Orchestrator.js +119 -122
  10. package/functions/computation-system-v2/framework/storage/StorageManager.js +16 -18
  11. package/functions/computation-system-v2/framework/testing/ComputationTester.js +155 -66
  12. package/functions/computation-system-v2/handlers/scheduler.js +15 -5
  13. package/functions/computation-system-v2/scripts/test-computation-dag.js +109 -0
  14. package/functions/task-engine/helpers/data_storage_helpers.js +6 -6
  15. package/package.json +1 -1
  16. package/functions/computation-system-v2/computations/PopularInvestorRiskAssessment.js +0 -176
  17. package/functions/computation-system-v2/computations/PopularInvestorRiskMetrics.js +0 -294
  18. package/functions/computation-system-v2/computations/UserPortfolioSummary.js +0 -172
  19. package/functions/computation-system-v2/scripts/migrate-sectors.js +0 -73
  20. package/functions/computation-system-v2/test/analyze-results.js +0 -238
  21. package/functions/computation-system-v2/test/other/test-dependency-cascade.js +0 -150
  22. package/functions/computation-system-v2/test/other/test-dispatcher.js +0 -317
  23. package/functions/computation-system-v2/test/other/test-framework.js +0 -500
  24. package/functions/computation-system-v2/test/other/test-real-execution.js +0 -166
  25. package/functions/computation-system-v2/test/other/test-real-integration.js +0 -194
  26. package/functions/computation-system-v2/test/other/test-refactor-e2e.js +0 -131
  27. package/functions/computation-system-v2/test/other/test-results.json +0 -31
  28. package/functions/computation-system-v2/test/other/test-risk-metrics-computation.js +0 -329
  29. package/functions/computation-system-v2/test/other/test-scheduler.js +0 -204
  30. package/functions/computation-system-v2/test/other/test-storage.js +0 -449
  31. package/functions/computation-system-v2/test/run-pipeline-test.js +0 -554
  32. package/functions/computation-system-v2/test/test-full-pipeline.js +0 -227
  33. package/functions/computation-system-v2/test/test-worker-pool.js +0 -266
@@ -1,227 +0,0 @@
1
- /**
2
- * @fileoverview Full Pipeline Integration Test
3
- * * Simulates the entire flow from Dispatcher -> Orchestrator -> Worker Pool -> Storage.
4
- * * Verifies that:
5
- * 1. The Dispatcher BLOCKS computations with missing mandatory data (RunAnalyzer).
6
- * 2. Runnable computations are sent to the Worker Pool (if configured).
7
- * 3. Results are stored in the TEST table (not production).
8
- * * * USAGE:
9
- * node test/test-full-pipeline.js --date 2026-01-24
10
- */
11
-
12
- process.env.NODE_ENV = 'test';
13
- process.env.WORKER_LOCAL_MODE = 'true'; // Simulate workers locally
14
- process.env.WORKER_POOL_ENABLED = 'true';
15
-
16
- const fs = require('fs');
17
- const path = require('path');
18
- const { Orchestrator } = require('../framework/execution/Orchestrator');
19
- const { TestConfigBuilder } = require('./run-pipeline-test');
20
- const prodConfig = require('../config/bulltrackers.config');
21
-
22
- // ============================================================================
23
- // TEST RUNNER
24
- // ============================================================================
25
-
26
- async function runPipelineTest() {
27
- const args = parseArgs();
28
- console.log('\n╔════════════════════════════════════════════════════════════╗');
29
- console.log('║ FULL PIPELINE INTEGRATION TEST ║');
30
- console.log('╚════════════════════════════════════════════════════════════╝');
31
- console.log(`📅 Target Date: ${args.date}`);
32
- console.log(`🧪 Test Table: computation_results_test`);
33
- console.log(`👷 Worker Pool: ENABLED (Local Simulation)\n`);
34
-
35
- // 0. DYNAMICALLY LOAD COMPUTATIONS
36
- // This fixes the "Initialized with 0 computations" error
37
- const computationsDir = path.join(__dirname, '../computations');
38
- const loadedComputations = fs.readdirSync(computationsDir)
39
- .filter(f => f.endsWith('.js'))
40
- .map(f => require(path.join(computationsDir, f)));
41
-
42
- // Inject into config
43
- prodConfig.computations = loadedComputations;
44
- console.log(`📦 Auto-discovered ${prodConfig.computations.length} computations from /computations directory`);
45
-
46
- // 1. CONFIGURE TEST ENVIRONMENT
47
- const builder = new TestConfigBuilder(prodConfig, {
48
- runId: `test-${Date.now()}`,
49
- date: args.date,
50
- testBucket: 'bulltrackers-computation-staging', // <--- FIX: Use your real bucket
51
- batchSize: 1000,
52
- concurrency: 2
53
- });
54
-
55
- const testConfig = builder.build();
56
-
57
- // Explicitly enable worker pool in the test config
58
- testConfig.workerPool = {
59
- ...prodConfig.workerPool,
60
- enabled: true,
61
- localMode: true,
62
- minEntitiesForOffload: 0 // Force everything to worker pool for testing
63
- };
64
-
65
- // 2. INITIALIZE ORCHESTRATOR
66
- const orchestrator = new Orchestrator(testConfig, console);
67
- await orchestrator.initialize();
68
-
69
- // -------------------------------------------------------------------------
70
- // PHASE 1: DISPATCHER VERIFICATION (Pre-Flight Check)
71
- // -------------------------------------------------------------------------
72
- console.log('🔍 PHASE 1: DISPATCHER ANALYSIS (The Gatekeeper)');
73
- console.log(' Verifying that missing data BLOCKS execution...');
74
-
75
- const analysis = await orchestrator.analyze({ date: args.date });
76
-
77
- printAnalysisTable(analysis);
78
-
79
- // Validation: Ensure nothing "Impossible" or "Blocked" is in the runnable list
80
- const badRunnables = analysis.runnable.filter(r =>
81
- analysis.blocked.find(b => b.name === r.name) ||
82
- analysis.impossible.find(i => i.name === r.name)
83
- );
84
-
85
- if (badRunnables.length > 0) {
86
- console.error('❌ CRITICAL FAILURE: Dispatcher marked blocked tasks as runnable!');
87
- process.exit(1);
88
- }
89
- console.log('✅ Dispatcher logic validated. Blocked tasks will NOT run.\n');
90
-
91
- // -------------------------------------------------------------------------
92
- // PHASE 2: EXECUTION (Worker Pool & Storage)
93
- // -------------------------------------------------------------------------
94
- console.log('🚀 PHASE 2: PIPELINE EXECUTION');
95
- console.log(' Running only valid tasks via Worker Pool...');
96
-
97
- // We intercept storage to verify writes without polluting real DB (optional if using test table)
98
- const storageInterceptor = new TestStorageInterceptor(orchestrator.storageManager);
99
- orchestrator.storageManager = storageInterceptor;
100
-
101
- // Run!
102
- const result = await orchestrator.execute({
103
- date: args.date,
104
- dryRun: false // We want to test the full "write" path to the test table
105
- });
106
-
107
- // -------------------------------------------------------------------------
108
- // PHASE 3: VERIFICATION & REPORTING
109
- // -------------------------------------------------------------------------
110
- console.log('\n📊 PHASE 3: FINAL REPORT');
111
-
112
- // 1. Did Blocked Tasks Run?
113
- const blockedRan = result.completed.filter(c =>
114
- analysis.blocked.find(b => b.name === c.name)
115
- );
116
-
117
- if (blockedRan.length > 0) {
118
- console.error(`❌ FAILURE: The following BLOCKED tasks executed anyway: ${blockedRan.map(c => c.name).join(', ')}`);
119
- } else {
120
- console.log('✅ SUCCESS: No blocked tasks were executed.');
121
- }
122
-
123
- // 2. Did Runnable Tasks Succeed?
124
- const runnableNames = analysis.runnable.map(r => r.name);
125
- const successfulRunnables = result.completed.filter(c => runnableNames.includes(c.name));
126
-
127
- if (successfulRunnables.length > 0) {
128
- console.log(`✅ SUCCESS: ${successfulRunnables.length} runnable tasks completed successfully.`);
129
- } else if (runnableNames.length > 0) {
130
- console.warn('⚠️ WARNING: Runnable tasks existed but none completed (check errors below).');
131
- } else {
132
- console.log('ℹ️ No runnable tasks found (this is expected if data is missing).');
133
- }
134
-
135
- // 3. Storage Verification
136
- const writes = storageInterceptor.getSummary();
137
- console.log(`💾 Storage: Written ${writes.totalEntities} entity results to ${testConfig.resultStore.table}`);
138
-
139
- if (result.summary.errors > 0) {
140
- console.log('\n❌ EXECUTION ERRORS:');
141
- result.errors.forEach(e => console.log(` - ${e.name}: ${e.error}`));
142
- }
143
- }
144
-
145
- // ============================================================================
146
- // HELPERS
147
- // ============================================================================
148
-
149
- function printAnalysisTable(analysis) {
150
- console.log('\n ┌──────────────────────────────┬──────────────┬──────────────────────────────────────────┐');
151
- console.log(' │ Computation │ Status │ Reason │');
152
- console.log(' ├──────────────────────────────┼──────────────┼──────────────────────────────────────────┤');
153
-
154
- const all = [
155
- ...analysis.runnable.map(r => ({ ...r, status: 'RUNNABLE' })),
156
- ...analysis.blocked.map(r => ({ ...r, status: 'BLOCKED' })),
157
- ...analysis.impossible.map(r => ({ ...r, status: 'IMPOSSIBLE' })),
158
- ...analysis.reRuns.map(r => ({ ...r, status: 'RERUN' })),
159
- ...analysis.skipped.map(r => ({ ...r, status: 'SKIPPED' }))
160
- ];
161
-
162
- all.forEach(row => {
163
- const name = row.name.padEnd(28).slice(0, 28);
164
- const status = row.status.padEnd(12);
165
- const reason = (row.reason || 'Ready to run').padEnd(40).slice(0, 40);
166
-
167
- let color = '\x1b[37m'; // White
168
- if (row.status === 'BLOCKED') color = '\x1b[31m'; // Red
169
- if (row.status === 'RUNNABLE') color = '\x1b[32m'; // Green
170
-
171
- console.log(` │ ${color}${name}\x1b[0m │ ${color}${status}\x1b[0m │ ${reason} │`);
172
- });
173
- console.log(' └──────────────────────────────┴──────────────┴──────────────────────────────────────────┘\n');
174
- }
175
-
176
- /**
177
- * Simple Storage Interceptor to verify writes
178
- */
179
- class TestStorageInterceptor {
180
- constructor(realStorage) {
181
- this.realStorage = realStorage;
182
- this.writes = [];
183
-
184
- // Proxy methods
185
- return new Proxy(this, {
186
- get(target, prop) {
187
- if (prop in target) return target[prop];
188
- if (typeof target.realStorage[prop] === 'function') {
189
- return target.realStorage[prop].bind(target.realStorage);
190
- }
191
- return target.realStorage[prop];
192
- }
193
- });
194
- }
195
-
196
- async commitResults(date, entry, results, depHashes) {
197
- this.writes.push({
198
- date,
199
- computation: entry.name,
200
- count: Object.keys(results).length
201
- });
202
- // Pass through to real storage (which is pointing to test table)
203
- return this.realStorage.commitResults(date, entry, results, depHashes);
204
- }
205
-
206
- getSummary() {
207
- return {
208
- totalWrites: this.writes.length,
209
- totalEntities: this.writes.reduce((sum, w) => sum + w.count, 0),
210
- details: this.writes
211
- };
212
- }
213
- }
214
-
215
- function parseArgs() {
216
- const args = process.argv.slice(2);
217
- let date = new Date();
218
- date.setDate(date.getDate() - 1); // Default to yesterday
219
-
220
- for (let i = 0; i < args.length; i++) {
221
- if (args[i] === '--date') date = new Date(args[++i]);
222
- }
223
-
224
- return { date: date.toISOString().slice(0, 10) };
225
- }
226
-
227
- runPipelineTest().catch(console.error);
@@ -1,266 +0,0 @@
1
- /**
2
- * @fileoverview Worker Pool Integration Test (Real Data)
3
- * * Tests the Worker Pool pipeline using REAL BigQuery data.
4
- * * WHAT IT DOES:
5
- * 1. Connects to BigQuery using your local credentials.
6
- * 2. Fetches REAL data for the specified date/computation.
7
- * 3. Simulates the Orchestrator's data packaging.
8
- * 4. Runs the worker logic locally (via RemoteTaskRunner localMode).
9
- * * USAGE:
10
- * node test/test-worker-pool.js --date 2026-01-24 --computation UserPortfolioSummary
11
- * * FLAGS:
12
- * --date YYYY-MM-DD (Required) Target date
13
- * --computation Name (Default: UserPortfolioSummary)
14
- * --limit N (Default: 5) Number of entities to test
15
- * --entities id1,id2 (Optional) Specific entities to test
16
- */
17
-
18
- const path = require('path');
19
- const { RemoteTaskRunner } = require('../framework/execution/RemoteTaskRunner');
20
- const { SchemaRegistry } = require('../framework/data/SchemaRegistry');
21
- const { QueryBuilder } = require('../framework/data/QueryBuilder');
22
- const { DataFetcher } = require('../framework/data/DataFetcher');
23
- const config = require('../config/bulltrackers.config');
24
-
25
- // Force Local Mode for the Worker Pool (runs logic in-process but uses the Runner's pipeline)
26
- process.env.WORKER_LOCAL_MODE = 'true';
27
- process.env.WORKER_POOL_ENABLED = 'true';
28
-
29
- // ============================================================================
30
- // HELPER: Mini Orchestrator (Data Loading)
31
- // ============================================================================
32
-
33
- class TestContext {
34
- constructor() {
35
- this.logger = console;
36
-
37
- // Initialize Framework Data Layer
38
- this.schemaRegistry = new SchemaRegistry(config.bigquery, this.logger);
39
- this.queryBuilder = new QueryBuilder(config.bigquery, this.schemaRegistry, this.logger);
40
- this.dataFetcher = new DataFetcher({ ...config.bigquery, tables: config.tables }, this.queryBuilder, this.logger);
41
- }
42
-
43
- async initialize() {
44
- console.log('🔌 Connecting to BigQuery and initializing Schema Registry...');
45
- // We only warm cache for tables we'll likely need to save time
46
- // In a real run, we might scan the computation's requirements
47
- }
48
- }
49
-
50
- // ============================================================================
51
- // MAIN RUNNER
52
- // ============================================================================
53
-
54
- async function runRealDataTest() {
55
- const args = parseArgs();
56
- console.log('\n╔════════════════════════════════════════════════════════════╗');
57
- console.log('║ WORKER POOL INTEGRATION TEST (REAL DATA) ║');
58
- console.log('╚════════════════════════════════════════════════════════════╝');
59
- console.log(`📅 Date: ${args.date}`);
60
- console.log(`🧮 Computation: ${args.computation}`);
61
- console.log(`📉 Entity Limit:${args.limit}`);
62
- console.log(`🔧 Local Mode: ENABLED\n`);
63
-
64
- const ctx = new TestContext();
65
- await ctx.initialize();
66
-
67
- // 1. Load the Computation Class
68
- const computationPath = path.join(__dirname, `../computations/${args.computation}`);
69
- let ComputationClass;
70
- try {
71
- ComputationClass = require(computationPath);
72
- } catch (e) {
73
- throw new Error(`Could not load computation at ${computationPath}: ${e.message}`);
74
- }
75
-
76
- const compConfig = ComputationClass.getConfig();
77
- const manifestEntry = {
78
- name: compConfig.name.toLowerCase(),
79
- originalName: compConfig.name,
80
- type: compConfig.type,
81
- requires: compConfig.requires || {},
82
- dependencies: compConfig.dependencies || [],
83
- hash: 'test-hash-123'
84
- };
85
-
86
- console.log(`📦 Loaded ${manifestEntry.originalName}`);
87
- console.log(` Requires: ${Object.keys(manifestEntry.requires).join(', ')}`);
88
-
89
- // 2. Determine Driver Table (for batching)
90
- const driverTable = getDriverTable(manifestEntry.requires, config);
91
- if (!driverTable) {
92
- throw new Error(`Could not determine driver table (entityField) for ${args.computation}. Is it a global computation?`);
93
- }
94
- const driverEntityField = config.tables[driverTable].entityField;
95
- console.log(` Driver Table: ${driverTable} (${driverEntityField})`);
96
-
97
- // 3. Fetch Data (The "Heavy Lifting" usually done by Orchestrator)
98
- console.log('\n📥 Fetching REAL data from BigQuery...');
99
-
100
- // Split requirements
101
- const { batchRequires, globalRequires } = splitRequirements(manifestEntry.requires, driverTable, config);
102
-
103
- // Fetch Global Data (Shared)
104
- let globalData = {};
105
- if (Object.keys(globalRequires).length > 0) {
106
- console.log(` Fetching global requirements: ${Object.keys(globalRequires).join(', ')}...`);
107
- globalData = await ctx.dataFetcher.fetchForComputation(globalRequires, args.date);
108
- }
109
-
110
- // Fetch Batch Data (Per-Entity)
111
- console.log(` Fetching batch requirements: ${Object.keys(batchRequires).join(', ')}...`);
112
-
113
- // Create a specific entity filter if requested
114
- if (args.entities.length > 0) {
115
- // Inject where clause for specific entities
116
- Object.values(batchRequires).forEach(req => {
117
- req.where = req.where || {};
118
- req.where[driverEntityField] = args.entities; // This relies on DataFetcher supporting array-based WHERE
119
- });
120
- }
121
-
122
- // We fetch one large batch for the test
123
- const batchStream = ctx.dataFetcher.fetchComputationBatched(batchRequires, args.date, args.limit);
124
-
125
- // Get the first batch
126
- const { value: batch, done } = await batchStream.next();
127
-
128
- if (done || !batch || batch.entityIds.length === 0) {
129
- console.warn('⚠️ No data found for this date/computation.');
130
- return;
131
- }
132
-
133
- console.log(`✅ Data fetched. Processing ${batch.entityIds.length} entities: ${batch.entityIds.join(', ')}`);
134
-
135
- // 4. Initialize RemoteTaskRunner
136
- // We configured WORKER_LOCAL_MODE=true, so this will run logic in-process
137
- // but crucially, it will go through the _buildContextPackage logic.
138
- const runner = new RemoteTaskRunner({
139
- workerPool: {
140
- enabled: true,
141
- localMode: true, // Run in-process
142
- concurrency: 5
143
- }
144
- }, console);
145
-
146
- // Prepare Context
147
- const baseContext = {
148
- references: {}, // TODO: Load ref data if needed
149
- config: {
150
- project: config.bigquery.projectId,
151
- tables: config.tables
152
- }
153
- };
154
-
155
- // Prepare Data Maps
156
- const entityDataMap = new Map();
157
- const combinedData = { ...batch.data, ...globalData };
158
-
159
- for (const entityId of batch.entityIds) {
160
- const entityData = filterDataForEntity(combinedData, entityId, driverEntityField, config);
161
- entityDataMap.set(entityId, entityData);
162
- }
163
-
164
- // 5. Execute via Worker Pool Logic
165
- console.log('\n🚀 Executing via RemoteTaskRunner (Local Simulation)...');
166
-
167
- const startTime = Date.now();
168
-
169
- const { results, errors } = await runner.runBatch(
170
- manifestEntry,
171
- args.date,
172
- baseContext,
173
- batch.entityIds,
174
- entityDataMap,
175
- {} // Mock dependencies for now
176
- );
177
-
178
- const duration = Date.now() - startTime;
179
-
180
- // 6. Report Results
181
- console.log('\n╔════════════════════════════════════════════════════════════╗');
182
- console.log('║ EXECUTION REPORT ║');
183
- console.log('╚════════════════════════════════════════════════════════════╝');
184
- console.log(`⏱️ Duration: ${duration}ms`);
185
- console.log(`✅ Success: ${Object.keys(results).length}`);
186
- console.log(`❌ Errors: ${errors.length}`);
187
-
188
- if (Object.keys(results).length > 0) {
189
- console.log('\n🔍 Sample Result (First Entity):');
190
- const sampleId = Object.keys(results)[0];
191
- console.log(` Entity: ${sampleId}`);
192
- console.log(JSON.stringify(results[sampleId], null, 2));
193
- }
194
-
195
- if (errors.length > 0) {
196
- console.log('\n❌ Errors:');
197
- errors.forEach(e => console.log(` [${e.entityId}] ${e.error}`));
198
- }
199
- }
200
-
201
- // ============================================================================
202
- // UTILS (Duplicated from Orchestrator logic)
203
- // ============================================================================
204
-
205
- function getDriverTable(requires, config) {
206
- for (const name of Object.keys(requires)) {
207
- const conf = config.tables[name];
208
- if (conf && conf.entityField) return name;
209
- }
210
- return null;
211
- }
212
-
213
- function splitRequirements(requires, driverTable, config) {
214
- const batchRequires = {};
215
- const globalRequires = {};
216
- const driverConfig = config.tables[driverTable];
217
- const driverEntityField = driverConfig ? driverConfig.entityField : null;
218
-
219
- for (const [name, spec] of Object.entries(requires)) {
220
- const conf = config.tables[name];
221
- if (conf && conf.entityField === driverEntityField) {
222
- batchRequires[name] = spec;
223
- } else {
224
- globalRequires[name] = spec;
225
- }
226
- }
227
- return { batchRequires, globalRequires };
228
- }
229
-
230
- function filterDataForEntity(data, id, driverEntityField, config) {
231
- const out = {};
232
- Object.entries(data).forEach(([tbl, d]) => {
233
- const conf = config.tables[tbl] || {};
234
- if (conf.entityField === driverEntityField && d && !Array.isArray(d)) {
235
- out[tbl] = d[id] || null;
236
- } else {
237
- out[tbl] = d;
238
- }
239
- });
240
- return out;
241
- }
242
-
243
- function parseArgs() {
244
- const args = process.argv.slice(2);
245
- const options = {
246
- date: new Date().toISOString().slice(0, 10),
247
- computation: 'UserPortfolioSummary',
248
- limit: 5,
249
- entities: []
250
- };
251
-
252
- for (let i = 0; i < args.length; i++) {
253
- const arg = args[i];
254
- if (arg === '--date') options.date = args[++i];
255
- if (arg === '--computation') options.computation = args[++i];
256
- if (arg === '--limit') options.limit = parseInt(args[++i], 10);
257
- if (arg === '--entities') options.entities = args[++i].split(',');
258
- }
259
- return options;
260
- }
261
-
262
- // Run
263
- runRealDataTest().catch(e => {
264
- console.error('\n💥 Fatal Error:', e);
265
- process.exit(1);
266
- });