bulltrackers-module 1.0.733 → 1.0.734
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/README.md +152 -0
- package/functions/computation-system-v2/computations/PopularInvestorProfileMetrics.js +720 -0
- package/functions/computation-system-v2/computations/PopularInvestorRiskAssessment.js +176 -0
- package/functions/computation-system-v2/computations/PopularInvestorRiskMetrics.js +294 -0
- package/functions/computation-system-v2/computations/TestComputation.js +46 -0
- package/functions/computation-system-v2/computations/UserPortfolioSummary.js +172 -0
- package/functions/computation-system-v2/config/bulltrackers.config.js +317 -0
- package/functions/computation-system-v2/framework/core/Computation.js +73 -0
- package/functions/computation-system-v2/framework/core/Manifest.js +223 -0
- package/functions/computation-system-v2/framework/core/RuleInjector.js +53 -0
- package/functions/computation-system-v2/framework/core/Rules.js +231 -0
- package/functions/computation-system-v2/framework/core/RunAnalyzer.js +163 -0
- package/functions/computation-system-v2/framework/cost/CostTracker.js +154 -0
- package/functions/computation-system-v2/framework/data/DataFetcher.js +399 -0
- package/functions/computation-system-v2/framework/data/QueryBuilder.js +232 -0
- package/functions/computation-system-v2/framework/data/SchemaRegistry.js +287 -0
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +498 -0
- package/functions/computation-system-v2/framework/execution/TaskRunner.js +35 -0
- package/functions/computation-system-v2/framework/execution/middleware/CostTrackerMiddleware.js +32 -0
- package/functions/computation-system-v2/framework/execution/middleware/LineageMiddleware.js +32 -0
- package/functions/computation-system-v2/framework/execution/middleware/Middleware.js +14 -0
- package/functions/computation-system-v2/framework/execution/middleware/ProfilerMiddleware.js +47 -0
- package/functions/computation-system-v2/framework/index.js +45 -0
- package/functions/computation-system-v2/framework/lineage/LineageTracker.js +147 -0
- package/functions/computation-system-v2/framework/monitoring/Profiler.js +80 -0
- package/functions/computation-system-v2/framework/resilience/Checkpointer.js +66 -0
- package/functions/computation-system-v2/framework/scheduling/ScheduleValidator.js +327 -0
- package/functions/computation-system-v2/framework/storage/StateRepository.js +286 -0
- package/functions/computation-system-v2/framework/storage/StorageManager.js +469 -0
- package/functions/computation-system-v2/framework/storage/index.js +9 -0
- package/functions/computation-system-v2/framework/testing/ComputationTester.js +86 -0
- package/functions/computation-system-v2/framework/utils/Graph.js +205 -0
- package/functions/computation-system-v2/handlers/dispatcher.js +109 -0
- package/functions/computation-system-v2/handlers/index.js +23 -0
- package/functions/computation-system-v2/handlers/onDemand.js +289 -0
- package/functions/computation-system-v2/handlers/scheduler.js +327 -0
- package/functions/computation-system-v2/index.js +163 -0
- package/functions/computation-system-v2/rules/index.js +49 -0
- package/functions/computation-system-v2/rules/instruments.js +465 -0
- package/functions/computation-system-v2/rules/metrics.js +304 -0
- package/functions/computation-system-v2/rules/portfolio.js +534 -0
- package/functions/computation-system-v2/rules/rankings.js +655 -0
- package/functions/computation-system-v2/rules/social.js +562 -0
- package/functions/computation-system-v2/rules/trades.js +545 -0
- package/functions/computation-system-v2/scripts/migrate-sectors.js +73 -0
- package/functions/computation-system-v2/test/test-dispatcher.js +317 -0
- package/functions/computation-system-v2/test/test-framework.js +500 -0
- package/functions/computation-system-v2/test/test-real-execution.js +166 -0
- package/functions/computation-system-v2/test/test-real-integration.js +194 -0
- package/functions/computation-system-v2/test/test-refactor-e2e.js +131 -0
- package/functions/computation-system-v2/test/test-results.json +31 -0
- package/functions/computation-system-v2/test/test-risk-metrics-computation.js +329 -0
- package/functions/computation-system-v2/test/test-scheduler.js +204 -0
- package/functions/computation-system-v2/test/test-storage.js +449 -0
- package/functions/orchestrator/index.js +18 -26
- package/package.json +3 -2
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview BullTrackers Configuration for Computation System v2
|
|
3
|
+
*
|
|
4
|
+
* This is the ONLY file that contains BullTrackers-specific knowledge.
|
|
5
|
+
* The framework itself is completely domain-agnostic.
|
|
6
|
+
*
|
|
7
|
+
* Business Rules:
|
|
8
|
+
* - Rules are injected into computations automatically
|
|
9
|
+
* - When a rule changes, computations using it are re-run
|
|
10
|
+
* - Computations should be "simple recipes" that call rules
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
// Load business rules
|
|
14
|
+
const rules = require('../rules');
|
|
15
|
+
|
|
16
|
+
module.exports = {
|
|
17
|
+
// =========================================================================
|
|
18
|
+
// PROJECT CONFIGURATION
|
|
19
|
+
// =========================================================================
|
|
20
|
+
|
|
21
|
+
project: {
|
|
22
|
+
id: 'bulltrackers',
|
|
23
|
+
name: 'BullTrackers Analytics',
|
|
24
|
+
version: '2.0.0'
|
|
25
|
+
},
|
|
26
|
+
|
|
27
|
+
// System epoch - bump this to force all computations to re-run
|
|
28
|
+
epoch: 'v2.0.0',
|
|
29
|
+
|
|
30
|
+
// =========================================================================
|
|
31
|
+
// BIGQUERY CONFIGURATION
|
|
32
|
+
// =========================================================================
|
|
33
|
+
|
|
34
|
+
bigquery: {
|
|
35
|
+
projectId: process.env.GCP_PROJECT_ID || 'stocks-12345',
|
|
36
|
+
dataset: process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data',
|
|
37
|
+
location: 'europe-west1',
|
|
38
|
+
cacheTTLMs: 3600000 // 1 hour schema cache
|
|
39
|
+
},
|
|
40
|
+
|
|
41
|
+
// =========================================================================
|
|
42
|
+
// TABLE DEFINITIONS
|
|
43
|
+
// =========================================================================
|
|
44
|
+
//
|
|
45
|
+
// Each table entry tells the framework:
|
|
46
|
+
// - dateField: Which column is used for date partitioning (null if not partitioned)
|
|
47
|
+
// - entityField: Which column identifies the entity (user, asset, etc.)
|
|
48
|
+
// - dataField: If the actual data is in a JSON column, which one
|
|
49
|
+
//
|
|
50
|
+
// The framework will automatically discover the full schema from BigQuery.
|
|
51
|
+
// These hints just tell it how to interpret the data.
|
|
52
|
+
// =========================================================================
|
|
53
|
+
|
|
54
|
+
tables: {
|
|
55
|
+
// User Portfolio Snapshots
|
|
56
|
+
'portfolio_snapshots': {
|
|
57
|
+
dateField: 'date',
|
|
58
|
+
entityField: 'user_id',
|
|
59
|
+
dataField: 'portfolio_data',
|
|
60
|
+
description: 'Daily portfolio snapshots for all users'
|
|
61
|
+
},
|
|
62
|
+
|
|
63
|
+
// User Trade History
|
|
64
|
+
'trade_history_snapshots': {
|
|
65
|
+
dateField: 'date',
|
|
66
|
+
entityField: 'user_id',
|
|
67
|
+
dataField: 'history_data',
|
|
68
|
+
description: 'Daily trade history snapshots'
|
|
69
|
+
},
|
|
70
|
+
|
|
71
|
+
// Social Posts
|
|
72
|
+
'social_post_snapshots': {
|
|
73
|
+
dateField: 'date',
|
|
74
|
+
entityField: 'user_id',
|
|
75
|
+
dataField: 'posts_data',
|
|
76
|
+
description: 'Daily social post snapshots'
|
|
77
|
+
},
|
|
78
|
+
|
|
79
|
+
// Asset Prices
|
|
80
|
+
'asset_prices': {
|
|
81
|
+
dateField: 'date',
|
|
82
|
+
entityField: 'instrument_id',
|
|
83
|
+
dataField: null, // Flat table
|
|
84
|
+
description: 'Daily asset prices'
|
|
85
|
+
},
|
|
86
|
+
|
|
87
|
+
// PI Rankings
|
|
88
|
+
'pi_rankings': {
|
|
89
|
+
dateField: 'date',
|
|
90
|
+
entityField: 'pi_id',
|
|
91
|
+
dataField: 'rankings_data',
|
|
92
|
+
description: 'Daily PI rankings snapshot'
|
|
93
|
+
},
|
|
94
|
+
|
|
95
|
+
// PI Master List (not date-partitioned)
|
|
96
|
+
'pi_master_list': {
|
|
97
|
+
dateField: null, // Not date-partitioned
|
|
98
|
+
entityField: 'cid',
|
|
99
|
+
dataField: null,
|
|
100
|
+
description: 'Master list of all Popular Investors'
|
|
101
|
+
},
|
|
102
|
+
|
|
103
|
+
// PI Ratings
|
|
104
|
+
'pi_ratings': {
|
|
105
|
+
dateField: 'date',
|
|
106
|
+
entityField: 'pi_id',
|
|
107
|
+
dataField: null,
|
|
108
|
+
description: 'Daily PI ratings'
|
|
109
|
+
},
|
|
110
|
+
|
|
111
|
+
// PI Page Views
|
|
112
|
+
'pi_page_views': {
|
|
113
|
+
dateField: 'date',
|
|
114
|
+
entityField: 'pi_id',
|
|
115
|
+
dataField: null,
|
|
116
|
+
description: 'Daily PI page view metrics'
|
|
117
|
+
},
|
|
118
|
+
|
|
119
|
+
// Watchlist Membership
|
|
120
|
+
'watchlist_membership': {
|
|
121
|
+
dateField: 'date',
|
|
122
|
+
entityField: 'pi_id',
|
|
123
|
+
dataField: null,
|
|
124
|
+
description: 'Daily watchlist membership counts'
|
|
125
|
+
},
|
|
126
|
+
|
|
127
|
+
// PI Alert History
|
|
128
|
+
'pi_alert_history': {
|
|
129
|
+
dateField: 'date',
|
|
130
|
+
entityField: 'pi_id',
|
|
131
|
+
dataField: 'metadata',
|
|
132
|
+
description: 'Daily alert trigger history'
|
|
133
|
+
},
|
|
134
|
+
|
|
135
|
+
// Instrument Insights
|
|
136
|
+
'instrument_insights': {
|
|
137
|
+
dateField: 'date',
|
|
138
|
+
entityField: 'instrument_id',
|
|
139
|
+
dataField: 'insights_data',
|
|
140
|
+
description: 'Daily instrument insights'
|
|
141
|
+
},
|
|
142
|
+
|
|
143
|
+
// Ticker Mappings (not date-partitioned)
|
|
144
|
+
'ticker_mappings': {
|
|
145
|
+
dateField: null,
|
|
146
|
+
entityField: 'instrument_id',
|
|
147
|
+
dataField: null,
|
|
148
|
+
description: 'Instrument ID to ticker symbol mappings'
|
|
149
|
+
},
|
|
150
|
+
|
|
151
|
+
// Computation Results
|
|
152
|
+
'computation_results': {
|
|
153
|
+
dateField: 'date',
|
|
154
|
+
entityField: null, // Keyed by computation_name
|
|
155
|
+
dataField: 'result_data',
|
|
156
|
+
description: 'Stored computation results'
|
|
157
|
+
},
|
|
158
|
+
// NEW: Sector Mappings Table
|
|
159
|
+
'sector_mappings': {
|
|
160
|
+
dateField: null, // Static data
|
|
161
|
+
entityField: 'symbol', // Key the data by symbol for fast lookup
|
|
162
|
+
dataField: null,
|
|
163
|
+
description: 'Ticker to Sector mappings migrated from Firestore'
|
|
164
|
+
}
|
|
165
|
+
},
|
|
166
|
+
|
|
167
|
+
// NEW: Data to load globally for every computation
|
|
168
|
+
referenceData: [
|
|
169
|
+
'sector_mappings'
|
|
170
|
+
],
|
|
171
|
+
|
|
172
|
+
// =========================================================================
|
|
173
|
+
// RESULT STORAGE CONFIGURATION
|
|
174
|
+
// =========================================================================
|
|
175
|
+
//
|
|
176
|
+
// Using a separate v2 table to avoid conflicts with v1 schema.
|
|
177
|
+
// v1 table: computation_results (date, computation_name, category, result_data, metadata, created_at)
|
|
178
|
+
// v2 table: computation_results_v2 (with entity_id, code_hash, etc.)
|
|
179
|
+
// =========================================================================
|
|
180
|
+
|
|
181
|
+
resultStore: {
|
|
182
|
+
// Using v3 table since v2 has streaming buffer data that blocks DML
|
|
183
|
+
// Jobs-based inserts are FREE and don't have streaming buffer issues
|
|
184
|
+
table: 'computation_results_v3',
|
|
185
|
+
partitionField: 'date',
|
|
186
|
+
clusterFields: ['computation_name', 'category']
|
|
187
|
+
},
|
|
188
|
+
|
|
189
|
+
// =========================================================================
|
|
190
|
+
// COMPUTATIONS
|
|
191
|
+
// =========================================================================
|
|
192
|
+
//
|
|
193
|
+
// Computations are registered here. During development, we add them one
|
|
194
|
+
// by one as they're migrated from v1.
|
|
195
|
+
// =========================================================================
|
|
196
|
+
|
|
197
|
+
computations: [
|
|
198
|
+
// Add migrated computations here:
|
|
199
|
+
// require('../computations/UserRiskScore'),
|
|
200
|
+
],
|
|
201
|
+
|
|
202
|
+
// =========================================================================
|
|
203
|
+
// PREDEFINED FILTER SETS
|
|
204
|
+
// =========================================================================
|
|
205
|
+
//
|
|
206
|
+
// Computations can reference these by name instead of hardcoding filters.
|
|
207
|
+
// =========================================================================
|
|
208
|
+
|
|
209
|
+
filterSets: {
|
|
210
|
+
'popular_investors': {
|
|
211
|
+
user_type: 'POPULAR_INVESTOR'
|
|
212
|
+
},
|
|
213
|
+
'signed_in_users': {
|
|
214
|
+
user_type: 'SIGNED_IN_USER'
|
|
215
|
+
},
|
|
216
|
+
'all_tracked_users': {
|
|
217
|
+
user_type: ['POPULAR_INVESTOR', 'SIGNED_IN_USER']
|
|
218
|
+
}
|
|
219
|
+
},
|
|
220
|
+
|
|
221
|
+
// =========================================================================
|
|
222
|
+
// BUSINESS RULES
|
|
223
|
+
// =========================================================================
|
|
224
|
+
//
|
|
225
|
+
// Rules are automatically injected into computations.
|
|
226
|
+
// When a rule changes, all computations using it are re-run.
|
|
227
|
+
//
|
|
228
|
+
// Usage in computation:
|
|
229
|
+
// const positions = rules.portfolio.extractPositions(data);
|
|
230
|
+
// const sharpe = rules.metrics.calculateSharpeRatio(returns);
|
|
231
|
+
// =========================================================================
|
|
232
|
+
|
|
233
|
+
rules,
|
|
234
|
+
|
|
235
|
+
// =========================================================================
|
|
236
|
+
// EXECUTION CONFIGURATION
|
|
237
|
+
// =========================================================================
|
|
238
|
+
|
|
239
|
+
execution: {
|
|
240
|
+
// Max concurrent entity processing (per-entity computations)
|
|
241
|
+
// Higher = faster but more memory. Tune based on your Cloud Function memory.
|
|
242
|
+
entityConcurrency: 20,
|
|
243
|
+
|
|
244
|
+
// Batch size for BigQuery inserts
|
|
245
|
+
insertBatchSize: 500,
|
|
246
|
+
|
|
247
|
+
// Memory safety: max entities to load for a dependency
|
|
248
|
+
// If a dependency has more entities than this, use getDependency(name, entityId) instead
|
|
249
|
+
// This prevents OOM when running many concurrent per-entity computations
|
|
250
|
+
// Example: 20 concurrent * 50KB per entity * 50000 entities = 50GB (bad!)
|
|
251
|
+
// With limit: 20 concurrent * 50KB per entity * 10000 entities = 10GB (still risky at 2GB RAM)
|
|
252
|
+
// Recommendation: Set this based on your Cloud Function memory
|
|
253
|
+
maxDependencyEntities: 10000
|
|
254
|
+
},
|
|
255
|
+
|
|
256
|
+
// =========================================================================
|
|
257
|
+
// SCHEDULING CONFIGURATION
|
|
258
|
+
// =========================================================================
|
|
259
|
+
//
|
|
260
|
+
// Controls how computations are scheduled and dispatched.
|
|
261
|
+
// Cloud Tasks handles throttling and retry via queue configuration.
|
|
262
|
+
// =========================================================================
|
|
263
|
+
|
|
264
|
+
scheduling: {
|
|
265
|
+
// Default schedule for computations that don't declare one
|
|
266
|
+
default: {
|
|
267
|
+
frequency: 'daily',
|
|
268
|
+
time: '02:00',
|
|
269
|
+
timezone: 'UTC'
|
|
270
|
+
},
|
|
271
|
+
|
|
272
|
+
// Minimum gap between dependent computations (minutes)
|
|
273
|
+
// Pass 1 @ 14:00 → Pass 2 must be >= 14:15
|
|
274
|
+
dependencyGapMinutes: 15
|
|
275
|
+
},
|
|
276
|
+
|
|
277
|
+
// =========================================================================
|
|
278
|
+
// CLOUD TASKS CONFIGURATION
|
|
279
|
+
// =========================================================================
|
|
280
|
+
//
|
|
281
|
+
// Single queue handles all scheduled triggers.
|
|
282
|
+
// Queue settings (maxConcurrent, retry) are configured in GCP, not here.
|
|
283
|
+
// =========================================================================
|
|
284
|
+
|
|
285
|
+
cloudTasks: {
|
|
286
|
+
projectId: process.env.GCP_PROJECT_ID || 'stocks-12345',
|
|
287
|
+
location: 'europe-west1',
|
|
288
|
+
queueName: 'computation-triggers',
|
|
289
|
+
dispatcherUrl: process.env.DISPATCHER_URL ||
|
|
290
|
+
'https://europe-west1-stocks-12345.cloudfunctions.net/computeDispatcher',
|
|
291
|
+
// Service account for OIDC authentication when invoking Dispatcher
|
|
292
|
+
// This SA needs roles/cloudfunctions.invoker on the Dispatcher function
|
|
293
|
+
serviceAccountEmail: process.env.CLOUD_TASKS_SA_EMAIL ||
|
|
294
|
+
'computation-scheduler@stocks-12345.iam.gserviceaccount.com'
|
|
295
|
+
},
|
|
296
|
+
|
|
297
|
+
// =========================================================================
|
|
298
|
+
// ON-DEMAND API CONFIGURATION
|
|
299
|
+
// =========================================================================
|
|
300
|
+
//
|
|
301
|
+
// Frontend-triggered computation requests.
|
|
302
|
+
// Routes through Dispatcher for validation.
|
|
303
|
+
// =========================================================================
|
|
304
|
+
|
|
305
|
+
onDemand: {
|
|
306
|
+
// Rate limiting per user
|
|
307
|
+
maxRequestsPerMinute: 5,
|
|
308
|
+
|
|
309
|
+
// Request timeout (ms) - frontend is waiting
|
|
310
|
+
timeout: 60000,
|
|
311
|
+
|
|
312
|
+
// Which computations can be triggered on-demand
|
|
313
|
+
// null = all computations allowed
|
|
314
|
+
// array = only listed computations allowed
|
|
315
|
+
allowedComputations: null
|
|
316
|
+
}
|
|
317
|
+
};
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Base Computation Class
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
class Computation {
|
|
6
|
+
constructor() {
|
|
7
|
+
this.results = {};
|
|
8
|
+
this._meta = {};
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
static getConfig() {
|
|
12
|
+
throw new Error('Computation.getConfig() must be implemented by subclass');
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
static validateConfig() {
|
|
16
|
+
const errors = [];
|
|
17
|
+
try {
|
|
18
|
+
const config = this.getConfig();
|
|
19
|
+
|
|
20
|
+
if (!config.name || typeof config.name !== 'string') errors.push('name is required and must be a string');
|
|
21
|
+
if (!config.requires || typeof config.requires !== 'object') errors.push('requires is required and must be an object');
|
|
22
|
+
if (config.dependencies && !Array.isArray(config.dependencies)) errors.push('dependencies must be an array');
|
|
23
|
+
|
|
24
|
+
// NEW: Validation for conditional dependencies
|
|
25
|
+
if (config.conditionalDependencies) {
|
|
26
|
+
if (!Array.isArray(config.conditionalDependencies)) {
|
|
27
|
+
errors.push('conditionalDependencies must be an array');
|
|
28
|
+
} else {
|
|
29
|
+
config.conditionalDependencies.forEach((cd, idx) => {
|
|
30
|
+
if (!cd.computation || typeof cd.computation !== 'string') {
|
|
31
|
+
errors.push(`conditionalDependencies[${idx}].computation must be a string`);
|
|
32
|
+
}
|
|
33
|
+
if (!cd.condition || typeof cd.condition !== 'function') {
|
|
34
|
+
errors.push(`conditionalDependencies[${idx}].condition must be a function`);
|
|
35
|
+
}
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (config.type && !['global', 'per-entity'].includes(config.type)) errors.push('type must be "global" or "per-entity"');
|
|
41
|
+
|
|
42
|
+
} catch (e) {
|
|
43
|
+
errors.push(`getConfig() threw: ${e.message}`);
|
|
44
|
+
}
|
|
45
|
+
return { valid: errors.length === 0, errors };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async process(context) { throw new Error('Computation.process() must be implemented by subclass'); }
|
|
49
|
+
async getResult() { return this.results; }
|
|
50
|
+
static getSchema() { return null; }
|
|
51
|
+
static getWeight() { return 1.0; }
|
|
52
|
+
|
|
53
|
+
setResult(entityId, result) { this.results[entityId] = result; }
|
|
54
|
+
setGlobalResult(result) { this.results = result; }
|
|
55
|
+
get(obj, path, defaultValue = null) {
|
|
56
|
+
if (!obj || !path) return defaultValue;
|
|
57
|
+
const keys = path.split('.');
|
|
58
|
+
let current = obj;
|
|
59
|
+
for (const key of keys) {
|
|
60
|
+
if (current === null || current === undefined) return defaultValue;
|
|
61
|
+
current = current[key];
|
|
62
|
+
}
|
|
63
|
+
return current !== undefined ? current : defaultValue;
|
|
64
|
+
}
|
|
65
|
+
log(level, message) {
|
|
66
|
+
const config = this.constructor.getConfig();
|
|
67
|
+
const prefix = `[${config.name}]`;
|
|
68
|
+
if (this._meta.logger) this._meta.logger.log(level, `${prefix} ${message}`);
|
|
69
|
+
else console.log(`${level}: ${prefix} ${message}`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
module.exports = { Computation };
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Manifest Builder
|
|
3
|
+
* * Responsible for:
|
|
4
|
+
* 1. Configuration validation
|
|
5
|
+
* 2. Hash generation (Version Control)
|
|
6
|
+
* 3. Schedule validation
|
|
7
|
+
* 4. Dependency resolution (delegated to Graph utils)
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
const crypto = require('crypto');
|
|
11
|
+
const { RulesRegistry } = require('./Rules');
|
|
12
|
+
const { ScheduleValidator } = require('../scheduling/ScheduleValidator');
|
|
13
|
+
const { Graph } = require('../utils/Graph');
|
|
14
|
+
|
|
15
|
+
class ManifestBuilder {
|
|
16
|
+
constructor(config, logger = null) {
|
|
17
|
+
this.config = config;
|
|
18
|
+
this.logger = logger;
|
|
19
|
+
this.epoch = config.epoch || 'v2.0.0';
|
|
20
|
+
|
|
21
|
+
this.sharedLayers = config.sharedLayers || {};
|
|
22
|
+
this.layerHashes = this._hashSharedLayers();
|
|
23
|
+
this.rulesRegistry = new RulesRegistry(config, logger);
|
|
24
|
+
this.scheduleValidator = new ScheduleValidator(config, logger);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
getRulesRegistry() { return this.rulesRegistry; }
|
|
28
|
+
getScheduleValidator() { return this.scheduleValidator; }
|
|
29
|
+
|
|
30
|
+
build(computations) {
|
|
31
|
+
this._log('INFO', `Building manifest for ${computations.length} computations...`);
|
|
32
|
+
|
|
33
|
+
const manifestMap = new Map();
|
|
34
|
+
const adjacency = new Map();
|
|
35
|
+
|
|
36
|
+
// 1. Process Computations
|
|
37
|
+
for (const ComputationClass of computations) {
|
|
38
|
+
const entry = this._processComputation(ComputationClass);
|
|
39
|
+
if (entry) {
|
|
40
|
+
manifestMap.set(entry.name, entry);
|
|
41
|
+
adjacency.set(entry.name, entry.dependencies);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// 2. Validate Dependencies
|
|
46
|
+
const nodes = Array.from(manifestMap.keys());
|
|
47
|
+
const cycle = Graph.detectCycle(nodes, adjacency);
|
|
48
|
+
if (cycle) {
|
|
49
|
+
throw new Error(`[Manifest] Circular dependency detected: ${cycle}`);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// 3. Topological Sort (calculates passes)
|
|
53
|
+
const sortedItems = Graph.topologicalSort(nodes, adjacency);
|
|
54
|
+
|
|
55
|
+
// 4. Hydrate Sorted List
|
|
56
|
+
const finalManifest = sortedItems.map(item => {
|
|
57
|
+
const entry = manifestMap.get(item.id);
|
|
58
|
+
entry.pass = item.pass;
|
|
59
|
+
return entry;
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
// 5. Finalize Hashes
|
|
63
|
+
this._computeFinalHashes(finalManifest, manifestMap);
|
|
64
|
+
|
|
65
|
+
// 6. Schedule Validation
|
|
66
|
+
this._validateSchedules(finalManifest);
|
|
67
|
+
|
|
68
|
+
return finalManifest;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
_processComputation(ComputationClass) {
|
|
72
|
+
if (typeof ComputationClass.getConfig !== 'function') {
|
|
73
|
+
this._log('ERROR', `Missing static getConfig(): ${ComputationClass.name}`);
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const validation = ComputationClass.validateConfig();
|
|
78
|
+
if (!validation.valid) {
|
|
79
|
+
this._log('ERROR', `Invalid config for ${ComputationClass.name}: ${validation.errors.join(', ')}`);
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const config = ComputationClass.getConfig();
|
|
84
|
+
const name = this._normalize(config.name);
|
|
85
|
+
const codeString = ComputationClass.toString();
|
|
86
|
+
const codeHash = this._hashCode(codeString);
|
|
87
|
+
|
|
88
|
+
// Hash Composition
|
|
89
|
+
const usedLayers = this._detectLayerUsage(codeString);
|
|
90
|
+
const { usedRules, hashes: ruleHashes } = this.rulesRegistry.detectUsage(codeString);
|
|
91
|
+
|
|
92
|
+
let compositeHash = codeHash + `|EPOCH:${this.epoch}`;
|
|
93
|
+
|
|
94
|
+
const layerHashes = {};
|
|
95
|
+
for (const [layerName, exports] of Object.entries(usedLayers)) {
|
|
96
|
+
const h = this._hashLayerExports(layerName, exports);
|
|
97
|
+
if (h) {
|
|
98
|
+
layerHashes[layerName] = h;
|
|
99
|
+
compositeHash += `|LAYER:${layerName}:${h}`;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
for (const [mod, h] of Object.entries(ruleHashes).sort()) {
|
|
104
|
+
compositeHash += `|RULE:${mod}:${h}`;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
name,
|
|
109
|
+
originalName: config.name,
|
|
110
|
+
class: ComputationClass,
|
|
111
|
+
category: config.category || 'default',
|
|
112
|
+
type: config.type || 'global',
|
|
113
|
+
requires: config.requires || {},
|
|
114
|
+
dependencies: (config.dependencies || []).map(d => this._normalize(d)),
|
|
115
|
+
isHistorical: config.isHistorical || false,
|
|
116
|
+
isTest: config.isTest || false,
|
|
117
|
+
schedule: this.scheduleValidator.parseSchedule(config.schedule),
|
|
118
|
+
storage: this._parseStorageConfig(config.storage),
|
|
119
|
+
ttlDays: config.ttlDays,
|
|
120
|
+
pass: 0, // Set later by Graph.js
|
|
121
|
+
hash: this._hashCode(compositeHash), // Intrinsic hash
|
|
122
|
+
weight: ComputationClass.getWeight ? ComputationClass.getWeight() : 1.0,
|
|
123
|
+
composition: {
|
|
124
|
+
epoch: this.epoch,
|
|
125
|
+
code: codeHash,
|
|
126
|
+
layers: layerHashes,
|
|
127
|
+
rules: ruleHashes,
|
|
128
|
+
deps: {}
|
|
129
|
+
}
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
_computeFinalHashes(sorted, manifestMap) {
|
|
134
|
+
for (const entry of sorted) {
|
|
135
|
+
let hashInput = entry.hash;
|
|
136
|
+
if (entry.dependencies.length > 0) {
|
|
137
|
+
const depHashes = entry.dependencies.sort().map(d => {
|
|
138
|
+
const h = manifestMap.get(d)?.hash;
|
|
139
|
+
entry.composition.deps[d] = h;
|
|
140
|
+
return h;
|
|
141
|
+
});
|
|
142
|
+
hashInput += `|DEPS:${depHashes.join('|')}`;
|
|
143
|
+
}
|
|
144
|
+
entry.hash = this._hashCode(hashInput);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
_validateSchedules(manifest) {
|
|
149
|
+
const validation = this.scheduleValidator.validate(manifest);
|
|
150
|
+
validation.errors.forEach(e => this._log('ERROR', `Schedule error: ${e.message}`));
|
|
151
|
+
validation.warnings.forEach(w => this._log('WARN', `Schedule warning: ${w.message}`));
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// -- Helpers --
|
|
155
|
+
|
|
156
|
+
_hashSharedLayers() {
|
|
157
|
+
const hashes = {};
|
|
158
|
+
for (const [name, exports] of Object.entries(this.sharedLayers)) {
|
|
159
|
+
hashes[name] = {};
|
|
160
|
+
Object.keys(exports).sort().forEach(k => {
|
|
161
|
+
const val = exports[k];
|
|
162
|
+
const str = typeof val === 'function' ? val.toString() : JSON.stringify(val);
|
|
163
|
+
hashes[name][k] = this._hashCode(`LAYER:${name}:${k}:${str}`);
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
return hashes;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
_hashLayerExports(layerName, exports) {
|
|
170
|
+
const vals = [];
|
|
171
|
+
for (const exp of exports) {
|
|
172
|
+
const h = this.layerHashes[layerName]?.[exp];
|
|
173
|
+
if (h) vals.push(h);
|
|
174
|
+
}
|
|
175
|
+
return vals.length ? this._hashCode(vals.sort().join('|')) : null;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
_detectLayerUsage(code) {
|
|
179
|
+
const used = {};
|
|
180
|
+
for (const [name, exports] of Object.entries(this.sharedLayers)) {
|
|
181
|
+
const found = Object.keys(exports).filter(exp =>
|
|
182
|
+
code.includes(exp) // Simple include check, similar to original regex
|
|
183
|
+
);
|
|
184
|
+
if (found.length) used[name] = found;
|
|
185
|
+
}
|
|
186
|
+
return used;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
_parseStorageConfig(cfg) {
|
|
190
|
+
return {
|
|
191
|
+
bigquery: cfg?.bigquery !== false,
|
|
192
|
+
firestore: {
|
|
193
|
+
enabled: cfg?.firestore?.enabled === true,
|
|
194
|
+
path: cfg?.firestore?.path || null,
|
|
195
|
+
merge: cfg?.firestore?.merge || false,
|
|
196
|
+
includeMetadata: cfg?.firestore?.includeMetadata !== false
|
|
197
|
+
}
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
_groupByPass(manifest) {
|
|
202
|
+
const passes = {};
|
|
203
|
+
manifest.forEach(e => {
|
|
204
|
+
if (!passes[e.pass]) passes[e.pass] = [];
|
|
205
|
+
passes[e.pass].push(e);
|
|
206
|
+
});
|
|
207
|
+
return passes;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
_hashCode(s) {
|
|
211
|
+
const cleaned = s.replace(/\s+/g, '');
|
|
212
|
+
return crypto.createHash('sha256').update(cleaned).digest('hex').substring(0, 16);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
_normalize(n) { return n.toLowerCase().replace(/[^a-z0-9]/g, ''); }
|
|
216
|
+
|
|
217
|
+
_log(l, m) { this.logger ? this.logger.log(l, `[Manifest] ${m}`) : console.log(`[Manifest] ${m}`); }
|
|
218
|
+
|
|
219
|
+
// Public alias for groupByPass matching the Interface
|
|
220
|
+
groupByPass(m) { return this._groupByPass(m); }
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
module.exports = { ManifestBuilder };
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Rule Injector (Proxy Pattern)
|
|
3
|
+
* * Wraps the RulesRegistry with a Proxy to:
|
|
4
|
+
* 1. Lazy-load rule modules only when accessed
|
|
5
|
+
* 2. Track exactly which rules are used during execution
|
|
6
|
+
* 3. Eliminate the need for hardcoded dependency lists
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
class RuleInjector {
|
|
10
|
+
constructor(registry) {
|
|
11
|
+
this.registry = registry;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Creates a proxied rules object and a tracking set.
|
|
16
|
+
* @returns {Object} { rules: Proxy, used: Set<string> }
|
|
17
|
+
*/
|
|
18
|
+
createContext() {
|
|
19
|
+
const used = new Set();
|
|
20
|
+
const registry = this.registry;
|
|
21
|
+
|
|
22
|
+
// cache loaded modules for this instance to avoid repeated lookups
|
|
23
|
+
const cache = {};
|
|
24
|
+
|
|
25
|
+
const rulesProxy = new Proxy({}, {
|
|
26
|
+
get: (target, prop) => {
|
|
27
|
+
const moduleName = prop.toString();
|
|
28
|
+
|
|
29
|
+
// 1. Track usage
|
|
30
|
+
used.add(moduleName);
|
|
31
|
+
|
|
32
|
+
// 2. Return from local cache if available
|
|
33
|
+
if (cache[moduleName]) return cache[moduleName];
|
|
34
|
+
|
|
35
|
+
// 3. Lazy Load from Registry
|
|
36
|
+
// We access the raw context from the registry
|
|
37
|
+
const fullContext = registry.getContext();
|
|
38
|
+
|
|
39
|
+
if (fullContext[moduleName]) {
|
|
40
|
+
cache[moduleName] = fullContext[moduleName];
|
|
41
|
+
return cache[moduleName];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Return undefined if rule module doesn't exist
|
|
45
|
+
return undefined;
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
return { rules: rulesProxy, used };
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
module.exports = { RuleInjector };
|