bulltrackers-module 1.0.741 → 1.0.743
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/README.md +33 -123
- package/functions/computation-system-v2/config/bulltrackers.config.js +1 -1
- package/functions/computation-system-v2/docs/api_reference.md +118 -0
- package/functions/computation-system-v2/docs/architecture.md +96 -52
- package/functions/computation-system-v2/docs/developer_guide.md +125 -0
- package/functions/computation-system-v2/docs/operations.md +99 -0
- package/functions/computation-system-v2/handlers/scheduler.js +27 -4
- package/package.json +1 -1
|
@@ -5,148 +5,58 @@
|
|
|
5
5
|
A generic, BigQuery-driven computation framework that can be applied to any dataset.
|
|
6
6
|
|
|
7
7
|
**Key Principles:**
|
|
8
|
-
- **Zero hardcoded schemas** - Schemas are discovered dynamically from BigQuery
|
|
9
|
-
- **Pre-query validation** - Queries are validated against cached schemas before execution
|
|
10
|
-
- **
|
|
11
|
-
- **
|
|
12
|
-
- **
|
|
8
|
+
- **Zero hardcoded schemas** - Schemas are discovered dynamically from BigQuery.
|
|
9
|
+
- **Pre-query validation** - Queries are validated against cached schemas before execution.
|
|
10
|
+
- **Pass-based execution** - Topological sort determines execution order (Kahn's algorithm).
|
|
11
|
+
- **Hash-based versioning** - Automatic detection of code/dependency changes to support cached re-runs.
|
|
12
|
+
- **Remote Execution** - Offloads heavy per-entity computations to serverless workers.
|
|
13
13
|
|
|
14
|
-
##
|
|
14
|
+
## Documentation
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
├── config/
|
|
34
|
-
│ └── bulltrackers.config.js # BullTrackers-specific configuration
|
|
35
|
-
│
|
|
36
|
-
├── computations/ # BullTrackers computations (v2 format)
|
|
37
|
-
│ └── (migrated computations go here)
|
|
38
|
-
│
|
|
39
|
-
└── index.js # Main entry point
|
|
40
|
-
```
|
|
16
|
+
The documentation is split into four detailed guides:
|
|
17
|
+
|
|
18
|
+
### 1. [Architecture Guide](docs/architecture.md)
|
|
19
|
+
* **Target Audience**: Architects, Senior Engineers
|
|
20
|
+
* **Contents**: System Philosophy, Core Components (Manifest, Schema Registry), Data Flow, and Remote Execution internals.
|
|
21
|
+
|
|
22
|
+
### 2. [Developer Guide](docs/developer_guide.md)
|
|
23
|
+
* **Target Audience**: Data Engineers, Developers writing logic
|
|
24
|
+
* **Contents**: How to create a new `Computation`, configure dependencies (`requires`), and implement the `process()` method.
|
|
25
|
+
|
|
26
|
+
### 3. [API Reference](docs/api_reference.md)
|
|
27
|
+
* **Target Audience**: Developers
|
|
28
|
+
* **Contents**: Complete API documentation for the `Computation` class, `context` object, and configuration schema. Includes **System Defaults & Fallbacks**.
|
|
29
|
+
|
|
30
|
+
### 4. [Operations Manual](docs/operations.md)
|
|
31
|
+
* **Target Audience**: DevOps, SREs, On-call Support
|
|
32
|
+
* **Contents**: Deployment instructions, Management API usage (`status`, `analyze`, `run`), Monitoring guide, and Troubleshooting common issues.
|
|
41
33
|
|
|
42
34
|
## Quick Start
|
|
43
35
|
|
|
44
|
-
###
|
|
36
|
+
### Define a Computation
|
|
45
37
|
|
|
46
38
|
```javascript
|
|
47
39
|
const { Computation } = require('./framework/core/Computation');
|
|
48
40
|
|
|
49
|
-
class
|
|
41
|
+
class UserPortfolioSummary extends Computation {
|
|
50
42
|
static getConfig() {
|
|
51
43
|
return {
|
|
52
|
-
name: '
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
lookback: 0, // 0 = today only, N = last N days
|
|
56
|
-
mandatory: true // If true, computation won't run without this data
|
|
57
|
-
}
|
|
58
|
-
},
|
|
59
|
-
dependencies: [], // Other computations this depends on
|
|
60
|
-
type: 'global' // 'global' or 'per-entity'
|
|
44
|
+
name: 'UserPortfolioSummary',
|
|
45
|
+
type: 'per-entity',
|
|
46
|
+
requires: { 'orders': { lookback: 7, mandatory: true } }
|
|
61
47
|
};
|
|
62
48
|
}
|
|
63
49
|
|
|
64
50
|
async process(context) {
|
|
65
|
-
const data = context.data['my_table'];
|
|
66
51
|
// Your logic here
|
|
67
|
-
return {
|
|
52
|
+
return { totalValue: 1000 };
|
|
68
53
|
}
|
|
69
54
|
}
|
|
70
55
|
```
|
|
71
56
|
|
|
72
|
-
###
|
|
73
|
-
|
|
74
|
-
```javascript
|
|
75
|
-
// config/bulltrackers.config.js
|
|
76
|
-
module.exports = {
|
|
77
|
-
bigquery: {
|
|
78
|
-
projectId: 'your-project',
|
|
79
|
-
dataset: 'your_dataset'
|
|
80
|
-
},
|
|
81
|
-
tables: {
|
|
82
|
-
'my_table': {
|
|
83
|
-
dateField: 'date',
|
|
84
|
-
entityField: 'user_id'
|
|
85
|
-
}
|
|
86
|
-
},
|
|
87
|
-
computations: [
|
|
88
|
-
require('./computations/MyComputation')
|
|
89
|
-
]
|
|
90
|
-
};
|
|
91
|
-
```
|
|
92
|
-
|
|
93
|
-
### 3. Run
|
|
94
|
-
|
|
95
|
-
```javascript
|
|
96
|
-
const { execute } = require('./computation-system-v2');
|
|
97
|
-
await execute({ date: '2026-01-24', config: require('./config/bulltrackers.config') });
|
|
98
|
-
```
|
|
99
|
-
|
|
100
|
-
## Key Concepts
|
|
101
|
-
|
|
102
|
-
### Schema Registry
|
|
103
|
-
|
|
104
|
-
The `SchemaRegistry` fetches table schemas from BigQuery's `INFORMATION_SCHEMA` and caches them:
|
|
105
|
-
|
|
106
|
-
```javascript
|
|
107
|
-
const schema = await schemaRegistry.getSchema('my_table');
|
|
108
|
-
// Returns: [{ name: 'id', type: 'INT64' }, { name: 'date', type: 'DATE' }, ...]
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
Before any query is executed, it's validated against the cached schema to prevent runtime errors.
|
|
112
|
-
|
|
113
|
-
### Query Validation
|
|
114
|
-
|
|
115
|
-
```javascript
|
|
116
|
-
// This will throw BEFORE hitting BigQuery if 'nonexistent_column' doesn't exist
|
|
117
|
-
const query = queryBuilder.build({
|
|
118
|
-
table: 'my_table',
|
|
119
|
-
select: ['id', 'nonexistent_column'], // Error: column doesn't exist
|
|
120
|
-
where: { date: '2026-01-24' }
|
|
121
|
-
});
|
|
122
|
-
```
|
|
123
|
-
|
|
124
|
-
### Pass System
|
|
125
|
-
|
|
126
|
-
Computations are automatically organized into "passes" based on dependencies:
|
|
57
|
+
### Run It
|
|
127
58
|
|
|
59
|
+
```bash
|
|
60
|
+
# Run locally (Dry Run)
|
|
61
|
+
node index.js execute --computation UserPortfolioSummary --date 2026-01-26 --dry-run
|
|
128
62
|
```
|
|
129
|
-
Pass 1: Computations with no dependencies (can run in parallel)
|
|
130
|
-
Pass 2: Computations depending only on Pass 1 results
|
|
131
|
-
Pass 3: Computations depending on Pass 1 or 2 results
|
|
132
|
-
...
|
|
133
|
-
```
|
|
134
|
-
|
|
135
|
-
### Hash-Based Versioning
|
|
136
|
-
|
|
137
|
-
Each computation gets a unique hash based on:
|
|
138
|
-
- Its own code
|
|
139
|
-
- The code of any shared utilities it uses
|
|
140
|
-
- The hashes of its dependencies
|
|
141
|
-
|
|
142
|
-
When a hash changes, the computation re-runs. When hashes match, results are reused.
|
|
143
|
-
|
|
144
|
-
## Comparison with v1
|
|
145
|
-
|
|
146
|
-
| Aspect | v1 | v2 |
|
|
147
|
-
|--------|----|----|
|
|
148
|
-
| Schema definition | Hardcoded in `bigquery_utils.js` | Dynamic from `INFORMATION_SCHEMA` |
|
|
149
|
-
| Query building | Per-table functions | Single generic builder with validation |
|
|
150
|
-
| Data loading | Complex routing through multiple layers | Direct fetch with simple interface |
|
|
151
|
-
| Computation declaration | Multiple overlapping fields | Single `requires` object |
|
|
152
|
-
| Domain coupling | Deeply embedded | Configuration-only |
|
|
@@ -300,7 +300,7 @@ module.exports = {
|
|
|
300
300
|
// Service account for OIDC authentication when invoking Dispatcher
|
|
301
301
|
// This SA needs roles/cloudfunctions.invoker on the Dispatcher function
|
|
302
302
|
serviceAccountEmail: process.env.CLOUD_TASKS_SA_EMAIL ||
|
|
303
|
-
'
|
|
303
|
+
'879684846540-compute@developer.gserviceaccount.com'
|
|
304
304
|
},
|
|
305
305
|
|
|
306
306
|
// =========================================================================
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# API Reference
|
|
2
|
+
|
|
3
|
+
## 1. `Computation` Class
|
|
4
|
+
|
|
5
|
+
The base class for all computations.
|
|
6
|
+
|
|
7
|
+
### Static Methods
|
|
8
|
+
|
|
9
|
+
#### `getConfig()`
|
|
10
|
+
Must be implemented by the subclass. Returns the configuration object.
|
|
11
|
+
* **Returns**: `Object` (See Configuration Schema below)
|
|
12
|
+
|
|
13
|
+
#### `validateConfig()`
|
|
14
|
+
Internal method used by the Manifest Builder to verify the config.
|
|
15
|
+
* **Returns**: `{ valid: boolean, errors: string[] }`
|
|
16
|
+
|
|
17
|
+
#### `getSchema()`
|
|
18
|
+
Optional. Define a hardcoded schema if BigQuery discovery is not used (Legacy support).
|
|
19
|
+
|
|
20
|
+
### Instance Methods
|
|
21
|
+
|
|
22
|
+
#### `async process(context)`
|
|
23
|
+
**Required**. The main execution logic.
|
|
24
|
+
* **Args**: `context` (Object)
|
|
25
|
+
* **Returns**: `Object` (The result to be saved)
|
|
26
|
+
|
|
27
|
+
#### `log(level, message)`
|
|
28
|
+
Structured logging helper.
|
|
29
|
+
* **Args**:
|
|
30
|
+
* `level`: 'INFO', 'WARN', 'ERROR', 'DEBUG'
|
|
31
|
+
* `message`: String
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## 2. `Context` Object
|
|
36
|
+
|
|
37
|
+
The object passed to `toprocess()`.
|
|
38
|
+
|
|
39
|
+
| Property | Type | Description |
|
|
40
|
+
| :--- | :--- | :--- |
|
|
41
|
+
| `date` | String | The target date of execution (YYYY-MM-DD). |
|
|
42
|
+
| `entityId` | String | The ID of the specific entity being processed. |
|
|
43
|
+
| `data` | Object | Map of table names to data (Rows or Arrays of Rows). |
|
|
44
|
+
| `previousResult` | Object | The result of *this* computation from the previous date (if `isHistorical: true`). |
|
|
45
|
+
| `config` | Object | A safe subset of the global configuration. |
|
|
46
|
+
| `references` | Object | Cached reference data (e.g., global mappings). |
|
|
47
|
+
|
|
48
|
+
### Methods
|
|
49
|
+
|
|
50
|
+
#### `getDependency(computationName, [entityId])`
|
|
51
|
+
Retrieves the result of a dependency.
|
|
52
|
+
* If `entityId` is omitted, it defaults to the current `context.entityId`.
|
|
53
|
+
* Use `_global` as `entityId` to fetch Global computation results.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## 3. Configuration Schema (`bulltrackers.config.js`)
|
|
58
|
+
|
|
59
|
+
```javascript
|
|
60
|
+
module.exports = {
|
|
61
|
+
project: 'gcp-project-id',
|
|
62
|
+
|
|
63
|
+
bigquery: {
|
|
64
|
+
projectId: 'gcp-project-id',
|
|
65
|
+
dataset: 'dataset_name',
|
|
66
|
+
location: 'US',
|
|
67
|
+
cacheTTLMs: 3600000 // 1 hour
|
|
68
|
+
},
|
|
69
|
+
|
|
70
|
+
workerPool: {
|
|
71
|
+
enabled: true,
|
|
72
|
+
workerUrl: 'https://...',
|
|
73
|
+
concurrency: 100
|
|
74
|
+
},
|
|
75
|
+
|
|
76
|
+
tables: {
|
|
77
|
+
'table_name': {
|
|
78
|
+
entityField: 'user_id',
|
|
79
|
+
dateField: 'date'
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## 4. System Defaults & Fallbacks
|
|
88
|
+
|
|
89
|
+
The system uses the following default values if not explicitly configured:
|
|
90
|
+
|
|
91
|
+
### scheduling (`ScheduleValidator.js`)
|
|
92
|
+
| Parameter | Default Value | Description |
|
|
93
|
+
| :--- | :--- | :--- |
|
|
94
|
+
| `frequency` | `'daily'` | Default schedule frequency. |
|
|
95
|
+
| `time` | `'02:00'` | Default execution time (UTC). |
|
|
96
|
+
| `timezone` | `'UTC'` | Default timezone. |
|
|
97
|
+
| `dependencyGapMinutes` | `15` | Minimum safe gap between dependent computations. |
|
|
98
|
+
|
|
99
|
+
### Execution (`Orchestrator.js`)
|
|
100
|
+
| Parameter | Default Value | Description |
|
|
101
|
+
| :--- | :--- | :--- |
|
|
102
|
+
| `batchSize` | `1000` | Number of entities per batch in Streaming/Remote mode. |
|
|
103
|
+
| `entityConcurrency` | `20` | Concurrent entities processed *per batch* in Local mode. |
|
|
104
|
+
|
|
105
|
+
### Remote Worker Pool (`RemoteTaskRunner.js`)
|
|
106
|
+
| Parameter | Default Value | Description |
|
|
107
|
+
| :--- | :--- | :--- |
|
|
108
|
+
| `concurrency` | `100` | Max concurrent Worker Functions invoked. |
|
|
109
|
+
| `timeout` | `60000` (60s) | HTTP timeout for worker invocation. |
|
|
110
|
+
| `retries` | `2` | Number of retries for transient failures. |
|
|
111
|
+
| `circuitBreaker.failureThreshold` | `0.30` (30%) | Failure rate needed to trip the circuit. |
|
|
112
|
+
| `circuitBreaker.minInvocations` | `20` | Minimum calls before Circuit Breaker activates. |
|
|
113
|
+
|
|
114
|
+
### BigQuery
|
|
115
|
+
| Parameter | Default Value | Description |
|
|
116
|
+
| :--- | :--- | :--- |
|
|
117
|
+
| `location` | `'US'` | Default BigQuery location. |
|
|
118
|
+
| `cacheTTLMs` | `3600000` (1h) | Duration to cache schemas in memory. |
|
|
@@ -1,59 +1,103 @@
|
|
|
1
|
+
# System Architecture
|
|
2
|
+
|
|
3
|
+
## 1. System Philosophy
|
|
4
|
+
|
|
5
|
+
The Computation System v2 is designed to be a generic, configuration-driven Directed Acyclic Graph (DAG) executor that operates directly on BigQuery data. It departs from traditional ETL pipelines by adhering to five core principles:
|
|
6
|
+
|
|
7
|
+
1. **Zero Hardcoded Schemas**: The system never defines schemas in code. It dynamically discovers them from BigQuery's `INFORMATION_SCHEMA` at runtime. This eliminates the "schema drift" problem where code falls out of sync with the database.
|
|
8
|
+
2. **Pre-Query Validation**: Every SQL query is validated against the cached schema *before* it is sent to BigQuery. This prevents expensive runtime failures and SQL errors.
|
|
9
|
+
3. **Pass-Based Execution**: Computations are automatically organized into "passes" (waves) based on their dependencies using Kahn's algorithm.
|
|
10
|
+
4. **Hash-Based Versioning**: The system tracks the "Identity" of every computation result using a cryptographic hash of:
|
|
11
|
+
* The Computation Code
|
|
12
|
+
* Shared Utility "Layers"
|
|
13
|
+
* Business Logic "Rules"
|
|
14
|
+
* Dependency Result Hashes
|
|
15
|
+
5. **Hybrid Execution**: Light computations run globally in-memory, while heavy per-entity computations are offloaded to a serverless worker pool (Remote Task Runner).
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## 2. Core Components
|
|
20
|
+
|
|
21
|
+
### 2.1. The Manifest Builder (`framework/core/Manifest.js`)
|
|
22
|
+
The Manifest is the blueprint of the system. At startup, it:
|
|
23
|
+
1. **Loads Configs**: Reads static `getConfig()` from all Computation classes.
|
|
24
|
+
2. **Builds Graph**: Constructs the dependency graph.
|
|
25
|
+
3. **Detects Cycles**: Throws an error if A -> B -> A.
|
|
26
|
+
4. **Calculates Passes**: Uses Topological Sort to assign a `pass` number (0, 1, 2...) to each computation.
|
|
27
|
+
5. **Generates Hashes**: Computes the intrinsic version hash for change detection.
|
|
28
|
+
|
|
29
|
+
### 2.2. Schema Registry (`framework/data/SchemaRegistry.js`)
|
|
30
|
+
The bridge between code and data.
|
|
31
|
+
* **Dynamic Discovery**: Fetches column definitions, types, and nullability from BigQuery.
|
|
32
|
+
* **Caching**: Caches schemas in memory (default TTL: 1 hour) to reduce latency.
|
|
33
|
+
* **Request Coalescing**: Prevents "Thundering Herd" issues by merging simultaneous requests for the same table schema.
|
|
34
|
+
|
|
35
|
+
### 2.3. The Orchestrator (`framework/execution/Orchestrator.js`)
|
|
36
|
+
The central nervous system. It creates the execution plan for a given date.
|
|
37
|
+
* **Dependency Resolution**: Ensures pre-requisite data is loaded.
|
|
38
|
+
* **Execution Strategy**: Decides whether to run a computation locally or offload it to the Remote Task Runner.
|
|
39
|
+
* **Streaming**: For large datasets, it streams data in batches (default: 1000 entities) to avoid Out-Of-Memory (OOM) crashes.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## 3. Data Flow
|
|
44
|
+
|
|
1
45
|
```mermaid
|
|
2
46
|
graph TD
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
subgraph
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
CloudTasks -->|HTTP POST w/ Backoff| Dispatcher[Dispatcher Handler]
|
|
11
|
-
Dispatcher -->|Run Computation| Orchestrator[Orchestrator]
|
|
12
|
-
Orchestrator -->|Return Status| Dispatcher
|
|
13
|
-
Dispatcher -.->|Blocked| Return503[503 Retry]
|
|
14
|
-
Return503 -.-> CloudTasks
|
|
15
|
-
Dispatcher -.->|Success / Skipped| Return200[200 OK]
|
|
47
|
+
User[User/Scheduler] -->|Trigger| Dispatcher
|
|
48
|
+
Dispatcher -->|POST| Orchestrator
|
|
49
|
+
|
|
50
|
+
subgraph "Phase 1: Preparation"
|
|
51
|
+
Orchestrator -->|Build| Manifest
|
|
52
|
+
Orchestrator -->|Check Status| StateDB[(State Repository)]
|
|
53
|
+
StateDB -->|Run/Skip/ReRun?| Orchestrator
|
|
16
54
|
end
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
ExecMode -->|Per-Entity / Heavy| RemoteRunner[Remote Task Runner]
|
|
30
|
-
RemoteRunner -->|Upload Context| GCS[(Cloud Storage)]
|
|
31
|
-
RemoteRunner --> Worker[Worker Handler]
|
|
32
|
-
Worker -->|Download Context| GCS
|
|
33
|
-
Worker -->|Execute| Logic
|
|
34
|
-
Worker -->|Return Result| RemoteRunner
|
|
55
|
+
|
|
56
|
+
subgraph "Phase 2: Execution"
|
|
57
|
+
Orchestrator -->|Fetch Data| DataFetcher
|
|
58
|
+
DataFetcher -->|Validate| SchemaRegistry
|
|
59
|
+
SchemaRegistry -->|Meta| BigQuery[(BigQuery)]
|
|
60
|
+
DataFetcher -->|Query| BigQuery
|
|
61
|
+
|
|
62
|
+
Orchestrator -->|Pass 0| C1[Computation A]
|
|
63
|
+
Orchestrator -->|Pass 1| C2[Computation B]
|
|
64
|
+
|
|
65
|
+
C1 -->|Result| Storage[Storage Manager]
|
|
66
|
+
Storage -->|Persist| BigQuery
|
|
35
67
|
end
|
|
68
|
+
```
|
|
36
69
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## 4. Execution Modes
|
|
73
|
+
|
|
74
|
+
### 4.1. Global Mode (Local Isolation)
|
|
75
|
+
Used for: Aggregations, summaries, or light computations.
|
|
76
|
+
* **Flow**: Fetches *all* required data into memory -> Runs logic -> Writes single result.
|
|
77
|
+
* **Concurrency**: Parallelized by `p-limit` locally.
|
|
78
|
+
|
|
79
|
+
### 4.2. Per-Entity Mode (Remote Offload)
|
|
80
|
+
Used for: Complex logic (e.g., Portfolio Calculations) requiring isolation.
|
|
81
|
+
* **Orchestrator**: Fetches data for a batch (e.g., 100 users).
|
|
82
|
+
* **Storage**: Uploads context (Data + Rules + Config) to Cloud Storage (GCS).
|
|
83
|
+
* **Worker Pool**: Invokes stateless Cloud Functions to process the batch.
|
|
84
|
+
* **Circuit Breaker**: Stops execution if failure rate exceeds threshold (default: 30%).
|
|
42
85
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
classDef plain fill:#ffffff,stroke:#333,stroke-width:1px;
|
|
51
|
-
classDef db fill:#e1f5fe,stroke:#01579b,stroke-width:2px;
|
|
52
|
-
classDef logic fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px;
|
|
53
|
-
classDef queue fill:#fff9c4,stroke:#fbc02d,stroke-width:2px;
|
|
54
|
-
|
|
55
|
-
class Cron,Scheduler,Dispatcher,Orchestrator,Manifest,LocalExec,RemoteRunner,Worker plain;
|
|
56
|
-
class StateRepo,BigQuery,GCS db;
|
|
57
|
-
class Logic logic;
|
|
58
|
-
class CloudTasks queue;
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## 5. Versioning & Hashing
|
|
89
|
+
|
|
90
|
+
How does the system know when to re-run a computation?
|
|
91
|
+
|
|
92
|
+
The **Composite Hash** is calculated as:
|
|
59
93
|
```
|
|
94
|
+
Hash = SHA256(
|
|
95
|
+
Code Body +
|
|
96
|
+
Epoch Version +
|
|
97
|
+
Shared Layer Hashes +
|
|
98
|
+
Rule Module Hashes +
|
|
99
|
+
Dependency Result Hashes
|
|
100
|
+
)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
If this hash matches the `resultHash` stored in the State Repository for a given date, the computation is **SKIPPED** (unless forced). This ensures idempotency and avoids unnecessary processing costs.
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# Developer Guide
|
|
2
|
+
|
|
3
|
+
This guide explains how to create, configure, and test new Computations in the v2 framework.
|
|
4
|
+
|
|
5
|
+
## 1. Creating a Computation
|
|
6
|
+
|
|
7
|
+
All computations must extend the base `Computation` class.
|
|
8
|
+
|
|
9
|
+
### Basic Template
|
|
10
|
+
|
|
11
|
+
```javascript
|
|
12
|
+
const { Computation } = require('../../framework/core/Computation');
|
|
13
|
+
|
|
14
|
+
class UserDailyActive extends Computation {
|
|
15
|
+
static getConfig() {
|
|
16
|
+
return {
|
|
17
|
+
name: 'UserDailyActive',
|
|
18
|
+
type: 'per-entity', // or 'global'
|
|
19
|
+
requires: {
|
|
20
|
+
// Table Dependencies (Data)
|
|
21
|
+
'user_logins': {
|
|
22
|
+
lookback: 0, // 0 = today only
|
|
23
|
+
mandatory: true
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
dependencies: [
|
|
27
|
+
// Computation Dependencies (Prerequisites)
|
|
28
|
+
// 'UserSessionSummary'
|
|
29
|
+
]
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async process(context) {
|
|
34
|
+
const { date, entityId, data } = context;
|
|
35
|
+
|
|
36
|
+
// Access pre-fetched data
|
|
37
|
+
const logins = data['user_logins'];
|
|
38
|
+
|
|
39
|
+
if (!logins || logins.length === 0) {
|
|
40
|
+
return null; // Return null to skip saving result
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
date,
|
|
45
|
+
userId: entityId,
|
|
46
|
+
loginCount: logins.length,
|
|
47
|
+
lastLoginTime: logins[logins.length - 1].timestamp
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
module.exports = UserDailyActive;
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## 2. Configuration Options (`getConfig`)
|
|
57
|
+
|
|
58
|
+
| Field | Type | Description |
|
|
59
|
+
| :--- | :--- | :--- |
|
|
60
|
+
| `name` | String | **Required**. Unique name of the computation. |
|
|
61
|
+
| `type` | String | `'global'` (runs once) or `'per-entity'` (runs for each entity). |
|
|
62
|
+
| `requires` | Object | Map of table names to requirements. |
|
|
63
|
+
| `dependencies` | Array | List of other computation names that must run *before* this one. |
|
|
64
|
+
| `schedule` | Object | `{ frequency: 'daily', time: '02:00' }`. Defaults to daily/02:00. |
|
|
65
|
+
| `ttlDays` | Number | How long to keep results in State DB (default: 365). |
|
|
66
|
+
| `isHistorical`| Boolean| If `true`, `context.previousResult` will contain yesterday's output. |
|
|
67
|
+
|
|
68
|
+
### The `requires` Object
|
|
69
|
+
|
|
70
|
+
```javascript
|
|
71
|
+
requires: {
|
|
72
|
+
'orders': {
|
|
73
|
+
lookback: 7, // Fetch last 7 days of data
|
|
74
|
+
mandatory: false, // If true, crashes if table is empty/missing
|
|
75
|
+
dateField: 'created_at' // Optional override
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## 3. The `process(context)` Method
|
|
83
|
+
|
|
84
|
+
The `process` method is the heart of your logic. It receives a `context` object:
|
|
85
|
+
|
|
86
|
+
### `context.data`
|
|
87
|
+
Contains the raw data fetched from BigQuery.
|
|
88
|
+
* **Per-Entity**: `data['table_name']` is the row (or array of rows) *specifically for that entity*.
|
|
89
|
+
* **Global**: `data['table_name']` is the entire dataset fetched.
|
|
90
|
+
|
|
91
|
+
### `context.getDependency(name, [entityId])`
|
|
92
|
+
Access results from previous computations.
|
|
93
|
+
```javascript
|
|
94
|
+
// In a per-entity computation, getting its own dependency:
|
|
95
|
+
const sessionStats = context.getDependency('UserSessionSummary');
|
|
96
|
+
|
|
97
|
+
// Getting a global dependency result from a per-entity computation:
|
|
98
|
+
const globalSettings = context.getDependency('GlobalSettings', '_global');
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### `context.rules`
|
|
102
|
+
Access to shared business logic modules (if configured).
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## 4. Testing & Validation
|
|
107
|
+
|
|
108
|
+
### Local Dry Run
|
|
109
|
+
You can run your computation locally without writing to BigQuery/Firestore by using the dry-run flag.
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
# Run a specific computation for a specific date
|
|
113
|
+
node index.js execute \
|
|
114
|
+
--computation UserDailyActive \
|
|
115
|
+
--date 2026-01-26 \
|
|
116
|
+
--dry-run \
|
|
117
|
+
--entityId user_12345
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Logging
|
|
121
|
+
Use `this.log(level, message)` instead of `console.log`. This ensures logs are properly tagged with the computation name and entity ID in Cloud Logging.
|
|
122
|
+
|
|
123
|
+
```javascript
|
|
124
|
+
this.log('INFO', `Processing user ${entityId} with ${logins.length} logins`);
|
|
125
|
+
```
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# Operations Manual
|
|
2
|
+
|
|
3
|
+
This guide covers deployment, daily operations, monitoring, and troubleshooting.
|
|
4
|
+
|
|
5
|
+
## 1. Deployment
|
|
6
|
+
|
|
7
|
+
The system is deployed as a Google Cloud Function (Gen 2).
|
|
8
|
+
|
|
9
|
+
### Deploy Command
|
|
10
|
+
```bash
|
|
11
|
+
# Navigate to the function directory
|
|
12
|
+
cd functions/computation-system-v2
|
|
13
|
+
|
|
14
|
+
# Deploy using the included script
|
|
15
|
+
node deploy.mjs ComputeSystem
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
**Note**: Ensure you have the `gcloud` CLI installed and authenticated.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## 2. Management API
|
|
23
|
+
|
|
24
|
+
The system exposes an HTTP endpoint for manual administration.
|
|
25
|
+
|
|
26
|
+
### Setup (One-time)
|
|
27
|
+
```bash
|
|
28
|
+
# Get Function URL
|
|
29
|
+
FUNCTION_URL=$(gcloud functions describe compute-system --region=europe-west1 --format="value(serviceConfig.uri)")
|
|
30
|
+
|
|
31
|
+
# Get Auth Token
|
|
32
|
+
TOKEN=$(gcloud auth print-identity-token)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Common Actions
|
|
36
|
+
|
|
37
|
+
#### Check Status
|
|
38
|
+
Lists all registered computations and their hash status.
|
|
39
|
+
```bash
|
|
40
|
+
curl -X POST "$FUNCTION_URL" \
|
|
41
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
42
|
+
-H "Content-Type: application/json" \
|
|
43
|
+
-d '{"action": "status"}'
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
#### Analyze a Date
|
|
47
|
+
Simulates a run for a valid date, showing what would run, skip, or be blocked.
|
|
48
|
+
```bash
|
|
49
|
+
curl -X POST "$FUNCTION_URL" \
|
|
50
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
51
|
+
-H "Content-Type: application/json" \
|
|
52
|
+
-d '{"action": "analyze", "date": "2026-01-26"}'
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
#### Manually Trigger (Force Run)
|
|
56
|
+
Forces a computation to run, ignoring hash checks.
|
|
57
|
+
```bash
|
|
58
|
+
curl -X POST "$FUNCTION_URL" \
|
|
59
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
60
|
+
-H "Content-Type: application/json" \
|
|
61
|
+
-d '{"action": "run", "computation": "UserDailyActive", "date": "2026-01-26", "force": true}'
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## 3. Monitoring
|
|
67
|
+
|
|
68
|
+
The Orchestrator logs a structured **Execution Summary** at the end of every run.
|
|
69
|
+
|
|
70
|
+
### Understanding Statuses
|
|
71
|
+
|
|
72
|
+
| Status | Icon | Description | Action Required |
|
|
73
|
+
| :--- | :--- | :--- | :--- |
|
|
74
|
+
| `completed` | ✅ | Computation finished successfully. | None |
|
|
75
|
+
| `skipped` | ⏭️ | Result hash matches previous run (Up-to-date). | None |
|
|
76
|
+
| `blocked` | ⛔ | A dependency failed or hasn't run yet. | Fix dependency then re-run. |
|
|
77
|
+
| `impossible` | 🚫 | A mandatory requirement (table) is empty. | Check upstream ETL. |
|
|
78
|
+
| `failed` | ❌ | Runtime error threw an exception. | Check logs for stack trace. |
|
|
79
|
+
|
|
80
|
+
### Circuit Breaker (Remote Mode)
|
|
81
|
+
If you see `CIRCUIT BREAKER TRIPPED` in the logs:
|
|
82
|
+
1. **Stop**: The system automatically aborted to prevent wasting money on failing workers.
|
|
83
|
+
2. **Investigate**: Look for "Worker failed" logs to see the root cause (e.g., Syntax Error in calculation).
|
|
84
|
+
3. **Fix**: Deploy the fix.
|
|
85
|
+
4. **Retry**: The circuit resets automatically on the next run.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## 4. Troubleshooting
|
|
90
|
+
|
|
91
|
+
### "Thundering Herd" on Startup
|
|
92
|
+
**Symptom**: Massive latency on the first few requests.
|
|
93
|
+
**Cause**: Cold start + filling Schema Cache.
|
|
94
|
+
**Fix**: The system now uses **Request Coalescing**. If it persists, increase `concurrency` in `workerPool` config.
|
|
95
|
+
|
|
96
|
+
### "Zombie" Computations
|
|
97
|
+
**Symptom**: A computation stuck in "Running" state for hours.
|
|
98
|
+
**Cause**: Function timeout or crash before writing status.
|
|
99
|
+
**Fix**: Run the `status` command. The system automatically detects stale locks (> 1 hour) and allows re-execution.
|
|
@@ -55,8 +55,10 @@ async function schedulerHandler(req, res) {
|
|
|
55
55
|
let zombies = [];
|
|
56
56
|
try {
|
|
57
57
|
zombies = await storageManager.findZombies(ZOMBIE_THRESHOLD_MINUTES);
|
|
58
|
+
// New Code
|
|
58
59
|
if (zombies.length > 0) {
|
|
59
|
-
|
|
60
|
+
const zombieDetails = zombies.map(z => `${z.name} [${z.date}]`).join(', ');
|
|
61
|
+
console.log(`[Scheduler] DETECTED ${zombies.length} ZOMBIES: ${zombieDetails}`);
|
|
60
62
|
}
|
|
61
63
|
} catch (e) {
|
|
62
64
|
console.error(`[Scheduler] Zombie check failed: ${e.message}`);
|
|
@@ -136,10 +138,14 @@ function isScheduleDue(schedule, currentTime, dayOfWeek, dayOfMonth) {
|
|
|
136
138
|
|
|
137
139
|
async function dispatchComputations(computations, defaultDate, scheduledTime) {
|
|
138
140
|
const limit = pLimit(CLOUD_TASKS_CONCURRENCY);
|
|
139
|
-
const { projectId, location, queueName, dispatcherUrl } = config.cloudTasks;
|
|
141
|
+
const { projectId, location, queueName, dispatcherUrl, serviceAccountEmail } = config.cloudTasks; // Ensure serviceAccountEmail is destructured
|
|
140
142
|
const queuePath = tasksClient.queuePath(projectId, location, queueName);
|
|
141
143
|
const timeSlot = formatTimeCompact(scheduledTime);
|
|
142
144
|
|
|
145
|
+
// Log the configuration ONCE at the start of dispatch to verify
|
|
146
|
+
console.log(`[Scheduler] Dispatching to Queue: ${queuePath}`);
|
|
147
|
+
console.log(`[Scheduler] Using OIDC Service Account: ${serviceAccountEmail}`);
|
|
148
|
+
|
|
143
149
|
const tasks = computations.map(entry => limit(async () => {
|
|
144
150
|
try {
|
|
145
151
|
// Determine date: Zombies use their original stuck date, normal tasks use today
|
|
@@ -170,7 +176,7 @@ async function dispatchComputations(computations, defaultDate, scheduledTime) {
|
|
|
170
176
|
headers: { 'Content-Type': 'application/json' },
|
|
171
177
|
body: Buffer.from(JSON.stringify(taskPayload)).toString('base64'),
|
|
172
178
|
oidcToken: {
|
|
173
|
-
serviceAccountEmail:
|
|
179
|
+
serviceAccountEmail: serviceAccountEmail, // Use the destructured variable
|
|
174
180
|
audience: dispatcherUrl
|
|
175
181
|
}
|
|
176
182
|
},
|
|
@@ -186,9 +192,26 @@ async function dispatchComputations(computations, defaultDate, scheduledTime) {
|
|
|
186
192
|
};
|
|
187
193
|
|
|
188
194
|
} catch (error) {
|
|
189
|
-
|
|
195
|
+
// EXISTING HANDLE: Duplicate tasks
|
|
196
|
+
if (error.code === 6) {
|
|
190
197
|
return { computation: entry.originalName, status: 'skipped', reason: 'duplicate' };
|
|
191
198
|
}
|
|
199
|
+
|
|
200
|
+
// NEW HANDLE: NOT_FOUND (Configuration Errors)
|
|
201
|
+
if (error.code === 5) {
|
|
202
|
+
console.error(`[Scheduler] 🚨 CONFIGURATION ERROR: 5 NOT_FOUND`);
|
|
203
|
+
console.error(`[Scheduler] Check 1: Does Queue exist? "${queuePath}"`);
|
|
204
|
+
console.error(`[Scheduler] Check 2: Does Service Account exist? "${serviceAccountEmail}"`);
|
|
205
|
+
console.error(`[Scheduler] Raw Error: ${error.message}`);
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
computation: entry.originalName,
|
|
209
|
+
status: 'error',
|
|
210
|
+
error: `Configuration Error: Queue or Service Account not found. (${error.message})`
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// General Errors
|
|
192
215
|
console.error(`[Scheduler] Failed to dispatch ${entry.originalName}:`, error.message);
|
|
193
216
|
return { computation: entry.originalName, status: 'error', error: error.message };
|
|
194
217
|
}
|