aponyx 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aponyx/__init__.py +14 -0
- aponyx/backtest/__init__.py +31 -0
- aponyx/backtest/adapters.py +77 -0
- aponyx/backtest/config.py +84 -0
- aponyx/backtest/engine.py +560 -0
- aponyx/backtest/protocols.py +101 -0
- aponyx/backtest/registry.py +334 -0
- aponyx/backtest/strategy_catalog.json +50 -0
- aponyx/cli/__init__.py +5 -0
- aponyx/cli/commands/__init__.py +8 -0
- aponyx/cli/commands/clean.py +349 -0
- aponyx/cli/commands/list.py +302 -0
- aponyx/cli/commands/report.py +167 -0
- aponyx/cli/commands/run.py +377 -0
- aponyx/cli/main.py +125 -0
- aponyx/config/__init__.py +82 -0
- aponyx/data/__init__.py +99 -0
- aponyx/data/bloomberg_config.py +306 -0
- aponyx/data/bloomberg_instruments.json +26 -0
- aponyx/data/bloomberg_securities.json +42 -0
- aponyx/data/cache.py +294 -0
- aponyx/data/fetch.py +659 -0
- aponyx/data/fetch_registry.py +135 -0
- aponyx/data/loaders.py +205 -0
- aponyx/data/providers/__init__.py +13 -0
- aponyx/data/providers/bloomberg.py +383 -0
- aponyx/data/providers/file.py +111 -0
- aponyx/data/registry.py +500 -0
- aponyx/data/requirements.py +96 -0
- aponyx/data/sample_data.py +415 -0
- aponyx/data/schemas.py +60 -0
- aponyx/data/sources.py +171 -0
- aponyx/data/synthetic_params.json +46 -0
- aponyx/data/transforms.py +336 -0
- aponyx/data/validation.py +308 -0
- aponyx/docs/__init__.py +24 -0
- aponyx/docs/adding_data_providers.md +682 -0
- aponyx/docs/cdx_knowledge_base.md +455 -0
- aponyx/docs/cdx_overlay_strategy.md +135 -0
- aponyx/docs/cli_guide.md +607 -0
- aponyx/docs/governance_design.md +551 -0
- aponyx/docs/logging_design.md +251 -0
- aponyx/docs/performance_evaluation_design.md +265 -0
- aponyx/docs/python_guidelines.md +786 -0
- aponyx/docs/signal_registry_usage.md +369 -0
- aponyx/docs/signal_suitability_design.md +558 -0
- aponyx/docs/visualization_design.md +277 -0
- aponyx/evaluation/__init__.py +11 -0
- aponyx/evaluation/performance/__init__.py +24 -0
- aponyx/evaluation/performance/adapters.py +109 -0
- aponyx/evaluation/performance/analyzer.py +384 -0
- aponyx/evaluation/performance/config.py +320 -0
- aponyx/evaluation/performance/decomposition.py +304 -0
- aponyx/evaluation/performance/metrics.py +761 -0
- aponyx/evaluation/performance/registry.py +327 -0
- aponyx/evaluation/performance/report.py +541 -0
- aponyx/evaluation/suitability/__init__.py +67 -0
- aponyx/evaluation/suitability/config.py +143 -0
- aponyx/evaluation/suitability/evaluator.py +389 -0
- aponyx/evaluation/suitability/registry.py +328 -0
- aponyx/evaluation/suitability/report.py +398 -0
- aponyx/evaluation/suitability/scoring.py +367 -0
- aponyx/evaluation/suitability/tests.py +303 -0
- aponyx/examples/01_generate_synthetic_data.py +53 -0
- aponyx/examples/02_fetch_data_file.py +82 -0
- aponyx/examples/03_fetch_data_bloomberg.py +104 -0
- aponyx/examples/04_compute_signal.py +164 -0
- aponyx/examples/05_evaluate_suitability.py +224 -0
- aponyx/examples/06_run_backtest.py +242 -0
- aponyx/examples/07_analyze_performance.py +214 -0
- aponyx/examples/08_visualize_results.py +272 -0
- aponyx/main.py +7 -0
- aponyx/models/__init__.py +45 -0
- aponyx/models/config.py +83 -0
- aponyx/models/indicator_transformation.json +52 -0
- aponyx/models/indicators.py +292 -0
- aponyx/models/metadata.py +447 -0
- aponyx/models/orchestrator.py +213 -0
- aponyx/models/registry.py +860 -0
- aponyx/models/score_transformation.json +42 -0
- aponyx/models/signal_catalog.json +29 -0
- aponyx/models/signal_composer.py +513 -0
- aponyx/models/signal_transformation.json +29 -0
- aponyx/persistence/__init__.py +16 -0
- aponyx/persistence/json_io.py +132 -0
- aponyx/persistence/parquet_io.py +378 -0
- aponyx/py.typed +0 -0
- aponyx/reporting/__init__.py +10 -0
- aponyx/reporting/generator.py +517 -0
- aponyx/visualization/__init__.py +20 -0
- aponyx/visualization/app.py +37 -0
- aponyx/visualization/plots.py +309 -0
- aponyx/visualization/visualizer.py +242 -0
- aponyx/workflows/__init__.py +18 -0
- aponyx/workflows/concrete_steps.py +720 -0
- aponyx/workflows/config.py +122 -0
- aponyx/workflows/engine.py +279 -0
- aponyx/workflows/registry.py +116 -0
- aponyx/workflows/steps.py +180 -0
- aponyx-0.1.18.dist-info/METADATA +552 -0
- aponyx-0.1.18.dist-info/RECORD +104 -0
- aponyx-0.1.18.dist-info/WHEEL +4 -0
- aponyx-0.1.18.dist-info/entry_points.txt +2 -0
- aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Workflow configuration management.
|
|
3
|
+
|
|
4
|
+
Defines immutable configuration for workflow execution including
|
|
5
|
+
signal/strategy selection, data sources, and execution options.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Literal
|
|
11
|
+
|
|
12
|
+
from aponyx.config import DATA_WORKFLOWS_DIR
|
|
13
|
+
|
|
14
|
+
StepName = Literal[
|
|
15
|
+
"data",
|
|
16
|
+
"signal",
|
|
17
|
+
"suitability",
|
|
18
|
+
"backtest",
|
|
19
|
+
"performance",
|
|
20
|
+
"visualization",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
# DataSource now accepts any string to support dynamic source discovery
|
|
24
|
+
DataSource = str
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True)
|
|
28
|
+
class WorkflowConfig:
|
|
29
|
+
"""
|
|
30
|
+
Immutable workflow execution configuration.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
label : str
|
|
35
|
+
Workflow label (lowercase, underscores only, pattern: ^[a-z][a-z0-9_]*$).
|
|
36
|
+
Used for workflow identification and directory naming.
|
|
37
|
+
signal_name : str
|
|
38
|
+
Signal name from signal catalog.
|
|
39
|
+
strategy_name : str
|
|
40
|
+
Strategy name from strategy catalog.
|
|
41
|
+
product : str
|
|
42
|
+
Product identifier for backtesting (e.g., "cdx_ig_5y", "cdx_hy_5y").
|
|
43
|
+
data_source : str
|
|
44
|
+
Data source type (e.g., "synthetic", "file", "bloomberg", or custom sources).
|
|
45
|
+
security_mapping : dict[str, str] | None
|
|
46
|
+
Maps generic instrument types to specific securities.
|
|
47
|
+
Example: {"cdx": "cdx_ig_5y", "etf": "hyg", "vix": "vix"}
|
|
48
|
+
If None, uses defaults from indicator catalog.
|
|
49
|
+
indicator_transformation_override : str | None
|
|
50
|
+
Override indicator transformation from catalog (must exist in indicator_transformation.json).
|
|
51
|
+
If None, uses indicator_transformation from signal catalog.
|
|
52
|
+
Example: "spread_momentum_5d" to swap indicator while keeping score/signal transformations.
|
|
53
|
+
score_transformation_override : str | None
|
|
54
|
+
Override score transformation from catalog (must exist in score_transformation.json).
|
|
55
|
+
If None, uses score_transformation from signal catalog.
|
|
56
|
+
Example: "z_score_60d" to swap normalization window while keeping indicator/signal transformations.
|
|
57
|
+
signal_transformation_override : str | None
|
|
58
|
+
Override signal transformation from catalog (must exist in signal_transformation.json).
|
|
59
|
+
If None, uses signal_transformation from signal catalog.
|
|
60
|
+
Example: "bounded_2_0" to swap trading rules while keeping indicator/score transformations.
|
|
61
|
+
steps : list[StepName] | None
|
|
62
|
+
Specific steps to execute (None = all steps in order).
|
|
63
|
+
force_rerun : bool
|
|
64
|
+
Force re-execution even if cached outputs exist.
|
|
65
|
+
output_dir : Path
|
|
66
|
+
Base directory for workflow outputs.
|
|
67
|
+
|
|
68
|
+
Notes
|
|
69
|
+
-----
|
|
70
|
+
Configuration is frozen to prevent accidental mutation during execution.
|
|
71
|
+
Use dataclasses.replace() to create modified copies if needed.
|
|
72
|
+
|
|
73
|
+
Four-Stage Transformation Pipeline
|
|
74
|
+
-----------------------------------
|
|
75
|
+
Security → Indicator → Score → Signal → Position
|
|
76
|
+
|
|
77
|
+
Each signal references exactly one transformation from each stage (1:1:1 relationship).
|
|
78
|
+
|
|
79
|
+
Runtime overrides allow swapping components at any stage without editing catalogs:
|
|
80
|
+
- security_mapping: Override which securities to load for each instrument type
|
|
81
|
+
- indicator_transformation_override: Swap indicator while keeping score/signal transformations
|
|
82
|
+
- score_transformation_override: Swap normalization while keeping indicator/signal transformations
|
|
83
|
+
- signal_transformation_override: Swap trading rules while keeping indicator/score transformations
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
label: str
|
|
87
|
+
signal_name: str
|
|
88
|
+
strategy_name: str
|
|
89
|
+
product: str
|
|
90
|
+
data_source: DataSource = "synthetic"
|
|
91
|
+
security_mapping: dict[str, str] | None = None
|
|
92
|
+
indicator_transformation_override: str | None = None
|
|
93
|
+
score_transformation_override: str | None = None
|
|
94
|
+
signal_transformation_override: str | None = None
|
|
95
|
+
steps: list[StepName] | None = None
|
|
96
|
+
force_rerun: bool = False
|
|
97
|
+
output_dir: Path = field(default_factory=lambda: DATA_WORKFLOWS_DIR)
|
|
98
|
+
|
|
99
|
+
def __post_init__(self) -> None:
|
|
100
|
+
"""Validate configuration on initialization."""
|
|
101
|
+
import re
|
|
102
|
+
|
|
103
|
+
# Validate label format
|
|
104
|
+
if not re.match(r"^[a-z][a-z0-9_]*$", self.label):
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"Label '{self.label}' is invalid. "
|
|
107
|
+
"Must start with lowercase letter and contain only lowercase letters, numbers, and underscores."
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Validate steps
|
|
111
|
+
if self.steps is not None:
|
|
112
|
+
valid_steps = {
|
|
113
|
+
"data",
|
|
114
|
+
"signal",
|
|
115
|
+
"suitability",
|
|
116
|
+
"backtest",
|
|
117
|
+
"performance",
|
|
118
|
+
"visualization",
|
|
119
|
+
}
|
|
120
|
+
invalid = set(self.steps) - valid_steps
|
|
121
|
+
if invalid:
|
|
122
|
+
raise ValueError(f"Invalid steps: {invalid}")
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Workflow orchestration engine.
|
|
3
|
+
|
|
4
|
+
Coordinates sequential execution of workflow steps with dependency tracking,
|
|
5
|
+
caching, error handling, and progress logging.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from .config import WorkflowConfig
|
|
14
|
+
from .steps import WorkflowStep
|
|
15
|
+
from .registry import StepRegistry
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class WorkflowEngine:
|
|
21
|
+
"""
|
|
22
|
+
Workflow execution orchestrator.
|
|
23
|
+
|
|
24
|
+
Manages sequential pipeline execution with:
|
|
25
|
+
- Dependency resolution (data → signal → backtest → ...)
|
|
26
|
+
- Smart caching (skip completed steps)
|
|
27
|
+
- Error handling (save partial results)
|
|
28
|
+
- Progress tracking (structured logging)
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
config : WorkflowConfig
|
|
33
|
+
Workflow execution configuration.
|
|
34
|
+
|
|
35
|
+
Examples
|
|
36
|
+
--------
|
|
37
|
+
Execute full workflow:
|
|
38
|
+
>>> config = WorkflowConfig(
|
|
39
|
+
... signal_name="spread_momentum",
|
|
40
|
+
... strategy_name="balanced",
|
|
41
|
+
... )
|
|
42
|
+
>>> engine = WorkflowEngine(config)
|
|
43
|
+
>>> results = engine.execute()
|
|
44
|
+
|
|
45
|
+
Execute specific steps:
|
|
46
|
+
>>> config = WorkflowConfig(
|
|
47
|
+
... signal_name="spread_momentum",
|
|
48
|
+
... strategy_name="balanced",
|
|
49
|
+
... steps=["data", "signal", "backtest"],
|
|
50
|
+
... )
|
|
51
|
+
>>> engine = WorkflowEngine(config)
|
|
52
|
+
>>> results = engine.execute()
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, config: WorkflowConfig) -> None:
|
|
56
|
+
self.config = config
|
|
57
|
+
self._registry = StepRegistry()
|
|
58
|
+
self._steps = self._resolve_steps()
|
|
59
|
+
self._context: dict[str, Any] = {}
|
|
60
|
+
self._start_time: datetime | None = None
|
|
61
|
+
|
|
62
|
+
def execute(self) -> dict[str, Any]:
|
|
63
|
+
"""
|
|
64
|
+
Execute workflow pipeline.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
dict[str, Any]
|
|
69
|
+
Workflow results with keys:
|
|
70
|
+
- steps_completed: int (number of steps executed)
|
|
71
|
+
- steps_skipped: int (number cached steps skipped)
|
|
72
|
+
- output_dir: Path (workflow output directory)
|
|
73
|
+
- duration_seconds: float (total execution time)
|
|
74
|
+
- errors: list[dict] (errors if any step failed)
|
|
75
|
+
|
|
76
|
+
Notes
|
|
77
|
+
-----
|
|
78
|
+
Steps execute in dependency order. If step N fails, steps N+1...
|
|
79
|
+
are skipped but results from steps 1...N-1 are preserved.
|
|
80
|
+
"""
|
|
81
|
+
self._start_time = datetime.now()
|
|
82
|
+
|
|
83
|
+
logger.info(
|
|
84
|
+
"Starting workflow: signal=%s, strategy=%s, source=%s, steps=%d",
|
|
85
|
+
self.config.signal_name,
|
|
86
|
+
self.config.strategy_name,
|
|
87
|
+
self.config.data_source,
|
|
88
|
+
len(self._steps),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Create workflow output directory upfront
|
|
92
|
+
output_dir = self._create_output_directory()
|
|
93
|
+
|
|
94
|
+
# Add output_dir to context for steps to use
|
|
95
|
+
self._context["output_dir"] = output_dir
|
|
96
|
+
|
|
97
|
+
completed = 0
|
|
98
|
+
skipped = 0
|
|
99
|
+
errors = []
|
|
100
|
+
|
|
101
|
+
for idx, step in enumerate(self._steps, start=1):
|
|
102
|
+
step_num = f"{idx}/{len(self._steps)}"
|
|
103
|
+
|
|
104
|
+
# Check cache
|
|
105
|
+
if self._should_skip_step(step):
|
|
106
|
+
logger.info("Step %s: %s (cached)", step_num, step.name)
|
|
107
|
+
# Load cached output into context for downstream steps
|
|
108
|
+
try:
|
|
109
|
+
cached_output = step.load_cached_output()
|
|
110
|
+
self._context[step.name] = cached_output
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logger.warning(
|
|
113
|
+
"Failed to load cached output for %s: %s. Re-running step.",
|
|
114
|
+
step.name,
|
|
115
|
+
str(e),
|
|
116
|
+
)
|
|
117
|
+
# Fall through to execute step instead
|
|
118
|
+
else:
|
|
119
|
+
skipped += 1
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
# Execute step
|
|
123
|
+
try:
|
|
124
|
+
logger.info("Step %s: %s", step_num, step.name)
|
|
125
|
+
output = step.execute(self._context)
|
|
126
|
+
self._context[step.name] = output
|
|
127
|
+
completed += 1
|
|
128
|
+
logger.info("Step %s: %s complete", step_num, step.name)
|
|
129
|
+
|
|
130
|
+
except Exception as e:
|
|
131
|
+
logger.error("Step %s: %s failed - %s", step_num, step.name, str(e))
|
|
132
|
+
errors.append(
|
|
133
|
+
{
|
|
134
|
+
"step": step.name,
|
|
135
|
+
"error": str(e),
|
|
136
|
+
"type": type(e).__name__,
|
|
137
|
+
}
|
|
138
|
+
)
|
|
139
|
+
break # Stop execution on first error
|
|
140
|
+
|
|
141
|
+
duration = (datetime.now() - self._start_time).total_seconds()
|
|
142
|
+
|
|
143
|
+
result = {
|
|
144
|
+
"steps_completed": completed,
|
|
145
|
+
"steps_skipped": skipped,
|
|
146
|
+
"output_dir": output_dir,
|
|
147
|
+
"duration_seconds": duration,
|
|
148
|
+
"errors": errors,
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# Save workflow metadata
|
|
152
|
+
self._save_metadata(output_dir, completed, skipped, errors, duration)
|
|
153
|
+
|
|
154
|
+
if errors:
|
|
155
|
+
logger.error(
|
|
156
|
+
"Workflow failed: completed=%d, skipped=%d, failed=%d (%.1fs)",
|
|
157
|
+
completed,
|
|
158
|
+
skipped,
|
|
159
|
+
len(errors),
|
|
160
|
+
duration,
|
|
161
|
+
)
|
|
162
|
+
else:
|
|
163
|
+
logger.info(
|
|
164
|
+
"Workflow complete: completed=%d, skipped=%d (%.1fs)",
|
|
165
|
+
completed,
|
|
166
|
+
skipped,
|
|
167
|
+
duration,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return result
|
|
171
|
+
|
|
172
|
+
def _resolve_steps(self) -> list[WorkflowStep]:
|
|
173
|
+
"""
|
|
174
|
+
Resolve workflow steps from configuration.
|
|
175
|
+
|
|
176
|
+
Returns
|
|
177
|
+
-------
|
|
178
|
+
list[WorkflowStep]
|
|
179
|
+
Ordered list of step instances to execute.
|
|
180
|
+
|
|
181
|
+
Notes
|
|
182
|
+
-----
|
|
183
|
+
If config.steps is None, returns all steps in dependency order.
|
|
184
|
+
If config.steps is specified, returns subset in correct order.
|
|
185
|
+
"""
|
|
186
|
+
all_steps = self._registry.get_all_steps(self.config)
|
|
187
|
+
|
|
188
|
+
if self.config.steps is None:
|
|
189
|
+
return all_steps
|
|
190
|
+
|
|
191
|
+
# Filter to requested steps (maintain order)
|
|
192
|
+
requested = set(self.config.steps)
|
|
193
|
+
return [s for s in all_steps if s.name in requested]
|
|
194
|
+
|
|
195
|
+
def _should_skip_step(self, step: WorkflowStep) -> bool:
|
|
196
|
+
"""
|
|
197
|
+
Determine if step should be skipped (cached).
|
|
198
|
+
|
|
199
|
+
Parameters
|
|
200
|
+
----------
|
|
201
|
+
step : WorkflowStep
|
|
202
|
+
Step to check.
|
|
203
|
+
|
|
204
|
+
Returns
|
|
205
|
+
-------
|
|
206
|
+
bool
|
|
207
|
+
True if step output exists and force_rerun is False.
|
|
208
|
+
"""
|
|
209
|
+
if self.config.force_rerun:
|
|
210
|
+
return False
|
|
211
|
+
return step.output_exists()
|
|
212
|
+
|
|
213
|
+
def _create_output_directory(self) -> Path:
|
|
214
|
+
"""
|
|
215
|
+
Create timestamped output directory for workflow.
|
|
216
|
+
|
|
217
|
+
Returns
|
|
218
|
+
-------
|
|
219
|
+
Path
|
|
220
|
+
Created output directory path.
|
|
221
|
+
|
|
222
|
+
Notes
|
|
223
|
+
-----
|
|
224
|
+
Format: workflows/{label}_{timestamp}/
|
|
225
|
+
"""
|
|
226
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
227
|
+
dirname = f"{self.config.label}_{timestamp}"
|
|
228
|
+
output_dir = self.config.output_dir / dirname
|
|
229
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
230
|
+
return output_dir
|
|
231
|
+
|
|
232
|
+
def _save_metadata(
|
|
233
|
+
self,
|
|
234
|
+
output_dir: Path,
|
|
235
|
+
completed: int,
|
|
236
|
+
skipped: int,
|
|
237
|
+
errors: list[dict[str, Any]],
|
|
238
|
+
duration: float,
|
|
239
|
+
) -> None:
|
|
240
|
+
"""
|
|
241
|
+
Save workflow metadata to metadata.json.
|
|
242
|
+
|
|
243
|
+
Parameters
|
|
244
|
+
----------
|
|
245
|
+
output_dir : Path
|
|
246
|
+
Workflow output directory.
|
|
247
|
+
completed : int
|
|
248
|
+
Number of completed steps.
|
|
249
|
+
skipped : int
|
|
250
|
+
Number of skipped steps.
|
|
251
|
+
errors : list of dict
|
|
252
|
+
Error details if any.
|
|
253
|
+
duration : float
|
|
254
|
+
Execution duration in seconds.
|
|
255
|
+
"""
|
|
256
|
+
from ..persistence import save_json
|
|
257
|
+
|
|
258
|
+
# Extract securities_used from signal step if available
|
|
259
|
+
securities_used = self._context.get("signal", {}).get("securities_used", {})
|
|
260
|
+
|
|
261
|
+
metadata = {
|
|
262
|
+
"label": self.config.label,
|
|
263
|
+
"signal": self.config.signal_name,
|
|
264
|
+
"strategy": self.config.strategy_name,
|
|
265
|
+
"product": self.config.product,
|
|
266
|
+
"data_source": self.config.data_source,
|
|
267
|
+
"securities_used": securities_used,
|
|
268
|
+
"timestamp": self._start_time.isoformat() if self._start_time else None,
|
|
269
|
+
"duration_seconds": duration,
|
|
270
|
+
"steps_completed": completed,
|
|
271
|
+
"steps_skipped": skipped,
|
|
272
|
+
"steps_total": len(self._steps),
|
|
273
|
+
"status": "failed" if errors else "completed",
|
|
274
|
+
"errors": errors if errors else None,
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
metadata_path = output_dir / "metadata.json"
|
|
278
|
+
save_json(metadata, metadata_path)
|
|
279
|
+
logger.debug("Saved workflow metadata: %s", metadata_path)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Workflow step registry.
|
|
3
|
+
|
|
4
|
+
Central factory for creating workflow step instances.
|
|
5
|
+
Decouples engine from concrete step implementations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
from .config import WorkflowConfig
|
|
12
|
+
from .concrete_steps import (
|
|
13
|
+
DataStep,
|
|
14
|
+
SignalStep,
|
|
15
|
+
SuitabilityStep,
|
|
16
|
+
BacktestStep,
|
|
17
|
+
PerformanceStep,
|
|
18
|
+
VisualizationStep,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from .steps import WorkflowStep
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class StepRegistry:
|
|
28
|
+
"""
|
|
29
|
+
Factory for workflow step instances.
|
|
30
|
+
|
|
31
|
+
Centralizes step creation and ensures consistent dependency order.
|
|
32
|
+
|
|
33
|
+
Examples
|
|
34
|
+
--------
|
|
35
|
+
Get all steps for workflow:
|
|
36
|
+
>>> registry = StepRegistry()
|
|
37
|
+
>>> config = WorkflowConfig(signal_name="spread_momentum", strategy_name="balanced")
|
|
38
|
+
>>> steps = registry.get_all_steps(config)
|
|
39
|
+
|
|
40
|
+
Get specific step:
|
|
41
|
+
>>> step = registry.get_step("data", config)
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self) -> None:
|
|
45
|
+
self._step_order = [
|
|
46
|
+
"data",
|
|
47
|
+
"signal",
|
|
48
|
+
"suitability",
|
|
49
|
+
"backtest",
|
|
50
|
+
"performance",
|
|
51
|
+
"visualization",
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
def get_canonical_order(self) -> list[str]:
|
|
55
|
+
"""
|
|
56
|
+
Get canonical workflow step order.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
list[str]
|
|
61
|
+
Ordered list of step names.
|
|
62
|
+
"""
|
|
63
|
+
return self._step_order.copy()
|
|
64
|
+
|
|
65
|
+
def get_all_steps(self, config: WorkflowConfig) -> list["WorkflowStep"]:
|
|
66
|
+
"""
|
|
67
|
+
Create all workflow steps in dependency order.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
config : WorkflowConfig
|
|
72
|
+
Workflow configuration.
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
list[WorkflowStep]
|
|
77
|
+
Ordered list of step instances.
|
|
78
|
+
"""
|
|
79
|
+
return [self._create_step(name, config) for name in self._step_order]
|
|
80
|
+
|
|
81
|
+
def get_step(self, name: str, config: WorkflowConfig) -> "WorkflowStep":
|
|
82
|
+
"""
|
|
83
|
+
Create single workflow step by name.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
name : str
|
|
88
|
+
Step name (data, signal, suitability, backtest, performance, visualization).
|
|
89
|
+
config : WorkflowConfig
|
|
90
|
+
Workflow configuration.
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
WorkflowStep
|
|
95
|
+
Step instance.
|
|
96
|
+
|
|
97
|
+
Raises
|
|
98
|
+
------
|
|
99
|
+
ValueError
|
|
100
|
+
If step name is invalid.
|
|
101
|
+
"""
|
|
102
|
+
if name not in self._step_order:
|
|
103
|
+
raise ValueError(f"Unknown step: {name}")
|
|
104
|
+
return self._create_step(name, config)
|
|
105
|
+
|
|
106
|
+
def _create_step(self, name: str, config: WorkflowConfig) -> "WorkflowStep":
|
|
107
|
+
"""Create step instance by name."""
|
|
108
|
+
step_classes = {
|
|
109
|
+
"data": DataStep,
|
|
110
|
+
"signal": SignalStep,
|
|
111
|
+
"suitability": SuitabilityStep,
|
|
112
|
+
"backtest": BacktestStep,
|
|
113
|
+
"performance": PerformanceStep,
|
|
114
|
+
"visualization": VisualizationStep,
|
|
115
|
+
}
|
|
116
|
+
return step_classes[name](config)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Workflow step abstractions.
|
|
3
|
+
|
|
4
|
+
Defines protocol for executable workflow steps with dependency tracking,
|
|
5
|
+
caching, and standardized I/O.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Protocol
|
|
12
|
+
|
|
13
|
+
from .config import WorkflowConfig
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class WorkflowStep(Protocol):
|
|
19
|
+
"""
|
|
20
|
+
Protocol for executable workflow steps.
|
|
21
|
+
|
|
22
|
+
All workflow steps must implement this interface for orchestration.
|
|
23
|
+
|
|
24
|
+
Attributes
|
|
25
|
+
----------
|
|
26
|
+
name : str
|
|
27
|
+
Step identifier (used for caching and logging).
|
|
28
|
+
config : WorkflowConfig
|
|
29
|
+
Workflow configuration.
|
|
30
|
+
|
|
31
|
+
Methods
|
|
32
|
+
-------
|
|
33
|
+
execute(context)
|
|
34
|
+
Execute step logic and return output data.
|
|
35
|
+
output_exists()
|
|
36
|
+
Check if step output already exists (for caching).
|
|
37
|
+
get_output_path()
|
|
38
|
+
Return path to expected output files.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
name: str
|
|
42
|
+
config: WorkflowConfig
|
|
43
|
+
|
|
44
|
+
def execute(self, context: dict[str, Any]) -> dict[str, Any]:
|
|
45
|
+
"""
|
|
46
|
+
Execute workflow step.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
context : dict[str, Any]
|
|
51
|
+
Outputs from previous steps (keyed by step name).
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
dict[str, Any]
|
|
56
|
+
Step output data to pass to subsequent steps.
|
|
57
|
+
|
|
58
|
+
Notes
|
|
59
|
+
-----
|
|
60
|
+
Steps should be idempotent: running twice produces same results.
|
|
61
|
+
Use context["data"] to access data from DataStep, etc.
|
|
62
|
+
"""
|
|
63
|
+
...
|
|
64
|
+
|
|
65
|
+
def output_exists(self) -> bool:
|
|
66
|
+
"""
|
|
67
|
+
Check if step output files exist.
|
|
68
|
+
|
|
69
|
+
Returns
|
|
70
|
+
-------
|
|
71
|
+
bool
|
|
72
|
+
True if all required outputs exist, False otherwise.
|
|
73
|
+
|
|
74
|
+
Notes
|
|
75
|
+
-----
|
|
76
|
+
Used by caching logic to skip completed steps.
|
|
77
|
+
Should check file existence and basic validation.
|
|
78
|
+
"""
|
|
79
|
+
...
|
|
80
|
+
|
|
81
|
+
def get_output_path(self) -> Path:
|
|
82
|
+
"""
|
|
83
|
+
Get expected output directory path.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
Path
|
|
88
|
+
Directory where step outputs are saved.
|
|
89
|
+
"""
|
|
90
|
+
...
|
|
91
|
+
|
|
92
|
+
def load_cached_output(self) -> dict[str, Any]:
|
|
93
|
+
"""
|
|
94
|
+
Load cached output from previous execution.
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
dict[str, Any]
|
|
99
|
+
Cached step output data.
|
|
100
|
+
|
|
101
|
+
Raises
|
|
102
|
+
------
|
|
103
|
+
FileNotFoundError
|
|
104
|
+
If cached output files don't exist.
|
|
105
|
+
ValueError
|
|
106
|
+
If cached output is invalid or corrupted.
|
|
107
|
+
|
|
108
|
+
Notes
|
|
109
|
+
-----
|
|
110
|
+
Called when step is skipped due to caching.
|
|
111
|
+
Must restore same output structure as execute() would return.
|
|
112
|
+
"""
|
|
113
|
+
...
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class BaseWorkflowStep(ABC):
|
|
117
|
+
"""
|
|
118
|
+
Abstract base class for workflow steps.
|
|
119
|
+
|
|
120
|
+
Provides common functionality for concrete step implementations.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
config : WorkflowConfig
|
|
125
|
+
Workflow configuration.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
def __init__(self, config: WorkflowConfig) -> None:
|
|
129
|
+
self.config = config
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
@abstractmethod
|
|
133
|
+
def name(self) -> str:
|
|
134
|
+
"""Step identifier."""
|
|
135
|
+
...
|
|
136
|
+
|
|
137
|
+
@abstractmethod
|
|
138
|
+
def execute(self, context: dict[str, Any]) -> dict[str, Any]:
|
|
139
|
+
"""Execute step logic."""
|
|
140
|
+
...
|
|
141
|
+
|
|
142
|
+
@abstractmethod
|
|
143
|
+
def output_exists(self) -> bool:
|
|
144
|
+
"""Check if output exists."""
|
|
145
|
+
...
|
|
146
|
+
|
|
147
|
+
@abstractmethod
|
|
148
|
+
def get_output_path(self) -> Path:
|
|
149
|
+
"""Get output directory."""
|
|
150
|
+
...
|
|
151
|
+
|
|
152
|
+
def load_cached_output(self) -> dict[str, Any]:
|
|
153
|
+
"""
|
|
154
|
+
Load cached output from previous execution.
|
|
155
|
+
|
|
156
|
+
Default implementation raises NotImplementedError.
|
|
157
|
+
Steps that support caching must override this method.
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
dict[str, Any]
|
|
162
|
+
Cached step output data.
|
|
163
|
+
|
|
164
|
+
Raises
|
|
165
|
+
------
|
|
166
|
+
NotImplementedError
|
|
167
|
+
If step doesn't support loading cached outputs.
|
|
168
|
+
"""
|
|
169
|
+
raise NotImplementedError(
|
|
170
|
+
f"Step {self.name} doesn't support loading cached outputs. "
|
|
171
|
+
"Override load_cached_output() method."
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def _log_start(self) -> None:
|
|
175
|
+
"""Log step start."""
|
|
176
|
+
logger.info("Starting step: %s", self.name)
|
|
177
|
+
|
|
178
|
+
def _log_complete(self, output: dict[str, Any]) -> None:
|
|
179
|
+
"""Log step completion."""
|
|
180
|
+
logger.info("Completed step: %s", self.name)
|