aponyx 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aponyx/__init__.py +14 -0
- aponyx/backtest/__init__.py +31 -0
- aponyx/backtest/adapters.py +77 -0
- aponyx/backtest/config.py +84 -0
- aponyx/backtest/engine.py +560 -0
- aponyx/backtest/protocols.py +101 -0
- aponyx/backtest/registry.py +334 -0
- aponyx/backtest/strategy_catalog.json +50 -0
- aponyx/cli/__init__.py +5 -0
- aponyx/cli/commands/__init__.py +8 -0
- aponyx/cli/commands/clean.py +349 -0
- aponyx/cli/commands/list.py +302 -0
- aponyx/cli/commands/report.py +167 -0
- aponyx/cli/commands/run.py +377 -0
- aponyx/cli/main.py +125 -0
- aponyx/config/__init__.py +82 -0
- aponyx/data/__init__.py +99 -0
- aponyx/data/bloomberg_config.py +306 -0
- aponyx/data/bloomberg_instruments.json +26 -0
- aponyx/data/bloomberg_securities.json +42 -0
- aponyx/data/cache.py +294 -0
- aponyx/data/fetch.py +659 -0
- aponyx/data/fetch_registry.py +135 -0
- aponyx/data/loaders.py +205 -0
- aponyx/data/providers/__init__.py +13 -0
- aponyx/data/providers/bloomberg.py +383 -0
- aponyx/data/providers/file.py +111 -0
- aponyx/data/registry.py +500 -0
- aponyx/data/requirements.py +96 -0
- aponyx/data/sample_data.py +415 -0
- aponyx/data/schemas.py +60 -0
- aponyx/data/sources.py +171 -0
- aponyx/data/synthetic_params.json +46 -0
- aponyx/data/transforms.py +336 -0
- aponyx/data/validation.py +308 -0
- aponyx/docs/__init__.py +24 -0
- aponyx/docs/adding_data_providers.md +682 -0
- aponyx/docs/cdx_knowledge_base.md +455 -0
- aponyx/docs/cdx_overlay_strategy.md +135 -0
- aponyx/docs/cli_guide.md +607 -0
- aponyx/docs/governance_design.md +551 -0
- aponyx/docs/logging_design.md +251 -0
- aponyx/docs/performance_evaluation_design.md +265 -0
- aponyx/docs/python_guidelines.md +786 -0
- aponyx/docs/signal_registry_usage.md +369 -0
- aponyx/docs/signal_suitability_design.md +558 -0
- aponyx/docs/visualization_design.md +277 -0
- aponyx/evaluation/__init__.py +11 -0
- aponyx/evaluation/performance/__init__.py +24 -0
- aponyx/evaluation/performance/adapters.py +109 -0
- aponyx/evaluation/performance/analyzer.py +384 -0
- aponyx/evaluation/performance/config.py +320 -0
- aponyx/evaluation/performance/decomposition.py +304 -0
- aponyx/evaluation/performance/metrics.py +761 -0
- aponyx/evaluation/performance/registry.py +327 -0
- aponyx/evaluation/performance/report.py +541 -0
- aponyx/evaluation/suitability/__init__.py +67 -0
- aponyx/evaluation/suitability/config.py +143 -0
- aponyx/evaluation/suitability/evaluator.py +389 -0
- aponyx/evaluation/suitability/registry.py +328 -0
- aponyx/evaluation/suitability/report.py +398 -0
- aponyx/evaluation/suitability/scoring.py +367 -0
- aponyx/evaluation/suitability/tests.py +303 -0
- aponyx/examples/01_generate_synthetic_data.py +53 -0
- aponyx/examples/02_fetch_data_file.py +82 -0
- aponyx/examples/03_fetch_data_bloomberg.py +104 -0
- aponyx/examples/04_compute_signal.py +164 -0
- aponyx/examples/05_evaluate_suitability.py +224 -0
- aponyx/examples/06_run_backtest.py +242 -0
- aponyx/examples/07_analyze_performance.py +214 -0
- aponyx/examples/08_visualize_results.py +272 -0
- aponyx/main.py +7 -0
- aponyx/models/__init__.py +45 -0
- aponyx/models/config.py +83 -0
- aponyx/models/indicator_transformation.json +52 -0
- aponyx/models/indicators.py +292 -0
- aponyx/models/metadata.py +447 -0
- aponyx/models/orchestrator.py +213 -0
- aponyx/models/registry.py +860 -0
- aponyx/models/score_transformation.json +42 -0
- aponyx/models/signal_catalog.json +29 -0
- aponyx/models/signal_composer.py +513 -0
- aponyx/models/signal_transformation.json +29 -0
- aponyx/persistence/__init__.py +16 -0
- aponyx/persistence/json_io.py +132 -0
- aponyx/persistence/parquet_io.py +378 -0
- aponyx/py.typed +0 -0
- aponyx/reporting/__init__.py +10 -0
- aponyx/reporting/generator.py +517 -0
- aponyx/visualization/__init__.py +20 -0
- aponyx/visualization/app.py +37 -0
- aponyx/visualization/plots.py +309 -0
- aponyx/visualization/visualizer.py +242 -0
- aponyx/workflows/__init__.py +18 -0
- aponyx/workflows/concrete_steps.py +720 -0
- aponyx/workflows/config.py +122 -0
- aponyx/workflows/engine.py +279 -0
- aponyx/workflows/registry.py +116 -0
- aponyx/workflows/steps.py +180 -0
- aponyx-0.1.18.dist-info/METADATA +552 -0
- aponyx-0.1.18.dist-info/RECORD +104 -0
- aponyx-0.1.18.dist-info/WHEEL +4 -0
- aponyx-0.1.18.dist-info/entry_points.txt +2 -0
- aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,720 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Concrete workflow step implementations.
|
|
3
|
+
|
|
4
|
+
Each step wraps existing functionality from aponyx modules:
|
|
5
|
+
- DataStep: Fetches/generates data (wraps data providers)
|
|
6
|
+
- SignalStep: Computes signals (wraps models.signals)
|
|
7
|
+
- SuitabilityStep: Evaluates signal quality (wraps evaluation.suitability)
|
|
8
|
+
- BacktestStep: Runs strategy backtest (wraps backtest.engine)
|
|
9
|
+
- PerformanceStep: Computes extended metrics (wraps evaluation.performance)
|
|
10
|
+
- VisualizationStep: Generates charts (wraps visualization.plots)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
from aponyx.config import (
|
|
20
|
+
DATA_DIR,
|
|
21
|
+
RAW_DIR,
|
|
22
|
+
REGISTRY_PATH,
|
|
23
|
+
SIGNAL_CATALOG_PATH,
|
|
24
|
+
STRATEGY_CATALOG_PATH,
|
|
25
|
+
)
|
|
26
|
+
from aponyx.data import DataRegistry
|
|
27
|
+
from aponyx.data.fetch_registry import get_fetch_spec
|
|
28
|
+
from aponyx.data.loaders import load_instrument_from_raw
|
|
29
|
+
from aponyx.models.registry import SignalRegistry
|
|
30
|
+
from aponyx.evaluation.suitability import (
|
|
31
|
+
evaluate_signal_suitability,
|
|
32
|
+
compute_forward_returns,
|
|
33
|
+
SuitabilityConfig,
|
|
34
|
+
generate_suitability_report,
|
|
35
|
+
save_report as save_suitability_report,
|
|
36
|
+
)
|
|
37
|
+
from aponyx.evaluation.performance import (
|
|
38
|
+
analyze_backtest_performance,
|
|
39
|
+
PerformanceConfig,
|
|
40
|
+
generate_performance_report,
|
|
41
|
+
save_report as save_performance_report,
|
|
42
|
+
)
|
|
43
|
+
from aponyx.backtest import run_backtest
|
|
44
|
+
from aponyx.backtest.registry import StrategyRegistry
|
|
45
|
+
from aponyx.visualization import plot_equity_curve, plot_drawdown, plot_signal
|
|
46
|
+
from aponyx.persistence import load_parquet, save_parquet
|
|
47
|
+
from .steps import BaseWorkflowStep
|
|
48
|
+
|
|
49
|
+
logger = logging.getLogger(__name__)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class DataStep(BaseWorkflowStep):
|
|
53
|
+
"""Load all required market data from registry or raw files."""
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def name(self) -> str:
|
|
57
|
+
return "data"
|
|
58
|
+
|
|
59
|
+
def execute(self, context: dict[str, Any]) -> dict[str, Any]:
|
|
60
|
+
self._log_start()
|
|
61
|
+
|
|
62
|
+
# Get all securities from Bloomberg securities config
|
|
63
|
+
# Download all configured securities regardless of signal requirements
|
|
64
|
+
from aponyx.data.bloomberg_config import list_securities
|
|
65
|
+
|
|
66
|
+
all_securities = list_securities() # Get all security IDs
|
|
67
|
+
|
|
68
|
+
# Initialize registry
|
|
69
|
+
data_registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
|
|
70
|
+
market_data = {}
|
|
71
|
+
|
|
72
|
+
for security_id in sorted(all_securities):
|
|
73
|
+
# When force_rerun is enabled, skip registry lookup and fetch fresh data
|
|
74
|
+
if not self.config.force_rerun:
|
|
75
|
+
# Try loading from registry first (cached/processed data)
|
|
76
|
+
matching_datasets = data_registry.list_datasets(instrument=security_id)
|
|
77
|
+
|
|
78
|
+
if matching_datasets:
|
|
79
|
+
# Use most recent dataset from registry
|
|
80
|
+
dataset_name = sorted(matching_datasets)[-1]
|
|
81
|
+
info = data_registry.get_dataset_info(dataset_name)
|
|
82
|
+
df = load_parquet(info["file_path"])
|
|
83
|
+
market_data[security_id] = df
|
|
84
|
+
logger.debug(
|
|
85
|
+
"Loaded %s from registry: %d rows", security_id, len(df)
|
|
86
|
+
)
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
# Registry empty or force_rerun enabled - handle bloomberg vs file/synthetic sources
|
|
90
|
+
if self.config.data_source == "bloomberg":
|
|
91
|
+
# Bloomberg source: fetch fresh data or update current day
|
|
92
|
+
logger.info(
|
|
93
|
+
"No cached data for %s - fetching from Bloomberg",
|
|
94
|
+
security_id,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
from aponyx.data import fetch_cdx, fetch_vix, fetch_etf, BloombergSource
|
|
98
|
+
from aponyx.data.bloomberg_config import get_security_spec
|
|
99
|
+
|
|
100
|
+
source = BloombergSource()
|
|
101
|
+
|
|
102
|
+
# Get instrument type for this security
|
|
103
|
+
spec = get_security_spec(security_id)
|
|
104
|
+
instrument_type = spec.instrument_type
|
|
105
|
+
|
|
106
|
+
# Determine which fetch function to use based on instrument type
|
|
107
|
+
if instrument_type == "vix":
|
|
108
|
+
df = fetch_vix(
|
|
109
|
+
source,
|
|
110
|
+
update_current_day=self.config.force_rerun,
|
|
111
|
+
)
|
|
112
|
+
elif instrument_type == "etf":
|
|
113
|
+
df = fetch_etf(
|
|
114
|
+
source,
|
|
115
|
+
security=security_id,
|
|
116
|
+
update_current_day=self.config.force_rerun,
|
|
117
|
+
)
|
|
118
|
+
elif instrument_type == "cdx":
|
|
119
|
+
df = fetch_cdx(
|
|
120
|
+
source,
|
|
121
|
+
security=security_id,
|
|
122
|
+
update_current_day=self.config.force_rerun,
|
|
123
|
+
)
|
|
124
|
+
else:
|
|
125
|
+
raise ValueError(f"Unknown instrument type: {instrument_type}")
|
|
126
|
+
|
|
127
|
+
market_data[security_id] = df
|
|
128
|
+
logger.info(
|
|
129
|
+
"Fetched %s from Bloomberg: %d rows",
|
|
130
|
+
security_id,
|
|
131
|
+
len(df),
|
|
132
|
+
)
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
# For file/synthetic sources, try to load from raw directory
|
|
136
|
+
raw_data_dir = RAW_DIR / self.config.data_source
|
|
137
|
+
|
|
138
|
+
if not raw_data_dir.exists():
|
|
139
|
+
raise ValueError(
|
|
140
|
+
f"No datasets found for security '{security_id}'. "
|
|
141
|
+
f"Raw data directory does not exist: {raw_data_dir}"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
logger.info(
|
|
145
|
+
"No cached data for %s - attempting to load from %s",
|
|
146
|
+
security_id,
|
|
147
|
+
raw_data_dir,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Get instrument type for this security
|
|
151
|
+
from aponyx.data.bloomberg_config import get_security_spec
|
|
152
|
+
|
|
153
|
+
spec = get_security_spec(security_id)
|
|
154
|
+
instrument_type = spec.instrument_type
|
|
155
|
+
|
|
156
|
+
# Get fetch specification from registry
|
|
157
|
+
fetch_spec = get_fetch_spec(instrument_type)
|
|
158
|
+
|
|
159
|
+
# Load instrument data using generic loader with specific security
|
|
160
|
+
# VIX doesn't require security parameter (single instrument)
|
|
161
|
+
securities = [security_id] if fetch_spec.requires_security else None
|
|
162
|
+
df = load_instrument_from_raw(
|
|
163
|
+
raw_data_dir,
|
|
164
|
+
instrument_type,
|
|
165
|
+
fetch_spec.fetch_fn,
|
|
166
|
+
securities,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
market_data[security_id] = df
|
|
170
|
+
|
|
171
|
+
output = {"market_data": market_data}
|
|
172
|
+
self._log_complete(output)
|
|
173
|
+
return output
|
|
174
|
+
|
|
175
|
+
def output_exists(self) -> bool:
|
|
176
|
+
# Data step doesn't cache (always loads from registry)
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
def get_output_path(self) -> Path:
|
|
180
|
+
return self.config.output_dir / "data"
|
|
181
|
+
|
|
182
|
+
def load_cached_output(self) -> dict[str, Any]:
|
|
183
|
+
"""Load cached market data (always reload from registry)."""
|
|
184
|
+
# Data step always reloads from registry, never uses cache
|
|
185
|
+
return self.execute({})
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class SignalStep(BaseWorkflowStep):
|
|
189
|
+
"""Compute signal values using indicator + transformation composition."""
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def name(self) -> str:
|
|
193
|
+
return "signal"
|
|
194
|
+
|
|
195
|
+
def execute(self, context: dict[str, Any]) -> dict[str, Any]:
|
|
196
|
+
self._log_start()
|
|
197
|
+
|
|
198
|
+
# Get all market data from previous step (keyed by security ID)
|
|
199
|
+
raw_market_data = context["data"]["market_data"]
|
|
200
|
+
|
|
201
|
+
# Load all registries for four-stage pipeline
|
|
202
|
+
from aponyx.config import (
|
|
203
|
+
INDICATOR_TRANSFORMATION_PATH,
|
|
204
|
+
SCORE_TRANSFORMATION_PATH,
|
|
205
|
+
SIGNAL_CATALOG_PATH,
|
|
206
|
+
SIGNAL_TRANSFORMATION_PATH,
|
|
207
|
+
)
|
|
208
|
+
from aponyx.models.registry import (
|
|
209
|
+
IndicatorTransformationRegistry,
|
|
210
|
+
ScoreTransformationRegistry,
|
|
211
|
+
SignalRegistry,
|
|
212
|
+
SignalTransformationRegistry,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
indicator_registry = IndicatorTransformationRegistry(
|
|
216
|
+
INDICATOR_TRANSFORMATION_PATH
|
|
217
|
+
)
|
|
218
|
+
score_registry = ScoreTransformationRegistry(SCORE_TRANSFORMATION_PATH)
|
|
219
|
+
signal_transformation_registry = SignalTransformationRegistry(
|
|
220
|
+
SIGNAL_TRANSFORMATION_PATH
|
|
221
|
+
)
|
|
222
|
+
signal_registry = SignalRegistry(SIGNAL_CATALOG_PATH)
|
|
223
|
+
|
|
224
|
+
# Get the specific signal metadata for this workflow
|
|
225
|
+
signal_metadata = signal_registry.get_metadata(self.config.signal_name)
|
|
226
|
+
|
|
227
|
+
# Build securities mapping from indicator's default_securities
|
|
228
|
+
# (or use config override if provided)
|
|
229
|
+
if self.config.security_mapping:
|
|
230
|
+
securities_to_use = self.config.security_mapping
|
|
231
|
+
logger.info(
|
|
232
|
+
"Using custom security mapping for signal '%s': %s",
|
|
233
|
+
self.config.signal_name,
|
|
234
|
+
securities_to_use,
|
|
235
|
+
)
|
|
236
|
+
else:
|
|
237
|
+
# Get default_securities from the indicator
|
|
238
|
+
indicator_name = signal_metadata.indicator_transformation
|
|
239
|
+
indicator_metadata = indicator_registry.get_metadata(indicator_name)
|
|
240
|
+
securities_to_use = indicator_metadata.default_securities
|
|
241
|
+
logger.info(
|
|
242
|
+
"Using default securities from indicator '%s' for signal '%s': %s",
|
|
243
|
+
indicator_name,
|
|
244
|
+
self.config.signal_name,
|
|
245
|
+
securities_to_use,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# Build instrument-type-keyed market data dict for signal computation
|
|
249
|
+
# Map instrument types (cdx, etf, vix) to actual security data
|
|
250
|
+
market_data = {}
|
|
251
|
+
for inst_type, security_id in securities_to_use.items():
|
|
252
|
+
if security_id not in raw_market_data:
|
|
253
|
+
raise ValueError(
|
|
254
|
+
f"Signal '{self.config.signal_name}' requires security '{security_id}' "
|
|
255
|
+
f"(instrument type '{inst_type}'), but it was not loaded. "
|
|
256
|
+
f"Available: {sorted(raw_market_data.keys())}"
|
|
257
|
+
)
|
|
258
|
+
market_data[inst_type] = raw_market_data[security_id]
|
|
259
|
+
logger.debug(
|
|
260
|
+
"Mapped %s -> %s (%d rows)",
|
|
261
|
+
inst_type,
|
|
262
|
+
security_id,
|
|
263
|
+
len(raw_market_data[security_id]),
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Compute the specific signal for this workflow using four-stage pipeline
|
|
267
|
+
from aponyx.models.signal_composer import compose_signal
|
|
268
|
+
|
|
269
|
+
signal = compose_signal(
|
|
270
|
+
signal_name=self.config.signal_name,
|
|
271
|
+
market_data=market_data,
|
|
272
|
+
indicator_registry=indicator_registry,
|
|
273
|
+
score_registry=score_registry,
|
|
274
|
+
signal_transformation_registry=signal_transformation_registry,
|
|
275
|
+
signal_registry=signal_registry,
|
|
276
|
+
indicator_transformation_override=self.config.indicator_transformation_override,
|
|
277
|
+
score_transformation_override=self.config.score_transformation_override,
|
|
278
|
+
signal_transformation_override=self.config.signal_transformation_override,
|
|
279
|
+
include_intermediates=False,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
logger.info(
|
|
283
|
+
"Computed signal '%s': %d values, %.2f%% non-null",
|
|
284
|
+
self.config.signal_name,
|
|
285
|
+
len(signal),
|
|
286
|
+
100 * signal.notna().sum() / len(signal),
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Save signal to output directory
|
|
290
|
+
output_dir = context.get("output_dir", self.config.output_dir) / "signals"
|
|
291
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
292
|
+
|
|
293
|
+
output_path = output_dir / "signal.parquet"
|
|
294
|
+
signal_df = signal.to_frame(name="value")
|
|
295
|
+
save_parquet(signal_df, output_path)
|
|
296
|
+
logger.debug(
|
|
297
|
+
"Saved signal to %s",
|
|
298
|
+
output_path,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Return the signal and the securities used for downstream steps
|
|
302
|
+
output = {
|
|
303
|
+
"signal": signal,
|
|
304
|
+
"securities_used": securities_to_use,
|
|
305
|
+
}
|
|
306
|
+
self._log_complete(output)
|
|
307
|
+
return output
|
|
308
|
+
|
|
309
|
+
def output_exists(self) -> bool:
|
|
310
|
+
"""Check if signals directory exists and has signal files."""
|
|
311
|
+
signal_dir = self.get_output_path()
|
|
312
|
+
if not signal_dir.exists():
|
|
313
|
+
return False
|
|
314
|
+
# Check if there are any signal files
|
|
315
|
+
signal_files = list(signal_dir.glob("*.parquet"))
|
|
316
|
+
return len(signal_files) > 0
|
|
317
|
+
|
|
318
|
+
def get_output_path(self) -> Path:
|
|
319
|
+
# Use workflow output_dir from config (timestamped folder)
|
|
320
|
+
return self.config.output_dir / "signals"
|
|
321
|
+
|
|
322
|
+
def load_cached_output(self) -> dict[str, Any]:
|
|
323
|
+
"""Load cached signal from disk."""
|
|
324
|
+
signal_dir = self.get_output_path()
|
|
325
|
+
signal_file = signal_dir / f"{self.config.signal_name}.parquet"
|
|
326
|
+
|
|
327
|
+
if not signal_file.exists():
|
|
328
|
+
raise FileNotFoundError(f"Cached signal file not found: {signal_file}")
|
|
329
|
+
|
|
330
|
+
signal_df = load_parquet(signal_file)
|
|
331
|
+
signal = signal_df["value"]
|
|
332
|
+
|
|
333
|
+
logger.info(
|
|
334
|
+
"Loaded cached signal '%s': %d values",
|
|
335
|
+
self.config.signal_name,
|
|
336
|
+
len(signal),
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# Securities used info is not cached, will use defaults
|
|
340
|
+
signal_registry = SignalRegistry(SIGNAL_CATALOG_PATH)
|
|
341
|
+
signal_metadata = signal_registry.get_metadata(self.config.signal_name)
|
|
342
|
+
securities_used = (
|
|
343
|
+
self.config.security_mapping or signal_metadata.default_securities
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
return {
|
|
347
|
+
"signal": signal,
|
|
348
|
+
"securities_used": securities_used,
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
class SuitabilityStep(BaseWorkflowStep):
|
|
353
|
+
"""Evaluate signal-product suitability."""
|
|
354
|
+
|
|
355
|
+
@property
|
|
356
|
+
def name(self) -> str:
|
|
357
|
+
return "suitability"
|
|
358
|
+
|
|
359
|
+
def execute(self, context: dict[str, Any]) -> dict[str, Any]:
|
|
360
|
+
self._log_start()
|
|
361
|
+
|
|
362
|
+
# Get signal from previous step
|
|
363
|
+
signal = context["signal"]["signal"]
|
|
364
|
+
|
|
365
|
+
# Get product from workflow config
|
|
366
|
+
product = self.config.product
|
|
367
|
+
|
|
368
|
+
# Load spread data for product from DataStep context
|
|
369
|
+
market_data = context["data"]["market_data"]
|
|
370
|
+
spread_df = self._load_spread_for_product(market_data, product)
|
|
371
|
+
|
|
372
|
+
# Compute forward returns for evaluation
|
|
373
|
+
forward_returns = compute_forward_returns(spread_df["spread"], lags=[1])
|
|
374
|
+
target_change = forward_returns[1]
|
|
375
|
+
|
|
376
|
+
# Run suitability evaluation
|
|
377
|
+
config = SuitabilityConfig()
|
|
378
|
+
result = evaluate_signal_suitability(signal, target_change, config)
|
|
379
|
+
|
|
380
|
+
logger.debug(
|
|
381
|
+
"Suitability: %s, score=%.2f",
|
|
382
|
+
result.decision,
|
|
383
|
+
result.composite_score,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# Generate and save report
|
|
387
|
+
report = generate_suitability_report(result, self.config.signal_name, product)
|
|
388
|
+
|
|
389
|
+
# Get workflow output directory from context (timestamped folder)
|
|
390
|
+
workflow_output_dir = context.get("output_dir", self.config.output_dir)
|
|
391
|
+
output_dir = workflow_output_dir / "reports"
|
|
392
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
393
|
+
|
|
394
|
+
# Extract timestamp from workflow output directory name
|
|
395
|
+
workflow_dir_name = workflow_output_dir.name
|
|
396
|
+
# Expected format: {signal}_{strategy}_{YYYYMMDD}_{HHMMSS}
|
|
397
|
+
parts = workflow_dir_name.split("_")
|
|
398
|
+
timestamp = f"{parts[-2]}_{parts[-1]}" # YYYYMMDD_HHMMSS
|
|
399
|
+
|
|
400
|
+
save_suitability_report(
|
|
401
|
+
report, self.config.signal_name, product, output_dir, timestamp
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
output = {"suitability_result": result, "product": product}
|
|
405
|
+
self._log_complete(output)
|
|
406
|
+
return output
|
|
407
|
+
|
|
408
|
+
def output_exists(self) -> bool:
|
|
409
|
+
# Check for suitability report markdown file
|
|
410
|
+
output_dir = self.get_output_path()
|
|
411
|
+
report_files = list(output_dir.glob(f"{self.config.signal_name}_*.md"))
|
|
412
|
+
return len(report_files) > 0
|
|
413
|
+
|
|
414
|
+
def get_output_path(self) -> Path:
|
|
415
|
+
# Use workflow output_dir from config (timestamped folder)
|
|
416
|
+
return self.config.output_dir / "reports"
|
|
417
|
+
|
|
418
|
+
def load_cached_output(self) -> dict[str, Any]:
|
|
419
|
+
"""Load cached suitability evaluation (report only, re-run for full data)."""
|
|
420
|
+
# Get product from workflow config
|
|
421
|
+
product = self.config.product
|
|
422
|
+
|
|
423
|
+
# We only cache the product info, not the full evaluation result
|
|
424
|
+
# Report exists on disk but we don't load it back into memory
|
|
425
|
+
return {"suitability_result": None, "product": product}
|
|
426
|
+
|
|
427
|
+
def _load_spread_for_product(
|
|
428
|
+
self, market_data: dict[str, pd.DataFrame], product: str
|
|
429
|
+
) -> pd.DataFrame:
|
|
430
|
+
"""
|
|
431
|
+
Load spread data for product from market data context.
|
|
432
|
+
|
|
433
|
+
Parameters
|
|
434
|
+
----------
|
|
435
|
+
market_data : dict[str, pd.DataFrame]
|
|
436
|
+
Market data from DataStep context.
|
|
437
|
+
product : str
|
|
438
|
+
Product identifier (e.g., "cdx_ig_5y").
|
|
439
|
+
|
|
440
|
+
Returns
|
|
441
|
+
-------
|
|
442
|
+
pd.DataFrame
|
|
443
|
+
Spread data with DatetimeIndex.
|
|
444
|
+
|
|
445
|
+
Raises
|
|
446
|
+
------
|
|
447
|
+
ValueError
|
|
448
|
+
If no dataset found for product.
|
|
449
|
+
"""
|
|
450
|
+
if product not in market_data:
|
|
451
|
+
available = sorted(market_data.keys())
|
|
452
|
+
raise ValueError(
|
|
453
|
+
f"No dataset found for security '{product}'. "
|
|
454
|
+
f"Available datasets: {available}"
|
|
455
|
+
)
|
|
456
|
+
return market_data[product]
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
class BacktestStep(BaseWorkflowStep):
|
|
460
|
+
"""Run strategy backtest."""
|
|
461
|
+
|
|
462
|
+
@property
|
|
463
|
+
def name(self) -> str:
|
|
464
|
+
return "backtest"
|
|
465
|
+
|
|
466
|
+
def execute(self, context: dict[str, Any]) -> dict[str, Any]:
|
|
467
|
+
self._log_start()
|
|
468
|
+
|
|
469
|
+
# Get signal from previous step
|
|
470
|
+
signal = context["signal"]["signal"]
|
|
471
|
+
|
|
472
|
+
# Get product from config, or from suitability step if available
|
|
473
|
+
product = context.get("suitability", {}).get("product") or self.config.product
|
|
474
|
+
|
|
475
|
+
# Load spread data for backtest from DataStep context
|
|
476
|
+
market_data = context["data"]["market_data"]
|
|
477
|
+
spread_df = self._load_spread_for_product(market_data, product)
|
|
478
|
+
spread = spread_df["spread"]
|
|
479
|
+
|
|
480
|
+
# Align signal and spread to common dates
|
|
481
|
+
common_idx = signal.index.intersection(spread.index)
|
|
482
|
+
signal = signal.loc[common_idx]
|
|
483
|
+
spread = spread.loc[common_idx]
|
|
484
|
+
|
|
485
|
+
logger.debug(
|
|
486
|
+
"Aligned data: %d rows from %s to %s",
|
|
487
|
+
len(common_idx),
|
|
488
|
+
common_idx[0].date(),
|
|
489
|
+
common_idx[-1].date(),
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
# Get strategy config from catalog
|
|
493
|
+
strategy_registry = StrategyRegistry(STRATEGY_CATALOG_PATH)
|
|
494
|
+
strategy_metadata = strategy_registry.get_metadata(self.config.strategy_name)
|
|
495
|
+
backtest_config = strategy_metadata.to_config()
|
|
496
|
+
|
|
497
|
+
# Run backtest using function (not class)
|
|
498
|
+
result = run_backtest(signal, spread, backtest_config)
|
|
499
|
+
|
|
500
|
+
# Compute quick Sharpe for debug logging (handle zero std)
|
|
501
|
+
pnl_std = result.pnl["net_pnl"].std()
|
|
502
|
+
quick_sharpe = (
|
|
503
|
+
result.pnl["net_pnl"].mean() / pnl_std * (252**0.5) if pnl_std > 0 else 0.0
|
|
504
|
+
)
|
|
505
|
+
logger.debug(
|
|
506
|
+
"Backtest complete: %d trades, sharpe=%.2f",
|
|
507
|
+
result.positions["position"].diff().abs().sum() / 2,
|
|
508
|
+
quick_sharpe,
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
# Save results
|
|
512
|
+
output_dir = context.get("output_dir", self.config.output_dir) / "backtest"
|
|
513
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
514
|
+
|
|
515
|
+
save_parquet(result.pnl, output_dir / "pnl.parquet")
|
|
516
|
+
save_parquet(result.positions, output_dir / "positions.parquet")
|
|
517
|
+
|
|
518
|
+
output = {"backtest_result": result}
|
|
519
|
+
self._log_complete(output)
|
|
520
|
+
return output
|
|
521
|
+
|
|
522
|
+
def output_exists(self) -> bool:
|
|
523
|
+
pnl_path = self.get_output_path() / "pnl.parquet"
|
|
524
|
+
positions_path = self.get_output_path() / "positions.parquet"
|
|
525
|
+
return pnl_path.exists() and positions_path.exists()
|
|
526
|
+
|
|
527
|
+
def get_output_path(self) -> Path:
|
|
528
|
+
# Use workflow output_dir from config (timestamped folder)
|
|
529
|
+
return self.config.output_dir / "backtest"
|
|
530
|
+
|
|
531
|
+
def load_cached_output(self) -> dict[str, Any]:
|
|
532
|
+
"""Load cached backtest results from disk."""
|
|
533
|
+
from aponyx.backtest import BacktestResult
|
|
534
|
+
|
|
535
|
+
output_dir = self.get_output_path()
|
|
536
|
+
pnl = load_parquet(output_dir / "pnl.parquet")
|
|
537
|
+
positions = load_parquet(output_dir / "positions.parquet")
|
|
538
|
+
|
|
539
|
+
# Create minimal metadata for cached results
|
|
540
|
+
metadata = {
|
|
541
|
+
"signal_name": self.config.signal_name,
|
|
542
|
+
"strategy_name": self.config.strategy_name,
|
|
543
|
+
"product": self.config.product,
|
|
544
|
+
"cached": True,
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
result = BacktestResult(pnl=pnl, positions=positions, metadata=metadata)
|
|
548
|
+
return {"backtest_result": result}
|
|
549
|
+
|
|
550
|
+
def _load_spread_for_product(
|
|
551
|
+
self, market_data: dict[str, pd.DataFrame], product: str
|
|
552
|
+
) -> pd.DataFrame:
|
|
553
|
+
"""
|
|
554
|
+
Load spread data for product from market data context.
|
|
555
|
+
|
|
556
|
+
Parameters
|
|
557
|
+
----------
|
|
558
|
+
market_data : dict[str, pd.DataFrame]
|
|
559
|
+
Market data from DataStep context.
|
|
560
|
+
product : str
|
|
561
|
+
Product identifier (e.g., "cdx_ig_5y").
|
|
562
|
+
|
|
563
|
+
Returns
|
|
564
|
+
-------
|
|
565
|
+
pd.DataFrame
|
|
566
|
+
Spread data with DatetimeIndex.
|
|
567
|
+
|
|
568
|
+
Raises
|
|
569
|
+
------
|
|
570
|
+
ValueError
|
|
571
|
+
If no dataset found for product.
|
|
572
|
+
"""
|
|
573
|
+
if product not in market_data:
|
|
574
|
+
available = sorted(market_data.keys())
|
|
575
|
+
raise ValueError(
|
|
576
|
+
f"No dataset found for security '{product}'. "
|
|
577
|
+
f"Available datasets: {available}"
|
|
578
|
+
)
|
|
579
|
+
return market_data[product]
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
class PerformanceStep(BaseWorkflowStep):
|
|
583
|
+
"""Compute extended performance metrics."""
|
|
584
|
+
|
|
585
|
+
@property
|
|
586
|
+
def name(self) -> str:
|
|
587
|
+
return "performance"
|
|
588
|
+
|
|
589
|
+
def execute(self, context: dict[str, Any]) -> dict[str, Any]:
|
|
590
|
+
self._log_start()
|
|
591
|
+
|
|
592
|
+
backtest_result = context["backtest"]["backtest_result"]
|
|
593
|
+
|
|
594
|
+
# Compute comprehensive performance metrics
|
|
595
|
+
config = PerformanceConfig(
|
|
596
|
+
n_subperiods=4,
|
|
597
|
+
rolling_window=63,
|
|
598
|
+
attribution_quantiles=3,
|
|
599
|
+
)
|
|
600
|
+
performance = analyze_backtest_performance(backtest_result, config)
|
|
601
|
+
|
|
602
|
+
logger.debug(
|
|
603
|
+
"Performance metrics: sharpe=%.2f, max_dd=%.2f%%",
|
|
604
|
+
performance.metrics.sharpe_ratio,
|
|
605
|
+
performance.metrics.max_drawdown * 100,
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
# Generate and save report
|
|
609
|
+
report = generate_performance_report(
|
|
610
|
+
performance,
|
|
611
|
+
signal_id=self.config.signal_name,
|
|
612
|
+
strategy_id=self.config.strategy_name,
|
|
613
|
+
generate_tearsheet=False,
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
# Get workflow output directory from context (timestamped folder)
|
|
617
|
+
workflow_output_dir = context.get("output_dir", self.config.output_dir)
|
|
618
|
+
output_dir = workflow_output_dir / "reports"
|
|
619
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
620
|
+
|
|
621
|
+
# Extract timestamp from workflow output directory name
|
|
622
|
+
workflow_dir_name = workflow_output_dir.name
|
|
623
|
+
# Expected format: {signal}_{strategy}_{YYYYMMDD}_{HHMMSS}
|
|
624
|
+
parts = workflow_dir_name.split("_")
|
|
625
|
+
timestamp = f"{parts[-2]}_{parts[-1]}" # YYYYMMDD_HHMMSS
|
|
626
|
+
|
|
627
|
+
save_performance_report(
|
|
628
|
+
report,
|
|
629
|
+
self.config.signal_name,
|
|
630
|
+
self.config.strategy_name,
|
|
631
|
+
output_dir,
|
|
632
|
+
timestamp,
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
output = {"performance": performance}
|
|
636
|
+
self._log_complete(output)
|
|
637
|
+
return output
|
|
638
|
+
|
|
639
|
+
def output_exists(self) -> bool:
|
|
640
|
+
# Check for performance report markdown file
|
|
641
|
+
output_dir = self.get_output_path()
|
|
642
|
+
report_files = list(
|
|
643
|
+
output_dir.glob(
|
|
644
|
+
f"{self.config.signal_name}_{self.config.strategy_name}_*.md"
|
|
645
|
+
)
|
|
646
|
+
)
|
|
647
|
+
return len(report_files) > 0
|
|
648
|
+
|
|
649
|
+
def get_output_path(self) -> Path:
|
|
650
|
+
# Use workflow output_dir from config (timestamped folder)
|
|
651
|
+
return self.config.output_dir / "reports"
|
|
652
|
+
|
|
653
|
+
def load_cached_output(self) -> dict[str, Any]:
|
|
654
|
+
"""Load cached performance evaluation (report only, no in-memory data)."""
|
|
655
|
+
# Performance report exists on disk but we don't load it back
|
|
656
|
+
return {"performance": None}
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
class VisualizationStep(BaseWorkflowStep):
|
|
660
|
+
"""Generate visualization charts."""
|
|
661
|
+
|
|
662
|
+
@property
|
|
663
|
+
def name(self) -> str:
|
|
664
|
+
return "visualization"
|
|
665
|
+
|
|
666
|
+
def execute(self, context: dict[str, Any]) -> dict[str, Any]:
|
|
667
|
+
self._log_start()
|
|
668
|
+
|
|
669
|
+
backtest_result = context["backtest"]["backtest_result"]
|
|
670
|
+
pnl = backtest_result.pnl
|
|
671
|
+
positions = backtest_result.positions
|
|
672
|
+
|
|
673
|
+
# Generate charts with descriptive titles
|
|
674
|
+
title_prefix = f"{self.config.signal_name} ({self.config.strategy_name})"
|
|
675
|
+
equity_fig = plot_equity_curve(
|
|
676
|
+
pnl["net_pnl"],
|
|
677
|
+
title=f"Equity Curve: {title_prefix}",
|
|
678
|
+
show_drawdown_shading=True,
|
|
679
|
+
)
|
|
680
|
+
drawdown_fig = plot_drawdown(
|
|
681
|
+
pnl["net_pnl"],
|
|
682
|
+
title=f"Drawdown: {title_prefix}",
|
|
683
|
+
)
|
|
684
|
+
signal_fig = plot_signal(
|
|
685
|
+
positions["signal"],
|
|
686
|
+
title=f"Signal: {self.config.signal_name}",
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
logger.debug("Generated 3 visualization charts")
|
|
690
|
+
|
|
691
|
+
# Save charts (HTML)
|
|
692
|
+
output_dir = (
|
|
693
|
+
context.get("output_dir", self.config.output_dir) / "visualizations"
|
|
694
|
+
)
|
|
695
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
696
|
+
|
|
697
|
+
equity_fig.write_html(output_dir / "equity_curve.html")
|
|
698
|
+
drawdown_fig.write_html(output_dir / "drawdown.html")
|
|
699
|
+
signal_fig.write_html(output_dir / "signal.html")
|
|
700
|
+
|
|
701
|
+
output = {
|
|
702
|
+
"equity_fig": equity_fig,
|
|
703
|
+
"drawdown_fig": drawdown_fig,
|
|
704
|
+
"signal_fig": signal_fig,
|
|
705
|
+
}
|
|
706
|
+
self._log_complete(output)
|
|
707
|
+
return output
|
|
708
|
+
|
|
709
|
+
def output_exists(self) -> bool:
|
|
710
|
+
equity_path = self.get_output_path() / "equity_curve.html"
|
|
711
|
+
return equity_path.exists()
|
|
712
|
+
|
|
713
|
+
def get_output_path(self) -> Path:
|
|
714
|
+
# Use workflow output_dir from config (timestamped folder)
|
|
715
|
+
return self.config.output_dir / "visualization"
|
|
716
|
+
|
|
717
|
+
def load_cached_output(self) -> dict[str, Any]:
|
|
718
|
+
"""Load cached visualizations (charts only, no in-memory figures)."""
|
|
719
|
+
# Charts exist as HTML files on disk but we don't load them back
|
|
720
|
+
return {"equity_fig": None, "drawdown_fig": None, "signal_fig": None}
|