aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,720 @@
1
+ """
2
+ Concrete workflow step implementations.
3
+
4
+ Each step wraps existing functionality from aponyx modules:
5
+ - DataStep: Fetches/generates data (wraps data providers)
6
+ - SignalStep: Computes signals (wraps models.signals)
7
+ - SuitabilityStep: Evaluates signal quality (wraps evaluation.suitability)
8
+ - BacktestStep: Runs strategy backtest (wraps backtest.engine)
9
+ - PerformanceStep: Computes extended metrics (wraps evaluation.performance)
10
+ - VisualizationStep: Generates charts (wraps visualization.plots)
11
+ """
12
+
13
+ import logging
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ import pandas as pd
18
+
19
+ from aponyx.config import (
20
+ DATA_DIR,
21
+ RAW_DIR,
22
+ REGISTRY_PATH,
23
+ SIGNAL_CATALOG_PATH,
24
+ STRATEGY_CATALOG_PATH,
25
+ )
26
+ from aponyx.data import DataRegistry
27
+ from aponyx.data.fetch_registry import get_fetch_spec
28
+ from aponyx.data.loaders import load_instrument_from_raw
29
+ from aponyx.models.registry import SignalRegistry
30
+ from aponyx.evaluation.suitability import (
31
+ evaluate_signal_suitability,
32
+ compute_forward_returns,
33
+ SuitabilityConfig,
34
+ generate_suitability_report,
35
+ save_report as save_suitability_report,
36
+ )
37
+ from aponyx.evaluation.performance import (
38
+ analyze_backtest_performance,
39
+ PerformanceConfig,
40
+ generate_performance_report,
41
+ save_report as save_performance_report,
42
+ )
43
+ from aponyx.backtest import run_backtest
44
+ from aponyx.backtest.registry import StrategyRegistry
45
+ from aponyx.visualization import plot_equity_curve, plot_drawdown, plot_signal
46
+ from aponyx.persistence import load_parquet, save_parquet
47
+ from .steps import BaseWorkflowStep
48
+
49
+ logger = logging.getLogger(__name__)
50
+
51
+
52
+ class DataStep(BaseWorkflowStep):
53
+ """Load all required market data from registry or raw files."""
54
+
55
+ @property
56
+ def name(self) -> str:
57
+ return "data"
58
+
59
+ def execute(self, context: dict[str, Any]) -> dict[str, Any]:
60
+ self._log_start()
61
+
62
+ # Get all securities from Bloomberg securities config
63
+ # Download all configured securities regardless of signal requirements
64
+ from aponyx.data.bloomberg_config import list_securities
65
+
66
+ all_securities = list_securities() # Get all security IDs
67
+
68
+ # Initialize registry
69
+ data_registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
70
+ market_data = {}
71
+
72
+ for security_id in sorted(all_securities):
73
+ # When force_rerun is enabled, skip registry lookup and fetch fresh data
74
+ if not self.config.force_rerun:
75
+ # Try loading from registry first (cached/processed data)
76
+ matching_datasets = data_registry.list_datasets(instrument=security_id)
77
+
78
+ if matching_datasets:
79
+ # Use most recent dataset from registry
80
+ dataset_name = sorted(matching_datasets)[-1]
81
+ info = data_registry.get_dataset_info(dataset_name)
82
+ df = load_parquet(info["file_path"])
83
+ market_data[security_id] = df
84
+ logger.debug(
85
+ "Loaded %s from registry: %d rows", security_id, len(df)
86
+ )
87
+ continue
88
+
89
+ # Registry empty or force_rerun enabled - handle bloomberg vs file/synthetic sources
90
+ if self.config.data_source == "bloomberg":
91
+ # Bloomberg source: fetch fresh data or update current day
92
+ logger.info(
93
+ "No cached data for %s - fetching from Bloomberg",
94
+ security_id,
95
+ )
96
+
97
+ from aponyx.data import fetch_cdx, fetch_vix, fetch_etf, BloombergSource
98
+ from aponyx.data.bloomberg_config import get_security_spec
99
+
100
+ source = BloombergSource()
101
+
102
+ # Get instrument type for this security
103
+ spec = get_security_spec(security_id)
104
+ instrument_type = spec.instrument_type
105
+
106
+ # Determine which fetch function to use based on instrument type
107
+ if instrument_type == "vix":
108
+ df = fetch_vix(
109
+ source,
110
+ update_current_day=self.config.force_rerun,
111
+ )
112
+ elif instrument_type == "etf":
113
+ df = fetch_etf(
114
+ source,
115
+ security=security_id,
116
+ update_current_day=self.config.force_rerun,
117
+ )
118
+ elif instrument_type == "cdx":
119
+ df = fetch_cdx(
120
+ source,
121
+ security=security_id,
122
+ update_current_day=self.config.force_rerun,
123
+ )
124
+ else:
125
+ raise ValueError(f"Unknown instrument type: {instrument_type}")
126
+
127
+ market_data[security_id] = df
128
+ logger.info(
129
+ "Fetched %s from Bloomberg: %d rows",
130
+ security_id,
131
+ len(df),
132
+ )
133
+ continue
134
+
135
+ # For file/synthetic sources, try to load from raw directory
136
+ raw_data_dir = RAW_DIR / self.config.data_source
137
+
138
+ if not raw_data_dir.exists():
139
+ raise ValueError(
140
+ f"No datasets found for security '{security_id}'. "
141
+ f"Raw data directory does not exist: {raw_data_dir}"
142
+ )
143
+
144
+ logger.info(
145
+ "No cached data for %s - attempting to load from %s",
146
+ security_id,
147
+ raw_data_dir,
148
+ )
149
+
150
+ # Get instrument type for this security
151
+ from aponyx.data.bloomberg_config import get_security_spec
152
+
153
+ spec = get_security_spec(security_id)
154
+ instrument_type = spec.instrument_type
155
+
156
+ # Get fetch specification from registry
157
+ fetch_spec = get_fetch_spec(instrument_type)
158
+
159
+ # Load instrument data using generic loader with specific security
160
+ # VIX doesn't require security parameter (single instrument)
161
+ securities = [security_id] if fetch_spec.requires_security else None
162
+ df = load_instrument_from_raw(
163
+ raw_data_dir,
164
+ instrument_type,
165
+ fetch_spec.fetch_fn,
166
+ securities,
167
+ )
168
+
169
+ market_data[security_id] = df
170
+
171
+ output = {"market_data": market_data}
172
+ self._log_complete(output)
173
+ return output
174
+
175
+ def output_exists(self) -> bool:
176
+ # Data step doesn't cache (always loads from registry)
177
+ return False
178
+
179
+ def get_output_path(self) -> Path:
180
+ return self.config.output_dir / "data"
181
+
182
+ def load_cached_output(self) -> dict[str, Any]:
183
+ """Load cached market data (always reload from registry)."""
184
+ # Data step always reloads from registry, never uses cache
185
+ return self.execute({})
186
+
187
+
188
+ class SignalStep(BaseWorkflowStep):
189
+ """Compute signal values using indicator + transformation composition."""
190
+
191
+ @property
192
+ def name(self) -> str:
193
+ return "signal"
194
+
195
+ def execute(self, context: dict[str, Any]) -> dict[str, Any]:
196
+ self._log_start()
197
+
198
+ # Get all market data from previous step (keyed by security ID)
199
+ raw_market_data = context["data"]["market_data"]
200
+
201
+ # Load all registries for four-stage pipeline
202
+ from aponyx.config import (
203
+ INDICATOR_TRANSFORMATION_PATH,
204
+ SCORE_TRANSFORMATION_PATH,
205
+ SIGNAL_CATALOG_PATH,
206
+ SIGNAL_TRANSFORMATION_PATH,
207
+ )
208
+ from aponyx.models.registry import (
209
+ IndicatorTransformationRegistry,
210
+ ScoreTransformationRegistry,
211
+ SignalRegistry,
212
+ SignalTransformationRegistry,
213
+ )
214
+
215
+ indicator_registry = IndicatorTransformationRegistry(
216
+ INDICATOR_TRANSFORMATION_PATH
217
+ )
218
+ score_registry = ScoreTransformationRegistry(SCORE_TRANSFORMATION_PATH)
219
+ signal_transformation_registry = SignalTransformationRegistry(
220
+ SIGNAL_TRANSFORMATION_PATH
221
+ )
222
+ signal_registry = SignalRegistry(SIGNAL_CATALOG_PATH)
223
+
224
+ # Get the specific signal metadata for this workflow
225
+ signal_metadata = signal_registry.get_metadata(self.config.signal_name)
226
+
227
+ # Build securities mapping from indicator's default_securities
228
+ # (or use config override if provided)
229
+ if self.config.security_mapping:
230
+ securities_to_use = self.config.security_mapping
231
+ logger.info(
232
+ "Using custom security mapping for signal '%s': %s",
233
+ self.config.signal_name,
234
+ securities_to_use,
235
+ )
236
+ else:
237
+ # Get default_securities from the indicator
238
+ indicator_name = signal_metadata.indicator_transformation
239
+ indicator_metadata = indicator_registry.get_metadata(indicator_name)
240
+ securities_to_use = indicator_metadata.default_securities
241
+ logger.info(
242
+ "Using default securities from indicator '%s' for signal '%s': %s",
243
+ indicator_name,
244
+ self.config.signal_name,
245
+ securities_to_use,
246
+ )
247
+
248
+ # Build instrument-type-keyed market data dict for signal computation
249
+ # Map instrument types (cdx, etf, vix) to actual security data
250
+ market_data = {}
251
+ for inst_type, security_id in securities_to_use.items():
252
+ if security_id not in raw_market_data:
253
+ raise ValueError(
254
+ f"Signal '{self.config.signal_name}' requires security '{security_id}' "
255
+ f"(instrument type '{inst_type}'), but it was not loaded. "
256
+ f"Available: {sorted(raw_market_data.keys())}"
257
+ )
258
+ market_data[inst_type] = raw_market_data[security_id]
259
+ logger.debug(
260
+ "Mapped %s -> %s (%d rows)",
261
+ inst_type,
262
+ security_id,
263
+ len(raw_market_data[security_id]),
264
+ )
265
+
266
+ # Compute the specific signal for this workflow using four-stage pipeline
267
+ from aponyx.models.signal_composer import compose_signal
268
+
269
+ signal = compose_signal(
270
+ signal_name=self.config.signal_name,
271
+ market_data=market_data,
272
+ indicator_registry=indicator_registry,
273
+ score_registry=score_registry,
274
+ signal_transformation_registry=signal_transformation_registry,
275
+ signal_registry=signal_registry,
276
+ indicator_transformation_override=self.config.indicator_transformation_override,
277
+ score_transformation_override=self.config.score_transformation_override,
278
+ signal_transformation_override=self.config.signal_transformation_override,
279
+ include_intermediates=False,
280
+ )
281
+
282
+ logger.info(
283
+ "Computed signal '%s': %d values, %.2f%% non-null",
284
+ self.config.signal_name,
285
+ len(signal),
286
+ 100 * signal.notna().sum() / len(signal),
287
+ )
288
+
289
+ # Save signal to output directory
290
+ output_dir = context.get("output_dir", self.config.output_dir) / "signals"
291
+ output_dir.mkdir(parents=True, exist_ok=True)
292
+
293
+ output_path = output_dir / "signal.parquet"
294
+ signal_df = signal.to_frame(name="value")
295
+ save_parquet(signal_df, output_path)
296
+ logger.debug(
297
+ "Saved signal to %s",
298
+ output_path,
299
+ )
300
+
301
+ # Return the signal and the securities used for downstream steps
302
+ output = {
303
+ "signal": signal,
304
+ "securities_used": securities_to_use,
305
+ }
306
+ self._log_complete(output)
307
+ return output
308
+
309
+ def output_exists(self) -> bool:
310
+ """Check if signals directory exists and has signal files."""
311
+ signal_dir = self.get_output_path()
312
+ if not signal_dir.exists():
313
+ return False
314
+ # Check if there are any signal files
315
+ signal_files = list(signal_dir.glob("*.parquet"))
316
+ return len(signal_files) > 0
317
+
318
+ def get_output_path(self) -> Path:
319
+ # Use workflow output_dir from config (timestamped folder)
320
+ return self.config.output_dir / "signals"
321
+
322
+ def load_cached_output(self) -> dict[str, Any]:
323
+ """Load cached signal from disk."""
324
+ signal_dir = self.get_output_path()
325
+ signal_file = signal_dir / f"{self.config.signal_name}.parquet"
326
+
327
+ if not signal_file.exists():
328
+ raise FileNotFoundError(f"Cached signal file not found: {signal_file}")
329
+
330
+ signal_df = load_parquet(signal_file)
331
+ signal = signal_df["value"]
332
+
333
+ logger.info(
334
+ "Loaded cached signal '%s': %d values",
335
+ self.config.signal_name,
336
+ len(signal),
337
+ )
338
+
339
+ # Securities used info is not cached, will use defaults
340
+ signal_registry = SignalRegistry(SIGNAL_CATALOG_PATH)
341
+ signal_metadata = signal_registry.get_metadata(self.config.signal_name)
342
+ securities_used = (
343
+ self.config.security_mapping or signal_metadata.default_securities
344
+ )
345
+
346
+ return {
347
+ "signal": signal,
348
+ "securities_used": securities_used,
349
+ }
350
+
351
+
352
+ class SuitabilityStep(BaseWorkflowStep):
353
+ """Evaluate signal-product suitability."""
354
+
355
+ @property
356
+ def name(self) -> str:
357
+ return "suitability"
358
+
359
+ def execute(self, context: dict[str, Any]) -> dict[str, Any]:
360
+ self._log_start()
361
+
362
+ # Get signal from previous step
363
+ signal = context["signal"]["signal"]
364
+
365
+ # Get product from workflow config
366
+ product = self.config.product
367
+
368
+ # Load spread data for product from DataStep context
369
+ market_data = context["data"]["market_data"]
370
+ spread_df = self._load_spread_for_product(market_data, product)
371
+
372
+ # Compute forward returns for evaluation
373
+ forward_returns = compute_forward_returns(spread_df["spread"], lags=[1])
374
+ target_change = forward_returns[1]
375
+
376
+ # Run suitability evaluation
377
+ config = SuitabilityConfig()
378
+ result = evaluate_signal_suitability(signal, target_change, config)
379
+
380
+ logger.debug(
381
+ "Suitability: %s, score=%.2f",
382
+ result.decision,
383
+ result.composite_score,
384
+ )
385
+
386
+ # Generate and save report
387
+ report = generate_suitability_report(result, self.config.signal_name, product)
388
+
389
+ # Get workflow output directory from context (timestamped folder)
390
+ workflow_output_dir = context.get("output_dir", self.config.output_dir)
391
+ output_dir = workflow_output_dir / "reports"
392
+ output_dir.mkdir(parents=True, exist_ok=True)
393
+
394
+ # Extract timestamp from workflow output directory name
395
+ workflow_dir_name = workflow_output_dir.name
396
+ # Expected format: {signal}_{strategy}_{YYYYMMDD}_{HHMMSS}
397
+ parts = workflow_dir_name.split("_")
398
+ timestamp = f"{parts[-2]}_{parts[-1]}" # YYYYMMDD_HHMMSS
399
+
400
+ save_suitability_report(
401
+ report, self.config.signal_name, product, output_dir, timestamp
402
+ )
403
+
404
+ output = {"suitability_result": result, "product": product}
405
+ self._log_complete(output)
406
+ return output
407
+
408
+ def output_exists(self) -> bool:
409
+ # Check for suitability report markdown file
410
+ output_dir = self.get_output_path()
411
+ report_files = list(output_dir.glob(f"{self.config.signal_name}_*.md"))
412
+ return len(report_files) > 0
413
+
414
+ def get_output_path(self) -> Path:
415
+ # Use workflow output_dir from config (timestamped folder)
416
+ return self.config.output_dir / "reports"
417
+
418
+ def load_cached_output(self) -> dict[str, Any]:
419
+ """Load cached suitability evaluation (report only, re-run for full data)."""
420
+ # Get product from workflow config
421
+ product = self.config.product
422
+
423
+ # We only cache the product info, not the full evaluation result
424
+ # Report exists on disk but we don't load it back into memory
425
+ return {"suitability_result": None, "product": product}
426
+
427
+ def _load_spread_for_product(
428
+ self, market_data: dict[str, pd.DataFrame], product: str
429
+ ) -> pd.DataFrame:
430
+ """
431
+ Load spread data for product from market data context.
432
+
433
+ Parameters
434
+ ----------
435
+ market_data : dict[str, pd.DataFrame]
436
+ Market data from DataStep context.
437
+ product : str
438
+ Product identifier (e.g., "cdx_ig_5y").
439
+
440
+ Returns
441
+ -------
442
+ pd.DataFrame
443
+ Spread data with DatetimeIndex.
444
+
445
+ Raises
446
+ ------
447
+ ValueError
448
+ If no dataset found for product.
449
+ """
450
+ if product not in market_data:
451
+ available = sorted(market_data.keys())
452
+ raise ValueError(
453
+ f"No dataset found for security '{product}'. "
454
+ f"Available datasets: {available}"
455
+ )
456
+ return market_data[product]
457
+
458
+
459
+ class BacktestStep(BaseWorkflowStep):
460
+ """Run strategy backtest."""
461
+
462
+ @property
463
+ def name(self) -> str:
464
+ return "backtest"
465
+
466
+ def execute(self, context: dict[str, Any]) -> dict[str, Any]:
467
+ self._log_start()
468
+
469
+ # Get signal from previous step
470
+ signal = context["signal"]["signal"]
471
+
472
+ # Get product from config, or from suitability step if available
473
+ product = context.get("suitability", {}).get("product") or self.config.product
474
+
475
+ # Load spread data for backtest from DataStep context
476
+ market_data = context["data"]["market_data"]
477
+ spread_df = self._load_spread_for_product(market_data, product)
478
+ spread = spread_df["spread"]
479
+
480
+ # Align signal and spread to common dates
481
+ common_idx = signal.index.intersection(spread.index)
482
+ signal = signal.loc[common_idx]
483
+ spread = spread.loc[common_idx]
484
+
485
+ logger.debug(
486
+ "Aligned data: %d rows from %s to %s",
487
+ len(common_idx),
488
+ common_idx[0].date(),
489
+ common_idx[-1].date(),
490
+ )
491
+
492
+ # Get strategy config from catalog
493
+ strategy_registry = StrategyRegistry(STRATEGY_CATALOG_PATH)
494
+ strategy_metadata = strategy_registry.get_metadata(self.config.strategy_name)
495
+ backtest_config = strategy_metadata.to_config()
496
+
497
+ # Run backtest using function (not class)
498
+ result = run_backtest(signal, spread, backtest_config)
499
+
500
+ # Compute quick Sharpe for debug logging (handle zero std)
501
+ pnl_std = result.pnl["net_pnl"].std()
502
+ quick_sharpe = (
503
+ result.pnl["net_pnl"].mean() / pnl_std * (252**0.5) if pnl_std > 0 else 0.0
504
+ )
505
+ logger.debug(
506
+ "Backtest complete: %d trades, sharpe=%.2f",
507
+ result.positions["position"].diff().abs().sum() / 2,
508
+ quick_sharpe,
509
+ )
510
+
511
+ # Save results
512
+ output_dir = context.get("output_dir", self.config.output_dir) / "backtest"
513
+ output_dir.mkdir(parents=True, exist_ok=True)
514
+
515
+ save_parquet(result.pnl, output_dir / "pnl.parquet")
516
+ save_parquet(result.positions, output_dir / "positions.parquet")
517
+
518
+ output = {"backtest_result": result}
519
+ self._log_complete(output)
520
+ return output
521
+
522
+ def output_exists(self) -> bool:
523
+ pnl_path = self.get_output_path() / "pnl.parquet"
524
+ positions_path = self.get_output_path() / "positions.parquet"
525
+ return pnl_path.exists() and positions_path.exists()
526
+
527
+ def get_output_path(self) -> Path:
528
+ # Use workflow output_dir from config (timestamped folder)
529
+ return self.config.output_dir / "backtest"
530
+
531
+ def load_cached_output(self) -> dict[str, Any]:
532
+ """Load cached backtest results from disk."""
533
+ from aponyx.backtest import BacktestResult
534
+
535
+ output_dir = self.get_output_path()
536
+ pnl = load_parquet(output_dir / "pnl.parquet")
537
+ positions = load_parquet(output_dir / "positions.parquet")
538
+
539
+ # Create minimal metadata for cached results
540
+ metadata = {
541
+ "signal_name": self.config.signal_name,
542
+ "strategy_name": self.config.strategy_name,
543
+ "product": self.config.product,
544
+ "cached": True,
545
+ }
546
+
547
+ result = BacktestResult(pnl=pnl, positions=positions, metadata=metadata)
548
+ return {"backtest_result": result}
549
+
550
+ def _load_spread_for_product(
551
+ self, market_data: dict[str, pd.DataFrame], product: str
552
+ ) -> pd.DataFrame:
553
+ """
554
+ Load spread data for product from market data context.
555
+
556
+ Parameters
557
+ ----------
558
+ market_data : dict[str, pd.DataFrame]
559
+ Market data from DataStep context.
560
+ product : str
561
+ Product identifier (e.g., "cdx_ig_5y").
562
+
563
+ Returns
564
+ -------
565
+ pd.DataFrame
566
+ Spread data with DatetimeIndex.
567
+
568
+ Raises
569
+ ------
570
+ ValueError
571
+ If no dataset found for product.
572
+ """
573
+ if product not in market_data:
574
+ available = sorted(market_data.keys())
575
+ raise ValueError(
576
+ f"No dataset found for security '{product}'. "
577
+ f"Available datasets: {available}"
578
+ )
579
+ return market_data[product]
580
+
581
+
582
+ class PerformanceStep(BaseWorkflowStep):
583
+ """Compute extended performance metrics."""
584
+
585
+ @property
586
+ def name(self) -> str:
587
+ return "performance"
588
+
589
+ def execute(self, context: dict[str, Any]) -> dict[str, Any]:
590
+ self._log_start()
591
+
592
+ backtest_result = context["backtest"]["backtest_result"]
593
+
594
+ # Compute comprehensive performance metrics
595
+ config = PerformanceConfig(
596
+ n_subperiods=4,
597
+ rolling_window=63,
598
+ attribution_quantiles=3,
599
+ )
600
+ performance = analyze_backtest_performance(backtest_result, config)
601
+
602
+ logger.debug(
603
+ "Performance metrics: sharpe=%.2f, max_dd=%.2f%%",
604
+ performance.metrics.sharpe_ratio,
605
+ performance.metrics.max_drawdown * 100,
606
+ )
607
+
608
+ # Generate and save report
609
+ report = generate_performance_report(
610
+ performance,
611
+ signal_id=self.config.signal_name,
612
+ strategy_id=self.config.strategy_name,
613
+ generate_tearsheet=False,
614
+ )
615
+
616
+ # Get workflow output directory from context (timestamped folder)
617
+ workflow_output_dir = context.get("output_dir", self.config.output_dir)
618
+ output_dir = workflow_output_dir / "reports"
619
+ output_dir.mkdir(parents=True, exist_ok=True)
620
+
621
+ # Extract timestamp from workflow output directory name
622
+ workflow_dir_name = workflow_output_dir.name
623
+ # Expected format: {signal}_{strategy}_{YYYYMMDD}_{HHMMSS}
624
+ parts = workflow_dir_name.split("_")
625
+ timestamp = f"{parts[-2]}_{parts[-1]}" # YYYYMMDD_HHMMSS
626
+
627
+ save_performance_report(
628
+ report,
629
+ self.config.signal_name,
630
+ self.config.strategy_name,
631
+ output_dir,
632
+ timestamp,
633
+ )
634
+
635
+ output = {"performance": performance}
636
+ self._log_complete(output)
637
+ return output
638
+
639
+ def output_exists(self) -> bool:
640
+ # Check for performance report markdown file
641
+ output_dir = self.get_output_path()
642
+ report_files = list(
643
+ output_dir.glob(
644
+ f"{self.config.signal_name}_{self.config.strategy_name}_*.md"
645
+ )
646
+ )
647
+ return len(report_files) > 0
648
+
649
+ def get_output_path(self) -> Path:
650
+ # Use workflow output_dir from config (timestamped folder)
651
+ return self.config.output_dir / "reports"
652
+
653
+ def load_cached_output(self) -> dict[str, Any]:
654
+ """Load cached performance evaluation (report only, no in-memory data)."""
655
+ # Performance report exists on disk but we don't load it back
656
+ return {"performance": None}
657
+
658
+
659
+ class VisualizationStep(BaseWorkflowStep):
660
+ """Generate visualization charts."""
661
+
662
+ @property
663
+ def name(self) -> str:
664
+ return "visualization"
665
+
666
+ def execute(self, context: dict[str, Any]) -> dict[str, Any]:
667
+ self._log_start()
668
+
669
+ backtest_result = context["backtest"]["backtest_result"]
670
+ pnl = backtest_result.pnl
671
+ positions = backtest_result.positions
672
+
673
+ # Generate charts with descriptive titles
674
+ title_prefix = f"{self.config.signal_name} ({self.config.strategy_name})"
675
+ equity_fig = plot_equity_curve(
676
+ pnl["net_pnl"],
677
+ title=f"Equity Curve: {title_prefix}",
678
+ show_drawdown_shading=True,
679
+ )
680
+ drawdown_fig = plot_drawdown(
681
+ pnl["net_pnl"],
682
+ title=f"Drawdown: {title_prefix}",
683
+ )
684
+ signal_fig = plot_signal(
685
+ positions["signal"],
686
+ title=f"Signal: {self.config.signal_name}",
687
+ )
688
+
689
+ logger.debug("Generated 3 visualization charts")
690
+
691
+ # Save charts (HTML)
692
+ output_dir = (
693
+ context.get("output_dir", self.config.output_dir) / "visualizations"
694
+ )
695
+ output_dir.mkdir(parents=True, exist_ok=True)
696
+
697
+ equity_fig.write_html(output_dir / "equity_curve.html")
698
+ drawdown_fig.write_html(output_dir / "drawdown.html")
699
+ signal_fig.write_html(output_dir / "signal.html")
700
+
701
+ output = {
702
+ "equity_fig": equity_fig,
703
+ "drawdown_fig": drawdown_fig,
704
+ "signal_fig": signal_fig,
705
+ }
706
+ self._log_complete(output)
707
+ return output
708
+
709
+ def output_exists(self) -> bool:
710
+ equity_path = self.get_output_path() / "equity_curve.html"
711
+ return equity_path.exists()
712
+
713
+ def get_output_path(self) -> Path:
714
+ # Use workflow output_dir from config (timestamped folder)
715
+ return self.config.output_dir / "visualization"
716
+
717
+ def load_cached_output(self) -> dict[str, Any]:
718
+ """Load cached visualizations (charts only, no in-memory figures)."""
719
+ # Charts exist as HTML files on disk but we don't load them back
720
+ return {"equity_fig": None, "drawdown_fig": None, "signal_fig": None}