aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,251 @@
1
+ # Logging Design for Persistence Layer
2
+
3
+ ## Overview
4
+
5
+ The persistence layer now includes comprehensive logging at INFO and DEBUG levels to provide visibility into data operations without compromising performance or test reliability.
6
+
7
+ ## Design Principles
8
+
9
+ ### 1. **Module-Level Loggers (Recommended Pattern)**
10
+
11
+ ```python
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+ ```
16
+
17
+ **Why this approach?**
18
+
19
+ - ✅ **Hierarchical naming**: Logger names follow module structure (`aponyx.persistence.parquet_io`)
20
+ - ✅ **Configurable at any level**: Users can control logging for entire package, submodules, or individual modules
21
+ - ✅ **No global configuration in library code**: The library never calls `logging.basicConfig()` - that's the application's responsibility
22
+ - ✅ **Works with pytest**: Tests run cleanly without logging output unless explicitly enabled
23
+ - ✅ **Standard Python practice**: Follows official Python logging guidelines for library code
24
+
25
+ **Anti-pattern (what we avoided):**
26
+ ```python
27
+ # DON'T DO THIS in library code
28
+ logging.basicConfig(level=logging.INFO) # Forces configuration on users
29
+ ```
30
+
31
+ ### 2. **INFO vs DEBUG Levels**
32
+
33
+ #### INFO Level (User-Facing Operations)
34
+ Logged at INFO when:
35
+ - Files are saved or loaded
36
+ - Registry operations occur (register, update, remove)
37
+ - Operations complete successfully with summary statistics
38
+
39
+ **Examples:**
40
+ ```python
41
+ logger.info("Saving DataFrame to Parquet: path=%s, rows=%d, columns=%d", path, len(df), len(df.columns))
42
+ logger.info("Registered dataset: name=%s, instrument=%s, rows=%s", name, instrument, row_count)
43
+ ```
44
+
45
+ **Characteristics:**
46
+ - High-level operations
47
+ - Always relevant to users
48
+ - Should appear in production logs
49
+ - Includes key metrics (rows, columns, file sizes)
50
+
51
+ #### DEBUG Level (Developer Details)
52
+ Logged at DEBUG when:
53
+ - Low-level details about operations
54
+ - File sizes after writing
55
+ - Applied filters and transformations
56
+ - Non-existent directories
57
+
58
+ **Examples:**
59
+ ```python
60
+ logger.debug("Successfully saved %d bytes to %s", path.stat().st_size, path)
61
+ logger.debug("Applied date filter: start=%s, end=%s, resulting_rows=%d", start_date, end_date, len(df))
62
+ ```
63
+
64
+ **Characteristics:**
65
+ - Implementation details
66
+ - Useful for debugging
67
+ - Can be verbose
68
+ - Typically disabled in production
69
+
70
+ ### 3. **Structured Logging with Parameters**
71
+
72
+ We use **%-formatting with parameters** instead of f-strings in log statements:
73
+
74
+ ```python
75
+ # ✅ CORRECT: Lazy evaluation, structured logging
76
+ logger.info("Loaded %d rows from %s", len(df), path)
77
+
78
+ # ❌ AVOID: Eager evaluation, string concatenation
79
+ logger.info(f"Loaded {len(df)} rows from {path}")
80
+ ```
81
+
82
+ **Benefits:**
83
+ - **Performance**: String formatting only happens if the log level is enabled
84
+ - **Structured logging**: Log aggregation tools can parse parameters
85
+ - **Consistency**: Standard Python logging best practice
86
+
87
+ ### 4. **Warning Level for Recoverable Errors**
88
+
89
+ ```python
90
+ logger.warning("Failed to extract stats from %s: %s", file_path, str(e))
91
+ ```
92
+
93
+ Used when:
94
+ - An operation fails but execution continues
95
+ - Non-critical errors occur
96
+ - User should be aware but no exception is raised
97
+
98
+ ## What We Log
99
+
100
+ ### Parquet I/O (`parquet_io.py`)
101
+
102
+ | Operation | Level | Information Logged |
103
+ |-----------|-------|-------------------|
104
+ | `save_parquet()` | INFO | Path, rows, columns, compression |
105
+ | `save_parquet()` | DEBUG | File size after save |
106
+ | `load_parquet()` | INFO | Path, columns filter |
107
+ | `load_parquet()` | INFO | Rows and columns loaded |
108
+ | `load_parquet()` | DEBUG | Date filter details, resulting rows |
109
+ | `list_parquet_files()` | INFO | Number of files found, directory, pattern |
110
+ | `list_parquet_files()` | DEBUG | Directory not exists |
111
+
112
+ ### JSON I/O (`json_io.py`)
113
+
114
+ | Operation | Level | Information Logged |
115
+ |-----------|-------|-------------------|
116
+ | `save_json()` | INFO | Path, number of top-level keys |
117
+ | `save_json()` | DEBUG | File size after save |
118
+ | `load_json()` | INFO | Path |
119
+ | `load_json()` | DEBUG | Number of top-level keys loaded |
120
+
121
+ ### Registry (`registry.py`)
122
+
123
+ | Operation | Level | Information Logged |
124
+ |-----------|-------|-------------------|
125
+ | `__init__()` | INFO | Registry path, number of datasets (loaded/created) |
126
+ | `register_dataset()` | INFO | Name, instrument, row count |
127
+ | `register_dataset()` | WARNING | Failed to extract stats from file |
128
+ | `register_dataset()` | DEBUG | Registering non-existent file |
129
+ | `update_dataset_stats()` | INFO | Name, rows, date range after update |
130
+ | `remove_dataset()` | INFO | Name removed, file deleted (if applicable) |
131
+
132
+ ## User Configuration Examples
133
+
134
+ ### Application Setup (Demo/Scripts)
135
+
136
+ ```python
137
+ import logging
138
+
139
+ # Basic configuration for scripts
140
+ logging.basicConfig(
141
+ level=logging.INFO,
142
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
143
+ datefmt="%H:%M:%S",
144
+ )
145
+ ```
146
+
147
+ ### Fine-Grained Control
148
+
149
+ ```python
150
+ import logging
151
+
152
+ # Enable DEBUG for parquet_io only
153
+ logging.getLogger("aponyx.persistence.parquet_io").setLevel(logging.DEBUG)
154
+
155
+ # Disable INFO for json_io
156
+ logging.getLogger("aponyx.persistence.json_io").setLevel(logging.WARNING)
157
+
158
+ # Enable all persistence layer at INFO
159
+ logging.getLogger("aponyx.persistence").setLevel(logging.INFO)
160
+ ```
161
+
162
+ ### Production Configuration
163
+
164
+ ```python
165
+ import logging
166
+
167
+ # Production: INFO to file, WARNING to console
168
+ file_handler = logging.FileHandler("aponyx.log")
169
+ file_handler.setLevel(logging.INFO)
170
+
171
+ console_handler = logging.StreamHandler()
172
+ console_handler.setLevel(logging.WARNING)
173
+
174
+ logging.getLogger("aponyx").addHandler(file_handler)
175
+ logging.getLogger("aponyx").addHandler(console_handler)
176
+ ```
177
+
178
+ ### Testing (Silence Logs)
179
+
180
+ ```python
181
+ # In conftest.py or test setup
182
+ import logging
183
+
184
+ logging.getLogger("aponyx").setLevel(logging.CRITICAL)
185
+ ```
186
+
187
+ Or use pytest's `caplog` fixture:
188
+ ```python
189
+ def test_something(caplog):
190
+ with caplog.at_level(logging.INFO, logger="aponyx.persistence"):
191
+ # Test code
192
+ pass
193
+
194
+ assert "Saving DataFrame to Parquet" in caplog.text
195
+ ```
196
+
197
+ ## Benefits of This Design
198
+
199
+ ### 1. **Non-Invasive**
200
+ - Library doesn't force logging configuration
201
+ - Users control what they see
202
+ - Tests run silently by default
203
+
204
+ ### 2. **Observable**
205
+ - Users can track data operations
206
+ - Debugging is easier with detailed logs
207
+ - Production monitoring is straightforward
208
+
209
+ ### 3. **Performance**
210
+ - Lazy evaluation (%-formatting)
211
+ - No overhead when logging is disabled
212
+ - Structured data for log aggregation
213
+
214
+ ### 4. **Maintainable**
215
+ - Consistent logging pattern across modules
216
+ - Clear signal-to-noise ratio (INFO vs DEBUG)
217
+ - Easy to add more logging as code evolves
218
+
219
+ ### 5. **Standards-Compliant**
220
+ - Follows Python logging best practices
221
+ - Compatible with enterprise logging infrastructure
222
+ - Works with popular logging frameworks (loguru, structlog)
223
+
224
+ ## Example Output
225
+
226
+ With `logging.basicConfig(level=logging.INFO)`:
227
+
228
+ ```
229
+ 00:08:40 - aponyx.persistence.parquet_io - INFO - Saving DataFrame to Parquet: path=data/cdx_ig_5y.parquet, rows=215, columns=2, compression=snappy
230
+ 00:08:41 - aponyx.data.registry - INFO - Loaded existing registry: path=data/.registries/registry.json, datasets=4
231
+ 00:08:41 - aponyx.persistence.parquet_io - INFO - Loading Parquet file: path=data/cdx_ig_5y.parquet, columns=all
232
+ 00:08:41 - aponyx.persistence.parquet_io - INFO - Loaded 215 rows, 2 columns from data/cdx_ig_5y.parquet
233
+ ```
234
+
235
+ Clean, informative, and actionable.
236
+
237
+ ## Future Enhancements
238
+
239
+ Potential additions as the project grows:
240
+
241
+ 1. **Metrics Integration**: Add timing decorators for performance monitoring
242
+ 2. **Structured Logging**: Migrate to `structlog` for JSON-formatted logs
243
+ 3. **Audit Trail**: Add UUID tracking for data lineage
244
+ 4. **Performance Logging**: Track I/O performance metrics
245
+
246
+ ---
247
+
248
+ **Summary**: The logging design follows Python best practices for library code, providing visibility without imposing configuration, and maintaining clean separation between library and application concerns.
249
+
250
+ **Maintained by:** stabilefrisur
251
+ **Last Updated:** December 13, 2025
@@ -0,0 +1,265 @@
1
+ # Performance Evaluation Design — aponyx
2
+
3
+ **Status:** ✅ **IMPLEMENTED** (November 9, 2025)
4
+
5
+ ## Objective
6
+
7
+ The second-stage evaluation feature provides **comprehensive performance analysis of backtest results**. This stage interprets simulation outputs into structured analytical insights. It sits between backtesting and visualization, enabling consistent, extensible, and comparable performance interpretation.
8
+
9
+ ---
10
+
11
+ ## Implementation Status
12
+
13
+ ### ✅ Completed (November 9, 2025)
14
+
15
+ **Core modules:**
16
+ - ✅ `analyzer.py` - Performance orchestration and evaluation
17
+ - ✅ `decomposition.py` - Return attribution (directional, signal strength, win/loss)
18
+ - ✅ `risk_metrics.py` - Extended metrics (stability, profit factor, tail ratio, consistency)
19
+ - ✅ `registry.py` - JSON-based metadata catalog with CRUD operations
20
+ - ✅ `report.py` - Markdown report generation with comprehensive formatting
21
+ - ✅ `config.py` - PerformanceConfig dataclass with validation
22
+
23
+ **Key features:**
24
+ - Extended performance metrics beyond basic Sharpe/drawdown
25
+ - Rolling Sharpe analysis with configurable windows
26
+ - Subperiod stability assessment (quarterly by default)
27
+ - Return attribution by direction, signal strength, and win/loss
28
+ - Comprehensive markdown reports
29
+ - Registry-based metadata tracking
30
+ - Integration with backtest layer via BacktestResult objects
31
+
32
+ **Notebook:**
33
+ - ✅ `05_performance_analysis.ipynb` - Complete workflow notebook (13 cells)
34
+ - Loads backtest results from Step 4
35
+ - Reconstructs BacktestResult objects
36
+ - Runs comprehensive performance analysis
37
+ - Displays extended metrics tables
38
+ - Visualizes rolling Sharpe and attribution
39
+ - Generates reports for all signal-strategy pairs
40
+ - Registers evaluations in PerformanceRegistry
41
+
42
+ **Testing:**
43
+ - ✅ Unit tests in `tests/evaluation/performance/`
44
+ - ✅ Registry integration tests
45
+ - ✅ Attribution decomposition tests
46
+ - ✅ Report generation tests
47
+
48
+ ---
49
+
50
+ ## Conceptual Placement
51
+
52
+ The feature will be implemented under a new subpackage:
53
+
54
+ ```
55
+ src/aponyx/evaluation/performance/
56
+ ```
57
+
58
+ This aligns with the project’s layered architecture, keeping the evaluation domain consistent:
59
+ - `evaluation.suitability` → pre-backtest screening
60
+ - `evaluation.performance` → post-backtest analysis
61
+
62
+ ---
63
+
64
+ ## Core Responsibilities
65
+
66
+ | Module | Purpose |
67
+ |--------|----------|
68
+ | `analyzer.py` | Orchestrates performance evaluation for one or more backtests |
69
+ | `decomposition.py` | Provides return attribution by signal component, trade side, or regime |
70
+ | `risk_metrics.py` | Computes advanced risk and stability metrics beyond base statistics |
71
+ | `registry.py` | Manages metadata catalog of evaluation runs (JSON-based) |
72
+ | `report.py` | Generates summary reports (Markdown or JSON) |
73
+ | `config.py` | Defines configuration dataclasses and evaluation parameters |
74
+
75
+ This mirrors the structure of the existing suitability evaluation package for consistency.
76
+
77
+ ---
78
+
79
+ ## Architectural Integration
80
+
81
+ **Inputs:**
82
+ - One or more `BacktestResult` objects from the backtest layer
83
+ - Optional contextual metadata such as signal name, strategy ID, and market regime labels
84
+
85
+ **Outputs:**
86
+ - Structured `PerformanceEvaluationResult` data container
87
+ - Optional Markdown or JSON report
88
+ - Registered entry in a `PerformanceRegistry` catalog for traceability
89
+
90
+ **Dependencies:**
91
+ - Imports allowed from `aponyx.backtest` and `aponyx.persistence`
92
+ - Must not import from `data`, `models`, or `visualization`
93
+
94
+ ---
95
+
96
+ ## Core Components
97
+
98
+ ### Configuration ✅
99
+ A frozen `PerformanceConfig` dataclass defines the evaluation scope including minimum observations, subperiods, risk-free rate, rolling window, and attribution quantiles.
100
+
101
+ ### Evaluation Result Container ✅
102
+ The `PerformanceResult` dataclass stores:
103
+ - Extended metrics (stability, profit factor, tail ratio, rolling Sharpe stats)
104
+ - Attribution results (directional, signal strength, win/loss decomposition)
105
+ - Subperiod stability scores
106
+ - Comprehensive metadata (timestamps, configuration, signal/strategy IDs)
107
+
108
+ ### Analyzer Module ✅
109
+ The `analyze_backtest_performance` function orchestrates:
110
+ - Loading BacktestResult objects
111
+ - Computing extended metrics via risk_metrics module
112
+ - Assessing temporal stability across subperiods
113
+ - Running attribution decomposition
114
+ - Returning structured PerformanceResult
115
+
116
+ ### Registry Pattern ✅
117
+ The `PerformanceRegistry` class provides:
118
+ - JSON-based catalog of evaluation runs
119
+ - CRUD operations: register, get, list (with filters)
120
+ - Metadata tracking (signal, strategy, timestamps, stability scores)
121
+ - Report path management
122
+
123
+ ### Decomposition and Attribution ✅
124
+ The `decomposition.py` module computes:
125
+ - **Directional attribution:** Long vs short P&L contribution
126
+ - **Signal strength attribution:** Quantile-based decomposition (terciles by default)
127
+ - **Win/loss decomposition:** Positive vs negative day breakdown
128
+
129
+ ### Reporting ✅
130
+ The `report.py` module generates:
131
+ - Comprehensive markdown reports with metrics tables
132
+ - Attribution breakdowns with visual formatting
133
+ - Stability analysis and interpretation
134
+ - Timestamped file persistence in `data/workflows/{signal}_{strategy}_{timestamp}/reports/`
135
+
136
+ ---
137
+
138
+ ## Modular Design Principles
139
+
140
+ | Design Choice | Rationale |
141
+ |----------------|------------|
142
+ | Protocol-based interfaces | Allow integration with third-party analytics libraries such as QuantStats or vectorbt without code rewrites |
143
+ | Functional orchestration with data containers | Simplify testing, serialization, and reproducibility |
144
+ | Registry pattern | Maintain consistent governance across evaluation layers |
145
+ | Separation of computation and interpretation | Enable flexible decision logic and visualization reuse |
146
+ | Optional external adapters | Support gradual integration of external toolkits |
147
+
148
+ ---
149
+
150
+ ## Implementation Status Summary
151
+
152
+ ### ✅ Must-haves (Completed)
153
+ - ✅ Single-signal evaluation of backtest results
154
+ - ✅ Extended performance metrics (stability, profit factor, tail ratio, rolling Sharpe)
155
+ - ✅ Subperiod stability analysis (quarterly by default)
156
+ - ✅ Markdown report generation
157
+ - ✅ Registry-based metadata tracking
158
+
159
+ ### ✅ Should-haves (Completed)
160
+ - ✅ Comparative evaluation across strategies or signals
161
+ - ✅ Basic attribution by trade direction and signal quantile
162
+ - ✅ Rolling performance diagnostics (rolling Sharpe analysis)
163
+
164
+ ### 🔄 Nice-to-haves (Future Work)
165
+ - ⏳ Optional adapters for external analytics libraries (QuantStats, vectorbt)
166
+ - ⏳ Multi-strategy or portfolio-level aggregation
167
+ - ⏳ Advanced attribution by risk source or regime
168
+ - ⏳ Streamlit dashboard integration for interactive review
169
+
170
+ ---
171
+
172
+ ## Directory Layout (Implemented)
173
+
174
+ ```
175
+ src/aponyx/evaluation/
176
+ ├── suitability/ # Pre-backtest evaluation
177
+ └── performance/ # Post-backtest analysis ✅
178
+ ├── __init__.py # Exports: analyze_backtest_performance, PerformanceRegistry, etc.
179
+ ├── analyzer.py # Core orchestration ✅
180
+ ├── decomposition.py # Attribution logic ✅
181
+ ├── risk_metrics.py # Extended metric computations ✅
182
+ ├── report.py # Markdown summaries ✅
183
+ ├── registry.py # Metadata catalog ✅
184
+ └── config.py # Configuration dataclasses ✅
185
+
186
+ data/.registries/
187
+ └── performance.json # Evaluation tracking (runtime) ✅
188
+ ```
189
+
190
+ **Tests:**
191
+ ```
192
+ tests/evaluation/performance/
193
+ ├── test_analyzer.py # Core evaluation tests ✅
194
+ ├── test_decomposition.py # Attribution tests ✅
195
+ ├── test_risk_metrics.py # Extended metrics tests ✅
196
+ ├── test_report.py # Report generation tests ✅
197
+ └── test_registry.py # Registry CRUD tests ✅
198
+ ```
199
+
200
+ ---
201
+
202
+ ## Data Flow (Implemented)
203
+
204
+ ```
205
+ BacktestResult(s) from backtest execution
206
+
207
+ PerformanceConfig (min_obs=252, n_subperiods=4, rolling_window=63)
208
+
209
+ analyze_backtest_performance(backtest_result, config)
210
+ ├─ compute_extended_metrics() → stability, profit factor, tail ratio
211
+ ├─ compute_rolling_sharpe() → mean/std of rolling window
212
+ ├─ compute_attribution() → directional, signal strength, win/loss
213
+ └─ assess_subperiod_stability() → quarterly stability scores
214
+
215
+ PerformanceResult (extended metrics + attribution + metadata)
216
+
217
+ generate_performance_report() → Markdown file
218
+
219
+ PerformanceRegistry.register_evaluation() → JSON catalog entry
220
+
221
+ Visualization Layer (optional)
222
+ ```
223
+
224
+ **Workflow integration:**
225
+ ```
226
+ Step 1: Data Download
227
+ Step 2: Signal Computation
228
+ Step 3: Suitability Evaluation
229
+ Step 4: Backtest Execution
230
+ Step 5: Performance Analysis ← NEW
231
+ ```
232
+
233
+ ---
234
+
235
+ ## Key Metrics Implemented
236
+
237
+ ### Extended Performance Metrics
238
+ - **Stability Score:** Temporal consistency across subperiods (0-1 scale)
239
+ - **Profit Factor:** Gross wins / gross losses
240
+ - **Tail Ratio:** 95th percentile / 5th percentile return
241
+ - **Rolling Sharpe:** Mean and standard deviation of rolling window Sharpe ratios
242
+ - **Consistency Score:** Percentage of positive subperiods
243
+ - **Recovery Statistics:** Average days to recover from drawdowns, drawdown count
244
+
245
+ ### Attribution Components
246
+ - **Directional:** Long vs short P&L contribution and percentages
247
+ - **Signal Strength:** Tercile-based decomposition (weak/medium/strong)
248
+ - **Win/Loss:** Positive vs negative day breakdown with contribution percentages
249
+
250
+ ---
251
+
252
+ ## Future Extensibility (Unchanged)
253
+
254
+ - **Adapters:** Optional plugin interfaces for third-party analytics libraries.
255
+ - **Metrics registry:** Dynamically register new performance metrics without changing core logic.
256
+ - **Comparison engine:** Evaluate relative performance across strategies or signals.
257
+ - **Dashboard integration:** Connect with Streamlit for interactive result exploration.
258
+ - **Portfolio extensions:** Aggregate results for multi-strategy analysis.
259
+
260
+ ---
261
+
262
+ ## Design Intent
263
+
264
+ This design preserves the modular, layered philosophy of the existing system. It introduces a standardized framework for interpreting backtest outputs, leaving room for scalability in analytics, visualization, and external integrations.
265
+