aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,558 @@
1
+ # Signal–Product Suitability Evaluation Layer
2
+
3
+ **Version:** 2.0
4
+ **Date:** December 2025
5
+ **Module Path:** `src/aponyx/evaluation/suitability/`
6
+ **Purpose:** Pre-backtest screening of signal-product pairs for statistical credibility, economic relevance, and temporal stability.
7
+
8
+ ---
9
+
10
+ ## 1. Purpose
11
+
12
+ The suitability evaluation layer provides **quantitative assessment of signal-product relationships** before committing resources to full backtesting. It answers four fundamental questions:
13
+
14
+ 1. **Data Health** – Is there sufficient, clean data for reliable inference?
15
+ 2. **Predictive Credibility** – Does the signal predict the target with statistically significant evidence?
16
+ 3. **Economic Relevance** – Is the predicted effect large enough to be tradeable?
17
+ 4. **Temporal Stability** – Is the relationship persistent across market regimes?
18
+
19
+ **Assessment Framework:**
20
+
21
+ | Decision | Meaning | Interpretation |
22
+ |----------|---------|----------------|
23
+ | PASS | Signal meets suitability criteria | High confidence for backtesting |
24
+ | HOLD | Signal shows promise but has marginal scores | Review before backtesting |
25
+ | FAIL | Signal lacks predictive evidence | Low confidence; consider improvements |
26
+
27
+ **Workflow Position:**
28
+
29
+ ```
30
+ Data → Signal Generation → Suitability Evaluation → Backtest → Performance Analysis
31
+ (assessment)
32
+ ```
33
+
34
+ ---
35
+
36
+ ## 2. Design Principles
37
+
38
+ 1. **Deterministic:** Evaluations produce identical results given identical inputs (no randomness).
39
+ 2. **Transparent:** All metrics have clear statistical and economic interpretation.
40
+ 3. **Lightweight:** Minimal dependencies; fast computation suitable for batch evaluation.
41
+ 4. **Reproducible:** Full metadata logging with versioned outputs and configuration tracking.
42
+ 5. **Functional:** Pure functions for all scoring logic; class-based registry for tracking.
43
+
44
+ ---
45
+
46
+ ## 3. Evaluation Components
47
+
48
+ ### 3.1 Data Health
49
+
50
+ **Purpose:** Verify data quality and sufficiency for reliable statistical inference.
51
+
52
+ **Metrics:**
53
+
54
+ - **Valid Observations:** Count of aligned signal-target pairs after dropna
55
+ - **Missing Percentage:** Proportion of data lost during alignment
56
+ - **Minimum Sample Requirement:** Configurable threshold (default: 100 observations)
57
+
58
+ **Scoring Function:**
59
+
60
+ ```python
61
+ def score_data_health(
62
+ valid_obs: int,
63
+ missing_pct: float,
64
+ min_obs: int,
65
+ ) -> float:
66
+ """
67
+ Score data quality on [0, 1] scale.
68
+
69
+ Returns 0.0 if valid_obs < min_obs.
70
+ Penalizes high missing_pct linearly.
71
+ """
72
+ ```
73
+
74
+ **Outcome:** Score ∈ [0, 1] with interpretation (excellent/adequate/poor).
75
+
76
+ ---
77
+
78
+ ### 3.2 Predictive Credibility
79
+
80
+ **Purpose:** Quantify statistical evidence that signal predicts target movements.
81
+
82
+ **Statistical Tests:**
83
+
84
+ - **Correlation:** Pearson correlation between signal and forward returns across multiple lags
85
+ - **Regression:** OLS regression β coefficients and t-statistics
86
+ - **Multi-Horizon:** Evaluate at configured lags (e.g., 1-day, 5-day, 10-day forward)
87
+
88
+ **Key Metric:** Mean absolute t-statistic across all lag horizons.
89
+
90
+ **Scoring Function:**
91
+
92
+ ```python
93
+ def score_predictive(mean_abs_tstat: float) -> float:
94
+ """
95
+ Map t-statistic strength to [0, 1] score.
96
+
97
+ |t| < 2.0: Low credibility (< 0.5)
98
+ |t| ≥ 3.0: High credibility (> 0.8)
99
+ """
100
+ ```
101
+
102
+ **Outcome:** Score ∈ [0, 1] based on statistical significance strength.
103
+
104
+ ---
105
+
106
+ ### 3.3 Economic Relevance
107
+
108
+ **Purpose:** Translate statistical relationship into economically meaningful impact.
109
+
110
+ **Computation:**
111
+
112
+ 1. Compute average β across all lag horizons
113
+ 2. Scale by signal volatility: `effect_size = |β| × σ_signal`
114
+ 3. Express in basis points for spread products
115
+
116
+ **Economic Context:**
117
+
118
+ - **CDX spreads:** Effect size < 1 bps likely too small to trade
119
+ - **CDX spreads:** Effect size > 5 bps economically significant
120
+
121
+ **Scoring Function:**
122
+
123
+ ```python
124
+ def score_economic(effect_size_bps: float) -> float:
125
+ """
126
+ Map economic impact to [0, 1] score.
127
+
128
+ < 1 bps: Low relevance (< 0.3)
129
+ > 5 bps: High relevance (> 0.8)
130
+ """
131
+ ```
132
+
133
+ **Outcome:** Score ∈ [0, 1] with bps-denominated effect size.
134
+
135
+ ---
136
+
137
+ ### 3.4 Temporal Stability
138
+
139
+ **Purpose:** Verify relationship persistence across market regimes.
140
+
141
+ **Method:**
142
+
143
+ 1. Compute rolling β coefficients using sliding window (default: 252 observations)
144
+ 2. Calculate coefficient of variation (CV) of rolling betas
145
+ 3. Check sign consistency (proportion of windows with same sign as aggregate)
146
+
147
+ **Stability Indicators:**
148
+
149
+ - **Sign Consistency Ratio:** Proportion of rolling windows with same sign as aggregate β
150
+ - **Magnitude Stability:** Coefficient of variation of rolling betas (lower = more stable)
151
+
152
+ **Scoring Function:**
153
+
154
+ ```python
155
+ def score_stability(
156
+ sign_consistency_ratio: float,
157
+ beta_cv: float,
158
+ ) -> float:
159
+ """
160
+ Score stability on [0, 1] scale using two components.
161
+
162
+ Sign consistency: ≥80% same sign = high (0.5 contribution)
163
+ Magnitude stability: CV < 0.5 = high (0.5 contribution)
164
+
165
+ Returns weighted average of both stability measures.
166
+ """
167
+ ```
168
+
169
+ **Outcome:** Score ∈ [0, 1] based on both sign persistence and magnitude stability.
170
+
171
+ ---
172
+
173
+ ### 3.5 Composite Score
174
+
175
+ **Aggregation:** Weighted average of component scores.
176
+
177
+ **Default Weights:**
178
+
179
+ | Component | Weight | Rationale |
180
+ |-----------|--------|--------|
181
+ | Data Health | 0.20 | Foundation requirement |
182
+ | Predictive Credibility | 0.40 | Core screening criterion |
183
+ | Economic Relevance | 0.20 | Practical tradability |
184
+ | Temporal Stability | 0.20 | Regime robustness |
185
+
186
+ **Decision Thresholds:**
187
+
188
+ | Composite Score | Decision |
189
+ |----------------|----------|
190
+ | ≥ 0.70 | PASS |
191
+ | 0.40–0.69 | HOLD |
192
+ | < 0.40 | FAIL |
193
+
194
+ ---
195
+
196
+ ## 4. Evaluation Methodology (Conceptual)
197
+
198
+ ### 4.1 Data Health
199
+
200
+ **Intention:** Only signals with reliable underlying data should be considered.
201
+
202
+ **Considerations:**
203
+
204
+ * Minimum sample size for statistical inference
205
+ * Missing data proportion and alignment with target variable
206
+ * Identification of extreme outliers or discontinuities
207
+
208
+ **Outcome:** A normalized score representing data reliability, along with interpretation text.
209
+
210
+ ---
211
+
212
+ ### 4.2 Predictive Credibility
213
+
214
+ **Intention:** Quantify the signal’s ability to predict the target in a statistically meaningful way.
215
+
216
+ **Conceptual Steps:**
217
+
218
+ * Measure directional association between signal and future target moves
219
+ * Adjust for residual correlation or autocorrelation effects
220
+ * Aggregate predictive strength across relevant horizons or lags
221
+
222
+ **Outcome:** A score representing statistical credibility, accompanied by key metrics and narrative interpretation.
223
+
224
+ ---
225
+
226
+ ### 4.3 Economic Relevance
227
+
228
+ **Intention:** Convert statistical association into a meaningful economic effect.
229
+
230
+ **Conceptual Steps:**
231
+
232
+ * Scale estimated impact by the variability of the signal and the target
233
+ * Present effect in units familiar to the researcher (e.g., bps, %, or σ-scaled moves)
234
+ * Contextualize whether the magnitude is practically relevant for trading or risk allocation
235
+
236
+ **Outcome:** Score reflecting the economic significance, and a short textual assessment.
237
+
238
+ ---
239
+
240
+ ### 4.4 Temporal Stability
241
+
242
+ **Intention:** Ensure the signal’s predictive effect is persistent and robust across different market conditions.
243
+
244
+ **Conceptual Steps:**
245
+
246
+ * Perform rolling or windowed evaluation of predictive metrics
247
+ * Track variability (CV), directional consistency, and frequency of sign flips
248
+ * Identify potential regime dependence or intermittent predictability
249
+
250
+ **Outcome:** Score reflecting stability, along with interpretive summary and optional visualization of temporal trends.
251
+
252
+ ---
253
+
254
+ ## 5. Composite Evaluation
255
+
256
+ * Combine the four dimension scores into a **composite score**.
257
+ * Default weighting can be adjusted based on research priorities:
258
+
259
+ | Dimension | Default Weight |
260
+ |-----------------------|----------------|
261
+ | Data Health | 0.2 |
262
+ | Predictive Credibility| 0.4 |
263
+ | Economic Relevance | 0.2 |
264
+ | Temporal Stability | 0.2 |
265
+
266
+ * Composite score interpretation:
267
+ - **≥ 0.70:** High confidence (strong candidate for backtesting)
268
+ - **0.40–0.69:** Marginal (requires judgment and further review)
269
+ - **< 0.40:** Low confidence (consider signal improvements)
270
+
271
+ ---
272
+
273
+ ## 4. Report Structure
274
+
275
+ **Format:** Markdown with structured sections for human readability and git versioning.
276
+
277
+ **Standard Sections:**
278
+
279
+ 1. **Header**
280
+ - Signal ID, Product ID, Evaluation timestamp
281
+ - Composite score and assessment category (PASS/HOLD/FAIL)
282
+
283
+ 2. **Summary Table**
284
+ - Component scores with weights
285
+ - Composite calculation breakdown
286
+
287
+ 3. **Component Details**
288
+ - **Data Health:** Valid observations, missing %, alignment quality
289
+ - **Predictive Credibility:** Correlations, betas, t-stats by lag horizon
290
+ - **Economic Relevance:** Effect size (bps), practical context
291
+ - **Temporal Stability:** Rolling beta statistics, sign consistency ratio, coefficient of variation, number of windows
292
+
293
+ 4. **Assessment Notes**
294
+ - Threshold comparison
295
+ - Marginal components (if applicable)
296
+ - Areas for potential improvement
297
+
298
+ 5. **Footer**
299
+ - Configuration snapshot (lags, rolling_window, thresholds, weights)
300
+ - Timestamp
301
+ - Reproducibility notice
302
+
303
+ **File Naming:** `{signal_id}_{product_id}_{timestamp}.md`
304
+
305
+ **Storage:** `data/workflows/{signal}_{strategy}_{timestamp}/reports/`
306
+
307
+ ---
308
+
309
+ ## 5. Module Architecture
310
+
311
+ ```
312
+ aponyx/evaluation/suitability/
313
+ ├── config.py # SuitabilityConfig dataclass (frozen)
314
+ ├── evaluator.py # evaluate_signal_suitability() orchestration
315
+ ├── tests.py # Statistical test functions (pure)
316
+ ├── scoring.py # Score computation functions (pure)
317
+ ├── report.py # generate_suitability_report() function
318
+ ├── registry.py # SuitabilityRegistry class
319
+ └── __init__.py # Public API exports
320
+
321
+ data/.registries/
322
+ └── suitability.json # Evaluation metadata catalog (runtime)
323
+ ```
324
+
325
+ **Design Patterns:**
326
+
327
+ | Module | Pattern | State | Purpose |
328
+ |--------|---------|-------|------|
329
+ | `config.py` | Frozen dataclass | Immutable | Configuration container |
330
+ | `evaluator.py` | Pure function | Stateless | Orchestration |
331
+ | `tests.py` | Pure functions | Stateless | Statistical computations |
332
+ | `scoring.py` | Pure functions | Stateless | Score mapping |
333
+ | `report.py` | Pure function | Stateless | Markdown generation |
334
+ | `registry.py` | Class-based | Mutable | Metadata tracking |
335
+
336
+ **Key Function Signatures:**
337
+
338
+ ```python
339
+ # Orchestration (evaluator.py)
340
+ def evaluate_signal_suitability(
341
+ signal: pd.Series,
342
+ target_change: pd.Series,
343
+ config: SuitabilityConfig | None = None,
344
+ ) -> SuitabilityResult
345
+
346
+ # Statistical tests (tests.py)
347
+ def compute_correlation(signal: pd.Series, target: pd.Series) -> float
348
+ def compute_regression_stats(signal: pd.Series, target: pd.Series) -> dict[str, float]
349
+ def compute_rolling_betas(signal: pd.Series, target: pd.Series, window: int) -> pd.Series
350
+ def compute_stability_metrics(rolling_betas: pd.Series, aggregate_beta: float) -> dict[str, float]
351
+
352
+ # Scoring (scoring.py)
353
+ def score_data_health(valid_obs: int, missing_pct: float, min_obs: int) -> float
354
+ def score_predictive(mean_abs_tstat: float) -> float
355
+ def score_economic(effect_size_bps: float) -> float
356
+ def score_stability(sign_consistency_ratio: float, beta_cv: float) -> float
357
+ def compute_composite_score(
358
+ data_health_score: float,
359
+ predictive_score: float,
360
+ economic_score: float,
361
+ stability_score: float,
362
+ config: SuitabilityConfig,
363
+ ) -> float
364
+ def assign_decision(composite_score: float, config: SuitabilityConfig) -> str
365
+
366
+ # Reporting (report.py)
367
+ def generate_suitability_report(
368
+ result: SuitabilityResult,
369
+ signal_id: str,
370
+ product_id: str,
371
+ ) -> str
372
+ ```
373
+
374
+ **Configuration Parameters:**
375
+
376
+ ```python
377
+ @dataclass(frozen=True)
378
+ class SuitabilityConfig:
379
+ lags: list[int] = [1, 3, 5] # Forecast horizons (days ahead)
380
+ min_obs: int = 500 # Minimum valid observations
381
+ rolling_window: int = 252 # Stability analysis window (~1 year)
382
+ pass_threshold: float = 0.7 # Score for PASS decision
383
+ hold_threshold: float = 0.4 # Score for HOLD decision
384
+ data_health_weight: float = 0.2 # Component weights (must sum to 1.0)
385
+ predictive_weight: float = 0.4
386
+ economic_weight: float = 0.2
387
+ stability_weight: float = 0.2
388
+ ```
389
+
390
+ **Result Container:**
391
+
392
+ ```python
393
+ @dataclass
394
+ class SuitabilityResult:
395
+ decision: str # PASS/HOLD/FAIL
396
+ composite_score: float # Weighted average (0-1)
397
+ data_health_score: float # Component scores (0-1)
398
+ predictive_score: float
399
+ economic_score: float
400
+ stability_score: float
401
+ valid_obs: int # Diagnostic metrics
402
+ missing_pct: float
403
+ correlations: dict[int, float] # By lag horizon
404
+ betas: dict[int, float]
405
+ t_stats: dict[int, float]
406
+ effect_size_bps: float
407
+ sign_consistency_ratio: float # Stability metrics
408
+ beta_cv: float
409
+ n_windows: int
410
+ timestamp: str
411
+ config: SuitabilityConfig
412
+ ```
413
+
414
+ ---
415
+
416
+ ## 6. Governance Integration
417
+
418
+ **Registry Pattern:** Class-based with mutable state.
419
+
420
+ **Lifecycle:**
421
+
422
+ ```python
423
+ from aponyx.config import SUITABILITY_REGISTRY_PATH
424
+ from aponyx.evaluation.suitability import SuitabilityRegistry, evaluate_signal_suitability
425
+
426
+ # 1. LOAD: Instantiate registry
427
+ registry = SuitabilityRegistry(SUITABILITY_REGISTRY_PATH)
428
+
429
+ # 2. EVALUATE: Run suitability check
430
+ result = evaluate_signal_suitability(signal, target, config)
431
+
432
+ # 3. REGISTER: Store evaluation metadata
433
+ eval_id = registry.register_evaluation(
434
+ suitability_result=result,
435
+ signal_id="cdx_etf_basis",
436
+ product_id="cdx_ig_5y",
437
+ report_path=report_path,
438
+ )
439
+
440
+ # 4. QUERY: Retrieve evaluations
441
+ all_evals = registry.list_evaluations()
442
+ cdx_evals = registry.list_evaluations(signal_id="cdx_etf_basis")
443
+ passed = registry.list_evaluations(decision="PASS")
444
+
445
+ # 5. SAVE: Auto-saves on register
446
+ registry.save_catalog() # Manual save also available
447
+ ```
448
+
449
+ **Metadata Schema:**
450
+
451
+ ```json
452
+ {
453
+ "evaluation_id": "eval_20251113_143022_a1b2c3",
454
+ "signal_id": "cdx_etf_basis",
455
+ "product_id": "cdx_ig_5y",
456
+ "timestamp": "2025-11-13T14:30:22",
457
+ "decision": "PASS",
458
+ "composite_score": 0.78,
459
+ "component_scores": {
460
+ "data_health": 0.85,
461
+ "predictive": 0.82,
462
+ "economic": 0.70,
463
+ "stability": 1.0
464
+ },
465
+ "stability_metrics": {
466
+ "sign_consistency_ratio": 0.95,
467
+ "beta_cv": 0.25,
468
+ "n_windows": 475
469
+ },
470
+ "report_path": "data/workflows/cdx_etf_basis_balanced_20251113_120000/reports/cdx_etf_basis_cdx_ig_5y_20251113.md",
471
+ "config": {
472
+ "lags": [1, 3, 5],
473
+ "rolling_window": 252,
474
+ "pass_threshold": 0.7,
475
+ "hold_threshold": 0.4
476
+ }
477
+ }
478
+ ```
479
+
480
+ ---
481
+
482
+ ## 7. Workflow Integration
483
+
484
+ **Research Sequence:**
485
+
486
+ ```
487
+ 1. Data → Signal Generation
488
+
489
+ 2. Suitability Evaluation
490
+ - Produces: SuitabilityResult + Markdown report
491
+ - Registry: Stores evaluation metadata
492
+
493
+ 3. Decision Gate
494
+ - PASS → Proceed to strategy design
495
+ - HOLD → Review marginal components
496
+ - FAIL → Archive signal
497
+
498
+ 4. Backtest (PASS only)
499
+
500
+ 5. Performance Analysis
501
+ ```
502
+
503
+ **Batch Evaluation:**
504
+
505
+ ```python
506
+ # Evaluate all signals against a product
507
+ from aponyx.models import SignalRegistry, compute_registered_signals
508
+ from aponyx.evaluation.suitability import evaluate_signal_suitability
509
+
510
+ signal_registry = SignalRegistry(SIGNAL_CATALOG_PATH)
511
+ market_data = {"cdx": cdx_df, "etf": etf_df, "vix": vix_df}
512
+ signals = compute_registered_signals(signal_registry, market_data, signal_config)
513
+
514
+ for signal_name, signal_series in signals.items():
515
+ result = evaluate_signal_suitability(signal_series, cdx_df['spread'])
516
+ if result.decision == "PASS":
517
+ # Proceed to backtest
518
+ pass
519
+ ```
520
+
521
+ ---
522
+
523
+ ## 8. Design Rationale
524
+
525
+ **Why Four Components?**
526
+
527
+ - **Data Health:** Foundation check prevents garbage-in-garbage-out
528
+ - **Predictive:** Core criterion for signal validity
529
+ - **Economic:** Bridges statistics to trading reality
530
+ - **Stability:** Guards against overfitting and regime dependence
531
+
532
+ **Why These Weights?**
533
+
534
+ - Predictive credibility (0.40) is doubled because statistical significance is the primary screening criterion
535
+ - Other components (0.20 each) are equally important secondary checks
536
+
537
+ **Why Rolling Window for Stability?**
538
+
539
+ - Rolling statistics capture regime transitions better than fixed temporal splits
540
+ - Provides continuous stability metrics rather than discrete subperiod comparisons
541
+ - Default window of 252 observations (~1 year for daily data) balances statistical power with regime sensitivity
542
+ - Minimum window of 50 observations ensures reliable beta estimation per window
543
+ - Dual-metric approach (sign consistency + coefficient of variation) captures both directional stability and magnitude consistency
544
+ - Sign consistency ratio ≥ 0.8 indicates persistent directional relationship across regimes
545
+ - Beta CV < 0.5 indicates stable effect size (low magnitude variation)
546
+
547
+ **Why Multi-Horizon Testing?**
548
+
549
+ - Different strategies may use different holding periods
550
+ - Signal should be robust across reasonable trading horizons
551
+ - Reduces risk of overfitting to single lag specification
552
+
553
+ ---
554
+
555
+ **Document Status:** Active design specification for `aponyx.evaluation.suitability` layer.
556
+ **Last Updated:** December 2025
557
+ **Maintainer:** stabilefrisur
558
+