aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,367 @@
1
+ """
2
+ Component scoring logic for suitability evaluation.
3
+
4
+ Provides scoring functions for the four evaluation components and
5
+ composite score computation.
6
+ """
7
+
8
+ import logging
9
+
10
+ from aponyx.evaluation.suitability.config import SuitabilityConfig
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def score_data_health(
16
+ valid_obs: int,
17
+ missing_pct: float,
18
+ min_obs: int,
19
+ ) -> float:
20
+ """
21
+ Score data quality and sufficiency.
22
+
23
+ Parameters
24
+ ----------
25
+ valid_obs : int
26
+ Number of valid observations after alignment.
27
+ missing_pct : float
28
+ Percentage of missing data (0-100).
29
+ min_obs : int
30
+ Minimum required observations threshold.
31
+
32
+ Returns
33
+ -------
34
+ float
35
+ Data health score on 0-1 scale.
36
+ Returns 0.0 if insufficient observations.
37
+ Otherwise, penalizes missing data with tolerance up to 20%.
38
+
39
+ Notes
40
+ -----
41
+ Scoring logic:
42
+ - If valid_obs < min_obs: score = 0.0 (insufficient data)
43
+ - Else: score = max(0, 1 - missing_pct / 20)
44
+ Capped at 0% (score=1.0) and 20% (score=0.0) missing.
45
+
46
+ Examples
47
+ --------
48
+ >>> score_data_health(600, 5.0, 500) # 600 obs, 5% missing
49
+ 0.75
50
+ >>> score_data_health(400, 0.0, 500) # Below threshold
51
+ 0.0
52
+ """
53
+ if valid_obs < min_obs:
54
+ logger.debug(
55
+ "Insufficient observations: %d < %d, score=0.0",
56
+ valid_obs,
57
+ min_obs,
58
+ )
59
+ return 0.0
60
+
61
+ # Penalize missing data, tolerating up to 20%
62
+ score = max(0.0, 1.0 - missing_pct / 20.0)
63
+
64
+ logger.debug(
65
+ "Data health: valid_obs=%d, missing_pct=%.2f%%, score=%.3f",
66
+ valid_obs,
67
+ missing_pct,
68
+ score,
69
+ )
70
+
71
+ return score
72
+
73
+
74
+ def score_predictive(mean_abs_tstat: float) -> float:
75
+ """
76
+ Score predictive association strength.
77
+
78
+ Parameters
79
+ ----------
80
+ mean_abs_tstat : float
81
+ Mean absolute t-statistic across lags.
82
+
83
+ Returns
84
+ -------
85
+ float
86
+ Predictive score on 0-1 scale.
87
+ Normalized by dividing by 3.0 (capped at 1.0).
88
+
89
+ Notes
90
+ -----
91
+ Scoring logic:
92
+ - score = min(1.0, mean_abs_tstat / 3.0)
93
+ - t-stat > 2.0: Statistically significant at conventional levels
94
+ - t-stat > 3.0: Strong evidence (score capped at 1.0)
95
+
96
+ Examples
97
+ --------
98
+ >>> score_predictive(2.5) # Significant
99
+ 0.833
100
+ >>> score_predictive(4.0) # Strong (capped)
101
+ 1.0
102
+ """
103
+ score = min(1.0, mean_abs_tstat / 3.0)
104
+
105
+ logger.debug(
106
+ "Predictive association: mean |t-stat|=%.3f, score=%.3f",
107
+ mean_abs_tstat,
108
+ score,
109
+ )
110
+
111
+ return score
112
+
113
+
114
+ def score_economic(effect_size_bps: float) -> float:
115
+ """
116
+ Score economic relevance based on effect size.
117
+
118
+ Parameters
119
+ ----------
120
+ effect_size_bps : float
121
+ Estimated impact in basis points per 1σ signal change.
122
+
123
+ Returns
124
+ -------
125
+ float
126
+ Economic score on 0-1 scale.
127
+
128
+ Notes
129
+ -----
130
+ Scoring thresholds:
131
+ - effect_size < 0.5 bps: Negligible → score = 0.2
132
+ - 0.5 ≤ effect_size < 2.0 bps: Moderate → score = 0.6
133
+ - effect_size ≥ 2.0 bps: Meaningful → score = 1.0
134
+
135
+ For CDX spreads, 2 bps is ~0.4% spread change (economically relevant).
136
+
137
+ Examples
138
+ --------
139
+ >>> score_economic(0.3) # Negligible
140
+ 0.2
141
+ >>> score_economic(1.5) # Moderate
142
+ 0.6
143
+ >>> score_economic(3.0) # Meaningful
144
+ 1.0
145
+ """
146
+ if effect_size_bps < 0.5:
147
+ score = 0.2
148
+ category = "negligible"
149
+ elif effect_size_bps < 2.0:
150
+ score = 0.6
151
+ category = "moderate"
152
+ else:
153
+ score = 1.0
154
+ category = "meaningful"
155
+
156
+ logger.debug(
157
+ "Economic relevance: effect_size=%.3f bps (%s), score=%.3f",
158
+ effect_size_bps,
159
+ category,
160
+ score,
161
+ )
162
+
163
+ return score
164
+
165
+
166
+ def score_stability(
167
+ sign_consistency_ratio: float,
168
+ beta_cv: float,
169
+ ) -> float:
170
+ """
171
+ Score temporal stability based on rolling window statistics.
172
+
173
+ Parameters
174
+ ----------
175
+ sign_consistency_ratio : float
176
+ Proportion of rolling windows with same sign as aggregate beta (0-1).
177
+ beta_cv : float
178
+ Coefficient of variation of rolling betas (std / |mean|).
179
+
180
+ Returns
181
+ -------
182
+ float
183
+ Stability score on 0-1 scale.
184
+ Weighted average of sign consistency and magnitude stability scores.
185
+
186
+ Notes
187
+ -----
188
+ Scoring logic:
189
+
190
+ Sign Consistency Component (50% weight):
191
+ - ratio ≥ 0.8: score = 1.0 (highly consistent)
192
+ - 0.6 ≤ ratio < 0.8: score = 0.5 (moderately consistent)
193
+ - ratio < 0.6: score = 0.0 (inconsistent)
194
+
195
+ Magnitude Stability Component (50% weight):
196
+ - CV < 0.5: score = 1.0 (stable magnitude)
197
+ - 0.5 ≤ CV < 1.0: score = 0.5 (moderate variation)
198
+ - CV ≥ 1.0: score = 0.0 (high variation)
199
+
200
+ Final score = 0.5 × sign_score + 0.5 × magnitude_score
201
+
202
+ Examples
203
+ --------
204
+ >>> score_stability(0.85, 0.3) # High consistency, low CV
205
+ 1.0
206
+ >>> score_stability(0.75, 0.6) # Moderate both
207
+ 0.5
208
+ >>> score_stability(0.5, 1.2) # Low consistency, high CV
209
+ 0.0
210
+ """
211
+ # Score sign consistency
212
+ if sign_consistency_ratio >= 0.8:
213
+ sign_score = 1.0
214
+ sign_category = "highly consistent"
215
+ elif sign_consistency_ratio >= 0.6:
216
+ sign_score = 0.5
217
+ sign_category = "moderately consistent"
218
+ else:
219
+ sign_score = 0.0
220
+ sign_category = "inconsistent"
221
+
222
+ # Score magnitude stability (lower CV = more stable)
223
+ if beta_cv < 0.5:
224
+ magnitude_score = 1.0
225
+ magnitude_category = "stable"
226
+ elif beta_cv < 1.0:
227
+ magnitude_score = 0.5
228
+ magnitude_category = "moderate variation"
229
+ else:
230
+ magnitude_score = 0.0
231
+ magnitude_category = "high variation"
232
+
233
+ # Weighted average (equal weights)
234
+ score = 0.5 * sign_score + 0.5 * magnitude_score
235
+
236
+ logger.debug(
237
+ "Temporal stability: sign_ratio=%.3f (%s, score=%.1f), "
238
+ "CV=%.3f (%s, score=%.1f), final_score=%.3f",
239
+ sign_consistency_ratio,
240
+ sign_category,
241
+ sign_score,
242
+ beta_cv,
243
+ magnitude_category,
244
+ magnitude_score,
245
+ score,
246
+ )
247
+
248
+ return score
249
+
250
+
251
+ def compute_composite_score(
252
+ data_health_score: float,
253
+ predictive_score: float,
254
+ economic_score: float,
255
+ stability_score: float,
256
+ config: SuitabilityConfig,
257
+ ) -> float:
258
+ """
259
+ Compute weighted composite score from component scores.
260
+
261
+ Parameters
262
+ ----------
263
+ data_health_score : float
264
+ Data quality score (0-1).
265
+ predictive_score : float
266
+ Predictive association score (0-1).
267
+ economic_score : float
268
+ Economic relevance score (0-1).
269
+ stability_score : float
270
+ Temporal stability score (0-1).
271
+ config : SuitabilityConfig
272
+ Configuration with component weights.
273
+
274
+ Returns
275
+ -------
276
+ float
277
+ Composite score on 0-1 scale (weighted average).
278
+
279
+ Notes
280
+ -----
281
+ Default weights:
282
+ - Data health: 20%
283
+ - Predictive: 40%
284
+ - Economic: 20%
285
+ - Stability: 20%
286
+
287
+ Examples
288
+ --------
289
+ >>> config = SuitabilityConfig()
290
+ >>> compute_composite_score(0.8, 0.9, 0.6, 1.0, config)
291
+ 0.82
292
+ """
293
+ composite = (
294
+ config.data_health_weight * data_health_score
295
+ + config.predictive_weight * predictive_score
296
+ + config.economic_weight * economic_score
297
+ + config.stability_weight * stability_score
298
+ )
299
+
300
+ logger.debug(
301
+ "Composite score: %.3f = %.2f×%.3f + %.2f×%.3f + %.2f×%.3f + %.2f×%.3f",
302
+ composite,
303
+ config.data_health_weight,
304
+ data_health_score,
305
+ config.predictive_weight,
306
+ predictive_score,
307
+ config.economic_weight,
308
+ economic_score,
309
+ config.stability_weight,
310
+ stability_score,
311
+ )
312
+
313
+ return composite
314
+
315
+
316
+ def assign_decision(
317
+ composite_score: float,
318
+ config: SuitabilityConfig,
319
+ ) -> str:
320
+ """
321
+ Assign decision based on composite score and thresholds.
322
+
323
+ Parameters
324
+ ----------
325
+ composite_score : float
326
+ Composite score (0-1).
327
+ config : SuitabilityConfig
328
+ Configuration with decision thresholds.
329
+
330
+ Returns
331
+ -------
332
+ str
333
+ Decision: "PASS", "HOLD", or "FAIL".
334
+
335
+ Notes
336
+ -----
337
+ Decision logic:
338
+ - score ≥ pass_threshold (0.7): PASS → proceed to backtest
339
+ - pass_threshold > score ≥ hold_threshold (0.4): HOLD → marginal, requires judgment
340
+ - score < hold_threshold: FAIL → do not backtest
341
+
342
+ Examples
343
+ --------
344
+ >>> config = SuitabilityConfig()
345
+ >>> assign_decision(0.75, config)
346
+ 'PASS'
347
+ >>> assign_decision(0.55, config)
348
+ 'HOLD'
349
+ >>> assign_decision(0.35, config)
350
+ 'FAIL'
351
+ """
352
+ if composite_score >= config.pass_threshold:
353
+ decision = "PASS"
354
+ elif composite_score >= config.hold_threshold:
355
+ decision = "HOLD"
356
+ else:
357
+ decision = "FAIL"
358
+
359
+ logger.debug(
360
+ "Decision assignment: score=%.3f, pass_threshold=%.2f, hold_threshold=%.2f → %s",
361
+ composite_score,
362
+ config.pass_threshold,
363
+ config.hold_threshold,
364
+ decision,
365
+ )
366
+
367
+ return decision
@@ -0,0 +1,303 @@
1
+ """
2
+ Statistical tests for signal-target relationships.
3
+
4
+ Provides correlation, regression, and temporal stability tests for
5
+ evaluating predictive associations.
6
+ """
7
+
8
+ import logging
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import statsmodels.api as sm
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def compute_correlation(
18
+ signal: pd.Series,
19
+ target: pd.Series,
20
+ ) -> float:
21
+ """
22
+ Compute Pearson correlation between signal and target.
23
+
24
+ Parameters
25
+ ----------
26
+ signal : pd.Series
27
+ Signal time series.
28
+ target : pd.Series
29
+ Target time series (must be aligned with signal).
30
+
31
+ Returns
32
+ -------
33
+ float
34
+ Pearson correlation coefficient (-1 to 1).
35
+
36
+ Notes
37
+ -----
38
+ Returns 0.0 if either series has zero variance or contains NaN values.
39
+
40
+ Examples
41
+ --------
42
+ >>> signal = pd.Series([1, 2, 3, 4, 5])
43
+ >>> target = pd.Series([2, 4, 6, 8, 10])
44
+ >>> compute_correlation(signal, target)
45
+ 1.0
46
+ """
47
+ if len(signal) == 0 or len(target) == 0:
48
+ logger.warning("Empty series provided, returning correlation=0.0")
49
+ return 0.0
50
+
51
+ if signal.std() == 0 or target.std() == 0:
52
+ logger.warning("Zero variance in series, returning correlation=0.0")
53
+ return 0.0
54
+
55
+ corr = signal.corr(target)
56
+ if pd.isna(corr):
57
+ logger.warning("NaN correlation (insufficient data), returning 0.0")
58
+ return 0.0
59
+
60
+ logger.debug("Computed correlation: %.4f", corr)
61
+ return float(corr)
62
+
63
+
64
+ def compute_regression_stats(
65
+ signal: pd.Series,
66
+ target: pd.Series,
67
+ ) -> dict[str, float]:
68
+ """
69
+ Compute OLS regression statistics for signal predicting target.
70
+
71
+ Runs ordinary least squares regression: target ~ signal
72
+
73
+ Parameters
74
+ ----------
75
+ signal : pd.Series
76
+ Independent variable (predictor).
77
+ target : pd.Series
78
+ Dependent variable (response).
79
+
80
+ Returns
81
+ -------
82
+ dict[str, float]
83
+ Dictionary with keys:
84
+ - 'beta': regression coefficient
85
+ - 't_stat': t-statistic for beta
86
+ - 'p_value': p-value for beta
87
+ - 'r_squared': coefficient of determination
88
+
89
+ Notes
90
+ -----
91
+ Uses statsmodels OLS with constant term (intercept).
92
+ Returns zeros if regression fails due to insufficient data or numerical issues.
93
+
94
+ Examples
95
+ --------
96
+ >>> signal = pd.Series([1, 2, 3, 4, 5])
97
+ >>> target = pd.Series([2, 4, 6, 8, 10])
98
+ >>> stats = compute_regression_stats(signal, target)
99
+ >>> stats['beta']
100
+ 2.0
101
+ """
102
+ if len(signal) < 3 or len(target) < 3:
103
+ logger.warning(
104
+ "Insufficient observations for regression (n=%d), returning zeros",
105
+ len(signal),
106
+ )
107
+ return {"beta": 0.0, "t_stat": 0.0, "p_value": 1.0, "r_squared": 0.0}
108
+
109
+ try:
110
+ # Add constant for intercept
111
+ X = sm.add_constant(signal.values)
112
+ y = target.values
113
+
114
+ # Fit OLS model
115
+ model = sm.OLS(y, X).fit()
116
+
117
+ # Extract statistics for signal coefficient (index 1, after constant)
118
+ beta = float(model.params[1])
119
+ t_stat = float(model.tvalues[1])
120
+ p_value = float(model.pvalues[1])
121
+ r_squared = float(model.rsquared)
122
+
123
+ logger.debug(
124
+ "Regression: beta=%.4f, t=%.4f, p=%.4f, R²=%.4f",
125
+ beta,
126
+ t_stat,
127
+ p_value,
128
+ r_squared,
129
+ )
130
+
131
+ return {
132
+ "beta": beta,
133
+ "t_stat": t_stat,
134
+ "p_value": p_value,
135
+ "r_squared": r_squared,
136
+ }
137
+
138
+ except Exception as e:
139
+ logger.warning("Regression failed: %s, returning zeros", e)
140
+ return {"beta": 0.0, "t_stat": 0.0, "p_value": 1.0, "r_squared": 0.0}
141
+
142
+
143
+ def compute_rolling_betas(
144
+ signal: pd.Series,
145
+ target: pd.Series,
146
+ window: int,
147
+ ) -> pd.Series:
148
+ """
149
+ Compute rolling regression betas using sliding window.
150
+
151
+ Parameters
152
+ ----------
153
+ signal : pd.Series
154
+ Signal time series with DatetimeIndex.
155
+ target : pd.Series
156
+ Target time series with DatetimeIndex (aligned with signal).
157
+ window : int
158
+ Rolling window size in observations (e.g., 252 for ~1 year daily data).
159
+
160
+ Returns
161
+ -------
162
+ pd.Series
163
+ Time series of rolling beta coefficients.
164
+ Index matches input series; first (window-1) values are NaN.
165
+
166
+ Notes
167
+ -----
168
+ Uses OLS regression in each window: target ~ signal + constant.
169
+ Minimum window size is 50 observations for reliable estimation.
170
+
171
+ Implementation uses rolling window with apply to compute regression
172
+ beta coefficient in each window.
173
+
174
+ Examples
175
+ --------
176
+ >>> signal = pd.Series([...], index=date_range)
177
+ >>> target = pd.Series([...], index=date_range)
178
+ >>> rolling_betas = compute_rolling_betas(signal, target, window=252)
179
+ >>> rolling_betas.mean() # Average beta across all windows
180
+ """
181
+ if len(signal) < window:
182
+ logger.warning(
183
+ "Insufficient data for rolling window (n=%d < window=%d), returning empty series",
184
+ len(signal),
185
+ window,
186
+ )
187
+ return pd.Series([], dtype=float, index=signal.index[:0])
188
+
189
+ # Preallocate result array
190
+ betas = np.full(len(signal), np.nan)
191
+
192
+ # Compute beta for each window
193
+ for i in range(window - 1, len(signal)):
194
+ window_signal = signal.iloc[i - window + 1 : i + 1]
195
+ window_target = target.iloc[i - window + 1 : i + 1]
196
+
197
+ if len(window_signal) < 3:
198
+ continue
199
+
200
+ try:
201
+ X = sm.add_constant(window_signal.values)
202
+ y = window_target.values
203
+ model = sm.OLS(y, X).fit()
204
+ betas[i] = float(model.params[1])
205
+ except Exception:
206
+ pass # Keep as NaN
207
+
208
+ rolling_betas = pd.Series(betas, index=signal.index, name=signal.name)
209
+
210
+ logger.debug(
211
+ "Computed %d rolling betas (window=%d, valid=%d)",
212
+ len(rolling_betas),
213
+ window,
214
+ rolling_betas.notna().sum(),
215
+ )
216
+
217
+ return rolling_betas
218
+
219
+
220
+ def compute_stability_metrics(
221
+ rolling_betas: pd.Series,
222
+ aggregate_beta: float,
223
+ ) -> dict[str, float]:
224
+ """
225
+ Compute stability metrics from rolling beta coefficients.
226
+
227
+ Parameters
228
+ ----------
229
+ rolling_betas : pd.Series
230
+ Time series of rolling beta coefficients from compute_rolling_betas().
231
+ aggregate_beta : float
232
+ Overall beta coefficient from full-sample regression.
233
+ Used as reference for sign consistency check.
234
+
235
+ Returns
236
+ -------
237
+ dict[str, float]
238
+ Dictionary with keys:
239
+ - 'sign_consistency_ratio': Proportion of windows matching aggregate sign
240
+ - 'beta_cv': Coefficient of variation (std / |mean|)
241
+ - 'n_windows': Number of valid rolling windows
242
+
243
+ Notes
244
+ -----
245
+ Sign consistency ratio ≥ 0.8 indicates stable directional relationship.
246
+ Beta CV < 0.5 indicates low magnitude variation (stable effect size).
247
+
248
+ Windows with beta ≈ 0 (|beta| < 0.01) are excluded from sign consistency
249
+ to avoid spurious sign flips in noise.
250
+
251
+ Examples
252
+ --------
253
+ >>> rolling_betas = pd.Series([1.5, 1.8, 1.6, 1.7, 1.9])
254
+ >>> aggregate_beta = 1.7
255
+ >>> metrics = compute_stability_metrics(rolling_betas, aggregate_beta)
256
+ >>> metrics['sign_consistency_ratio']
257
+ 1.0 # All same sign
258
+ >>> metrics['beta_cv']
259
+ 0.08 # Low variation
260
+ """
261
+ # Remove NaN values
262
+ valid_betas = rolling_betas.dropna()
263
+
264
+ if len(valid_betas) == 0:
265
+ logger.warning("No valid rolling betas, returning zero metrics")
266
+ return {
267
+ "sign_consistency_ratio": 0.0,
268
+ "beta_cv": 0.0,
269
+ "n_windows": 0,
270
+ }
271
+
272
+ # Sign consistency: proportion of windows with same sign as aggregate
273
+ aggregate_sign = np.sign(aggregate_beta)
274
+
275
+ # Filter out near-zero betas (|beta| < 0.01) to avoid noise
276
+ non_zero_mask = np.abs(valid_betas) >= 0.01
277
+ if non_zero_mask.sum() == 0:
278
+ sign_consistency_ratio = 0.0
279
+ else:
280
+ same_sign = (np.sign(valid_betas[non_zero_mask]) == aggregate_sign).sum()
281
+ sign_consistency_ratio = float(same_sign / non_zero_mask.sum())
282
+
283
+ # Coefficient of variation: std / |mean|
284
+ beta_mean = valid_betas.mean()
285
+ beta_std = valid_betas.std()
286
+
287
+ if abs(beta_mean) < 1e-10:
288
+ beta_cv = 0.0
289
+ else:
290
+ beta_cv = float(beta_std / abs(beta_mean))
291
+
292
+ logger.debug(
293
+ "Stability metrics: sign_ratio=%.3f, CV=%.3f, n_windows=%d",
294
+ sign_consistency_ratio,
295
+ beta_cv,
296
+ len(valid_betas),
297
+ )
298
+
299
+ return {
300
+ "sign_consistency_ratio": sign_consistency_ratio,
301
+ "beta_cv": beta_cv,
302
+ "n_windows": len(valid_betas),
303
+ }
@@ -0,0 +1,53 @@
1
+ """
2
+ Generate synthetic market data for all instruments.
3
+
4
+ Prerequisites
5
+ -------------
6
+ None — this is the first step in the workflow.
7
+
8
+ Outputs
9
+ -------
10
+ Raw synthetic data files in data/raw/synthetic/:
11
+ - cdx_ig_5y_{hash}.parquet (CDX spread data)
12
+ - cdx_ig_10y_{hash}.parquet (CDX spread data)
13
+ - cdx_hy_5y_{hash}.parquet (CDX spread data)
14
+ - itrx_xover_5y_{hash}.parquet (CDX spread data)
15
+ - itrx_eur_5y_{hash}.parquet (CDX spread data)
16
+ - vix_{hash}.parquet (VIX volatility index)
17
+ - hyg_{hash}.parquet (High yield ETF spreads)
18
+ - lqd_{hash}.parquet (Investment grade ETF spreads)
19
+
20
+ Each file includes metadata JSON with generation parameters.
21
+
22
+ Examples
23
+ --------
24
+ Run from project root:
25
+ python -m aponyx.examples.01_generate_synthetic_data
26
+
27
+ Expected output: 8 parquet files with 5 years of daily data (~1260 rows each).
28
+ """
29
+
30
+ from aponyx.config import RAW_DIR
31
+ from aponyx.data.sample_data import generate_for_fetch_interface
32
+
33
+
34
+ def main() -> None:
35
+ """
36
+ Generate synthetic market data for testing and demonstrations.
37
+
38
+ Creates realistic time series data for all instruments defined in
39
+ bloomberg_securities.json, using hash-based naming compatible with
40
+ the data fetch interface.
41
+ """
42
+ output_dir = RAW_DIR / "synthetic"
43
+
44
+ generate_for_fetch_interface(
45
+ output_dir=output_dir,
46
+ start_date="2020-01-01",
47
+ end_date="2025-01-01",
48
+ seed=42,
49
+ )
50
+
51
+
52
+ if __name__ == "__main__":
53
+ main()