aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
aponyx/data/cache.py ADDED
@@ -0,0 +1,294 @@
1
+ """
2
+ Transparent caching layer for fetched data.
3
+
4
+ Caches API/provider responses to local Parquet files with staleness tracking.
5
+ """
6
+
7
+ import hashlib
8
+ import logging
9
+ from datetime import datetime, timedelta
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ import pandas as pd
14
+
15
+ from ..persistence.parquet_io import save_parquet, load_parquet
16
+ from .registry import DataRegistry
17
+ from .sources import DataSource, resolve_provider
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def _generate_cache_key(
23
+ source: DataSource,
24
+ security: str,
25
+ start_date: str | None,
26
+ end_date: str | None,
27
+ **params: Any,
28
+ ) -> str:
29
+ """
30
+ Generate unique cache key from fetch parameters.
31
+
32
+ Parameters
33
+ ----------
34
+ source : DataSource
35
+ Data source configuration.
36
+ security : str
37
+ Security identifier (e.g., 'cdx_ig_5y', 'vix', 'hyg').
38
+ start_date : str or None
39
+ Start date.
40
+ end_date : str or None
41
+ End date.
42
+ **params : Any
43
+ Additional parameters.
44
+
45
+ Returns
46
+ -------
47
+ str
48
+ Hash-based cache key.
49
+ """
50
+ # Create stable string representation
51
+ key_parts = [
52
+ resolve_provider(source),
53
+ security,
54
+ start_date or "none",
55
+ end_date or "none",
56
+ str(sorted(params.items())),
57
+ ]
58
+ key_string = "|".join(key_parts)
59
+
60
+ # Generate hash
61
+ hash_obj = hashlib.sha256(key_string.encode())
62
+ return hash_obj.hexdigest()[:16]
63
+
64
+
65
+ def get_cache_path(
66
+ cache_dir: Path,
67
+ provider: str,
68
+ security: str,
69
+ cache_key: str,
70
+ ) -> Path:
71
+ """
72
+ Generate file path for cached data.
73
+
74
+ Parameters
75
+ ----------
76
+ cache_dir : Path
77
+ Base cache directory.
78
+ provider : str
79
+ Provider type (file, bloomberg, api).
80
+ security : str
81
+ Security identifier (e.g., 'cdx_ig_5y', 'vix', 'hyg').
82
+ cache_key : str
83
+ Unique cache key.
84
+
85
+ Returns
86
+ -------
87
+ Path
88
+ Path to cache file.
89
+ """
90
+ provider_dir = cache_dir / provider
91
+ provider_dir.mkdir(parents=True, exist_ok=True)
92
+
93
+ # Sanitize security name for filename
94
+ safe_security = security.replace(".", "_").replace("/", "_")
95
+ filename = f"{safe_security}_{cache_key}.parquet"
96
+
97
+ return provider_dir / filename
98
+
99
+
100
+ def is_cache_stale(
101
+ cache_path: Path,
102
+ ttl_days: int | None = None,
103
+ ) -> bool:
104
+ """
105
+ Check if cached data is stale based on TTL.
106
+
107
+ Parameters
108
+ ----------
109
+ cache_path : Path
110
+ Path to cached file.
111
+ ttl_days : int or None
112
+ Time-to-live in days. None means cache never expires.
113
+
114
+ Returns
115
+ -------
116
+ bool
117
+ True if cache is stale or doesn't exist.
118
+ """
119
+ if not cache_path.exists():
120
+ return True
121
+
122
+ if ttl_days is None:
123
+ return False
124
+
125
+ # Check file modification time
126
+ mtime = datetime.fromtimestamp(cache_path.stat().st_mtime)
127
+ age = datetime.now() - mtime
128
+
129
+ is_stale = age > timedelta(days=ttl_days)
130
+
131
+ if is_stale:
132
+ logger.debug("Cache stale: age=%s, ttl=%d days", age, ttl_days)
133
+
134
+ return is_stale
135
+
136
+
137
+ def get_cached_data(
138
+ source: DataSource,
139
+ security: str,
140
+ cache_dir: Path,
141
+ start_date: str | None = None,
142
+ end_date: str | None = None,
143
+ ttl_days: int | None = None,
144
+ **params: Any,
145
+ ) -> pd.DataFrame | None:
146
+ """
147
+ Retrieve data from cache if available and fresh.
148
+
149
+ Parameters
150
+ ----------
151
+ source : DataSource
152
+ Data source configuration.
153
+ security : str
154
+ Security identifier (e.g., 'cdx_ig_5y', 'vix', 'hyg').
155
+ cache_dir : Path
156
+ Cache directory.
157
+ start_date : str or None
158
+ Start date filter.
159
+ end_date : str or None
160
+ End date filter.
161
+ ttl_days : int or None
162
+ Cache TTL in days.
163
+ **params : Any
164
+ Additional fetch parameters.
165
+
166
+ Returns
167
+ -------
168
+ pd.DataFrame or None
169
+ Cached data if available and fresh, None otherwise.
170
+ """
171
+ provider = resolve_provider(source)
172
+ cache_key = _generate_cache_key(source, security, start_date, end_date, **params)
173
+ cache_path = get_cache_path(cache_dir, provider, security, cache_key)
174
+
175
+ if is_cache_stale(cache_path, ttl_days):
176
+ logger.debug("Cache miss or stale: %s", cache_path.name)
177
+ return None
178
+
179
+ logger.info("Cache hit: %s", cache_path.name)
180
+ return load_parquet(cache_path)
181
+
182
+
183
+ def save_to_cache(
184
+ df: pd.DataFrame,
185
+ source: DataSource,
186
+ security: str,
187
+ cache_dir: Path,
188
+ registry: DataRegistry | None = None,
189
+ start_date: str | None = None,
190
+ end_date: str | None = None,
191
+ **params: Any,
192
+ ) -> Path:
193
+ """
194
+ Save fetched data to cache.
195
+
196
+ Parameters
197
+ ----------
198
+ df : pd.DataFrame
199
+ Data to cache.
200
+ source : DataSource
201
+ Data source configuration.
202
+ security : str
203
+ Security identifier (e.g., 'cdx_ig_5y', 'vix', 'hyg').
204
+ cache_dir : Path
205
+ Cache directory.
206
+ registry : DataRegistry or None
207
+ Optional registry to register cached dataset.
208
+ start_date : str or None
209
+ Start date (for cache key).
210
+ end_date : str or None
211
+ End date (for cache key).
212
+ **params : Any
213
+ Additional parameters (for cache key).
214
+
215
+ Returns
216
+ -------
217
+ Path
218
+ Path to cached file.
219
+ """
220
+ provider = resolve_provider(source)
221
+ cache_key = _generate_cache_key(source, security, start_date, end_date, **params)
222
+ cache_path = get_cache_path(cache_dir, provider, security, cache_key)
223
+
224
+ # Save to Parquet
225
+ save_parquet(df, cache_path)
226
+ logger.info("Cached data: path=%s, rows=%d", cache_path, len(df))
227
+
228
+ # Register in catalog if provided
229
+ if registry is not None:
230
+ registry.register_dataset(
231
+ name=f"cache_{security}_{cache_key}",
232
+ file_path=cache_path,
233
+ instrument=security,
234
+ metadata={
235
+ "provider": provider,
236
+ "cached_at": datetime.now().isoformat(),
237
+ "cache_key": cache_key,
238
+ "params": {"security": security, **params},
239
+ },
240
+ )
241
+
242
+ return cache_path
243
+
244
+
245
+ def update_current_day(
246
+ cached_df: pd.DataFrame,
247
+ current_df: pd.DataFrame,
248
+ ) -> pd.DataFrame:
249
+ """
250
+ Update cached data with current day's data point.
251
+
252
+ Parameters
253
+ ----------
254
+ cached_df : pd.DataFrame
255
+ Historical cached data with DatetimeIndex.
256
+ current_df : pd.DataFrame
257
+ Current day's data (single row with today's date).
258
+
259
+ Returns
260
+ -------
261
+ pd.DataFrame
262
+ Updated DataFrame with current day's data merged/replaced.
263
+
264
+ Notes
265
+ -----
266
+ If today's date already exists in cached_df, it will be replaced.
267
+ Otherwise, current_df is appended. Result is sorted by date.
268
+ """
269
+ if cached_df.empty:
270
+ return current_df
271
+
272
+ if current_df.empty:
273
+ return cached_df
274
+
275
+ # Get today's date from current_df
276
+ today = current_df.index[0]
277
+
278
+ # Remove today's data if it exists in cache
279
+ updated_df = cached_df[cached_df.index != today]
280
+
281
+ # Append current day's data
282
+ updated_df = pd.concat([updated_df, current_df])
283
+
284
+ # Sort by date
285
+ updated_df = updated_df.sort_index()
286
+
287
+ logger.debug(
288
+ "Updated cache: removed %d existing rows for %s, total rows=%d",
289
+ len(cached_df) - len(updated_df) + len(current_df),
290
+ today,
291
+ len(updated_df),
292
+ )
293
+
294
+ return updated_df