aponyx 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aponyx/__init__.py +14 -0
- aponyx/backtest/__init__.py +31 -0
- aponyx/backtest/adapters.py +77 -0
- aponyx/backtest/config.py +84 -0
- aponyx/backtest/engine.py +560 -0
- aponyx/backtest/protocols.py +101 -0
- aponyx/backtest/registry.py +334 -0
- aponyx/backtest/strategy_catalog.json +50 -0
- aponyx/cli/__init__.py +5 -0
- aponyx/cli/commands/__init__.py +8 -0
- aponyx/cli/commands/clean.py +349 -0
- aponyx/cli/commands/list.py +302 -0
- aponyx/cli/commands/report.py +167 -0
- aponyx/cli/commands/run.py +377 -0
- aponyx/cli/main.py +125 -0
- aponyx/config/__init__.py +82 -0
- aponyx/data/__init__.py +99 -0
- aponyx/data/bloomberg_config.py +306 -0
- aponyx/data/bloomberg_instruments.json +26 -0
- aponyx/data/bloomberg_securities.json +42 -0
- aponyx/data/cache.py +294 -0
- aponyx/data/fetch.py +659 -0
- aponyx/data/fetch_registry.py +135 -0
- aponyx/data/loaders.py +205 -0
- aponyx/data/providers/__init__.py +13 -0
- aponyx/data/providers/bloomberg.py +383 -0
- aponyx/data/providers/file.py +111 -0
- aponyx/data/registry.py +500 -0
- aponyx/data/requirements.py +96 -0
- aponyx/data/sample_data.py +415 -0
- aponyx/data/schemas.py +60 -0
- aponyx/data/sources.py +171 -0
- aponyx/data/synthetic_params.json +46 -0
- aponyx/data/transforms.py +336 -0
- aponyx/data/validation.py +308 -0
- aponyx/docs/__init__.py +24 -0
- aponyx/docs/adding_data_providers.md +682 -0
- aponyx/docs/cdx_knowledge_base.md +455 -0
- aponyx/docs/cdx_overlay_strategy.md +135 -0
- aponyx/docs/cli_guide.md +607 -0
- aponyx/docs/governance_design.md +551 -0
- aponyx/docs/logging_design.md +251 -0
- aponyx/docs/performance_evaluation_design.md +265 -0
- aponyx/docs/python_guidelines.md +786 -0
- aponyx/docs/signal_registry_usage.md +369 -0
- aponyx/docs/signal_suitability_design.md +558 -0
- aponyx/docs/visualization_design.md +277 -0
- aponyx/evaluation/__init__.py +11 -0
- aponyx/evaluation/performance/__init__.py +24 -0
- aponyx/evaluation/performance/adapters.py +109 -0
- aponyx/evaluation/performance/analyzer.py +384 -0
- aponyx/evaluation/performance/config.py +320 -0
- aponyx/evaluation/performance/decomposition.py +304 -0
- aponyx/evaluation/performance/metrics.py +761 -0
- aponyx/evaluation/performance/registry.py +327 -0
- aponyx/evaluation/performance/report.py +541 -0
- aponyx/evaluation/suitability/__init__.py +67 -0
- aponyx/evaluation/suitability/config.py +143 -0
- aponyx/evaluation/suitability/evaluator.py +389 -0
- aponyx/evaluation/suitability/registry.py +328 -0
- aponyx/evaluation/suitability/report.py +398 -0
- aponyx/evaluation/suitability/scoring.py +367 -0
- aponyx/evaluation/suitability/tests.py +303 -0
- aponyx/examples/01_generate_synthetic_data.py +53 -0
- aponyx/examples/02_fetch_data_file.py +82 -0
- aponyx/examples/03_fetch_data_bloomberg.py +104 -0
- aponyx/examples/04_compute_signal.py +164 -0
- aponyx/examples/05_evaluate_suitability.py +224 -0
- aponyx/examples/06_run_backtest.py +242 -0
- aponyx/examples/07_analyze_performance.py +214 -0
- aponyx/examples/08_visualize_results.py +272 -0
- aponyx/main.py +7 -0
- aponyx/models/__init__.py +45 -0
- aponyx/models/config.py +83 -0
- aponyx/models/indicator_transformation.json +52 -0
- aponyx/models/indicators.py +292 -0
- aponyx/models/metadata.py +447 -0
- aponyx/models/orchestrator.py +213 -0
- aponyx/models/registry.py +860 -0
- aponyx/models/score_transformation.json +42 -0
- aponyx/models/signal_catalog.json +29 -0
- aponyx/models/signal_composer.py +513 -0
- aponyx/models/signal_transformation.json +29 -0
- aponyx/persistence/__init__.py +16 -0
- aponyx/persistence/json_io.py +132 -0
- aponyx/persistence/parquet_io.py +378 -0
- aponyx/py.typed +0 -0
- aponyx/reporting/__init__.py +10 -0
- aponyx/reporting/generator.py +517 -0
- aponyx/visualization/__init__.py +20 -0
- aponyx/visualization/app.py +37 -0
- aponyx/visualization/plots.py +309 -0
- aponyx/visualization/visualizer.py +242 -0
- aponyx/workflows/__init__.py +18 -0
- aponyx/workflows/concrete_steps.py +720 -0
- aponyx/workflows/config.py +122 -0
- aponyx/workflows/engine.py +279 -0
- aponyx/workflows/registry.py +116 -0
- aponyx/workflows/steps.py +180 -0
- aponyx-0.1.18.dist-info/METADATA +552 -0
- aponyx-0.1.18.dist-info/RECORD +104 -0
- aponyx-0.1.18.dist-info/WHEEL +4 -0
- aponyx-0.1.18.dist-info/entry_points.txt +2 -0
- aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
aponyx/data/cache.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Transparent caching layer for fetched data.
|
|
3
|
+
|
|
4
|
+
Caches API/provider responses to local Parquet files with staleness tracking.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import logging
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
from ..persistence.parquet_io import save_parquet, load_parquet
|
|
16
|
+
from .registry import DataRegistry
|
|
17
|
+
from .sources import DataSource, resolve_provider
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _generate_cache_key(
|
|
23
|
+
source: DataSource,
|
|
24
|
+
security: str,
|
|
25
|
+
start_date: str | None,
|
|
26
|
+
end_date: str | None,
|
|
27
|
+
**params: Any,
|
|
28
|
+
) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Generate unique cache key from fetch parameters.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
source : DataSource
|
|
35
|
+
Data source configuration.
|
|
36
|
+
security : str
|
|
37
|
+
Security identifier (e.g., 'cdx_ig_5y', 'vix', 'hyg').
|
|
38
|
+
start_date : str or None
|
|
39
|
+
Start date.
|
|
40
|
+
end_date : str or None
|
|
41
|
+
End date.
|
|
42
|
+
**params : Any
|
|
43
|
+
Additional parameters.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
str
|
|
48
|
+
Hash-based cache key.
|
|
49
|
+
"""
|
|
50
|
+
# Create stable string representation
|
|
51
|
+
key_parts = [
|
|
52
|
+
resolve_provider(source),
|
|
53
|
+
security,
|
|
54
|
+
start_date or "none",
|
|
55
|
+
end_date or "none",
|
|
56
|
+
str(sorted(params.items())),
|
|
57
|
+
]
|
|
58
|
+
key_string = "|".join(key_parts)
|
|
59
|
+
|
|
60
|
+
# Generate hash
|
|
61
|
+
hash_obj = hashlib.sha256(key_string.encode())
|
|
62
|
+
return hash_obj.hexdigest()[:16]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_cache_path(
|
|
66
|
+
cache_dir: Path,
|
|
67
|
+
provider: str,
|
|
68
|
+
security: str,
|
|
69
|
+
cache_key: str,
|
|
70
|
+
) -> Path:
|
|
71
|
+
"""
|
|
72
|
+
Generate file path for cached data.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
cache_dir : Path
|
|
77
|
+
Base cache directory.
|
|
78
|
+
provider : str
|
|
79
|
+
Provider type (file, bloomberg, api).
|
|
80
|
+
security : str
|
|
81
|
+
Security identifier (e.g., 'cdx_ig_5y', 'vix', 'hyg').
|
|
82
|
+
cache_key : str
|
|
83
|
+
Unique cache key.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
Path
|
|
88
|
+
Path to cache file.
|
|
89
|
+
"""
|
|
90
|
+
provider_dir = cache_dir / provider
|
|
91
|
+
provider_dir.mkdir(parents=True, exist_ok=True)
|
|
92
|
+
|
|
93
|
+
# Sanitize security name for filename
|
|
94
|
+
safe_security = security.replace(".", "_").replace("/", "_")
|
|
95
|
+
filename = f"{safe_security}_{cache_key}.parquet"
|
|
96
|
+
|
|
97
|
+
return provider_dir / filename
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def is_cache_stale(
|
|
101
|
+
cache_path: Path,
|
|
102
|
+
ttl_days: int | None = None,
|
|
103
|
+
) -> bool:
|
|
104
|
+
"""
|
|
105
|
+
Check if cached data is stale based on TTL.
|
|
106
|
+
|
|
107
|
+
Parameters
|
|
108
|
+
----------
|
|
109
|
+
cache_path : Path
|
|
110
|
+
Path to cached file.
|
|
111
|
+
ttl_days : int or None
|
|
112
|
+
Time-to-live in days. None means cache never expires.
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
bool
|
|
117
|
+
True if cache is stale or doesn't exist.
|
|
118
|
+
"""
|
|
119
|
+
if not cache_path.exists():
|
|
120
|
+
return True
|
|
121
|
+
|
|
122
|
+
if ttl_days is None:
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
# Check file modification time
|
|
126
|
+
mtime = datetime.fromtimestamp(cache_path.stat().st_mtime)
|
|
127
|
+
age = datetime.now() - mtime
|
|
128
|
+
|
|
129
|
+
is_stale = age > timedelta(days=ttl_days)
|
|
130
|
+
|
|
131
|
+
if is_stale:
|
|
132
|
+
logger.debug("Cache stale: age=%s, ttl=%d days", age, ttl_days)
|
|
133
|
+
|
|
134
|
+
return is_stale
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def get_cached_data(
|
|
138
|
+
source: DataSource,
|
|
139
|
+
security: str,
|
|
140
|
+
cache_dir: Path,
|
|
141
|
+
start_date: str | None = None,
|
|
142
|
+
end_date: str | None = None,
|
|
143
|
+
ttl_days: int | None = None,
|
|
144
|
+
**params: Any,
|
|
145
|
+
) -> pd.DataFrame | None:
|
|
146
|
+
"""
|
|
147
|
+
Retrieve data from cache if available and fresh.
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
source : DataSource
|
|
152
|
+
Data source configuration.
|
|
153
|
+
security : str
|
|
154
|
+
Security identifier (e.g., 'cdx_ig_5y', 'vix', 'hyg').
|
|
155
|
+
cache_dir : Path
|
|
156
|
+
Cache directory.
|
|
157
|
+
start_date : str or None
|
|
158
|
+
Start date filter.
|
|
159
|
+
end_date : str or None
|
|
160
|
+
End date filter.
|
|
161
|
+
ttl_days : int or None
|
|
162
|
+
Cache TTL in days.
|
|
163
|
+
**params : Any
|
|
164
|
+
Additional fetch parameters.
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
-------
|
|
168
|
+
pd.DataFrame or None
|
|
169
|
+
Cached data if available and fresh, None otherwise.
|
|
170
|
+
"""
|
|
171
|
+
provider = resolve_provider(source)
|
|
172
|
+
cache_key = _generate_cache_key(source, security, start_date, end_date, **params)
|
|
173
|
+
cache_path = get_cache_path(cache_dir, provider, security, cache_key)
|
|
174
|
+
|
|
175
|
+
if is_cache_stale(cache_path, ttl_days):
|
|
176
|
+
logger.debug("Cache miss or stale: %s", cache_path.name)
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
logger.info("Cache hit: %s", cache_path.name)
|
|
180
|
+
return load_parquet(cache_path)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def save_to_cache(
|
|
184
|
+
df: pd.DataFrame,
|
|
185
|
+
source: DataSource,
|
|
186
|
+
security: str,
|
|
187
|
+
cache_dir: Path,
|
|
188
|
+
registry: DataRegistry | None = None,
|
|
189
|
+
start_date: str | None = None,
|
|
190
|
+
end_date: str | None = None,
|
|
191
|
+
**params: Any,
|
|
192
|
+
) -> Path:
|
|
193
|
+
"""
|
|
194
|
+
Save fetched data to cache.
|
|
195
|
+
|
|
196
|
+
Parameters
|
|
197
|
+
----------
|
|
198
|
+
df : pd.DataFrame
|
|
199
|
+
Data to cache.
|
|
200
|
+
source : DataSource
|
|
201
|
+
Data source configuration.
|
|
202
|
+
security : str
|
|
203
|
+
Security identifier (e.g., 'cdx_ig_5y', 'vix', 'hyg').
|
|
204
|
+
cache_dir : Path
|
|
205
|
+
Cache directory.
|
|
206
|
+
registry : DataRegistry or None
|
|
207
|
+
Optional registry to register cached dataset.
|
|
208
|
+
start_date : str or None
|
|
209
|
+
Start date (for cache key).
|
|
210
|
+
end_date : str or None
|
|
211
|
+
End date (for cache key).
|
|
212
|
+
**params : Any
|
|
213
|
+
Additional parameters (for cache key).
|
|
214
|
+
|
|
215
|
+
Returns
|
|
216
|
+
-------
|
|
217
|
+
Path
|
|
218
|
+
Path to cached file.
|
|
219
|
+
"""
|
|
220
|
+
provider = resolve_provider(source)
|
|
221
|
+
cache_key = _generate_cache_key(source, security, start_date, end_date, **params)
|
|
222
|
+
cache_path = get_cache_path(cache_dir, provider, security, cache_key)
|
|
223
|
+
|
|
224
|
+
# Save to Parquet
|
|
225
|
+
save_parquet(df, cache_path)
|
|
226
|
+
logger.info("Cached data: path=%s, rows=%d", cache_path, len(df))
|
|
227
|
+
|
|
228
|
+
# Register in catalog if provided
|
|
229
|
+
if registry is not None:
|
|
230
|
+
registry.register_dataset(
|
|
231
|
+
name=f"cache_{security}_{cache_key}",
|
|
232
|
+
file_path=cache_path,
|
|
233
|
+
instrument=security,
|
|
234
|
+
metadata={
|
|
235
|
+
"provider": provider,
|
|
236
|
+
"cached_at": datetime.now().isoformat(),
|
|
237
|
+
"cache_key": cache_key,
|
|
238
|
+
"params": {"security": security, **params},
|
|
239
|
+
},
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
return cache_path
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def update_current_day(
|
|
246
|
+
cached_df: pd.DataFrame,
|
|
247
|
+
current_df: pd.DataFrame,
|
|
248
|
+
) -> pd.DataFrame:
|
|
249
|
+
"""
|
|
250
|
+
Update cached data with current day's data point.
|
|
251
|
+
|
|
252
|
+
Parameters
|
|
253
|
+
----------
|
|
254
|
+
cached_df : pd.DataFrame
|
|
255
|
+
Historical cached data with DatetimeIndex.
|
|
256
|
+
current_df : pd.DataFrame
|
|
257
|
+
Current day's data (single row with today's date).
|
|
258
|
+
|
|
259
|
+
Returns
|
|
260
|
+
-------
|
|
261
|
+
pd.DataFrame
|
|
262
|
+
Updated DataFrame with current day's data merged/replaced.
|
|
263
|
+
|
|
264
|
+
Notes
|
|
265
|
+
-----
|
|
266
|
+
If today's date already exists in cached_df, it will be replaced.
|
|
267
|
+
Otherwise, current_df is appended. Result is sorted by date.
|
|
268
|
+
"""
|
|
269
|
+
if cached_df.empty:
|
|
270
|
+
return current_df
|
|
271
|
+
|
|
272
|
+
if current_df.empty:
|
|
273
|
+
return cached_df
|
|
274
|
+
|
|
275
|
+
# Get today's date from current_df
|
|
276
|
+
today = current_df.index[0]
|
|
277
|
+
|
|
278
|
+
# Remove today's data if it exists in cache
|
|
279
|
+
updated_df = cached_df[cached_df.index != today]
|
|
280
|
+
|
|
281
|
+
# Append current day's data
|
|
282
|
+
updated_df = pd.concat([updated_df, current_df])
|
|
283
|
+
|
|
284
|
+
# Sort by date
|
|
285
|
+
updated_df = updated_df.sort_index()
|
|
286
|
+
|
|
287
|
+
logger.debug(
|
|
288
|
+
"Updated cache: removed %d existing rows for %s, total rows=%d",
|
|
289
|
+
len(cached_df) - len(updated_df) + len(current_df),
|
|
290
|
+
today,
|
|
291
|
+
len(updated_df),
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
return updated_df
|