aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,132 @@
1
+ """
2
+ JSON I/O utilities for metadata, parameters, and run logs.
3
+
4
+ Handles serialization of dictionaries with support for common data types
5
+ including datetime, Path, and numpy arrays.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from pathlib import Path
11
+ from typing import Any
12
+ from datetime import datetime, date
13
+ import numpy as np
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class EnhancedJSONEncoder(json.JSONEncoder):
19
+ """
20
+ JSON encoder with support for datetime, Path, and numpy types.
21
+
22
+ Extends standard JSONEncoder to handle common scientific computing types
23
+ that appear in metadata and parameter dictionaries.
24
+ """
25
+
26
+ def default(self, obj: Any) -> Any:
27
+ """Convert non-serializable objects to JSON-compatible types."""
28
+ if isinstance(obj, (datetime, date)):
29
+ return obj.isoformat()
30
+ elif isinstance(obj, Path):
31
+ return str(obj)
32
+ elif isinstance(obj, np.integer):
33
+ return int(obj)
34
+ elif isinstance(obj, np.floating):
35
+ return float(obj)
36
+ elif isinstance(obj, np.ndarray):
37
+ return obj.tolist()
38
+ return super().default(obj)
39
+
40
+
41
+ def save_json(
42
+ data: dict[str, Any],
43
+ path: str | Path,
44
+ indent: int = 2,
45
+ sort_keys: bool = True,
46
+ ) -> Path:
47
+ """
48
+ Save dictionary to JSON file with enhanced type support.
49
+
50
+ Parameters
51
+ ----------
52
+ data : dict
53
+ Dictionary to serialize. Supports datetime, Path, and numpy types.
54
+ path : str or Path
55
+ Target file path. Parent directories created if needed.
56
+ indent : int, default 2
57
+ Number of spaces for indentation (for readability).
58
+ sort_keys : bool, default True
59
+ Whether to sort dictionary keys alphabetically.
60
+
61
+ Returns
62
+ -------
63
+ Path
64
+ Absolute path to the saved file.
65
+
66
+ Examples
67
+ --------
68
+ >>> metadata = {
69
+ ... 'timestamp': datetime.now(),
70
+ ... 'params': {'window': 5, 'threshold': 0.5},
71
+ ... 'version': '0.1.0'
72
+ ... }
73
+ >>> save_json(metadata, 'logs/run_20241025.json')
74
+ """
75
+ path = Path(path)
76
+ path.parent.mkdir(parents=True, exist_ok=True)
77
+
78
+ logger.info("Saving JSON to %s (%d top-level keys)", path, len(data))
79
+
80
+ with path.open("w", encoding="utf-8") as f:
81
+ json.dump(
82
+ data,
83
+ f,
84
+ cls=EnhancedJSONEncoder,
85
+ indent=indent,
86
+ sort_keys=sort_keys,
87
+ ensure_ascii=False,
88
+ )
89
+
90
+ logger.debug("Successfully saved %d bytes to %s", path.stat().st_size, path)
91
+ return path.absolute()
92
+
93
+
94
+ def load_json(path: str | Path) -> dict[str, Any]:
95
+ """
96
+ Load dictionary from JSON file.
97
+
98
+ Parameters
99
+ ----------
100
+ path : str or Path
101
+ Source file path.
102
+
103
+ Returns
104
+ -------
105
+ dict
106
+ Deserialized dictionary.
107
+
108
+ Raises
109
+ ------
110
+ FileNotFoundError
111
+ If the specified file does not exist.
112
+ json.JSONDecodeError
113
+ If the file contains invalid JSON.
114
+
115
+ Examples
116
+ --------
117
+ >>> metadata = load_json('logs/run_20241025.json')
118
+ >>> print(metadata['timestamp'])
119
+ """
120
+ path = Path(path)
121
+ if not path.exists():
122
+ raise FileNotFoundError(f"JSON file not found: {path}")
123
+
124
+ logger.info("Loading JSON from %s", path)
125
+
126
+ with path.open("r", encoding="utf-8") as f:
127
+ data = json.load(f)
128
+
129
+ logger.debug(
130
+ "Loaded JSON with %d top-level keys", len(data) if isinstance(data, dict) else 0
131
+ )
132
+ return data
@@ -0,0 +1,378 @@
1
+ """
2
+ Parquet I/O utilities for time series data and indicator cache persistence.
3
+
4
+ Handles efficient storage and retrieval of market data (CDX spreads, VIX, ETF prices)
5
+ and computed indicators with metadata preservation and validation.
6
+ """
7
+
8
+ import hashlib
9
+ import json
10
+ import logging
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ import pandas as pd
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def save_parquet(
20
+ df: pd.DataFrame,
21
+ path: str | Path,
22
+ compression: str = "snappy",
23
+ index: bool = True,
24
+ ) -> Path:
25
+ """
26
+ Save DataFrame to Parquet with optimized settings for time series data.
27
+
28
+ Parameters
29
+ ----------
30
+ df : pd.DataFrame
31
+ DataFrame to persist. For time series, index should be DatetimeIndex.
32
+ path : str or Path
33
+ Target file path. Parent directories created if needed.
34
+ compression : str, default "snappy"
35
+ Compression algorithm. Options: "snappy", "gzip", "brotli", "zstd".
36
+ index : bool, default True
37
+ Whether to write DataFrame index to file.
38
+
39
+ Returns
40
+ -------
41
+ Path
42
+ Absolute path to the saved file.
43
+
44
+ Raises
45
+ ------
46
+ ValueError
47
+ If DataFrame is empty or path is invalid.
48
+
49
+ Examples
50
+ --------
51
+ >>> df = pd.DataFrame({'spread': [100, 105, 98]},
52
+ ... index=pd.date_range('2024-01-01', periods=3))
53
+ >>> save_parquet(df, 'data/cdx_ig_5y.parquet')
54
+ """
55
+ if df.empty:
56
+ raise ValueError("Cannot save empty DataFrame")
57
+
58
+ path = Path(path)
59
+ path.parent.mkdir(parents=True, exist_ok=True)
60
+
61
+ logger.info(
62
+ "Saving DataFrame to Parquet: path=%s, rows=%d, columns=%d, compression=%s",
63
+ path,
64
+ len(df),
65
+ len(df.columns),
66
+ compression,
67
+ )
68
+
69
+ df.to_parquet(
70
+ path,
71
+ engine="pyarrow",
72
+ compression=compression,
73
+ index=index,
74
+ )
75
+
76
+ logger.debug("Successfully saved %d bytes to %s", path.stat().st_size, path)
77
+ return path.absolute()
78
+
79
+
80
+ def load_parquet(
81
+ path: str | Path,
82
+ columns: list[str] | None = None,
83
+ start_date: pd.Timestamp | None = None,
84
+ end_date: pd.Timestamp | None = None,
85
+ ) -> pd.DataFrame:
86
+ """
87
+ Load DataFrame from Parquet with optional filtering.
88
+
89
+ Parameters
90
+ ----------
91
+ path : str or Path
92
+ Source file path.
93
+ columns : list of str, optional
94
+ Subset of columns to load. If None, loads all columns.
95
+ start_date : pd.Timestamp, optional
96
+ Filter data from this date (inclusive). Requires DatetimeIndex.
97
+ end_date : pd.Timestamp, optional
98
+ Filter data to this date (inclusive). Requires DatetimeIndex.
99
+
100
+ Returns
101
+ -------
102
+ pd.DataFrame
103
+ Loaded and optionally filtered DataFrame.
104
+
105
+ Raises
106
+ ------
107
+ FileNotFoundError
108
+ If the specified file does not exist.
109
+ ValueError
110
+ If date filtering is requested but index is not DatetimeIndex.
111
+
112
+ Examples
113
+ --------
114
+ >>> df = load_parquet('data/cdx_ig_5y.parquet',
115
+ ... start_date=pd.Timestamp('2024-01-01'))
116
+ >>> df = load_parquet('data/vix.parquet', columns=['close'])
117
+ """
118
+ path = Path(path)
119
+ if not path.exists():
120
+ raise FileNotFoundError(f"Parquet file not found: {path}")
121
+
122
+ logger.info("Loading Parquet file: path=%s, columns=%s", path, columns or "all")
123
+
124
+ df = pd.read_parquet(path, engine="pyarrow", columns=columns)
125
+
126
+ # Apply date filtering if requested
127
+ if start_date is not None or end_date is not None:
128
+ if not isinstance(df.index, pd.DatetimeIndex):
129
+ raise ValueError(
130
+ f"Date filtering requires DatetimeIndex. Got {type(df.index).__name__}"
131
+ )
132
+
133
+ if start_date is not None:
134
+ df = df[df.index >= start_date]
135
+ if end_date is not None:
136
+ df = df[df.index <= end_date]
137
+
138
+ logger.debug(
139
+ "Applied date filter: start=%s, end=%s, resulting_rows=%d",
140
+ start_date,
141
+ end_date,
142
+ len(df),
143
+ )
144
+
145
+ logger.info("Loaded %d rows, %d columns from %s", len(df), len(df.columns), path)
146
+ return df
147
+
148
+
149
+ def list_parquet_files(directory: str | Path, pattern: str = "*.parquet") -> list[Path]:
150
+ """
151
+ List all Parquet files in a directory matching a pattern.
152
+
153
+ Parameters
154
+ ----------
155
+ directory : str or Path
156
+ Directory to search.
157
+ pattern : str, default "*.parquet"
158
+ Glob pattern for file matching.
159
+
160
+ Returns
161
+ -------
162
+ list of Path
163
+ Sorted list of matching file paths.
164
+
165
+ Examples
166
+ --------
167
+ >>> files = list_parquet_files('data/', pattern='cdx_*.parquet')
168
+ >>> files = list_parquet_files('data/raw/')
169
+ """
170
+ directory = Path(directory)
171
+ if not directory.exists():
172
+ logger.debug("Directory does not exist: %s", directory)
173
+ return []
174
+
175
+ files = sorted(directory.glob(pattern))
176
+ logger.info(
177
+ "Found %d Parquet files in %s (pattern=%s)", len(files), directory, pattern
178
+ )
179
+ return files
180
+
181
+
182
+ def generate_indicator_cache_key(
183
+ indicator_name: str,
184
+ parameters: dict[str, Any],
185
+ input_data: dict[str, pd.DataFrame],
186
+ ) -> str:
187
+ """
188
+ Generate deterministic cache key for indicator computation.
189
+
190
+ Cache key format: {indicator_name}_{params_hash}_{data_hash}
191
+
192
+ Parameters
193
+ ----------
194
+ indicator_name : str
195
+ Name of the indicator.
196
+ parameters : dict[str, Any]
197
+ Indicator computation parameters.
198
+ input_data : dict[str, pd.DataFrame]
199
+ Input market data DataFrames.
200
+
201
+ Returns
202
+ -------
203
+ str
204
+ Cache key string.
205
+
206
+ Examples
207
+ --------
208
+ >>> key = generate_indicator_cache_key(
209
+ ... "cdx_etf_spread_diff",
210
+ ... {"lookback": 5},
211
+ ... {"cdx": cdx_df, "etf": etf_df}
212
+ ... )
213
+ >>> key
214
+ 'cdx_etf_spread_diff_a1b2c3d4_e5f6g7h8'
215
+ """
216
+ # Hash parameters
217
+ params_str = json.dumps(parameters, sort_keys=True)
218
+ params_hash = hashlib.sha256(params_str.encode()).hexdigest()[:8]
219
+
220
+ # Hash input data (concatenate all DataFrame hashes)
221
+ data_hashes = []
222
+ for key in sorted(input_data.keys()):
223
+ df_hash = hashlib.sha256(
224
+ pd.util.hash_pandas_object(input_data[key]).values
225
+ ).hexdigest()[:8]
226
+ data_hashes.append(df_hash)
227
+ data_hash = hashlib.sha256("".join(data_hashes).encode()).hexdigest()[:8]
228
+
229
+ cache_key = f"{indicator_name}_{params_hash}_{data_hash}"
230
+ logger.debug("Generated cache key: %s", cache_key)
231
+ return cache_key
232
+
233
+
234
+ def save_indicator_to_cache(
235
+ indicator_series: pd.Series,
236
+ cache_key: str,
237
+ cache_dir: Path,
238
+ ) -> Path:
239
+ """
240
+ Save computed indicator to cache.
241
+
242
+ Parameters
243
+ ----------
244
+ indicator_series : pd.Series
245
+ Computed indicator time series.
246
+ cache_key : str
247
+ Cache key from generate_indicator_cache_key().
248
+ cache_dir : Path
249
+ Root cache directory (e.g., data/cache/indicators/).
250
+
251
+ Returns
252
+ -------
253
+ Path
254
+ Path to saved cache file.
255
+
256
+ Examples
257
+ --------
258
+ >>> from aponyx.config import INDICATOR_CACHE_DIR
259
+ >>> cache_path = save_indicator_to_cache(
260
+ ... indicator_series,
261
+ ... "cdx_etf_spread_diff_a1b2c3d4_e5f6g7h8",
262
+ ... INDICATOR_CACHE_DIR
263
+ ... )
264
+ """
265
+ cache_path = cache_dir / f"{cache_key}.parquet"
266
+ cache_dir.mkdir(parents=True, exist_ok=True)
267
+
268
+ # Convert Series to DataFrame for parquet storage
269
+ df = indicator_series.to_frame(name="value")
270
+
271
+ save_parquet(df, cache_path, compression="snappy", index=True)
272
+
273
+ logger.info("Cached indicator: key=%s, rows=%d", cache_key, len(indicator_series))
274
+ return cache_path
275
+
276
+
277
+ def load_indicator_from_cache(
278
+ cache_key: str,
279
+ cache_dir: Path,
280
+ ) -> pd.Series | None:
281
+ """
282
+ Load indicator from cache if available.
283
+
284
+ Parameters
285
+ ----------
286
+ cache_key : str
287
+ Cache key from generate_indicator_cache_key().
288
+ cache_dir : Path
289
+ Root cache directory (e.g., data/cache/indicators/).
290
+
291
+ Returns
292
+ -------
293
+ pd.Series or None
294
+ Cached indicator series if found, None otherwise.
295
+
296
+ Examples
297
+ --------
298
+ >>> from aponyx.config import INDICATOR_CACHE_DIR
299
+ >>> indicator = load_indicator_from_cache(
300
+ ... "cdx_etf_spread_diff_a1b2c3d4_e5f6g7h8",
301
+ ... INDICATOR_CACHE_DIR
302
+ ... )
303
+ """
304
+ cache_path = cache_dir / f"{cache_key}.parquet"
305
+
306
+ if not cache_path.exists():
307
+ logger.debug("Cache miss: key=%s", cache_key)
308
+ return None
309
+
310
+ try:
311
+ df = load_parquet(cache_path)
312
+ indicator_series = df["value"]
313
+ logger.info("Cache hit: key=%s, rows=%d", cache_key, len(indicator_series))
314
+ return indicator_series
315
+ except Exception as e:
316
+ logger.warning("Failed to load cache: key=%s, error=%s", cache_key, e)
317
+ return None
318
+
319
+
320
+ def invalidate_indicator_cache(
321
+ indicator_name: str | None = None,
322
+ cache_dir: Path | None = None,
323
+ ) -> int:
324
+ """
325
+ Invalidate indicator cache by deleting cache files.
326
+
327
+ Parameters
328
+ ----------
329
+ indicator_name : str or None
330
+ Specific indicator to invalidate. If None, invalidates all indicators.
331
+ cache_dir : Path or None
332
+ Cache directory. If None, uses default from config.
333
+
334
+ Returns
335
+ -------
336
+ int
337
+ Number of cache files deleted.
338
+
339
+ Examples
340
+ --------
341
+ >>> from aponyx.config import INDICATOR_CACHE_DIR
342
+ >>> # Invalidate specific indicator
343
+ >>> deleted = invalidate_indicator_cache("cdx_etf_spread_diff", INDICATOR_CACHE_DIR)
344
+ >>> # Invalidate all indicators
345
+ >>> deleted = invalidate_indicator_cache(None, INDICATOR_CACHE_DIR)
346
+ """
347
+ if cache_dir is None:
348
+ from ..config import INDICATOR_CACHE_DIR
349
+
350
+ cache_dir = INDICATOR_CACHE_DIR
351
+
352
+ if not cache_dir.exists():
353
+ logger.debug("Cache directory does not exist: %s", cache_dir)
354
+ return 0
355
+
356
+ # Determine pattern for deletion
357
+ if indicator_name:
358
+ pattern = f"{indicator_name}_*.parquet"
359
+ else:
360
+ pattern = "*.parquet"
361
+
362
+ # Delete matching files
363
+ cache_files = list(cache_dir.glob(pattern))
364
+ deleted_count = 0
365
+ for cache_file in cache_files:
366
+ try:
367
+ cache_file.unlink()
368
+ deleted_count += 1
369
+ logger.debug("Deleted cache file: %s", cache_file)
370
+ except Exception as e:
371
+ logger.warning("Failed to delete cache file %s: %s", cache_file, e)
372
+
373
+ logger.info(
374
+ "Invalidated indicator cache: pattern=%s, deleted=%d",
375
+ pattern,
376
+ deleted_count,
377
+ )
378
+ return deleted_count
aponyx/py.typed ADDED
File without changes
@@ -0,0 +1,10 @@
1
+ """
2
+ Report generation for systematic macro credit research.
3
+
4
+ Provides functionality for aggregating workflow results into
5
+ comprehensive analysis reports in multiple formats.
6
+ """
7
+
8
+ from .generator import generate_report, ReportFormat
9
+
10
+ __all__ = ["generate_report", "ReportFormat"]