aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,682 @@
1
+ # Adding Data Providers
2
+
3
+ ## Overview
4
+
5
+ The data layer uses a **provider pattern** to support multiple data sources (files, Bloomberg, APIs) through a common interface. This guide shows how to add a new data provider to the framework.
6
+
7
+ **Goal:** Extend data sources without modifying existing code—add new providers as separate modules.
8
+
9
+ ## Provider Architecture
10
+
11
+ ### Current Providers
12
+
13
+ | Provider | Module | Status | Use Case |
14
+ |----------|--------|--------|----------|
15
+ | `FileSource` | `providers/file.py` | ✅ Implemented | Local Parquet/CSV files |
16
+ | `BloombergSource` | `providers/bloomberg.py` | ✅ Implemented | Bloomberg Terminal data (requires `xbbg` and manual `blpapi` install) |
17
+ | `APISource` | `sources.py` (dataclass only) | ⚠️ Defined but no fetch implementation | REST API endpoints |
18
+
19
+ ### Bloomberg Provider Setup
20
+
21
+ The Bloomberg provider requires manual installation of the `blpapi` library:
22
+
23
+ 1. Download `blpapi` from Bloomberg's developer portal
24
+ 2. Install manually: `pip install path/to/blpapi-*.whl`
25
+ 3. Install aponyx with Bloomberg support: `uv pip install aponyx[bloomberg]`
26
+
27
+ The `xbbg` wrapper is included in the `bloomberg` optional dependency, but `blpapi` itself must be installed separately due to Bloomberg's proprietary distribution.
28
+
29
+ **Intraday Updates:** Bloomberg provider supports efficient current-day updates via BDP.
30
+
31
+ ---
32
+
33
+ ## Data Storage Architecture
34
+
35
+ The project uses a three-tier storage structure:
36
+
37
+ | Directory | Purpose | Lifecycle | Regenerable |
38
+ |-----------|---------|-----------|-------------|
39
+ | `data/raw/` | Original source data (Bloomberg downloads, synthetic generation) | **Permanent** — Never auto-deleted | ❌ No |
40
+ | `data/cache/` | Performance optimization for repeated reads | **Temporary** — TTL-based expiration | ✅ Yes |
41
+ | `data/workflows/` | Timestamped workflow outputs (signals, backtests, reports, visualizations) | **Temporary** — Recomputable from raw | ✅ Yes |
42
+
43
+ **Data Flow:**
44
+ ```
45
+ Raw Storage (Bloomberg/Synthetic)
46
+
47
+ Cache Layer (TTL-based, automatic)
48
+
49
+ Models/Signals
50
+
51
+ Processed Storage (Results)
52
+ ```
53
+
54
+ **Key Principle:** Raw data is the source of truth. Cache and processed data can always be regenerated from raw.
55
+
56
+ ### Raw Data Storage
57
+
58
+ **File Naming:** `{instrument}_{security}_{hash}.parquet`
59
+
60
+ **Examples:**
61
+ ```
62
+ cdx_cdx_ig_5y_b1f849bfe3a1.parquet
63
+ vix_vix_00252a34df0f.parquet
64
+ etf_hyg_108d48a6a616.parquet
65
+ ```
66
+
67
+ **Hash Generation:**
68
+ - 12-character SHA256 hash prefix
69
+ - Computed from: provider, instrument, date range, row count, metadata
70
+ - Ensures uniqueness across different data pulls
71
+
72
+ **Metadata Sidecar:** Each `.parquet` file has a corresponding `.json` metadata file:
73
+
74
+ ```json
75
+ {
76
+ "provider": "synthetic",
77
+ "instrument": "cdx",
78
+ "security": "cdx_ig_5y",
79
+ "stored_at": "2025-11-16T20:32:53.953000",
80
+ "date_range": {
81
+ "start": "2020-11-17",
82
+ "end": "2025-11-16"
83
+ },
84
+ "row_count": 1304,
85
+ "columns": ["spread", "security"],
86
+ "hash": "b1f849bfe3a1",
87
+ "generation_params": {
88
+ "base_spread": 100.0,
89
+ "volatility": 5.0
90
+ }
91
+ }
92
+ ```
93
+
94
+ ### Cache Layer
95
+
96
+ **Purpose:** Transparent time-to-live (TTL) based caching for data fetching operations.
97
+
98
+ **Cache Location:**
99
+ ```
100
+ data/
101
+ cache/
102
+ file/ # Temporary cache from FileSource loads
103
+ cdx_ig_5y_abc123.parquet
104
+ ```
105
+
106
+ **Configuration:**
107
+ ```python
108
+ from aponyx.config import CACHE_ENABLED, CACHE_TTL_DAYS
109
+
110
+ # Default: enabled, 1 day TTL
111
+ # Control per fetch call:
112
+ df = fetch_cdx(source, security="cdx_ig_5y", use_cache=True) # Use cache
113
+ df = fetch_cdx(source, security="cdx_ig_5y", use_cache=False) # Skip cache
114
+ ```
115
+
116
+ **Automatic Invalidation:**
117
+ 1. **TTL expiration:** Entry older than `CACHE_TTL_DAYS`
118
+ 2. **Source modification:** Source file modified after cache creation
119
+
120
+ **Intraday Updates (Bloomberg only):**
121
+ ```python
122
+ # Morning: Full history
123
+ cdx_df = fetch_cdx(BloombergSource(), security="cdx_ig_5y")
124
+
125
+ # Afternoon: Update only today (~10x faster)
126
+ cdx_df = fetch_cdx(BloombergSource(), security="cdx_ig_5y", update_current_day=True)
127
+ ```
128
+
129
+ **Benefits:**
130
+ - ~10x faster than full refetch
131
+ - 500x less data transfer (1 point vs 1800 days)
132
+ - Preserves historical data in cache
133
+
134
+ ### Provider Interface
135
+
136
+ Providers are defined as dataclasses and used by fetch functions:
137
+
138
+ ```python
139
+ from dataclasses import dataclass
140
+ from pathlib import Path
141
+ import pandas as pd
142
+
143
+ @dataclass(frozen=True)
144
+ class FileSource:
145
+ """File-based data source (Parquet or CSV)."""
146
+ path: Path
147
+
148
+ @dataclass(frozen=True)
149
+ class BloombergSource:
150
+ """Bloomberg Terminal data source."""
151
+ pass
152
+
153
+ # Fetch functions handle provider-specific logic
154
+ def fetch_from_file(
155
+ file_path: str | Path,
156
+ instrument: str,
157
+ start_date: str | None = None,
158
+ end_date: str | None = None,
159
+ **params,
160
+ ) -> pd.DataFrame:
161
+ """Fetch data from local file."""
162
+ ...
163
+ ```
164
+
165
+ ## Adding a New Provider
166
+
167
+ ### Step 1: Define Data Source
168
+
169
+ Add to `src/aponyx/data/sources.py`:
170
+
171
+ ```python
172
+ """Data source configuration for pluggable data providers."""
173
+
174
+ from dataclasses import dataclass
175
+ from typing import Any
176
+
177
+ @dataclass(frozen=True)
178
+ class MyCustomSource:
179
+ """
180
+ Custom data source for [your provider].
181
+
182
+ Attributes
183
+ ----------
184
+ endpoint : str
185
+ API endpoint or connection string.
186
+ params : dict[str, Any] | None
187
+ Additional connection parameters.
188
+ """
189
+ endpoint: str
190
+ params: dict[str, Any] | None = None
191
+
192
+
193
+ # Update DataSource union type
194
+ DataSource = FileSource | BloombergSource | MyCustomSource
195
+ ```
196
+
197
+ ### Step 2: Create Provider Fetch Function
198
+
199
+ Create `src/aponyx/data/providers/my_provider.py`:
200
+
201
+ ```python
202
+ """
203
+ Custom data provider fetch implementation.
204
+
205
+ Fetches data from [describe your source].
206
+ """
207
+
208
+ import logging
209
+ from typing import Any
210
+
211
+ import pandas as pd
212
+
213
+ logger = logging.getLogger(__name__)
214
+
215
+
216
+ def fetch_from_mycustom(
217
+ endpoint: str,
218
+ instrument: str,
219
+ start_date: str | None = None,
220
+ end_date: str | None = None,
221
+ **params: Any,
222
+ ) -> pd.DataFrame:
223
+ """
224
+ Fetch data from custom source.
225
+
226
+ Parameters
227
+ ----------
228
+ endpoint : str
229
+ API endpoint or data source URL.
230
+ instrument : str
231
+ Instrument identifier.
232
+ start_date : str | None
233
+ Optional start date filter (ISO format).
234
+ end_date : str | None
235
+ Optional end date filter (ISO format).
236
+ **params : Any
237
+ Additional provider-specific parameters.
238
+
239
+ Returns
240
+ -------
241
+ pd.DataFrame
242
+ Raw data with datetime index.
243
+
244
+ Notes
245
+ -----
246
+ Caching is handled by the fetch layer, not provider implementation.
247
+ """
248
+ logger.info("Fetching %s from endpoint: %s", instrument, endpoint)
249
+
250
+ # Implement provider-specific data fetching logic
251
+ # Example: API call, database query, etc.
252
+ # Your implementation here
253
+
254
+ # Build query parameters
255
+ query_params = {"instrument": instrument}
256
+ if start_date:
257
+ query_params["start_date"] = start_date
258
+ if end_date:
259
+ query_params["end_date"] = end_date
260
+ query_params.update(params)
261
+
262
+ # Fetch data (example - implement actual logic)
263
+ df = self._make_request(endpoint, query_params)
264
+
265
+ logger.info("Loaded %d rows from custom source", len(df))
266
+ return df
267
+
268
+ def _make_request(self, endpoint: str, params: dict[str, Any]) -> pd.DataFrame:
269
+ """Make actual request to data source."""
270
+ raise NotImplementedError("Implement provider-specific request logic")
271
+ ```
272
+
273
+ ### Step 3: Update Provider Init
274
+
275
+ Add to `src/aponyx/data/providers/__init__.py`:
276
+
277
+ ```python
278
+ """Data provider implementations."""
279
+
280
+ from .file import fetch_from_file
281
+ from .bloomberg import fetch_from_bloomberg
282
+ from .my_provider import fetch_from_mycustom # Add new provider
283
+
284
+ __all__ = [
285
+ "fetch_from_file",
286
+ "fetch_from_bloomberg",
287
+ "fetch_from_mycustom", # Export new fetch function
288
+ ]
289
+ ```
290
+
291
+ ### Step 4: Integrate with Fetch Layer
292
+
293
+ Update `src/aponyx/data/fetch.py` to support new provider:
294
+
295
+ ```python
296
+ from .sources import MyCustomSource, resolve_provider
297
+ from .providers.my_provider import fetch_from_mycustom
298
+
299
+ def _get_provider_fetch_function(source: DataSource):
300
+ """Get fetch function for data source."""
301
+ provider_type = resolve_provider(source)
302
+
303
+ if provider_type == "file":
304
+ return fetch_from_file
305
+ elif provider_type == "bloomberg":
306
+ return fetch_from_bloomberg
307
+ elif provider_type == "mycustom": # Add new provider
308
+ return fetch_from_mycustom
309
+ else:
310
+ raise ValueError(f"Unsupported provider: {provider_type}")
311
+
312
+ # Then use in instrument fetch functions:
313
+ def fetch_cdx(
314
+ source: DataSource | None = None,
315
+ security: str | None = None,
316
+ start_date: str | None = None,
317
+ end_date: str | None = None,
318
+ use_cache: bool = CACHE_ENABLED,
319
+ ) -> pd.DataFrame:
320
+ """Fetch CDX data from any provider."""
321
+ # ... caching logic ...
322
+
323
+ fetch_fn = _get_provider_fetch_function(source)
324
+
325
+ if isinstance(source, MyCustomSource):
326
+ df = fetch_fn(
327
+ endpoint=source.endpoint,
328
+ instrument="cdx",
329
+ start_date=start_date,
330
+ end_date=end_date,
331
+ **(source.params or {}),
332
+ )
333
+ # ... other providers ...
334
+ ```
335
+
336
+ ### Step 5: Update Provider Resolution
337
+
338
+ Add to `src/aponyx/data/sources.py`:
339
+
340
+ ```python
341
+ def resolve_provider(source: DataSource) -> str:
342
+ """Resolve data source to provider type identifier."""
343
+ if isinstance(source, FileSource):
344
+ return "file"
345
+ elif isinstance(source, BloombergSource):
346
+ return "bloomberg"
347
+ elif isinstance(source, MyCustomSource): # Add new provider
348
+ return "mycustom"
349
+ else:
350
+ raise ValueError(f"Unknown source type: {type(source)}")
351
+ ```
352
+
353
+ ### Step 6: Add Schema Validation (Optional)
354
+
355
+ If your data has a specific structure, add a schema in `src/aponyx/data/schemas.py`:
356
+
357
+ ```python
358
+ from dataclasses import dataclass
359
+
360
+ @dataclass
361
+ class MyCustomSchema:
362
+ """Schema for custom data provider."""
363
+
364
+ required_columns: list[str] = field(
365
+ default_factory=lambda: ["date", "value", "volume"]
366
+ )
367
+ date_column: str = "date"
368
+ numeric_columns: list[str] = field(
369
+ default_factory=lambda: ["value", "volume"]
370
+ )
371
+ ```
372
+
373
+ ### Step 7: Write Tests
374
+
375
+ Create `tests/data/test_my_provider.py`:
376
+
377
+ ```python
378
+ """Tests for custom data provider."""
379
+
380
+ import pytest
381
+ import pandas as pd
382
+ from aponyx.data import fetch_cdx
383
+ from aponyx.data.sources import MyCustomSource
384
+
385
+
386
+ def test_fetch_basic(monkeypatch):
387
+ """Test basic data fetching with custom provider."""
388
+ # Create source
389
+ source = MyCustomSource(
390
+ endpoint="https://api.example.com",
391
+ params={"api_key": "test"},
392
+ )
393
+
394
+ # Mock the provider fetch function
395
+ def mock_fetch(*args, **kwargs):
396
+ return pd.DataFrame({
397
+ "date": pd.date_range("2024-01-01", periods=10),
398
+ "spread": range(100, 110),
399
+ "security": ["cdx_ig_5y"] * 10,
400
+ }).set_index("date")
401
+
402
+ from aponyx.data import providers
403
+ monkeypatch.setattr(providers, "fetch_from_mycustom", mock_fetch)
404
+
405
+ # Fetch data
406
+ df = fetch_cdx(source, security="cdx_ig_5y")
407
+
408
+ # Validate
409
+ assert len(df) == 10
410
+ assert "spread" in df.columns
411
+ ```
412
+
413
+ ## Example: REST API Provider
414
+
415
+ ### Source Definition
416
+
417
+ Add to `src/aponyx/data/sources.py`:
418
+
419
+ ```python
420
+ @dataclass(frozen=True)
421
+ class APISource:
422
+ """
423
+ Generic REST API data source.
424
+
425
+ Attributes
426
+ ----------
427
+ endpoint : str
428
+ API endpoint URL.
429
+ api_key : str | None
430
+ API authentication key.
431
+ params : dict[str, Any] | None
432
+ Additional request parameters.
433
+ """
434
+ endpoint: str
435
+ api_key: str | None = None
436
+ params: dict[str, Any] | None = None
437
+
438
+ # Update DataSource union
439
+ DataSource = FileSource | BloombergSource | APISource
440
+ ```
441
+
442
+ ### Provider Implementation
443
+
444
+ Create `src/aponyx/data/providers/api.py`:
445
+
446
+ ```python
447
+ """REST API data provider."""
448
+
449
+ import logging
450
+ from typing import Any
451
+
452
+ import pandas as pd
453
+ import requests
454
+
455
+ logger = logging.getLogger(__name__)
456
+
457
+
458
+ def fetch_from_api(
459
+ endpoint: str,
460
+ instrument: str,
461
+ api_key: str | None = None,
462
+ start_date: str | None = None,
463
+ end_date: str | None = None,
464
+ **params: Any,
465
+ ) -> pd.DataFrame:
466
+ """
467
+ Fetch data from REST API endpoint.
468
+
469
+ Parameters
470
+ ----------
471
+ endpoint : str
472
+ API endpoint URL.
473
+ instrument : str
474
+ Instrument identifier.
475
+ api_key : str | None
476
+ Optional API key for authentication.
477
+ start_date : str | None
478
+ Start date filter (ISO format).
479
+ end_date : str | None
480
+ End date filter (ISO format).
481
+ **params : Any
482
+ Additional query parameters.
483
+
484
+ Returns
485
+ -------
486
+ pd.DataFrame
487
+ JSON response converted to DataFrame with DatetimeIndex.
488
+ """
489
+ # Build request parameters
490
+ query_params = {"instrument": instrument}
491
+ if start_date:
492
+ query_params["start_date"] = start_date
493
+ if end_date:
494
+ query_params["end_date"] = end_date
495
+ query_params.update(params)
496
+
497
+ # Add authentication if available
498
+ headers = {}
499
+ if api_key:
500
+ headers["Authorization"] = f"Bearer {api_key}"
501
+
502
+ logger.info("GET %s with params=%s", endpoint, query_params)
503
+
504
+ # Make request
505
+ response = requests.get(endpoint, params=query_params, headers=headers)
506
+ response.raise_for_status()
507
+
508
+ # Parse JSON to DataFrame
509
+ data = response.json()
510
+ df = pd.DataFrame(data)
511
+
512
+ # Convert date column if present
513
+ if "date" in df.columns:
514
+ df["date"] = pd.to_datetime(df["date"])
515
+ df = df.set_index("date")
516
+
517
+ logger.info("Fetched %d rows from API", len(df))
518
+ return df
519
+ ```
520
+
521
+ ### Usage
522
+
523
+ ```python
524
+ from aponyx.data import fetch_cdx
525
+ from aponyx.data.sources import MyCustomSource
526
+
527
+ # Setup custom source
528
+ source = MyCustomSource(
529
+ endpoint="https://api.example.com/market-data",
530
+ params={"api_key": "your-key-here"},
531
+ )
532
+
533
+ # Fetch data (caching handled automatically)
534
+ df = fetch_cdx(
535
+ source=source,
536
+ security="cdx_ig_5y",
537
+ start_date="2024-01-01",
538
+ end_date="2024-12-31",
539
+ )
540
+ ```
541
+
542
+ ## Provider Design Patterns
543
+
544
+ ### Pattern 1: Stateful Connection
545
+
546
+ ```python
547
+ # For providers requiring persistent connections,
548
+ # manage state in module-level variables
549
+
550
+ _connection = None
551
+
552
+ def _get_connection():
553
+ """Get or create connection instance."""
554
+ global _connection
555
+ if _connection is None:
556
+ _connection = initialize_connection()
557
+ logger.info("Connection established")
558
+ return _connection
559
+
560
+ def fetch_from_database(
561
+ query: str,
562
+ instrument: str,
563
+ **params,
564
+ ) -> pd.DataFrame:
565
+ """Fetch using persistent connection."""
566
+ conn = _get_connection()
567
+ return pd.read_sql(query, conn, params=params)
568
+ ```
569
+
570
+ ### Pattern 2: Retry Logic
571
+
572
+ ```python
573
+ from tenacity import retry, stop_after_attempt, wait_exponential
574
+
575
+ @retry(
576
+ stop=stop_after_attempt(3),
577
+ wait=wait_exponential(min=1, max=10),
578
+ )
579
+ def fetch_from_api(
580
+ endpoint: str,
581
+ instrument: str,
582
+ **params,
583
+ ) -> pd.DataFrame:
584
+ """Fetch with automatic retry on network errors."""
585
+ response = requests.get(endpoint, params=params)
586
+ response.raise_for_status()
587
+ return pd.DataFrame(response.json())
588
+ ```
589
+
590
+ ### Pattern 3: Batch Fetching
591
+
592
+ ```python
593
+ def fetch_from_batch_api(
594
+ endpoint: str,
595
+ instrument: str,
596
+ start_date: str | None = None,
597
+ end_date: str | None = None,
598
+ batch_size: int = 1000,
599
+ **params,
600
+ ) -> pd.DataFrame:
601
+ """Fetch data in batches for large date ranges."""
602
+ all_data = []
603
+
604
+ # Split date range into batches
605
+ batches = _create_date_batches(start_date, end_date, batch_size)
606
+
607
+ for batch_start, batch_end in batches:
608
+ logger.debug("Fetching batch: %s to %s", batch_start, batch_end)
609
+ batch_df = _fetch_single_batch(
610
+ endpoint,
611
+ instrument,
612
+ batch_start,
613
+ batch_end,
614
+ **params,
615
+ )
616
+ all_data.append(batch_df)
617
+
618
+ # Combine all batches
619
+ return pd.concat(all_data).sort_index()
620
+ ```
621
+
622
+ ## Best Practices
623
+
624
+ 1. **Define data sources as frozen dataclasses** for immutability
625
+ 2. **Implement fetch functions** instead of class methods for simplicity
626
+ 3. **Let the fetch layer handle caching** - providers should focus on data retrieval
627
+ 4. **Log all operations** (connections, queries, errors) using %-formatting
628
+ 5. **Validate output schema** in the fetch layer, not provider
629
+ 6. **Handle errors gracefully** with informative messages
630
+ 7. **Use type hints** for all parameters and return values
631
+ 8. **Test with mocked data** to avoid external dependencies
632
+ 9. **Document connection requirements** (credentials, network access)
633
+ 10. **Follow naming convention**: `fetch_from_*` for provider functions
634
+
635
+ ## Troubleshooting
636
+
637
+ ### Provider Not Found
638
+
639
+ ```python
640
+ # Check import
641
+ from aponyx.data.providers import fetch_from_mycustom # Should work
642
+
643
+ # Verify __init__.py exports
644
+ from aponyx.data import providers
645
+ print(dir(providers)) # Should list fetch_from_mycustom
646
+ ```
647
+
648
+ ### Cache Not Working
649
+
650
+ ```python
651
+ # Enable debug logging to see cache operations
652
+ import logging
653
+ logging.basicConfig(level=logging.DEBUG)
654
+
655
+ # Check if caching is enabled
656
+ from aponyx.config import CACHE_ENABLED, CACHE_TTL_DAYS
657
+ print(f"Cache enabled: {CACHE_ENABLED}, TTL: {CACHE_TTL_DAYS} days")
658
+
659
+ # Explicitly control cache usage
660
+ df = fetch_cdx(source, security="cdx_ig_5y", use_cache=True)
661
+ ```
662
+
663
+ ### Authentication Failures
664
+
665
+ ```python
666
+ # Don't hardcode credentials in source definitions
667
+ import os
668
+
669
+ api_key = os.environ.get("MY_API_KEY")
670
+ if not api_key:
671
+ raise ValueError("MY_API_KEY environment variable not set")
672
+
673
+ source = APISource(
674
+ endpoint="https://api.example.com",
675
+ api_key=api_key,
676
+ )
677
+ ```
678
+
679
+ ---
680
+
681
+ **Maintained by:** stabilefrisur
682
+ **Last Updated:** December 13, 2025