data-source-manager 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_source_manager-1.4.0/LICENSE +21 -0
- data_source_manager-1.4.0/PKG-INFO +489 -0
- data_source_manager-1.4.0/README.md +451 -0
- data_source_manager-1.4.0/pyproject.toml +111 -0
- data_source_manager-1.4.0/setup.cfg +4 -0
- data_source_manager-1.4.0/setup.py +7 -0
- data_source_manager-1.4.0/src/data_source_manager/__init__.py +93 -0
- data_source_manager-1.4.0/src/data_source_manager/core/__init__.py +5 -0
- data_source_manager-1.4.0/src/data_source_manager/core/providers/__init__.py +392 -0
- data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/__init__.py +13 -0
- data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/binance_funding_rate_client.py +436 -0
- data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/cache_manager.py +454 -0
- data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/data_client_interface.py +129 -0
- data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/rest_data_client.py +516 -0
- data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/vision_data_client.py +1135 -0
- data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/vision_path_mapper.py +307 -0
- data_source_manager-1.4.0/src/data_source_manager/core/providers/okx/__init__.py +22 -0
- data_source_manager-1.4.0/src/data_source_manager/core/providers/okx/okx_rest_client.py +542 -0
- data_source_manager-1.4.0/src/data_source_manager/core/sync/__init__.py +5 -0
- data_source_manager-1.4.0/src/data_source_manager/core/sync/data_source_manager.py +1162 -0
- data_source_manager-1.4.0/src/data_source_manager/core/sync/dsm_lib.py +259 -0
- data_source_manager-1.4.0/src/data_source_manager/core/sync/dsm_types.py +185 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/__init__.py +144 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/api_boundary_validator.py +682 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/app_paths.py +168 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/arrow_cache_reader.py +608 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/cache/__init__.py +58 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/cache/errors.py +35 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/cache/functions.py +108 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/cache/key_manager.py +96 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/cache/memory_map.py +103 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/cache/options.py +50 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/cache/validator.py +391 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/cache/vision_manager.py +111 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/cache_validator.py +46 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/config.py +403 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/dataframe_types.py +243 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/dataframe_utils.py +515 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/deprecation_rules.py +247 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/dsm_config.py +351 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/__init__.py +4 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_api_utils.py +354 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_cache_utils.py +346 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_date_range_utils.py +215 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_fcp_utils.py +357 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_time_range_utils.py +346 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_utilities.py +341 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/rest_client_utils.py +285 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/rest_data_processing.py +163 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/rest_exceptions.py +110 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/rest_metrics.py +221 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/vision_checksum.py +263 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/vision_constraints.py +322 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/vision_exceptions.py +75 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/vision_file_utils.py +138 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_core/vision_timestamp.py +202 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/__init__.py +4 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_app_options.py +164 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_cache_utils.py +192 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_clean_logging.py +318 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_cli_utils.py +319 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_data_fetcher.py +177 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_datetime_parser.py +108 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_display_utils.py +335 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_doc_utils.py +351 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_help_content.py +258 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_validation_utils.py +76 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/__init__.py +5 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/console_utils.py +159 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/custom_logger.py +74 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/error_logger.py +198 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/formatters.py +119 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/logger_proxy.py +470 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/logger_setup_utils.py +138 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/session_utils.py +76 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/timeout_logger.py +174 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/gap_detector.py +339 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/logger_setup.py +51 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/loguru_setup.py +342 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/market/__init__.py +58 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/market/capabilities.py +245 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/market/endpoints.py +80 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/market/enums.py +317 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/market/validation.py +213 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/market_constraints.py +52 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/market_utils.py +56 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/network/__init__.py +58 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/network/api.py +174 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/network/client_factory.py +167 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/network/download.py +292 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/network/exceptions.py +24 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/network/vision_download.py +176 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/network_utils.py +44 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/time/__init__.py +73 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/time/bars.py +60 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/time/conversion.py +245 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/time/filtering.py +171 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/time/intervals.py +370 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/time/processor.py +220 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/time_utils.py +78 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/validation/__init__.py +63 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/validation/availability_validation.py +151 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/validation/dataframe_validation.py +285 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/validation/file_validation.py +74 -0
- data_source_manager-1.4.0/src/data_source_manager/utils/validation/time_validation.py +403 -0
- data_source_manager-1.4.0/src/data_source_manager/utils_for_debug/__init__.py +7 -0
- data_source_manager-1.4.0/src/data_source_manager/utils_for_debug/timestamp_debug.py +276 -0
- data_source_manager-1.4.0/src/data_source_manager.egg-info/PKG-INFO +489 -0
- data_source_manager-1.4.0/src/data_source_manager.egg-info/SOURCES.txt +111 -0
- data_source_manager-1.4.0/src/data_source_manager.egg-info/dependency_links.txt +1 -0
- data_source_manager-1.4.0/src/data_source_manager.egg-info/entry_points.txt +4 -0
- data_source_manager-1.4.0/src/data_source_manager.egg-info/requires.txt +26 -0
- data_source_manager-1.4.0/src/data_source_manager.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Eon Labs Ltd.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,489 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: data-source-manager
|
|
3
|
+
Version: 1.4.0
|
|
4
|
+
Summary: Professional market data integration with clean package architecture
|
|
5
|
+
Author-email: EonLabs <terry@eonlabs.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
9
|
+
Requires-Python: >=3.13
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: attrs>=25.0.0
|
|
13
|
+
Requires-Dist: pyarrow<21.0.0,>=19.0.0
|
|
14
|
+
Requires-Dist: polars<2.0.0,>=1.30.0
|
|
15
|
+
Requires-Dist: pandas<3.0.0,>=2.2.0
|
|
16
|
+
Requires-Dist: numpy<3.0.0,>=1.26.0
|
|
17
|
+
Requires-Dist: fsspec>=2024.6.0
|
|
18
|
+
Requires-Dist: requests>=2.32.0
|
|
19
|
+
Requires-Dist: httpx>=0.28.0
|
|
20
|
+
Requires-Dist: tenacity>=9.0.0
|
|
21
|
+
Requires-Dist: colorlog>=6.8.0
|
|
22
|
+
Requires-Dist: typer>=0.16.0
|
|
23
|
+
Requires-Dist: rich>=13.0.0
|
|
24
|
+
Requires-Dist: pendulum>=3.0.0
|
|
25
|
+
Requires-Dist: platformdirs>=4.0.0
|
|
26
|
+
Requires-Dist: loguru>=0.7.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: build>=1.2.0; extra == "dev"
|
|
29
|
+
Requires-Dist: twine>=6.0.0; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-xdist>=3.6.0; extra == "dev"
|
|
32
|
+
Requires-Dist: pytest-cov>=6.0.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest-profiling; extra == "dev"
|
|
34
|
+
Requires-Dist: pyupgrade>=3.20.0; extra == "dev"
|
|
35
|
+
Requires-Dist: ruff>=0.11.0; extra == "dev"
|
|
36
|
+
Requires-Dist: GitPython>=3.1.0; extra == "dev"
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
# Data Source Manager
|
|
40
|
+
|
|
41
|
+
A high-performance, robust package for efficient market data retrieval from multiple data providers, including [Binance Vision](https://data.binance.vision/) and Binance REST ([Spot](https://developers.binance.com/docs/binance-spot-api-docs/rest-api/general-endpoints), [USDS-Margined Futures](https://developers.binance.com/docs/derivatives/usds-margined-futures/general-info), [Coin-Margined Futures](https://developers.binance.com/docs/derivatives/coin-margined-futures/general-info)) using Apache Arrow MMAP for optimal performance.
|
|
42
|
+
|
|
43
|
+
## Features
|
|
44
|
+
|
|
45
|
+
- **Failover Control Protocol (FCP)**: Robust data retrieval from multiple sources
|
|
46
|
+
- **Local Cache**: Fast access to previously downloaded data using Apache Arrow
|
|
47
|
+
- **Vision API**: Efficient historical data from Binance Vision API on AWS S3
|
|
48
|
+
- **REST API**: Real-time and recent data from Binance REST API
|
|
49
|
+
- **Automatic Retry**: Built-in retry logic with exponential backoff
|
|
50
|
+
- **Data Validation**: Comprehensive data integrity checks
|
|
51
|
+
- **Rich Logging**: Beautiful, configurable logging with loguru support
|
|
52
|
+
- **Professional Package Structure**: Proper src-layout with clean namespace imports
|
|
53
|
+
|
|
54
|
+
## Package Structure
|
|
55
|
+
|
|
56
|
+
Data Source Manager follows modern Python packaging standards with a clean src-layout structure:
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
data-source-manager/
|
|
60
|
+
├── src/
|
|
61
|
+
│ └── data_source_manager/ # Main package namespace
|
|
62
|
+
│ ├── __init__.py # Public API exports (lazy loading)
|
|
63
|
+
│ ├── core/ # Core functionality
|
|
64
|
+
│ │ ├── sync/ # Synchronous data managers
|
|
65
|
+
│ │ │ ├── data_source_manager.py # Main DSM class with FCP
|
|
66
|
+
│ │ │ ├── dsm_types.py # DataSource, DataSourceConfig
|
|
67
|
+
│ │ │ └── dsm_lib.py # High-level fetch functions
|
|
68
|
+
│ │ └── providers/ # Data provider implementations
|
|
69
|
+
│ │ └── binance/ # Binance-specific clients
|
|
70
|
+
│ └── utils/ # Utility modules
|
|
71
|
+
│ ├── market_constraints.py # Enums: DataProvider, MarketType, Interval
|
|
72
|
+
│ └── loguru_setup.py # Logging configuration
|
|
73
|
+
├── examples/ # Usage examples and demos
|
|
74
|
+
├── tests/ # Test suite
|
|
75
|
+
└── docs/ # Documentation
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Note**: The public API returns pandas DataFrames for compatibility with downstream consumers. Polars is used internally for some performance-critical operations.
|
|
79
|
+
|
|
80
|
+
## Installation
|
|
81
|
+
|
|
82
|
+
There are two main ways to install Data Source Manager, depending on your needs:
|
|
83
|
+
|
|
84
|
+
### 1. For Development or Running Demos Directly
|
|
85
|
+
|
|
86
|
+
If you want to run the provided demos directly from the cloned repository or use the core library while having the source files available in your workspace, follow these steps:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
# Clone the repository
|
|
90
|
+
git clone https://github.com/terrylica/data-source-manager.git
|
|
91
|
+
cd data-source-manager
|
|
92
|
+
|
|
93
|
+
# Install with uv (recommended, 10-100x faster than pip)
|
|
94
|
+
uv sync --dev
|
|
95
|
+
|
|
96
|
+
# Or with pip (slower, not recommended)
|
|
97
|
+
pip install -e ".[dev]"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**Note**: This project uses [uv](https://docs.astral.sh/uv/) for package management. Install it via `curl -LsSf https://astral.sh/uv/install.sh | sh`.
|
|
101
|
+
|
|
102
|
+
This method keeps all the source files in your workspace and includes necessary tools for development workflows.
|
|
103
|
+
|
|
104
|
+
### 2. As a Dependency in Your Project (`pyproject.toml`)
|
|
105
|
+
|
|
106
|
+
If you want to use Data Source Manager as a library in your own Python project (managed with `pyproject.toml`) without including its entire source code in your project's directory, you can add it as a Git dependency.
|
|
107
|
+
|
|
108
|
+
Add the following to your project's `pyproject.toml` file under the `[project.dependencies]` array (as per PEP 621):
|
|
109
|
+
|
|
110
|
+
```toml
|
|
111
|
+
[project]
|
|
112
|
+
# ... other project configurations like name, version ...
|
|
113
|
+
dependencies = [
|
|
114
|
+
# ... other dependencies ...
|
|
115
|
+
"data-source-manager @ git+https://github.com/terrylica/data-source-manager.git"
|
|
116
|
+
# You can also specify a particular branch, tag, or commit hash:
|
|
117
|
+
# "data-source-manager @ git+https://github.com/terrylica/data-source-manager.git@main"
|
|
118
|
+
# "data-source-manager @ git+https://github.com/terrylica/data-source-manager.git@<version>"
|
|
119
|
+
]
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
This will install Data Source Manager into your Python environment's `site-packages` directory, keeping your project workspace clean.
|
|
123
|
+
|
|
124
|
+
**Note on CLI Tools:**
|
|
125
|
+
The installation process (through either method) automatically registers the CLI commands (`dsm-demo-cli` and `dsm-demo-module`) as executable scripts in your Python environment. These commands will be available in your terminal after successful installation.
|
|
126
|
+
|
|
127
|
+
## Usage
|
|
128
|
+
|
|
129
|
+
### Basic Usage
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from data_source_manager import DataSourceManager, DataProvider, MarketType, Interval
|
|
133
|
+
from datetime import datetime, timedelta, timezone
|
|
134
|
+
|
|
135
|
+
# Create a manager for USDT-margined futures
|
|
136
|
+
manager = DataSourceManager.create(DataProvider.BINANCE, MarketType.FUTURES_USDT)
|
|
137
|
+
|
|
138
|
+
# Fetch recent BTCUSDT data with automatic failover
|
|
139
|
+
# IMPORTANT: Always use UTC timezone-aware datetimes
|
|
140
|
+
end_time = datetime.now(timezone.utc)
|
|
141
|
+
start_time = end_time - timedelta(days=7)
|
|
142
|
+
|
|
143
|
+
df = manager.get_data(
|
|
144
|
+
symbol="BTCUSDT",
|
|
145
|
+
start_time=start_time,
|
|
146
|
+
end_time=end_time,
|
|
147
|
+
interval=Interval.HOUR_1
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
print(f"Loaded {len(df)} bars of BTCUSDT data")
|
|
151
|
+
print(df.head())
|
|
152
|
+
manager.close() # Always close when done
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### Failover Control Protocol (FCP)
|
|
156
|
+
|
|
157
|
+
The DSM automatically handles data retrieval through multiple sources:
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
# The FCP follows this sequence automatically:
|
|
161
|
+
# 1. 🚀 Local cache lookup (fastest)
|
|
162
|
+
# 2. 📡 Vision API for historical data (efficient)
|
|
163
|
+
# 3. 🔄 REST API fallback (real-time)
|
|
164
|
+
|
|
165
|
+
# All with automatic retry, data validation, and gap detection
|
|
166
|
+
manager = DataSourceManager.create(DataProvider.BINANCE, MarketType.SPOT)
|
|
167
|
+
|
|
168
|
+
# This single call handles all the complexity:
|
|
169
|
+
data = manager.get_data("ETHUSDT", start_time, end_time, Interval.MINUTE_5)
|
|
170
|
+
# ✅ Cache checked, Vision API queried, REST API fallback - all automatic!
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Advanced Configuration
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
from data_source_manager import DataSourceManager, DataProvider, MarketType, Interval
|
|
177
|
+
from data_source_manager.core.sync.data_source_manager import DataSource
|
|
178
|
+
|
|
179
|
+
# Force specific data source (bypass FCP)
|
|
180
|
+
manager = DataSourceManager.create(
|
|
181
|
+
provider=DataProvider.BINANCE,
|
|
182
|
+
market_type=MarketType.FUTURES_USDT,
|
|
183
|
+
enforce_source=DataSource.VISION, # Force Vision API only
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Multiple market types supported
|
|
187
|
+
spot_manager = DataSourceManager.create(DataProvider.BINANCE, MarketType.SPOT)
|
|
188
|
+
futures_manager = DataSourceManager.create(DataProvider.BINANCE, MarketType.FUTURES_USDT)
|
|
189
|
+
coin_manager = DataSourceManager.create(DataProvider.BINANCE, MarketType.FUTURES_COIN)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Running the Demos
|
|
193
|
+
|
|
194
|
+
Once installed, you can run the demos using the command-line tools:
|
|
195
|
+
|
|
196
|
+
### DSM Demo CLI
|
|
197
|
+
|
|
198
|
+
The CLI demonstration provides an interactive way to explore the Failover Control Protocol:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
# Run the DSM Demo CLI with default parameters
|
|
202
|
+
dsm-demo-cli
|
|
203
|
+
|
|
204
|
+
# Run with specific parameters (get BTC data for a 10-day period)
|
|
205
|
+
dsm-demo-cli -s BTCUSDT -i 1m -d 10
|
|
206
|
+
|
|
207
|
+
# Get help and see all available options
|
|
208
|
+
dsm-demo-cli --help
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
The CLI tool will automatically:
|
|
212
|
+
|
|
213
|
+
1. Check for data in local cache
|
|
214
|
+
2. Try to fetch missing data from Binance Vision API
|
|
215
|
+
3. Fall back to REST API for data not available in cache or Vision API
|
|
216
|
+
4. Save retrieved data to cache for future use
|
|
217
|
+
|
|
218
|
+
### DSM Demo Module
|
|
219
|
+
|
|
220
|
+
The module demo provides a programmatic interface:
|
|
221
|
+
|
|
222
|
+
```bash
|
|
223
|
+
# Run the DSM Demo Module to see examples
|
|
224
|
+
dsm-demo-module
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## Using as a Library
|
|
228
|
+
|
|
229
|
+
The core data fetching functionality of Data Source Manager is available for direct import and use in your Python projects after installation.
|
|
230
|
+
|
|
231
|
+
The main function for retrieving market data is `fetch_market_data`.
|
|
232
|
+
|
|
233
|
+
### Example 1: Fetching with Specific Date Range
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
from datetime import datetime, timezone
|
|
237
|
+
from data_source_manager import fetch_market_data, MarketType, DataProvider, Interval, ChartType
|
|
238
|
+
|
|
239
|
+
# Define parameters
|
|
240
|
+
provider = DataProvider.BINANCE
|
|
241
|
+
market_type = MarketType.SPOT
|
|
242
|
+
chart_type = ChartType.KLINES
|
|
243
|
+
symbol = "BTCUSDT"
|
|
244
|
+
interval = Interval.MINUTE_1
|
|
245
|
+
# IMPORTANT: Always use UTC timezone-aware datetimes
|
|
246
|
+
start_time = datetime(2023, 1, 1, tzinfo=timezone.utc)
|
|
247
|
+
end_time = datetime(2023, 1, 10, tzinfo=timezone.utc)
|
|
248
|
+
|
|
249
|
+
# Fetch data (returns pandas DataFrame)
|
|
250
|
+
df, elapsed_time, records_count = fetch_market_data(
|
|
251
|
+
provider=provider,
|
|
252
|
+
market_type=market_type,
|
|
253
|
+
chart_type=chart_type,
|
|
254
|
+
symbol=symbol,
|
|
255
|
+
interval=interval,
|
|
256
|
+
start_time=start_time,
|
|
257
|
+
end_time=end_time,
|
|
258
|
+
use_cache=True,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
# Process results
|
|
262
|
+
print(f"Fetched {records_count} records in {elapsed_time:.2f} seconds")
|
|
263
|
+
if df is not None:
|
|
264
|
+
print(df.head())
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### Example 2: Fetching Backward from a Specific End Time
|
|
268
|
+
|
|
269
|
+
This example demonstrates how to fetch data backward from a precise end time in May 2025:
|
|
270
|
+
|
|
271
|
+
```python
|
|
272
|
+
import pendulum
|
|
273
|
+
from data_source_manager import fetch_market_data, MarketType, DataProvider, Interval, ChartType
|
|
274
|
+
|
|
275
|
+
# Define parameters
|
|
276
|
+
provider = DataProvider.BINANCE
|
|
277
|
+
market_type = MarketType.SPOT
|
|
278
|
+
chart_type = ChartType.KLINES
|
|
279
|
+
symbol = "BTCUSDT"
|
|
280
|
+
interval = Interval.MINUTE_1
|
|
281
|
+
|
|
282
|
+
# Define a specific end time with precise minutes and seconds
|
|
283
|
+
# Note: Using pendulum for better datetime handling as per project standards
|
|
284
|
+
end_time = pendulum.datetime(2025, 5, 15, 13, 45, 30, tz="UTC") # 2025-05-15 13:45:30 UTC
|
|
285
|
+
days = 7 # Fetch 7 days backward from the end time
|
|
286
|
+
|
|
287
|
+
# Fetch data (no need to specify start_time, it will be calculated)
|
|
288
|
+
df, elapsed_time, records_count = fetch_market_data(
|
|
289
|
+
provider=provider,
|
|
290
|
+
market_type=market_type,
|
|
291
|
+
chart_type=chart_type,
|
|
292
|
+
symbol=symbol,
|
|
293
|
+
interval=interval,
|
|
294
|
+
end_time=end_time,
|
|
295
|
+
days=days,
|
|
296
|
+
use_cache=True,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Process results
|
|
300
|
+
print(f"Fetched {records_count} records in {elapsed_time:.2f} seconds")
|
|
301
|
+
print(f"Date range: {end_time.subtract(days=days).format('YYYY-MM-DD HH:mm:ss.SSS')} to {end_time.format('YYYY-MM-DD HH:mm:ss.SSS')}")
|
|
302
|
+
if df is not None:
|
|
303
|
+
print(df.head())
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
You can import `fetch_market_data` directly from the `data_source_manager` package. The necessary enums (`MarketType`, `DataProvider`, `ChartType`, `Interval`, `DataSource`) and `DataSourceConfig` are also exposed at the top level for easy access.
|
|
307
|
+
|
|
308
|
+
Refer to the source code of `data_source_manager.core.sync.dsm_lib.fetch_market_data` and `data_source_manager.core.sync.data_source_manager.DataSourceConfig` for detailed parameter information and usage.
|
|
309
|
+
|
|
310
|
+
## Data Source Manager (DSM) Demo
|
|
311
|
+
|
|
312
|
+
### Quick Start
|
|
313
|
+
|
|
314
|
+
- **[DSM Demo CLI Documentation](examples/sync/)**: Interactive demonstration of the Failover Control Protocol mechanism, the core data retrieval strategy that ensures robust and efficient data collection from multiple sources.
|
|
315
|
+
- **[DSM Demo Module Documentation](examples/lib_module/)**: Programmatic interface to `src/data_source_manager/core/sync/dsm_lib.py` functions, complementing the CLI tool by providing a library approach to implement the same data retrieval functionality in custom applications.
|
|
316
|
+
|
|
317
|
+
### Understanding Data Sources
|
|
318
|
+
|
|
319
|
+
The DSM implements a Failover Control Protocol (FCP) that follows this sequence:
|
|
320
|
+
|
|
321
|
+
1. **Cache**: First checks local Arrow files for requested data
|
|
322
|
+
2. **VISION API**: For missing data, attempts to download from Binance Vision API
|
|
323
|
+
3. **REST API**: Falls back to Binance REST API for any remaining data gaps
|
|
324
|
+
|
|
325
|
+
Note that recent data (within ~48 hours) is typically not available in the Vision API and will be retrieved from the REST API.
|
|
326
|
+
|
|
327
|
+
## Development Guidelines
|
|
328
|
+
|
|
329
|
+
### Core Principles
|
|
330
|
+
|
|
331
|
+
- **[Focus DSM FCP Demo Rule](.cursor/rules/focus-dsm-fcp-demo.mdc)**: The authoritative instruction file guiding the Cursor Agent to strictly adhere to the demo plan and maintain focus on the Failover Control Protocol demonstration.
|
|
332
|
+
- [scripts/dev](scripts/dev): Contains various scripts for development and maintenance tasks.
|
|
333
|
+
|
|
334
|
+
## API Documentation
|
|
335
|
+
|
|
336
|
+
The `docs/api` folder provides in-depth documentation on data source characteristics and retrieval mechanisms. Refer to the [API Documentation Overview](docs/api/) for a summary of the contents in this directory.
|
|
337
|
+
|
|
338
|
+
## Data Initialization and Shortlisting
|
|
339
|
+
|
|
340
|
+
1. Initialization
|
|
341
|
+
- Execute `scripts/binance_vision_api_aws_s3/fetch_binance_data_availability.sh` to build `scripts/binance_vision_api_aws_s3/reports/spot_synchronal.csv`
|
|
342
|
+
- The archaic word _synchronal_ contextually means the Binance Exchanges crypto base pair that we're interested in monitoring, because they must be active in the SPOT, UM and CM market of the Binance Exchange.
|
|
343
|
+
- `scripts/binance_vision_api_aws_s3/reports/spot_synchronal.csv` contains only the Binance SPOT market symbols, their earliest date available, and their available intervals (i.e. 1s, 1m, 3m, ..., 1d), and which base pairs (e.g. BTC) are also on the UM and CM markets.
|
|
344
|
+
|
|
345
|
+
2. Shortlisting
|
|
346
|
+
- To exclude specific symbols from subsequent operations below, simply remove their corresponding lines from `spot_synchronal.csv`
|
|
347
|
+
|
|
348
|
+
## Development Scripts
|
|
349
|
+
|
|
350
|
+
The `scripts/dev` directory contains a collection of utility scripts designed to assist with various development, testing, and maintenance tasks. These scripts leverage modern Python tooling and practices to streamline workflows.
|
|
351
|
+
|
|
352
|
+
Some of the key tools and libraries used across these scripts include:
|
|
353
|
+
|
|
354
|
+
- **Ruff**: For fast linting and code formatting.
|
|
355
|
+
- **Vulture**: To identify dead code.
|
|
356
|
+
- **pytest-xdist**: For parallel test execution.
|
|
357
|
+
- **rope**: For Python code refactoring, used in conjunction with `git mv` for moving files.
|
|
358
|
+
- **fsspec**: For seamless interaction with various filesystems.
|
|
359
|
+
|
|
360
|
+
Explore the scripts and their individual READMEs within the [`scripts/dev`](scripts/dev) directory for more details.
|
|
361
|
+
|
|
362
|
+
## Logging Control
|
|
363
|
+
|
|
364
|
+
DSM now supports **loguru** for much easier log level control:
|
|
365
|
+
|
|
366
|
+
### DSM Logging Suppression for Feature Engineering
|
|
367
|
+
|
|
368
|
+
**Problem**: DSM produces extensive logging that clutters console output during feature engineering workflows.
|
|
369
|
+
|
|
370
|
+
**Solution**: Use `DSM_LOG_LEVEL=CRITICAL` to suppress all non-critical DSM logs:
|
|
371
|
+
|
|
372
|
+
```python
|
|
373
|
+
# Clean feature engineering code - no boilerplate needed!
|
|
374
|
+
import os
|
|
375
|
+
os.environ["DSM_LOG_LEVEL"] = "CRITICAL"
|
|
376
|
+
|
|
377
|
+
from data_source_manager import DataSourceManager, DataProvider, MarketType, Interval
|
|
378
|
+
|
|
379
|
+
# Create DSM instance - minimal logging
|
|
380
|
+
dsm = DataSourceManager.create(DataProvider.BINANCE, MarketType.SPOT)
|
|
381
|
+
|
|
382
|
+
# Fetch data - clean output, only your logs visible
|
|
383
|
+
data = dsm.get_data(
|
|
384
|
+
symbol="SOLUSDT",
|
|
385
|
+
start_time=start_time,
|
|
386
|
+
end_time=end_time,
|
|
387
|
+
interval=Interval.MINUTE_1,
|
|
388
|
+
)
|
|
389
|
+
# ✅ Clean output - no more cluttered DSM logs!
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
**Benefits**:
|
|
393
|
+
|
|
394
|
+
- ✅ **No Boilerplate**: Eliminates 15+ lines of logging suppression code
|
|
395
|
+
- ✅ **Clean Output**: Professional console output for feature engineering
|
|
396
|
+
- ✅ **Easy Control**: Single environment variable controls all DSM logging
|
|
397
|
+
- ✅ **Cleaner Default**: Default ERROR level provides quieter operation
|
|
398
|
+
|
|
399
|
+
### Simple Environment Variable Control
|
|
400
|
+
|
|
401
|
+
```bash
|
|
402
|
+
# Clean output for feature engineering (suppress DSM logs)
|
|
403
|
+
export DSM_LOG_LEVEL=CRITICAL
|
|
404
|
+
|
|
405
|
+
# Normal development with basic info
|
|
406
|
+
export DSM_LOG_LEVEL=INFO
|
|
407
|
+
|
|
408
|
+
# Default behavior (errors and critical only)
|
|
409
|
+
# No need to set anything - ERROR is the default
|
|
410
|
+
|
|
411
|
+
# Detailed debugging
|
|
412
|
+
export DSM_LOG_LEVEL=DEBUG
|
|
413
|
+
|
|
414
|
+
# Optional: Log to file with automatic rotation
|
|
415
|
+
export DSM_LOG_FILE=./logs/dsm.log
|
|
416
|
+
|
|
417
|
+
# Run your application
|
|
418
|
+
python your_script.py
|
|
419
|
+
```
|
|
420
|
+
|
|
421
|
+
### Programmatic Control
|
|
422
|
+
|
|
423
|
+
```python
|
|
424
|
+
from data_source_manager.utils.loguru_setup import logger
|
|
425
|
+
|
|
426
|
+
# Set log level
|
|
427
|
+
logger.configure_level("DEBUG")
|
|
428
|
+
|
|
429
|
+
# Enable file logging
|
|
430
|
+
logger.configure_file("./logs/dsm.log")
|
|
431
|
+
|
|
432
|
+
# Use rich formatting
|
|
433
|
+
logger.info("Status: <green>SUCCESS</green>")
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
### Migration from Old Logger
|
|
437
|
+
|
|
438
|
+
The old `data_source_manager.utils.logger_setup` module is deprecated and will emit a deprecation warning. It re-exports from `loguru_setup` for backward compatibility, but you should update your imports:
|
|
439
|
+
|
|
440
|
+
```python
|
|
441
|
+
# Old (deprecated):
|
|
442
|
+
from data_source_manager.utils.logger_setup import logger
|
|
443
|
+
|
|
444
|
+
# New (recommended):
|
|
445
|
+
from data_source_manager.utils.loguru_setup import logger
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
All existing code continues to work without changes, but updating imports is recommended.
|
|
449
|
+
|
|
450
|
+
### Demo
|
|
451
|
+
|
|
452
|
+
Try the logging demos to see the benefits:
|
|
453
|
+
|
|
454
|
+
```bash
|
|
455
|
+
# DSM logging control demo
|
|
456
|
+
python examples/dsm_logging_demo.py
|
|
457
|
+
|
|
458
|
+
# Test different log levels with actual DSM
|
|
459
|
+
python examples/dsm_logging_demo.py --log-level CRITICAL --test-dsm
|
|
460
|
+
python examples/dsm_logging_demo.py --log-level DEBUG --test-dsm
|
|
461
|
+
|
|
462
|
+
# Clean feature engineering example
|
|
463
|
+
python examples/clean_feature_engineering_example.py
|
|
464
|
+
|
|
465
|
+
# General loguru demo
|
|
466
|
+
python examples/loguru_demo.py
|
|
467
|
+
|
|
468
|
+
# Environment variable control
|
|
469
|
+
DSM_LOG_LEVEL=CRITICAL python examples/clean_feature_engineering_example.py
|
|
470
|
+
DSM_LOG_LEVEL=DEBUG python examples/dsm_logging_demo.py --test-dsm
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
## Benefits of Loguru
|
|
474
|
+
|
|
475
|
+
- **🎯 Easy Control**: `DSM_LOG_LEVEL=DEBUG` vs complex logging configuration
|
|
476
|
+
- **🚀 Better Performance**: Loguru is faster than Python's standard logging
|
|
477
|
+
- **🔄 Auto Rotation**: Built-in log file rotation and compression
|
|
478
|
+
- **🎨 Rich Formatting**: Beautiful colored output with module/function info
|
|
479
|
+
- **🔧 Same API**: All existing logging calls work unchanged
|
|
480
|
+
|
|
481
|
+
## Documentation
|
|
482
|
+
|
|
483
|
+
- [API Documentation](docs/api/) - Complete API reference
|
|
484
|
+
- [Examples](examples/) - Usage examples and demos
|
|
485
|
+
- [Troubleshooting](docs/TROUBLESHOOTING.md) - Common issues and solutions
|
|
486
|
+
|
|
487
|
+
## License
|
|
488
|
+
|
|
489
|
+
MIT License - See [LICENSE](LICENSE) file for details.
|