data-source-manager 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. data_source_manager-1.4.0/LICENSE +21 -0
  2. data_source_manager-1.4.0/PKG-INFO +489 -0
  3. data_source_manager-1.4.0/README.md +451 -0
  4. data_source_manager-1.4.0/pyproject.toml +111 -0
  5. data_source_manager-1.4.0/setup.cfg +4 -0
  6. data_source_manager-1.4.0/setup.py +7 -0
  7. data_source_manager-1.4.0/src/data_source_manager/__init__.py +93 -0
  8. data_source_manager-1.4.0/src/data_source_manager/core/__init__.py +5 -0
  9. data_source_manager-1.4.0/src/data_source_manager/core/providers/__init__.py +392 -0
  10. data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/__init__.py +13 -0
  11. data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/binance_funding_rate_client.py +436 -0
  12. data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/cache_manager.py +454 -0
  13. data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/data_client_interface.py +129 -0
  14. data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/rest_data_client.py +516 -0
  15. data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/vision_data_client.py +1135 -0
  16. data_source_manager-1.4.0/src/data_source_manager/core/providers/binance/vision_path_mapper.py +307 -0
  17. data_source_manager-1.4.0/src/data_source_manager/core/providers/okx/__init__.py +22 -0
  18. data_source_manager-1.4.0/src/data_source_manager/core/providers/okx/okx_rest_client.py +542 -0
  19. data_source_manager-1.4.0/src/data_source_manager/core/sync/__init__.py +5 -0
  20. data_source_manager-1.4.0/src/data_source_manager/core/sync/data_source_manager.py +1162 -0
  21. data_source_manager-1.4.0/src/data_source_manager/core/sync/dsm_lib.py +259 -0
  22. data_source_manager-1.4.0/src/data_source_manager/core/sync/dsm_types.py +185 -0
  23. data_source_manager-1.4.0/src/data_source_manager/utils/__init__.py +144 -0
  24. data_source_manager-1.4.0/src/data_source_manager/utils/api_boundary_validator.py +682 -0
  25. data_source_manager-1.4.0/src/data_source_manager/utils/app_paths.py +168 -0
  26. data_source_manager-1.4.0/src/data_source_manager/utils/arrow_cache_reader.py +608 -0
  27. data_source_manager-1.4.0/src/data_source_manager/utils/cache/__init__.py +58 -0
  28. data_source_manager-1.4.0/src/data_source_manager/utils/cache/errors.py +35 -0
  29. data_source_manager-1.4.0/src/data_source_manager/utils/cache/functions.py +108 -0
  30. data_source_manager-1.4.0/src/data_source_manager/utils/cache/key_manager.py +96 -0
  31. data_source_manager-1.4.0/src/data_source_manager/utils/cache/memory_map.py +103 -0
  32. data_source_manager-1.4.0/src/data_source_manager/utils/cache/options.py +50 -0
  33. data_source_manager-1.4.0/src/data_source_manager/utils/cache/validator.py +391 -0
  34. data_source_manager-1.4.0/src/data_source_manager/utils/cache/vision_manager.py +111 -0
  35. data_source_manager-1.4.0/src/data_source_manager/utils/cache_validator.py +46 -0
  36. data_source_manager-1.4.0/src/data_source_manager/utils/config.py +403 -0
  37. data_source_manager-1.4.0/src/data_source_manager/utils/dataframe_types.py +243 -0
  38. data_source_manager-1.4.0/src/data_source_manager/utils/dataframe_utils.py +515 -0
  39. data_source_manager-1.4.0/src/data_source_manager/utils/deprecation_rules.py +247 -0
  40. data_source_manager-1.4.0/src/data_source_manager/utils/dsm_config.py +351 -0
  41. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/__init__.py +4 -0
  42. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_api_utils.py +354 -0
  43. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_cache_utils.py +346 -0
  44. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_date_range_utils.py +215 -0
  45. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_fcp_utils.py +357 -0
  46. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_time_range_utils.py +346 -0
  47. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/dsm_utilities.py +341 -0
  48. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/rest_client_utils.py +285 -0
  49. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/rest_data_processing.py +163 -0
  50. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/rest_exceptions.py +110 -0
  51. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/rest_metrics.py +221 -0
  52. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/vision_checksum.py +263 -0
  53. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/vision_constraints.py +322 -0
  54. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/vision_exceptions.py +75 -0
  55. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/vision_file_utils.py +138 -0
  56. data_source_manager-1.4.0/src/data_source_manager/utils/for_core/vision_timestamp.py +202 -0
  57. data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/__init__.py +4 -0
  58. data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_app_options.py +164 -0
  59. data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_cache_utils.py +192 -0
  60. data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_clean_logging.py +318 -0
  61. data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_cli_utils.py +319 -0
  62. data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_data_fetcher.py +177 -0
  63. data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_datetime_parser.py +108 -0
  64. data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_display_utils.py +335 -0
  65. data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_doc_utils.py +351 -0
  66. data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_help_content.py +258 -0
  67. data_source_manager-1.4.0/src/data_source_manager/utils/for_demo/dsm_validation_utils.py +76 -0
  68. data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/__init__.py +5 -0
  69. data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/console_utils.py +159 -0
  70. data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/custom_logger.py +74 -0
  71. data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/error_logger.py +198 -0
  72. data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/formatters.py +119 -0
  73. data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/logger_proxy.py +470 -0
  74. data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/logger_setup_utils.py +138 -0
  75. data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/session_utils.py +76 -0
  76. data_source_manager-1.4.0/src/data_source_manager/utils/for_logger/timeout_logger.py +174 -0
  77. data_source_manager-1.4.0/src/data_source_manager/utils/gap_detector.py +339 -0
  78. data_source_manager-1.4.0/src/data_source_manager/utils/logger_setup.py +51 -0
  79. data_source_manager-1.4.0/src/data_source_manager/utils/loguru_setup.py +342 -0
  80. data_source_manager-1.4.0/src/data_source_manager/utils/market/__init__.py +58 -0
  81. data_source_manager-1.4.0/src/data_source_manager/utils/market/capabilities.py +245 -0
  82. data_source_manager-1.4.0/src/data_source_manager/utils/market/endpoints.py +80 -0
  83. data_source_manager-1.4.0/src/data_source_manager/utils/market/enums.py +317 -0
  84. data_source_manager-1.4.0/src/data_source_manager/utils/market/validation.py +213 -0
  85. data_source_manager-1.4.0/src/data_source_manager/utils/market_constraints.py +52 -0
  86. data_source_manager-1.4.0/src/data_source_manager/utils/market_utils.py +56 -0
  87. data_source_manager-1.4.0/src/data_source_manager/utils/network/__init__.py +58 -0
  88. data_source_manager-1.4.0/src/data_source_manager/utils/network/api.py +174 -0
  89. data_source_manager-1.4.0/src/data_source_manager/utils/network/client_factory.py +167 -0
  90. data_source_manager-1.4.0/src/data_source_manager/utils/network/download.py +292 -0
  91. data_source_manager-1.4.0/src/data_source_manager/utils/network/exceptions.py +24 -0
  92. data_source_manager-1.4.0/src/data_source_manager/utils/network/vision_download.py +176 -0
  93. data_source_manager-1.4.0/src/data_source_manager/utils/network_utils.py +44 -0
  94. data_source_manager-1.4.0/src/data_source_manager/utils/time/__init__.py +73 -0
  95. data_source_manager-1.4.0/src/data_source_manager/utils/time/bars.py +60 -0
  96. data_source_manager-1.4.0/src/data_source_manager/utils/time/conversion.py +245 -0
  97. data_source_manager-1.4.0/src/data_source_manager/utils/time/filtering.py +171 -0
  98. data_source_manager-1.4.0/src/data_source_manager/utils/time/intervals.py +370 -0
  99. data_source_manager-1.4.0/src/data_source_manager/utils/time/processor.py +220 -0
  100. data_source_manager-1.4.0/src/data_source_manager/utils/time_utils.py +78 -0
  101. data_source_manager-1.4.0/src/data_source_manager/utils/validation/__init__.py +63 -0
  102. data_source_manager-1.4.0/src/data_source_manager/utils/validation/availability_validation.py +151 -0
  103. data_source_manager-1.4.0/src/data_source_manager/utils/validation/dataframe_validation.py +285 -0
  104. data_source_manager-1.4.0/src/data_source_manager/utils/validation/file_validation.py +74 -0
  105. data_source_manager-1.4.0/src/data_source_manager/utils/validation/time_validation.py +403 -0
  106. data_source_manager-1.4.0/src/data_source_manager/utils_for_debug/__init__.py +7 -0
  107. data_source_manager-1.4.0/src/data_source_manager/utils_for_debug/timestamp_debug.py +276 -0
  108. data_source_manager-1.4.0/src/data_source_manager.egg-info/PKG-INFO +489 -0
  109. data_source_manager-1.4.0/src/data_source_manager.egg-info/SOURCES.txt +111 -0
  110. data_source_manager-1.4.0/src/data_source_manager.egg-info/dependency_links.txt +1 -0
  111. data_source_manager-1.4.0/src/data_source_manager.egg-info/entry_points.txt +4 -0
  112. data_source_manager-1.4.0/src/data_source_manager.egg-info/requires.txt +26 -0
  113. data_source_manager-1.4.0/src/data_source_manager.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Eon Labs Ltd.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,489 @@
1
+ Metadata-Version: 2.4
2
+ Name: data-source-manager
3
+ Version: 1.4.0
4
+ Summary: Professional market data integration with clean package architecture
5
+ Author-email: EonLabs <terry@eonlabs.com>
6
+ License-Expression: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3.13
9
+ Requires-Python: >=3.13
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: attrs>=25.0.0
13
+ Requires-Dist: pyarrow<21.0.0,>=19.0.0
14
+ Requires-Dist: polars<2.0.0,>=1.30.0
15
+ Requires-Dist: pandas<3.0.0,>=2.2.0
16
+ Requires-Dist: numpy<3.0.0,>=1.26.0
17
+ Requires-Dist: fsspec>=2024.6.0
18
+ Requires-Dist: requests>=2.32.0
19
+ Requires-Dist: httpx>=0.28.0
20
+ Requires-Dist: tenacity>=9.0.0
21
+ Requires-Dist: colorlog>=6.8.0
22
+ Requires-Dist: typer>=0.16.0
23
+ Requires-Dist: rich>=13.0.0
24
+ Requires-Dist: pendulum>=3.0.0
25
+ Requires-Dist: platformdirs>=4.0.0
26
+ Requires-Dist: loguru>=0.7.0
27
+ Provides-Extra: dev
28
+ Requires-Dist: build>=1.2.0; extra == "dev"
29
+ Requires-Dist: twine>=6.0.0; extra == "dev"
30
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
31
+ Requires-Dist: pytest-xdist>=3.6.0; extra == "dev"
32
+ Requires-Dist: pytest-cov>=6.0.0; extra == "dev"
33
+ Requires-Dist: pytest-profiling; extra == "dev"
34
+ Requires-Dist: pyupgrade>=3.20.0; extra == "dev"
35
+ Requires-Dist: ruff>=0.11.0; extra == "dev"
36
+ Requires-Dist: GitPython>=3.1.0; extra == "dev"
37
+ Dynamic: license-file
38
+
39
+ # Data Source Manager
40
+
41
+ A high-performance, robust package for efficient market data retrieval from multiple data providers, including [Binance Vision](https://data.binance.vision/) and Binance REST ([Spot](https://developers.binance.com/docs/binance-spot-api-docs/rest-api/general-endpoints), [USDS-Margined Futures](https://developers.binance.com/docs/derivatives/usds-margined-futures/general-info), [Coin-Margined Futures](https://developers.binance.com/docs/derivatives/coin-margined-futures/general-info)) using Apache Arrow MMAP for optimal performance.
42
+
43
+ ## Features
44
+
45
+ - **Failover Control Protocol (FCP)**: Robust data retrieval from multiple sources
46
+ - **Local Cache**: Fast access to previously downloaded data using Apache Arrow
47
+ - **Vision API**: Efficient historical data from Binance Vision API on AWS S3
48
+ - **REST API**: Real-time and recent data from Binance REST API
49
+ - **Automatic Retry**: Built-in retry logic with exponential backoff
50
+ - **Data Validation**: Comprehensive data integrity checks
51
+ - **Rich Logging**: Beautiful, configurable logging with loguru support
52
+ - **Professional Package Structure**: Proper src-layout with clean namespace imports
53
+
54
+ ## Package Structure
55
+
56
+ Data Source Manager follows modern Python packaging standards with a clean src-layout structure:
57
+
58
+ ```
59
+ data-source-manager/
60
+ ├── src/
61
+ │ └── data_source_manager/ # Main package namespace
62
+ │ ├── __init__.py # Public API exports (lazy loading)
63
+ │ ├── core/ # Core functionality
64
+ │ │ ├── sync/ # Synchronous data managers
65
+ │ │ │ ├── data_source_manager.py # Main DSM class with FCP
66
+ │ │ │ ├── dsm_types.py # DataSource, DataSourceConfig
67
+ │ │ │ └── dsm_lib.py # High-level fetch functions
68
+ │ │ └── providers/ # Data provider implementations
69
+ │ │ └── binance/ # Binance-specific clients
70
+ │ └── utils/ # Utility modules
71
+ │ ├── market_constraints.py # Enums: DataProvider, MarketType, Interval
72
+ │ └── loguru_setup.py # Logging configuration
73
+ ├── examples/ # Usage examples and demos
74
+ ├── tests/ # Test suite
75
+ └── docs/ # Documentation
76
+ ```
77
+
78
+ **Note**: The public API returns pandas DataFrames for compatibility with downstream consumers. Polars is used internally for some performance-critical operations.
79
+
80
+ ## Installation
81
+
82
+ There are two main ways to install Data Source Manager, depending on your needs:
83
+
84
+ ### 1. For Development or Running Demos Directly
85
+
86
+ If you want to run the provided demos directly from the cloned repository or use the core library while having the source files available in your workspace, follow these steps:
87
+
88
+ ```bash
89
+ # Clone the repository
90
+ git clone https://github.com/terrylica/data-source-manager.git
91
+ cd data-source-manager
92
+
93
+ # Install with uv (recommended, 10-100x faster than pip)
94
+ uv sync --dev
95
+
96
+ # Or with pip (slower, not recommended)
97
+ pip install -e ".[dev]"
98
+ ```
99
+
100
+ **Note**: This project uses [uv](https://docs.astral.sh/uv/) for package management. Install it via `curl -LsSf https://astral.sh/uv/install.sh | sh`.
101
+
102
+ This method keeps all the source files in your workspace and includes necessary tools for development workflows.
103
+
104
+ ### 2. As a Dependency in Your Project (`pyproject.toml`)
105
+
106
+ If you want to use Data Source Manager as a library in your own Python project (managed with `pyproject.toml`) without including its entire source code in your project's directory, you can add it as a Git dependency.
107
+
108
+ Add the following to your project's `pyproject.toml` file under the `[project.dependencies]` array (as per PEP 621):
109
+
110
+ ```toml
111
+ [project]
112
+ # ... other project configurations like name, version ...
113
+ dependencies = [
114
+ # ... other dependencies ...
115
+ "data-source-manager @ git+https://github.com/terrylica/data-source-manager.git"
116
+ # You can also specify a particular branch, tag, or commit hash:
117
+ # "data-source-manager @ git+https://github.com/terrylica/data-source-manager.git@main"
118
+ # "data-source-manager @ git+https://github.com/terrylica/data-source-manager.git@<version>"
119
+ ]
120
+ ```
121
+
122
+ This will install Data Source Manager into your Python environment's `site-packages` directory, keeping your project workspace clean.
123
+
124
+ **Note on CLI Tools:**
125
+ The installation process (through either method) automatically registers the CLI commands (`dsm-demo-cli` and `dsm-demo-module`) as executable scripts in your Python environment. These commands will be available in your terminal after successful installation.
126
+
127
+ ## Usage
128
+
129
+ ### Basic Usage
130
+
131
+ ```python
132
+ from data_source_manager import DataSourceManager, DataProvider, MarketType, Interval
133
+ from datetime import datetime, timedelta, timezone
134
+
135
+ # Create a manager for USDT-margined futures
136
+ manager = DataSourceManager.create(DataProvider.BINANCE, MarketType.FUTURES_USDT)
137
+
138
+ # Fetch recent BTCUSDT data with automatic failover
139
+ # IMPORTANT: Always use UTC timezone-aware datetimes
140
+ end_time = datetime.now(timezone.utc)
141
+ start_time = end_time - timedelta(days=7)
142
+
143
+ df = manager.get_data(
144
+ symbol="BTCUSDT",
145
+ start_time=start_time,
146
+ end_time=end_time,
147
+ interval=Interval.HOUR_1
148
+ )
149
+
150
+ print(f"Loaded {len(df)} bars of BTCUSDT data")
151
+ print(df.head())
152
+ manager.close() # Always close when done
153
+ ```
154
+
155
+ ### Failover Control Protocol (FCP)
156
+
157
+ The DSM automatically handles data retrieval through multiple sources:
158
+
159
+ ```python
160
+ # The FCP follows this sequence automatically:
161
+ # 1. 🚀 Local cache lookup (fastest)
162
+ # 2. 📡 Vision API for historical data (efficient)
163
+ # 3. 🔄 REST API fallback (real-time)
164
+
165
+ # All with automatic retry, data validation, and gap detection
166
+ manager = DataSourceManager.create(DataProvider.BINANCE, MarketType.SPOT)
167
+
168
+ # This single call handles all the complexity:
169
+ data = manager.get_data("ETHUSDT", start_time, end_time, Interval.MINUTE_5)
170
+ # ✅ Cache checked, Vision API queried, REST API fallback - all automatic!
171
+ ```
172
+
173
+ ### Advanced Configuration
174
+
175
+ ```python
176
+ from data_source_manager import DataSourceManager, DataProvider, MarketType, Interval
177
+ from data_source_manager.core.sync.data_source_manager import DataSource
178
+
179
+ # Force specific data source (bypass FCP)
180
+ manager = DataSourceManager.create(
181
+ provider=DataProvider.BINANCE,
182
+ market_type=MarketType.FUTURES_USDT,
183
+ enforce_source=DataSource.VISION, # Force Vision API only
184
+ )
185
+
186
+ # Multiple market types supported
187
+ spot_manager = DataSourceManager.create(DataProvider.BINANCE, MarketType.SPOT)
188
+ futures_manager = DataSourceManager.create(DataProvider.BINANCE, MarketType.FUTURES_USDT)
189
+ coin_manager = DataSourceManager.create(DataProvider.BINANCE, MarketType.FUTURES_COIN)
190
+ ```
191
+
192
+ ## Running the Demos
193
+
194
+ Once installed, you can run the demos using the command-line tools:
195
+
196
+ ### DSM Demo CLI
197
+
198
+ The CLI demonstration provides an interactive way to explore the Failover Control Protocol:
199
+
200
+ ```bash
201
+ # Run the DSM Demo CLI with default parameters
202
+ dsm-demo-cli
203
+
204
+ # Run with specific parameters (get BTC data for a 10-day period)
205
+ dsm-demo-cli -s BTCUSDT -i 1m -d 10
206
+
207
+ # Get help and see all available options
208
+ dsm-demo-cli --help
209
+ ```
210
+
211
+ The CLI tool will automatically:
212
+
213
+ 1. Check for data in local cache
214
+ 2. Try to fetch missing data from Binance Vision API
215
+ 3. Fall back to REST API for data not available in cache or Vision API
216
+ 4. Save retrieved data to cache for future use
217
+
218
+ ### DSM Demo Module
219
+
220
+ The module demo provides a programmatic interface:
221
+
222
+ ```bash
223
+ # Run the DSM Demo Module to see examples
224
+ dsm-demo-module
225
+ ```
226
+
227
+ ## Using as a Library
228
+
229
+ The core data fetching functionality of Data Source Manager is available for direct import and use in your Python projects after installation.
230
+
231
+ The main function for retrieving market data is `fetch_market_data`.
232
+
233
+ ### Example 1: Fetching with Specific Date Range
234
+
235
+ ```python
236
+ from datetime import datetime, timezone
237
+ from data_source_manager import fetch_market_data, MarketType, DataProvider, Interval, ChartType
238
+
239
+ # Define parameters
240
+ provider = DataProvider.BINANCE
241
+ market_type = MarketType.SPOT
242
+ chart_type = ChartType.KLINES
243
+ symbol = "BTCUSDT"
244
+ interval = Interval.MINUTE_1
245
+ # IMPORTANT: Always use UTC timezone-aware datetimes
246
+ start_time = datetime(2023, 1, 1, tzinfo=timezone.utc)
247
+ end_time = datetime(2023, 1, 10, tzinfo=timezone.utc)
248
+
249
+ # Fetch data (returns pandas DataFrame)
250
+ df, elapsed_time, records_count = fetch_market_data(
251
+ provider=provider,
252
+ market_type=market_type,
253
+ chart_type=chart_type,
254
+ symbol=symbol,
255
+ interval=interval,
256
+ start_time=start_time,
257
+ end_time=end_time,
258
+ use_cache=True,
259
+ )
260
+
261
+ # Process results
262
+ print(f"Fetched {records_count} records in {elapsed_time:.2f} seconds")
263
+ if df is not None:
264
+ print(df.head())
265
+ ```
266
+
267
+ ### Example 2: Fetching Backward from a Specific End Time
268
+
269
+ This example demonstrates how to fetch data backward from a precise end time in May 2025:
270
+
271
+ ```python
272
+ import pendulum
273
+ from data_source_manager import fetch_market_data, MarketType, DataProvider, Interval, ChartType
274
+
275
+ # Define parameters
276
+ provider = DataProvider.BINANCE
277
+ market_type = MarketType.SPOT
278
+ chart_type = ChartType.KLINES
279
+ symbol = "BTCUSDT"
280
+ interval = Interval.MINUTE_1
281
+
282
+ # Define a specific end time with precise minutes and seconds
283
+ # Note: Using pendulum for better datetime handling as per project standards
284
+ end_time = pendulum.datetime(2025, 5, 15, 13, 45, 30, tz="UTC") # 2025-05-15 13:45:30 UTC
285
+ days = 7 # Fetch 7 days backward from the end time
286
+
287
+ # Fetch data (no need to specify start_time, it will be calculated)
288
+ df, elapsed_time, records_count = fetch_market_data(
289
+ provider=provider,
290
+ market_type=market_type,
291
+ chart_type=chart_type,
292
+ symbol=symbol,
293
+ interval=interval,
294
+ end_time=end_time,
295
+ days=days,
296
+ use_cache=True,
297
+ )
298
+
299
+ # Process results
300
+ print(f"Fetched {records_count} records in {elapsed_time:.2f} seconds")
301
+ print(f"Date range: {end_time.subtract(days=days).format('YYYY-MM-DD HH:mm:ss.SSS')} to {end_time.format('YYYY-MM-DD HH:mm:ss.SSS')}")
302
+ if df is not None:
303
+ print(df.head())
304
+ ```
305
+
306
+ You can import `fetch_market_data` directly from the `data_source_manager` package. The necessary enums (`MarketType`, `DataProvider`, `ChartType`, `Interval`, `DataSource`) and `DataSourceConfig` are also exposed at the top level for easy access.
307
+
308
+ Refer to the source code of `data_source_manager.core.sync.dsm_lib.fetch_market_data` and `data_source_manager.core.sync.data_source_manager.DataSourceConfig` for detailed parameter information and usage.
309
+
310
+ ## Data Source Manager (DSM) Demo
311
+
312
+ ### Quick Start
313
+
314
+ - **[DSM Demo CLI Documentation](examples/sync/)**: Interactive demonstration of the Failover Control Protocol mechanism, the core data retrieval strategy that ensures robust and efficient data collection from multiple sources.
315
+ - **[DSM Demo Module Documentation](examples/lib_module/)**: Programmatic interface to `src/data_source_manager/core/sync/dsm_lib.py` functions, complementing the CLI tool by providing a library approach to implement the same data retrieval functionality in custom applications.
316
+
317
+ ### Understanding Data Sources
318
+
319
+ The DSM implements a Failover Control Protocol (FCP) that follows this sequence:
320
+
321
+ 1. **Cache**: First checks local Arrow files for requested data
322
+ 2. **VISION API**: For missing data, attempts to download from Binance Vision API
323
+ 3. **REST API**: Falls back to Binance REST API for any remaining data gaps
324
+
325
+ Note that recent data (within ~48 hours) is typically not available in the Vision API and will be retrieved from the REST API.
326
+
327
+ ## Development Guidelines
328
+
329
+ ### Core Principles
330
+
331
+ - **[Focus DSM FCP Demo Rule](.cursor/rules/focus-dsm-fcp-demo.mdc)**: The authoritative instruction file guiding the Cursor Agent to strictly adhere to the demo plan and maintain focus on the Failover Control Protocol demonstration.
332
+ - [scripts/dev](scripts/dev): Contains various scripts for development and maintenance tasks.
333
+
334
+ ## API Documentation
335
+
336
+ The `docs/api` folder provides in-depth documentation on data source characteristics and retrieval mechanisms. Refer to the [API Documentation Overview](docs/api/) for a summary of the contents in this directory.
337
+
338
+ ## Data Initialization and Shortlisting
339
+
340
+ 1. Initialization
341
+ - Execute `scripts/binance_vision_api_aws_s3/fetch_binance_data_availability.sh` to build `scripts/binance_vision_api_aws_s3/reports/spot_synchronal.csv`
342
+ - The archaic word _synchronal_ contextually means the Binance Exchanges crypto base pair that we're interested in monitoring, because they must be active in the SPOT, UM and CM market of the Binance Exchange.
343
+ - `scripts/binance_vision_api_aws_s3/reports/spot_synchronal.csv` contains only the Binance SPOT market symbols, their earliest date available, and their available intervals (i.e. 1s, 1m, 3m, ..., 1d), and which base pairs (e.g. BTC) are also on the UM and CM markets.
344
+
345
+ 2. Shortlisting
346
+ - To exclude specific symbols from subsequent operations below, simply remove their corresponding lines from `spot_synchronal.csv`
347
+
348
+ ## Development Scripts
349
+
350
+ The `scripts/dev` directory contains a collection of utility scripts designed to assist with various development, testing, and maintenance tasks. These scripts leverage modern Python tooling and practices to streamline workflows.
351
+
352
+ Some of the key tools and libraries used across these scripts include:
353
+
354
+ - **Ruff**: For fast linting and code formatting.
355
+ - **Vulture**: To identify dead code.
356
+ - **pytest-xdist**: For parallel test execution.
357
+ - **rope**: For Python code refactoring, used in conjunction with `git mv` for moving files.
358
+ - **fsspec**: For seamless interaction with various filesystems.
359
+
360
+ Explore the scripts and their individual READMEs within the [`scripts/dev`](scripts/dev) directory for more details.
361
+
362
+ ## Logging Control
363
+
364
+ DSM now supports **loguru** for much easier log level control:
365
+
366
+ ### DSM Logging Suppression for Feature Engineering
367
+
368
+ **Problem**: DSM produces extensive logging that clutters console output during feature engineering workflows.
369
+
370
+ **Solution**: Use `DSM_LOG_LEVEL=CRITICAL` to suppress all non-critical DSM logs:
371
+
372
+ ```python
373
+ # Clean feature engineering code - no boilerplate needed!
374
+ import os
375
+ os.environ["DSM_LOG_LEVEL"] = "CRITICAL"
376
+
377
+ from data_source_manager import DataSourceManager, DataProvider, MarketType, Interval
378
+
379
+ # Create DSM instance - minimal logging
380
+ dsm = DataSourceManager.create(DataProvider.BINANCE, MarketType.SPOT)
381
+
382
+ # Fetch data - clean output, only your logs visible
383
+ data = dsm.get_data(
384
+ symbol="SOLUSDT",
385
+ start_time=start_time,
386
+ end_time=end_time,
387
+ interval=Interval.MINUTE_1,
388
+ )
389
+ # ✅ Clean output - no more cluttered DSM logs!
390
+ ```
391
+
392
+ **Benefits**:
393
+
394
+ - ✅ **No Boilerplate**: Eliminates 15+ lines of logging suppression code
395
+ - ✅ **Clean Output**: Professional console output for feature engineering
396
+ - ✅ **Easy Control**: Single environment variable controls all DSM logging
397
+ - ✅ **Cleaner Default**: Default ERROR level provides quieter operation
398
+
399
+ ### Simple Environment Variable Control
400
+
401
+ ```bash
402
+ # Clean output for feature engineering (suppress DSM logs)
403
+ export DSM_LOG_LEVEL=CRITICAL
404
+
405
+ # Normal development with basic info
406
+ export DSM_LOG_LEVEL=INFO
407
+
408
+ # Default behavior (errors and critical only)
409
+ # No need to set anything - ERROR is the default
410
+
411
+ # Detailed debugging
412
+ export DSM_LOG_LEVEL=DEBUG
413
+
414
+ # Optional: Log to file with automatic rotation
415
+ export DSM_LOG_FILE=./logs/dsm.log
416
+
417
+ # Run your application
418
+ python your_script.py
419
+ ```
420
+
421
+ ### Programmatic Control
422
+
423
+ ```python
424
+ from data_source_manager.utils.loguru_setup import logger
425
+
426
+ # Set log level
427
+ logger.configure_level("DEBUG")
428
+
429
+ # Enable file logging
430
+ logger.configure_file("./logs/dsm.log")
431
+
432
+ # Use rich formatting
433
+ logger.info("Status: <green>SUCCESS</green>")
434
+ ```
435
+
436
+ ### Migration from Old Logger
437
+
438
+ The old `data_source_manager.utils.logger_setup` module is deprecated and will emit a deprecation warning. It re-exports from `loguru_setup` for backward compatibility, but you should update your imports:
439
+
440
+ ```python
441
+ # Old (deprecated):
442
+ from data_source_manager.utils.logger_setup import logger
443
+
444
+ # New (recommended):
445
+ from data_source_manager.utils.loguru_setup import logger
446
+ ```
447
+
448
+ All existing code continues to work without changes, but updating imports is recommended.
449
+
450
+ ### Demo
451
+
452
+ Try the logging demos to see the benefits:
453
+
454
+ ```bash
455
+ # DSM logging control demo
456
+ python examples/dsm_logging_demo.py
457
+
458
+ # Test different log levels with actual DSM
459
+ python examples/dsm_logging_demo.py --log-level CRITICAL --test-dsm
460
+ python examples/dsm_logging_demo.py --log-level DEBUG --test-dsm
461
+
462
+ # Clean feature engineering example
463
+ python examples/clean_feature_engineering_example.py
464
+
465
+ # General loguru demo
466
+ python examples/loguru_demo.py
467
+
468
+ # Environment variable control
469
+ DSM_LOG_LEVEL=CRITICAL python examples/clean_feature_engineering_example.py
470
+ DSM_LOG_LEVEL=DEBUG python examples/dsm_logging_demo.py --test-dsm
471
+ ```
472
+
473
+ ## Benefits of Loguru
474
+
475
+ - **🎯 Easy Control**: `DSM_LOG_LEVEL=DEBUG` vs complex logging configuration
476
+ - **🚀 Better Performance**: Loguru is faster than Python's standard logging
477
+ - **🔄 Auto Rotation**: Built-in log file rotation and compression
478
+ - **🎨 Rich Formatting**: Beautiful colored output with module/function info
479
+ - **🔧 Same API**: All existing logging calls work unchanged
480
+
481
+ ## Documentation
482
+
483
+ - [API Documentation](docs/api/) - Complete API reference
484
+ - [Examples](examples/) - Usage examples and demos
485
+ - [Troubleshooting](docs/TROUBLESHOOTING.md) - Common issues and solutions
486
+
487
+ ## License
488
+
489
+ MIT License - See [LICENSE](LICENSE) file for details.