pfeed 0.0.1.dev12__tar.gz → 0.0.1.dev14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pfeed-0.0.1.dev14/PKG-INFO +267 -0
- pfeed-0.0.1.dev14/README.md +231 -0
- pfeed-0.0.1.dev14/pfeed/__init__.py +62 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/cli/commands/config.py +3 -1
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/cli/commands/docker_compose.py +1 -8
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/cli/commands/download.py +19 -25
- pfeed-0.0.1.dev14/pfeed/cli/commands/open.py +47 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/cli/main.py +4 -2
- pfeed-0.0.1.dev14/pfeed/config_handler.py +148 -0
- pfeed-0.0.1.dev14/pfeed/const/common.py +15 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/const/paths.py +0 -2
- pfeed-0.0.1.dev14/pfeed/data_tools/data_tool_pandas.py +62 -0
- pfeed-0.0.1.dev14/pfeed/data_tools/data_tool_polars.py +65 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/datastore.py +45 -34
- pfeed-0.0.1.dev14/pfeed/etl.py +405 -0
- pfeed-0.0.1.dev14/pfeed/feeds/__init__.py +3 -0
- pfeed-0.0.1.dev14/pfeed/feeds/base_feed.py +296 -0
- pfeed-0.0.1.dev14/pfeed/feeds/binance_feed.py +21 -0
- pfeed-0.0.1.dev14/pfeed/feeds/bybit_feed.py +53 -0
- pfeed-0.0.1.dev14/pfeed/feeds/yahoo_finance_feed.py +178 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/filepath.py +32 -10
- pfeed-0.0.1.dev14/pfeed/resolution.py +62 -0
- pfeed-0.0.1.dev14/pfeed/sources/binance/__init__.py +11 -0
- pfeed-0.0.1.dev14/pfeed/sources/binance/api.py +105 -0
- pfeed-0.0.1.dev14/pfeed/sources/binance/const.py +47 -0
- {pfeed-0.0.1.dev12/pfeed/sources/bybit → pfeed-0.0.1.dev14/pfeed/sources/binance}/download.py +24 -14
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/sources/bybit/__init__.py +1 -8
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/sources/bybit/api.py +26 -20
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/sources/bybit/const.py +5 -18
- pfeed-0.0.1.dev14/pfeed/sources/bybit/download.py +196 -0
- pfeed-0.0.1.dev14/pfeed/sources/bybit/stream.py +3 -0
- pfeed-0.0.1.dev14/pfeed/sources/bybit/types.py +4 -0
- pfeed-0.0.1.dev14/pfeed/sources/bybit/utils.py +44 -0
- pfeed-0.0.1.dev14/pfeed/types/common_literals.py +13 -0
- pfeed-0.0.1.dev14/pfeed/utils/file_format.py +76 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/utils/utils.py +38 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/utils/validate.py +13 -12
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pyproject.toml +18 -16
- pfeed-0.0.1.dev12/PKG-INFO +0 -293
- pfeed-0.0.1.dev12/README.md +0 -260
- pfeed-0.0.1.dev12/pfeed/__init__.py +0 -55
- pfeed-0.0.1.dev12/pfeed/config/logging.yml +0 -51
- pfeed-0.0.1.dev12/pfeed/config_handler.py +0 -71
- pfeed-0.0.1.dev12/pfeed/const/commons.py +0 -11
- pfeed-0.0.1.dev12/pfeed/data_tools/data_tool_pandas.py +0 -132
- pfeed-0.0.1.dev12/pfeed/data_tools/data_tool_polars.py +0 -104
- pfeed-0.0.1.dev12/pfeed/data_tools/data_tool_pyspark.py +0 -2
- pfeed-0.0.1.dev12/pfeed/etl.py +0 -228
- pfeed-0.0.1.dev12/pfeed/feeds/__init__.py +0 -2
- pfeed-0.0.1.dev12/pfeed/feeds/base_feed.py +0 -48
- pfeed-0.0.1.dev12/pfeed/feeds/bybit_feed.py +0 -174
- pfeed-0.0.1.dev12/pfeed/feeds/yahoo_finance_feed.py +0 -102
- pfeed-0.0.1.dev12/pfeed/types/common_literals.py +0 -10
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/LICENSE +0 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/cli/__init__.py +0 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/cli/commands/__init__.py +0 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/cli/commands/stream.py +0 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/feeds/custom_csv_feed.py +0 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/main.py +0 -0
- {pfeed-0.0.1.dev12/pfeed/sources/bybit → pfeed-0.0.1.dev14/pfeed/sources/binance}/stream.py +0 -0
- {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev14}/pfeed/utils/monitor.py +0 -0
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pfeed
|
|
3
|
+
Version: 0.0.1.dev14
|
|
4
|
+
Summary: Data pipeline for algo-trading, getting and storing both real-time and historical data made easy.
|
|
5
|
+
Home-page: https://pfund.ai
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Keywords: trading,algo-trading,data pipeline,ETL,data lake,data warehouse,data integration,historical data,live data,data streaming
|
|
8
|
+
Author: Stephen Yau
|
|
9
|
+
Author-email: softwareentrepreneer+pfeed@gmail.com
|
|
10
|
+
Requires-Python: >=3.10,<3.13
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Provides-Extra: all
|
|
17
|
+
Provides-Extra: boost
|
|
18
|
+
Provides-Extra: data
|
|
19
|
+
Provides-Extra: df
|
|
20
|
+
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
|
21
|
+
Requires-Dist: connectorx (>=0.3.3,<0.4.0) ; extra == "boost" or extra == "all"
|
|
22
|
+
Requires-Dist: fastparquet (>=2024.5.0,<2025.0.0)
|
|
23
|
+
Requires-Dist: minio (>=7.2.8,<8.0.0) ; extra == "data" or extra == "all"
|
|
24
|
+
Requires-Dist: pandas (>=2.2.2,<3.0.0) ; extra == "df" or extra == "all"
|
|
25
|
+
Requires-Dist: pfund (>=0.0.1.dev13,<0.0.2)
|
|
26
|
+
Requires-Dist: polars (>=1.6.0,<2.0.0) ; extra == "df" or extra == "all"
|
|
27
|
+
Requires-Dist: psutil (>=6.0.0,<7.0.0) ; extra == "data" or extra == "all"
|
|
28
|
+
Requires-Dist: pyarrow (>=15.0.0,<16.0.0) ; extra == "df" or extra == "all"
|
|
29
|
+
Requires-Dist: ray (>=2.35.0,<3.0.0) ; extra == "boost" or extra == "all"
|
|
30
|
+
Requires-Dist: s3fs (>=2024.9.0,<2025.0.0) ; extra == "data" or extra == "all"
|
|
31
|
+
Requires-Dist: yfinance (>=0.2.43,<0.3.0)
|
|
32
|
+
Project-URL: Documentation, https://pfeed-docs.pfund.ai
|
|
33
|
+
Project-URL: Repository, https://github.com/PFund-Software-Ltd/pfeed
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# PFeed: Data Pipeline for Algo-Trading, Getting and Storing Real-Time and Historical Data Made Easy.
|
|
37
|
+
|
|
38
|
+

|
|
39
|
+

|
|
40
|
+
[](https://pypi.org/project/pfeed)
|
|
41
|
+

|
|
42
|
+
[](https://jupyterbook.org)
|
|
43
|
+
[](https://python-poetry.org/)
|
|
44
|
+
|
|
45
|
+
[MinIO]: https://min.io/
|
|
46
|
+
[PFund]: https://github.com/PFund-Software-Ltd/pfund
|
|
47
|
+
[Ray]: https://github.com/ray-project/ray
|
|
48
|
+
[Polars]: https://github.com/pola-rs/polars
|
|
49
|
+
[Prefect]: https://www.prefect.io
|
|
50
|
+
[Timescaledb]: https://www.timescale.com/
|
|
51
|
+
[Dask]: https://www.dask.org/
|
|
52
|
+
[Spark]: https://spark.apache.org/docs/latest/api/python/index.html
|
|
53
|
+
[DuckDB]: https://github.com/duckdb/duckdb
|
|
54
|
+
[Daft]: https://github.com/Eventual-Inc/Daft
|
|
55
|
+
[PyTrade.org]: https://pytrade.org
|
|
56
|
+
[Databento]: https://databento.com/
|
|
57
|
+
[Polygon]: https://polygon.io/
|
|
58
|
+
[Bybit]: https://bybit.com/
|
|
59
|
+
[FirstRate Data]: https://firstratedata.com
|
|
60
|
+
|
|
61
|
+
## Problem
|
|
62
|
+
Starting algo-trading requires reliable, clean data. However, the time-consuming and mundane tasks of data cleaning and storage often discourage traders from embarking on their algo-trading journey.
|
|
63
|
+
|
|
64
|
+
## Solution
|
|
65
|
+
By leveraging modern data engineering tools, `pfeed` handles the tedious data work and **outputs backtesting-ready data**, accelerating traders to get to the strategy development phase.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
PFeed (/piː fiːd/) is a data pipeline for algorithmic trading, serving as a bridge between raw data sources and traders by automating the process of data collection, cleaning, transformation, and storage, loading clean data into a **local data lake for quantitative analysis**.
|
|
69
|
+
|
|
70
|
+
## Core Features
|
|
71
|
+
- [x] Unified approach for interacting with various [data sources](#supported-data-sources) and obtaining historical and live data
|
|
72
|
+
- [x] ETL data pipline for transforming raw data to clean data and storing it in [MinIO] (optional)
|
|
73
|
+
- [x] Fast data downloading, utilizing [Ray] for parallelization
|
|
74
|
+
- [x] Supports multiple data tools (e.g. Pandas, [Polars], [Dask], [Spark], [DuckDB], [Daft])
|
|
75
|
+
- [ ] Integrates with [Prefect] to control data flows
|
|
76
|
+
- [ ] Listens to PFund's trade engine and adds trade history to a local database [Timescaledb] (optional)
|
|
77
|
+
|
|
78
|
+
> It is designed to be used alongside [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready.
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
<details>
|
|
83
|
+
<summary>Table of Contents</summary>
|
|
84
|
+
|
|
85
|
+
- [Installation](#installation)
|
|
86
|
+
- [Quick Start](#quick-start)
|
|
87
|
+
- [Main Usage: Data Feed](#main-usage-data-feed)
|
|
88
|
+
- [Download Historical Data on Command Line](#download-historical-data-on-command-line)
|
|
89
|
+
- [Download Historical Data in Python](#download-historical-data-in-python)
|
|
90
|
+
- [List Current Config](#list-current-config)
|
|
91
|
+
- [Run PFeed's docker-compose.yml](#run-pfeeds-docker-composeyml)
|
|
92
|
+
- [Supported Data Sources](#supported-data-sources)
|
|
93
|
+
- [Supported Data Tools](#supported-data-tools)
|
|
94
|
+
- [Related Projects](#related-projects)
|
|
95
|
+
- [Disclaimer](#disclaimer)
|
|
96
|
+
|
|
97
|
+
</details>
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
## Installation
|
|
101
|
+
### Using [Poetry](https://python-poetry.org) (Recommended)
|
|
102
|
+
```bash
|
|
103
|
+
# [RECOMMENDED]: Download data (e.g. Bybit and Yahoo Finance) + Data tools (e.g. pandas, polars) + Data storage (e.g. MinIO) + Boosted performance (e.g. Ray)
|
|
104
|
+
poetry add "pfeed[all]"
|
|
105
|
+
|
|
106
|
+
# [Download data + Data tools + Data storage]
|
|
107
|
+
poetry add "pfeed[df,data]"
|
|
108
|
+
|
|
109
|
+
# [Download data + Data tools]
|
|
110
|
+
poetry add "pfeed[df]"
|
|
111
|
+
|
|
112
|
+
# [Download data only]:
|
|
113
|
+
poetry add pfeed
|
|
114
|
+
|
|
115
|
+
# update to the latest version:
|
|
116
|
+
poetry update pfeed
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Using Pip
|
|
120
|
+
```bash
|
|
121
|
+
# same as above, you can choose to install "pfeed[all]", "pfeed[df,data]", "pfeed[df]" or "pfeed"
|
|
122
|
+
pip install "pfeed[all]"
|
|
123
|
+
|
|
124
|
+
# install the latest version:
|
|
125
|
+
pip install -U pfeed
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Checking your installation
|
|
129
|
+
```bash
|
|
130
|
+
$ pfeed --version
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Quick Start
|
|
134
|
+
### 1. Get Historical Data in Dataframe (No storage)
|
|
135
|
+
Get [Bybit]'s data in dataframe, e.g. 1-minute data (data is downloaded on the fly if not stored locally)
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
import pfeed as pe
|
|
139
|
+
|
|
140
|
+
feed = pe.BybitFeed(data_tool='polars')
|
|
141
|
+
|
|
142
|
+
df = feed.get_historical_data(
|
|
143
|
+
'BTC_USDT_PERP',
|
|
144
|
+
resolution='1minute', # 'raw' or '1tick'/'1t' or '2second'/'2s' etc.
|
|
145
|
+
start_date='2024-03-01',
|
|
146
|
+
end_date='2024-03-01',
|
|
147
|
+
)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Printing the first few rows of `df`:
|
|
151
|
+
| | ts | product | resolution | open | high | low | close | volume |
|
|
152
|
+
|---:|:--------------------|:--------------|:-------------|--------:|--------:|--------:|--------:|---------:|
|
|
153
|
+
| 0 | 2024-03-01 00:00:00 | BTC_USDT_PERP | 1m | 61184.1 | 61244.5 | 61175.8 | 61244.5 | 159.142 |
|
|
154
|
+
| 1 | 2024-03-01 00:01:00 | BTC_USDT_PERP | 1m | 61245.3 | 61276.5 | 61200.7 | 61232.2 | 227.242 |
|
|
155
|
+
| 2 | 2024-03-01 00:02:00 | BTC_USDT_PERP | 1m | 61232.2 | 61249 | 61180 | 61184.2 | 91.446 |
|
|
156
|
+
|
|
157
|
+
> By using pfeed, you are just a few lines of code away from a standardized dataframe, how convenient!
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
### 2. Download Historical Data on the Command Line Interface (CLI)
|
|
162
|
+
```bash
|
|
163
|
+
# download data, default data type (dtype) is 'raw' data
|
|
164
|
+
pfeed download -d BYBIT -p BTC_USDT_PERP --start-date 2024-03-01 --end-date 2024-03-08
|
|
165
|
+
|
|
166
|
+
# download multiple products BTC_USDT_PERP and ETH_USDT_PERP and minute data
|
|
167
|
+
pfeed download -d BYBIT -p BTC_USDT_PERP -p ETH_USDT_PERP --dtypes minute
|
|
168
|
+
|
|
169
|
+
# download all perpetuals data from bybit
|
|
170
|
+
pfeed download -d BYBIT --ptypes PERP
|
|
171
|
+
|
|
172
|
+
# download all the data from bybit (CAUTION: your local machine probably won't have enough space for this!)
|
|
173
|
+
pfeed download -d BYBIT
|
|
174
|
+
|
|
175
|
+
# store data into MinIO (need to start MinIO by running `pfeed docker-compose up -d` first)
|
|
176
|
+
pfeed download -d BYBIT -p BTC_USDT_PERP --use-minio
|
|
177
|
+
|
|
178
|
+
# enable debug mode and turn off using Ray
|
|
179
|
+
pfeed download -d BYBIT -p BTC_USDT_PERP --debug --no-ray
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### 3. Download Historical Data in Python
|
|
183
|
+
```python
|
|
184
|
+
import pfeed as pe
|
|
185
|
+
|
|
186
|
+
# compared to the CLI approach, this approach is more convenient for downloading multiple products
|
|
187
|
+
pe.download(
|
|
188
|
+
data_source='bybit',
|
|
189
|
+
pdts=[
|
|
190
|
+
'BTC_USDT_PERP',
|
|
191
|
+
'ETH_USDT_PERP',
|
|
192
|
+
'BCH_USDT_PERP',
|
|
193
|
+
],
|
|
194
|
+
dtypes=['raw'], # data types, e.g. 'raw', 'tick', 'second', 'minute' etc.
|
|
195
|
+
start_date='2024-03-01',
|
|
196
|
+
end_date='2024-03-08',
|
|
197
|
+
use_minio=False,
|
|
198
|
+
)
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### List Current Config
|
|
202
|
+
```bash
|
|
203
|
+
# list the current config:
|
|
204
|
+
pfeed config --list
|
|
205
|
+
|
|
206
|
+
# change the data storage location to your local project's 'data' folder:
|
|
207
|
+
pfeed config --data-path ./data
|
|
208
|
+
|
|
209
|
+
# for more commands:
|
|
210
|
+
pfeed --help
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Run PFeed's docker-compose.yml
|
|
214
|
+
```bash
|
|
215
|
+
# same as 'docker-compose', only difference is it has pointed to pfeed's docker-compose.yml file
|
|
216
|
+
pfeed docker-compose [COMMAND]
|
|
217
|
+
|
|
218
|
+
# e.g. start services
|
|
219
|
+
pfeed docker-compose up -d
|
|
220
|
+
|
|
221
|
+
# e.g. stop services
|
|
222
|
+
pfeed docker-compose down
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
## Supported Data Sources
|
|
227
|
+
| Data Source | Get Historical Data | Download Historical Data | Get Live/Paper Data | Stream Live/Paper Data |
|
|
228
|
+
| ------------------------- | ------------------- | ------------------------ | ------------------- | ---------------------- |
|
|
229
|
+
| Yahoo Finance | 🟢 | ⚪ | ⚪ | ⚪ |
|
|
230
|
+
| Bybit | 🟢 | 🟢 | 🟡 | 🔴 |
|
|
231
|
+
| *Interactive Brokers (IB) | 🔴 | ⚪ | 🔴 | 🔴 |
|
|
232
|
+
| *[FirstRate Data] | 🔴 | 🔴 | ⚪ | ⚪ |
|
|
233
|
+
| [Databento] | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
234
|
+
| [Polygon] | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
235
|
+
| Binance | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
236
|
+
| OKX | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
237
|
+
|
|
238
|
+
🟢 = finished \
|
|
239
|
+
🟡 = in progress \
|
|
240
|
+
🔴 = todo \
|
|
241
|
+
⚪ = not applicable \
|
|
242
|
+
\* = paid data
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
## Supported Data Tools
|
|
246
|
+
| Data Tools | Supported |
|
|
247
|
+
| ------------------------ | --------- |
|
|
248
|
+
| Pandas | 🟢 |
|
|
249
|
+
| [Polars] | 🟢 |
|
|
250
|
+
| [Dask] | 🔴 |
|
|
251
|
+
| [Spark] | 🔴 |
|
|
252
|
+
| [DuckDB] | 🔴 |
|
|
253
|
+
| [Daft] | 🔴 |
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
## Related Projects
|
|
257
|
+
- [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading
|
|
258
|
+
- [PyTrade.org] - A curated list of Python libraries and resources for algorithmic trading.
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
## Disclaimer
|
|
262
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
263
|
+
|
|
264
|
+
This framework is intended for educational and research purposes only. It should not be used for real trading without understanding the risks involved. Trading in financial markets involves significant risk, and there is always the potential for loss. Your trading results may vary. No representation is being made that any account will or is likely to achieve profits or losses similar to those discussed on this platform.
|
|
265
|
+
|
|
266
|
+
The developers of this framework are not responsible for any financial losses incurred from using this software. This includes but not limited to losses resulting from inaccuracies in any financial data output by PFeed. Users should conduct their due diligence, verify the accuracy of any data produced by PFeed, and consult with a professional financial advisor before engaging in real trading activities.
|
|
267
|
+
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# PFeed: Data Pipeline for Algo-Trading, Getting and Storing Real-Time and Historical Data Made Easy.
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+
[](https://pypi.org/project/pfeed)
|
|
6
|
+

|
|
7
|
+
[](https://jupyterbook.org)
|
|
8
|
+
[](https://python-poetry.org/)
|
|
9
|
+
|
|
10
|
+
[MinIO]: https://min.io/
|
|
11
|
+
[PFund]: https://github.com/PFund-Software-Ltd/pfund
|
|
12
|
+
[Ray]: https://github.com/ray-project/ray
|
|
13
|
+
[Polars]: https://github.com/pola-rs/polars
|
|
14
|
+
[Prefect]: https://www.prefect.io
|
|
15
|
+
[Timescaledb]: https://www.timescale.com/
|
|
16
|
+
[Dask]: https://www.dask.org/
|
|
17
|
+
[Spark]: https://spark.apache.org/docs/latest/api/python/index.html
|
|
18
|
+
[DuckDB]: https://github.com/duckdb/duckdb
|
|
19
|
+
[Daft]: https://github.com/Eventual-Inc/Daft
|
|
20
|
+
[PyTrade.org]: https://pytrade.org
|
|
21
|
+
[Databento]: https://databento.com/
|
|
22
|
+
[Polygon]: https://polygon.io/
|
|
23
|
+
[Bybit]: https://bybit.com/
|
|
24
|
+
[FirstRate Data]: https://firstratedata.com
|
|
25
|
+
|
|
26
|
+
## Problem
|
|
27
|
+
Starting algo-trading requires reliable, clean data. However, the time-consuming and mundane tasks of data cleaning and storage often discourage traders from embarking on their algo-trading journey.
|
|
28
|
+
|
|
29
|
+
## Solution
|
|
30
|
+
By leveraging modern data engineering tools, `pfeed` handles the tedious data work and **outputs backtesting-ready data**, accelerating traders to get to the strategy development phase.
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
PFeed (/piː fiːd/) is a data pipeline for algorithmic trading, serving as a bridge between raw data sources and traders by automating the process of data collection, cleaning, transformation, and storage, loading clean data into a **local data lake for quantitative analysis**.
|
|
34
|
+
|
|
35
|
+
## Core Features
|
|
36
|
+
- [x] Unified approach for interacting with various [data sources](#supported-data-sources) and obtaining historical and live data
|
|
37
|
+
- [x] ETL data pipline for transforming raw data to clean data and storing it in [MinIO] (optional)
|
|
38
|
+
- [x] Fast data downloading, utilizing [Ray] for parallelization
|
|
39
|
+
- [x] Supports multiple data tools (e.g. Pandas, [Polars], [Dask], [Spark], [DuckDB], [Daft])
|
|
40
|
+
- [ ] Integrates with [Prefect] to control data flows
|
|
41
|
+
- [ ] Listens to PFund's trade engine and adds trade history to a local database [Timescaledb] (optional)
|
|
42
|
+
|
|
43
|
+
> It is designed to be used alongside [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready.
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
<details>
|
|
48
|
+
<summary>Table of Contents</summary>
|
|
49
|
+
|
|
50
|
+
- [Installation](#installation)
|
|
51
|
+
- [Quick Start](#quick-start)
|
|
52
|
+
- [Main Usage: Data Feed](#main-usage-data-feed)
|
|
53
|
+
- [Download Historical Data on Command Line](#download-historical-data-on-command-line)
|
|
54
|
+
- [Download Historical Data in Python](#download-historical-data-in-python)
|
|
55
|
+
- [List Current Config](#list-current-config)
|
|
56
|
+
- [Run PFeed's docker-compose.yml](#run-pfeeds-docker-composeyml)
|
|
57
|
+
- [Supported Data Sources](#supported-data-sources)
|
|
58
|
+
- [Supported Data Tools](#supported-data-tools)
|
|
59
|
+
- [Related Projects](#related-projects)
|
|
60
|
+
- [Disclaimer](#disclaimer)
|
|
61
|
+
|
|
62
|
+
</details>
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
## Installation
|
|
66
|
+
### Using [Poetry](https://python-poetry.org) (Recommended)
|
|
67
|
+
```bash
|
|
68
|
+
# [RECOMMENDED]: Download data (e.g. Bybit and Yahoo Finance) + Data tools (e.g. pandas, polars) + Data storage (e.g. MinIO) + Boosted performance (e.g. Ray)
|
|
69
|
+
poetry add "pfeed[all]"
|
|
70
|
+
|
|
71
|
+
# [Download data + Data tools + Data storage]
|
|
72
|
+
poetry add "pfeed[df,data]"
|
|
73
|
+
|
|
74
|
+
# [Download data + Data tools]
|
|
75
|
+
poetry add "pfeed[df]"
|
|
76
|
+
|
|
77
|
+
# [Download data only]:
|
|
78
|
+
poetry add pfeed
|
|
79
|
+
|
|
80
|
+
# update to the latest version:
|
|
81
|
+
poetry update pfeed
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Using Pip
|
|
85
|
+
```bash
|
|
86
|
+
# same as above, you can choose to install "pfeed[all]", "pfeed[df,data]", "pfeed[df]" or "pfeed"
|
|
87
|
+
pip install "pfeed[all]"
|
|
88
|
+
|
|
89
|
+
# install the latest version:
|
|
90
|
+
pip install -U pfeed
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Checking your installation
|
|
94
|
+
```bash
|
|
95
|
+
$ pfeed --version
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Quick Start
|
|
99
|
+
### 1. Get Historical Data in Dataframe (No storage)
|
|
100
|
+
Get [Bybit]'s data in dataframe, e.g. 1-minute data (data is downloaded on the fly if not stored locally)
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
import pfeed as pe
|
|
104
|
+
|
|
105
|
+
feed = pe.BybitFeed(data_tool='polars')
|
|
106
|
+
|
|
107
|
+
df = feed.get_historical_data(
|
|
108
|
+
'BTC_USDT_PERP',
|
|
109
|
+
resolution='1minute', # 'raw' or '1tick'/'1t' or '2second'/'2s' etc.
|
|
110
|
+
start_date='2024-03-01',
|
|
111
|
+
end_date='2024-03-01',
|
|
112
|
+
)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Printing the first few rows of `df`:
|
|
116
|
+
| | ts | product | resolution | open | high | low | close | volume |
|
|
117
|
+
|---:|:--------------------|:--------------|:-------------|--------:|--------:|--------:|--------:|---------:|
|
|
118
|
+
| 0 | 2024-03-01 00:00:00 | BTC_USDT_PERP | 1m | 61184.1 | 61244.5 | 61175.8 | 61244.5 | 159.142 |
|
|
119
|
+
| 1 | 2024-03-01 00:01:00 | BTC_USDT_PERP | 1m | 61245.3 | 61276.5 | 61200.7 | 61232.2 | 227.242 |
|
|
120
|
+
| 2 | 2024-03-01 00:02:00 | BTC_USDT_PERP | 1m | 61232.2 | 61249 | 61180 | 61184.2 | 91.446 |
|
|
121
|
+
|
|
122
|
+
> By using pfeed, you are just a few lines of code away from a standardized dataframe, how convenient!
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
### 2. Download Historical Data on the Command Line Interface (CLI)
|
|
127
|
+
```bash
|
|
128
|
+
# download data, default data type (dtype) is 'raw' data
|
|
129
|
+
pfeed download -d BYBIT -p BTC_USDT_PERP --start-date 2024-03-01 --end-date 2024-03-08
|
|
130
|
+
|
|
131
|
+
# download multiple products BTC_USDT_PERP and ETH_USDT_PERP and minute data
|
|
132
|
+
pfeed download -d BYBIT -p BTC_USDT_PERP -p ETH_USDT_PERP --dtypes minute
|
|
133
|
+
|
|
134
|
+
# download all perpetuals data from bybit
|
|
135
|
+
pfeed download -d BYBIT --ptypes PERP
|
|
136
|
+
|
|
137
|
+
# download all the data from bybit (CAUTION: your local machine probably won't have enough space for this!)
|
|
138
|
+
pfeed download -d BYBIT
|
|
139
|
+
|
|
140
|
+
# store data into MinIO (need to start MinIO by running `pfeed docker-compose up -d` first)
|
|
141
|
+
pfeed download -d BYBIT -p BTC_USDT_PERP --use-minio
|
|
142
|
+
|
|
143
|
+
# enable debug mode and turn off using Ray
|
|
144
|
+
pfeed download -d BYBIT -p BTC_USDT_PERP --debug --no-ray
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### 3. Download Historical Data in Python
|
|
148
|
+
```python
|
|
149
|
+
import pfeed as pe
|
|
150
|
+
|
|
151
|
+
# compared to the CLI approach, this approach is more convenient for downloading multiple products
|
|
152
|
+
pe.download(
|
|
153
|
+
data_source='bybit',
|
|
154
|
+
pdts=[
|
|
155
|
+
'BTC_USDT_PERP',
|
|
156
|
+
'ETH_USDT_PERP',
|
|
157
|
+
'BCH_USDT_PERP',
|
|
158
|
+
],
|
|
159
|
+
dtypes=['raw'], # data types, e.g. 'raw', 'tick', 'second', 'minute' etc.
|
|
160
|
+
start_date='2024-03-01',
|
|
161
|
+
end_date='2024-03-08',
|
|
162
|
+
use_minio=False,
|
|
163
|
+
)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### List Current Config
|
|
167
|
+
```bash
|
|
168
|
+
# list the current config:
|
|
169
|
+
pfeed config --list
|
|
170
|
+
|
|
171
|
+
# change the data storage location to your local project's 'data' folder:
|
|
172
|
+
pfeed config --data-path ./data
|
|
173
|
+
|
|
174
|
+
# for more commands:
|
|
175
|
+
pfeed --help
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Run PFeed's docker-compose.yml
|
|
179
|
+
```bash
|
|
180
|
+
# same as 'docker-compose', only difference is it has pointed to pfeed's docker-compose.yml file
|
|
181
|
+
pfeed docker-compose [COMMAND]
|
|
182
|
+
|
|
183
|
+
# e.g. start services
|
|
184
|
+
pfeed docker-compose up -d
|
|
185
|
+
|
|
186
|
+
# e.g. stop services
|
|
187
|
+
pfeed docker-compose down
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
## Supported Data Sources
|
|
192
|
+
| Data Source | Get Historical Data | Download Historical Data | Get Live/Paper Data | Stream Live/Paper Data |
|
|
193
|
+
| ------------------------- | ------------------- | ------------------------ | ------------------- | ---------------------- |
|
|
194
|
+
| Yahoo Finance | 🟢 | ⚪ | ⚪ | ⚪ |
|
|
195
|
+
| Bybit | 🟢 | 🟢 | 🟡 | 🔴 |
|
|
196
|
+
| *Interactive Brokers (IB) | 🔴 | ⚪ | 🔴 | 🔴 |
|
|
197
|
+
| *[FirstRate Data] | 🔴 | 🔴 | ⚪ | ⚪ |
|
|
198
|
+
| [Databento] | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
199
|
+
| [Polygon] | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
200
|
+
| Binance | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
201
|
+
| OKX | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
202
|
+
|
|
203
|
+
🟢 = finished \
|
|
204
|
+
🟡 = in progress \
|
|
205
|
+
🔴 = todo \
|
|
206
|
+
⚪ = not applicable \
|
|
207
|
+
\* = paid data
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
## Supported Data Tools
|
|
211
|
+
| Data Tools | Supported |
|
|
212
|
+
| ------------------------ | --------- |
|
|
213
|
+
| Pandas | 🟢 |
|
|
214
|
+
| [Polars] | 🟢 |
|
|
215
|
+
| [Dask] | 🔴 |
|
|
216
|
+
| [Spark] | 🔴 |
|
|
217
|
+
| [DuckDB] | 🔴 |
|
|
218
|
+
| [Daft] | 🔴 |
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
## Related Projects
|
|
222
|
+
- [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading
|
|
223
|
+
- [PyTrade.org] - A curated list of Python libraries and resources for algorithmic trading.
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
## Disclaimer
|
|
227
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
228
|
+
|
|
229
|
+
This framework is intended for educational and research purposes only. It should not be used for real trading without understanding the risks involved. Trading in financial markets involves significant risk, and there is always the potential for loss. Your trading results may vary. No representation is being made that any account will or is likely to achieve profits or losses similar to those discussed on this platform.
|
|
230
|
+
|
|
231
|
+
The developers of this framework are not responsible for any financial losses incurred from using this software. This includes but not limited to losses resulting from inaccuracies in any financial data output by PFeed. Users should conduct their due diligence, verify the accuracy of any data produced by PFeed, and consult with a professional financial advisor before engaging in real trading activities.
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
|
|
4
|
+
if TYPE_CHECKING:
|
|
5
|
+
from pfeed.types.common_literals import tSUPPORTED_DOWNLOAD_DATA_SOURCES, tSUPPORTED_DATA_TYPES
|
|
6
|
+
|
|
7
|
+
import importlib
|
|
8
|
+
from importlib.metadata import version
|
|
9
|
+
|
|
10
|
+
from pfeed.config_handler import configure, get_config
|
|
11
|
+
from pfeed.const.common import ALIASES
|
|
12
|
+
from pfeed.sources import bybit
|
|
13
|
+
from pfeed.feeds import BybitFeed, YahooFinanceFeed
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def download_historical_data(
|
|
17
|
+
data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES,
|
|
18
|
+
pdts: str | list[str] | None = None,
|
|
19
|
+
dtypes: tSUPPORTED_DATA_TYPES | list[tSUPPORTED_DATA_TYPES] | None = None,
|
|
20
|
+
ptypes: str | list[str] | None = None,
|
|
21
|
+
start_date: str | None = None,
|
|
22
|
+
end_date: str | None = None,
|
|
23
|
+
num_cpus: int = 8,
|
|
24
|
+
use_ray: bool = True,
|
|
25
|
+
use_minio: bool = False,
|
|
26
|
+
):
|
|
27
|
+
data_source = importlib.import_module(f"pfeed.sources.{data_source.lower()}")
|
|
28
|
+
return data_source.download_historical_data(
|
|
29
|
+
pdts=pdts,
|
|
30
|
+
dtypes=dtypes,
|
|
31
|
+
ptypes=ptypes,
|
|
32
|
+
start_date=start_date,
|
|
33
|
+
end_date=end_date,
|
|
34
|
+
num_cpus=num_cpus,
|
|
35
|
+
use_ray=use_ray,
|
|
36
|
+
use_minio=use_minio,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# TODO
|
|
41
|
+
def stream_realtime_data(data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES):
|
|
42
|
+
data_source = importlib.import_module(f"pfeed.sources.{data_source.lower()}")
|
|
43
|
+
return data_source.stream_realtime_data()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
download = download_historical_data
|
|
48
|
+
stream = stream_realtime_data
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
__version__ = version("pfeed")
|
|
52
|
+
__all__ = (
|
|
53
|
+
"__version__",
|
|
54
|
+
"configure",
|
|
55
|
+
"get_config",
|
|
56
|
+
"ALIASES",
|
|
57
|
+
"bybit",
|
|
58
|
+
"binance",
|
|
59
|
+
"YahooFinanceFeed",
|
|
60
|
+
"BybitFeed",
|
|
61
|
+
"BinanceFeed",
|
|
62
|
+
)
|
|
@@ -10,7 +10,7 @@ from pfeed.config_handler import ConfigHandler
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def save_config(config: ConfigHandler, config_file_path: str | Path):
|
|
13
|
-
if
|
|
13
|
+
if isinstance(config_file_path, str):
|
|
14
14
|
config_file_path = Path(config_file_path)
|
|
15
15
|
config_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
16
16
|
with open(config_file_path, 'w') as f:
|
|
@@ -31,6 +31,8 @@ def remove_config(config_file_path: str | Path):
|
|
|
31
31
|
@click.option('--logging-config', type=dict, help='Set the logging config')
|
|
32
32
|
@click.option('--use-fork-process', type=bool, help='If True, multiprocessing.set_start_method("fork")')
|
|
33
33
|
@click.option('--use-custom-excepthook', type=bool, help='If True, log uncaught exceptions to file')
|
|
34
|
+
@click.option('--env-file', 'env_file_path', type=click.Path(resolve_path=True, exists=True), help='Path to the .env file')
|
|
35
|
+
@click.option('--debug', is_flag=True, help='if enabled, debug mode will be enabled where logs at DEBUG level will be printed')
|
|
34
36
|
@click.option('--list', '-l', is_flag=True, is_eager=True, help='List all available options')
|
|
35
37
|
@click.option('--reset', is_flag=True, is_eager=True, help='Reset the configuration to defaults')
|
|
36
38
|
def config(ctx, **kwargs):
|
|
@@ -3,7 +3,6 @@ from pathlib import Path
|
|
|
3
3
|
import importlib.resources
|
|
4
4
|
import subprocess
|
|
5
5
|
|
|
6
|
-
from dotenv import find_dotenv, load_dotenv
|
|
7
6
|
import click
|
|
8
7
|
|
|
9
8
|
from pfeed.const.paths import PROJ_NAME
|
|
@@ -18,14 +17,8 @@ from pfeed.const.paths import PROJ_NAME
|
|
|
18
17
|
@click.option('--docker-file', 'docker_file_path', type=click.Path(exists=True), help='Path to the docker-compose.yml file')
|
|
19
18
|
def docker_compose(ctx, env_file_path, docker_file_path):
|
|
20
19
|
"""Forwards commands to docker-compose with the package's docker-compose.yml file if not specified."""
|
|
21
|
-
if not env_file_path:
|
|
22
|
-
if env_file_path := find_dotenv(usecwd=True, raise_error_if_not_found=False):
|
|
23
|
-
click.echo(f'.env file path is not specified, using env file in "{env_file_path}"')
|
|
24
|
-
else:
|
|
25
|
-
click.echo('.env file is not found')
|
|
26
|
-
load_dotenv(env_file_path, override=True)
|
|
27
|
-
|
|
28
20
|
config = ctx.obj['config']
|
|
21
|
+
config.load_env_file(env_file_path)
|
|
29
22
|
os.environ['PFEED_DATA_PATH'] = config.data_path
|
|
30
23
|
|
|
31
24
|
if not docker_file_path:
|
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
import importlib
|
|
2
2
|
|
|
3
3
|
import click
|
|
4
|
-
from dotenv import find_dotenv, load_dotenv
|
|
5
4
|
|
|
6
|
-
|
|
5
|
+
import pfeed as pe
|
|
6
|
+
from pfeed.const.common import (
|
|
7
|
+
ALIASES,
|
|
8
|
+
SUPPORTED_DOWNLOAD_DATA_SOURCES,
|
|
9
|
+
SUPPORTED_DATA_TYPES,
|
|
10
|
+
SUPPORTED_PRODUCT_TYPES,
|
|
11
|
+
)
|
|
7
12
|
|
|
8
13
|
|
|
9
14
|
# add aliases to supported download data sources
|
|
@@ -15,39 +20,28 @@ SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED = SUPPORTED_DATA_TYPES + ['raw']
|
|
|
15
20
|
|
|
16
21
|
|
|
17
22
|
@click.command()
|
|
18
|
-
@click.pass_context
|
|
19
|
-
@click.option('--env-file', 'env_file_path', type=click.Path(exists=True), help='Path to the .env file')
|
|
20
23
|
@click.option('--data-source', '-d', required=True, type=click.Choice(SUPPORTED_DOWNLOAD_DATA_SOURCES_ALIASES_INCLUDED, case_sensitive=False), help='Data source')
|
|
21
|
-
@click.option('--
|
|
22
|
-
@click.option('--
|
|
23
|
-
@click.option('--
|
|
24
|
+
@click.option('--pdts', '-p', 'pdts', multiple=True, default=[], help='List of trading products')
|
|
25
|
+
@click.option('--dtypes', '--dt', 'dtypes', multiple=True, default=['raw'], type=click.Choice(SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED, case_sensitive=False), help=f'{SUPPORTED_DATA_TYPES=}. How to pass in multiple values: --dt raw --dt tick')
|
|
26
|
+
@click.option('--ptypes', '--pt', 'ptypes', multiple=True, default=[], type=click.Choice(SUPPORTED_PRODUCT_TYPES, case_sensitive=False), help='List of product types, e.g. PERP = get all perpetuals')
|
|
24
27
|
@click.option('--start-date', '-s', type=click.DateTime(formats=["%Y-%m-%d"]), help='Start date in YYYY-MM-DD format')
|
|
25
28
|
@click.option('--end-date', '-e', type=click.DateTime(formats=["%Y-%m-%d"]), help='End date in YYYY-MM-DD format')
|
|
26
|
-
@click.option('--
|
|
29
|
+
@click.option('--num-cpus', '-n', default=8, type=int, help="number of logical CPUs used for Ray's tasks")
|
|
30
|
+
@click.option('--use-minio', '-m', is_flag=True, help='if enabled, data will be loaded into Minio')
|
|
27
31
|
@click.option('--no-ray', is_flag=True, help='if enabled, Ray will not be used')
|
|
28
|
-
@click.option('--
|
|
32
|
+
@click.option('--env-file', 'env_file_path', type=click.Path(exists=True), help='Path to the .env file')
|
|
29
33
|
@click.option('--debug', is_flag=True, help='if enabled, debug mode will be enabled where logs at DEBUG level will be printed')
|
|
30
|
-
def download(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
click.echo(f'.env file path is not specified, using env file in "{env_file_path}"')
|
|
34
|
-
else:
|
|
35
|
-
click.echo('.env file is not found')
|
|
36
|
-
load_dotenv(env_file_path, override=True)
|
|
37
|
-
|
|
38
|
-
if data_source in ALIASES:
|
|
39
|
-
data_source = ALIASES[data_source]
|
|
40
|
-
|
|
34
|
+
def download(data_source, pdts, dtypes, ptypes, start_date, end_date, num_cpus, no_ray, use_minio, env_file_path, debug):
|
|
35
|
+
pe.configure(env_file_path=env_file_path, debug=debug)
|
|
36
|
+
data_source = ALIASES.get(data_source, data_source)
|
|
41
37
|
pipeline = importlib.import_module(f'pfeed.sources.{data_source.lower()}.download')
|
|
42
38
|
pipeline.download_historical_data(
|
|
43
39
|
pdts=pdts,
|
|
44
|
-
dtypes=
|
|
45
|
-
ptypes=
|
|
40
|
+
dtypes=dtypes,
|
|
41
|
+
ptypes=ptypes,
|
|
46
42
|
start_date=start_date.date().strftime('%Y-%m-%d') if start_date else start_date,
|
|
47
43
|
end_date=end_date.date().strftime('%Y-%m-%d') if end_date else end_date,
|
|
48
|
-
|
|
44
|
+
num_cpus=num_cpus,
|
|
49
45
|
use_ray=not no_ray,
|
|
50
46
|
use_minio=use_minio,
|
|
51
|
-
debug=debug,
|
|
52
|
-
config=ctx.obj['config'],
|
|
53
47
|
)
|