pfeed 0.0.1.dev4__tar.gz → 0.0.1.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. pfeed-0.0.1.dev5/PKG-INFO +281 -0
  2. pfeed-0.0.1.dev5/README.md +246 -0
  3. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/__init__.py +1 -0
  4. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/cli/commands/config.py +3 -0
  5. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/cli/commands/docker_compose.py +7 -8
  6. pfeed-0.0.1.dev5/pfeed/cli/commands/download.py +53 -0
  7. pfeed-0.0.1.dev5/pfeed/const/commons.py +10 -0
  8. pfeed-0.0.1.dev5/pfeed/data_tools/data_tool_pandas.py +132 -0
  9. pfeed-0.0.1.dev5/pfeed/data_tools/data_tool_polars.py +104 -0
  10. pfeed-0.0.1.dev5/pfeed/data_tools/data_tool_pyspark.py +2 -0
  11. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/datastore.py +12 -22
  12. pfeed-0.0.1.dev5/pfeed/etl.py +221 -0
  13. pfeed-0.0.1.dev5/pfeed/feeds/base_feed.py +48 -0
  14. pfeed-0.0.1.dev5/pfeed/feeds/bybit_feed.py +170 -0
  15. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/feeds/yahoo_finance_feed.py +12 -7
  16. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/filepath.py +3 -3
  17. pfeed-0.0.1.dev5/pfeed/sources/bybit/__init__.py +6 -0
  18. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/sources/bybit/api.py +15 -8
  19. pfeed-0.0.1.dev5/pfeed/sources/bybit/const.py +38 -0
  20. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/sources/bybit/download.py +67 -53
  21. pfeed-0.0.1.dev5/pfeed/sources/bybit/stream.py +3 -0
  22. pfeed-0.0.1.dev5/pfeed/utils/monitor.py +21 -0
  23. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/utils/utils.py +5 -0
  24. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/utils/validate.py +13 -5
  25. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pyproject.toml +11 -5
  26. pfeed-0.0.1.dev4/PKG-INFO +0 -166
  27. pfeed-0.0.1.dev4/README.md +0 -134
  28. pfeed-0.0.1.dev4/pfeed/cli/commands/download.py +0 -54
  29. pfeed-0.0.1.dev4/pfeed/const/commons.py +0 -6
  30. pfeed-0.0.1.dev4/pfeed/feeds/base_feed.py +0 -17
  31. pfeed-0.0.1.dev4/pfeed/feeds/bybit_feed.py +0 -92
  32. pfeed-0.0.1.dev4/pfeed/sources/bybit/__init__.py +0 -6
  33. pfeed-0.0.1.dev4/pfeed/sources/bybit/const.py +0 -41
  34. pfeed-0.0.1.dev4/pfeed/sources/bybit/etl.py +0 -161
  35. pfeed-0.0.1.dev4/pfeed/sources/bybit/stream.py +0 -6
  36. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/LICENSE +0 -0
  37. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/cli/__init__.py +0 -0
  38. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/cli/commands/__init__.py +0 -0
  39. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/cli/commands/stream.py +0 -0
  40. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/cli/main.py +0 -0
  41. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/config/logging.yml +0 -0
  42. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/config_handler.py +0 -0
  43. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/const/paths.py +0 -0
  44. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/feeds/__init__.py +0 -0
  45. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/feeds/custom_csv_feed.py +0 -0
  46. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/main.py +0 -0
  47. {pfeed-0.0.1.dev4 → pfeed-0.0.1.dev5}/pfeed/sources/__init__.py +0 -0
@@ -0,0 +1,281 @@
1
+ Metadata-Version: 2.1
2
+ Name: pfeed
3
+ Version: 0.0.1.dev5
4
+ Summary: Data pipeline for algo-trading, getting and storing both real-time and historical data made easy.
5
+ Home-page: https://pfund.ai
6
+ License: Apache-2.0
7
+ Keywords: trading,algo-trading,data pipeline,ETL,data lake,data warehouse,data integration,historical data,live data,data streaming
8
+ Author: Stephen Yau
9
+ Author-email: softwareentrepreneer+pfeed@gmail.com
10
+ Requires-Python: >=3.10,<3.12
11
+ Classifier: License :: OSI Approved :: Apache Software License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
16
+ Requires-Dist: click (>=8.1.7,<9.0.0)
17
+ Requires-Dist: connectorx (>=0.3.2,<0.4.0)
18
+ Requires-Dist: minio (>=7.2.5,<8.0.0)
19
+ Requires-Dist: pandas (>=2.2.0,<3.0.0)
20
+ Requires-Dist: pfund (>=0.0.1.dev4,<0.0.2)
21
+ Requires-Dist: platformdirs (>=4.2.0,<5.0.0)
22
+ Requires-Dist: polars (>=0.20.16,<0.21.0)
23
+ Requires-Dist: pyarrow (>=15.0.0,<16.0.0)
24
+ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
25
+ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
26
+ Requires-Dist: ray (>=2.10.0,<3.0.0)
27
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
28
+ Requires-Dist: rich (>=13.7.0,<14.0.0)
29
+ Requires-Dist: s3fs (>=2024.3.1,<2025.0.0)
30
+ Requires-Dist: tqdm (>=4.66.2,<5.0.0)
31
+ Requires-Dist: yfinance (>=0.2.37,<0.3.0)
32
+ Project-URL: Documentation, https://pfeed-docs.pfund.ai
33
+ Project-URL: Repository, https://github.com/PFund-Software-Ltd/pfeed
34
+ Description-Content-Type: text/markdown
35
+
36
+ # PFeed: Data Pipeline for Algo-Trading, Getting and Storing Real-Time and Historical Data Made Easy.
37
+
38
+ ![GitHub stars](https://img.shields.io/github/stars/PFund-Software-Ltd/pfeed?style=social)
39
+ ![PyPI downloads](https://img.shields.io/pypi/dm/pfeed?label=downloads)
40
+ [![PyPI](https://img.shields.io/pypi/v/pfeed.svg)](https://pypi.org/project/pfeed)
41
+ ![PyPI - Support Python Versions](https://img.shields.io/pypi/pyversions/pfeed)
42
+ [![Jupyter Book Badge](https://raw.githubusercontent.com/PFund-Software-Ltd/pfeed/main/docs/images/jupyterbook.svg)](https://jupyterbook.org)
43
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
44
+
45
+ PFeed (/piː fiːd/) is a data integration library tailored for algorithmic trading,
46
+ serving as an ETL (Extract, Transform, Load) data pipeline between raw data sources and traders,
47
+ helping them in creating a **local data lake for quantitative research**.
48
+
49
+ PFeed allows traders to download historical, paper, and live data from various data sources, both free and paid,
50
+ and stores them into a local data lake using [MinIO](https://min.io/).
51
+
52
+ It is designed to be used alongside [PFund](https://github.com/PFund-Software-Ltd/pfund) — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading, or as a standalone package.
53
+
54
+ <details>
55
+ <summary>Table of Contents</summary>
56
+
57
+ - [Project Status](#project-status)
58
+ - [Mission](#mission)
59
+ - [Core Features](#core-features)
60
+ - [Installation](#installation)
61
+ - [Quick Start](#quick-start)
62
+ - [Main Usage: Data Feed](#main-usage-data-feed)
63
+ - [Download Historical Data on Command Line](#download-historical-data-on-command-line)
64
+ - [Download Historical Data in Python](#download-historical-data-in-python)
65
+ - [List Current Config](#list-current-config)
66
+ - [Run PFeed's docker-compose.yml](#run-pfeeds-docker-composeyml)
67
+ - [Supported Data Sources](#supported-data-sources)
68
+ - [Related Projects](#related-projects)
69
+
70
+ </details>
71
+
72
+
73
+ ## Project Status
74
+ **_Caution: PFeed is at a VERY EARLY stage, use it at your own risk._**
75
+
76
+ PFeed is currently under active development, the framework design will be prioritized first over
77
+ stability and scalability.
78
+
79
+ Please note that the available version is a *dev* version, not a *stable* one. \
80
+ You are encouraged to play with the *dev* version, but only use it when a *stable* version is released.
81
+
82
+ > PFeed for the time being only supports [Bybit](https://bybit.com/) and Yahoo Finance for testing purpose.
83
+
84
+ ## Mission
85
+ Algo-trading has always been a complicated task due to the multitude of components and procedures involved. \
86
+ Data collection and processing is probably the most mundane and yet critical part of it, as all results and findings
87
+ are derived from the data.
88
+
89
+ However, preparing this data for use is not quick and easy. For example, sometimes even when the data is available (e.g. [Bybit data](https://public.bybit.com/trading/)), it is often in raw form and requires some cleaning.
90
+
91
+ > PFeed's mission is to **_free traders from the tedious data work_** by providing cleaned data in a standard format that is ready for use, making them significantly faster to get to the analysis and strategy development phase.
92
+
93
+
94
+ ## Core Features
95
+ - [x] Unified approach for interacting with various data sources, obtaining historical and real-time data
96
+ - [x] ETL data pipline for transforming raw data and storing it in [MinIO](https://min.io/) (optional)
97
+ - [x] Utilizes [Ray](https://github.com/ray-project/ray) for parallel data downloading
98
+ - [x] Supports Pandas, [Polars](https://github.com/pola-rs/polars) as data tools
99
+ - [ ] Integrates with [Prefect](https://www.prefect.io) to control data flows
100
+ - [ ] Listens to PFund's trade engine and adds trade history to a local database [Timescaledb](https://www.timescale.com/) (optional)
101
+
102
+
103
+ ## Installation
104
+ ### Using [Poetry](https://python-poetry.org) (Recommended)
105
+ ```bash
106
+ poetry add pfeed
107
+ ```
108
+
109
+ ### Using Pip
110
+ ```bash
111
+ pip install pfeed
112
+ ```
113
+
114
+
115
+ ## Quick Start
116
+ ### Main Usage: Data Feed
117
+ 1. Download bybit raw data on the fly if not stored locally
118
+
119
+ ```python
120
+ import pfeed as pe
121
+
122
+ feed = pe.BybitFeed()
123
+
124
+ # df is a dataframe or a lazyframe (lazily loaded dataframe)
125
+ df = feed.get_historical_data(
126
+ 'BTC_USDT_PERP',
127
+ resolution='raw',
128
+ start_date='2024-03-01',
129
+ end_date='2024-03-01',
130
+ data_tool='polars', # or 'pandas'
131
+ )
132
+ ```
133
+
134
+ > By using pfeed, you are just one line of code away from playing with e.g. bybit data, how convenient!
135
+
136
+ Printing the first few rows of `df`:
137
+ | | ts | symbol | side | volume | price | tickDirection | trdMatchID | grossValue | homeNotional | foreignNotional |
138
+ |---:|:------------------------------|:---------|-------:|---------:|--------:|:----------------|:-------------------------------------|-------------:|---------------:|------------------:|
139
+ | 0 | 2024-03-01 00:00:00.097599983 | BTCUSDT | 1 | 0.003 | 61184.1 | ZeroMinusTick | 79ac9a21-0249-5985-b042-906ec7604794 | 1.83552e+10 | 0.003 | 183.552 |
140
+ | 1 | 2024-03-01 00:00:00.098299980 | BTCUSDT | 1 | 0.078 | 61184.9 | PlusTick | 2af4e516-8ff4-5955-bb9c-38aa385b7b44 | 4.77242e+11 | 0.078 | 4772.42 |
141
+
142
+ 2. Get dataframe with different resolution, e.g. 1-minute data
143
+ ```python
144
+ import pfeed as pe
145
+
146
+ feed = pe.BybitFeed()
147
+
148
+ # df is a dataframe or a lazyframe (lazily loaded dataframe)
149
+ df = feed.get_historical_data(
150
+ 'BTC_USDT_PERP',
151
+ resolution='1minute', # or '1tick'/'1t', '2second'/'2s', '3minute'/'3m' etc.
152
+ start_date='2024-03-01',
153
+ end_date='2024-03-01',
154
+ data_tool='polars',
155
+ )
156
+ ```
157
+ > If you will be interacting with the data frequently, you should consider downloading it to your local machine.
158
+
159
+ Printing the first few rows of `df`:
160
+ | | ts | product | resolution | open | high | low | close | volume |
161
+ |---:|:--------------------|:--------------|:-------------|--------:|--------:|--------:|--------:|---------:|
162
+ | 0 | 2024-03-01 00:00:00 | BTC_USDT_PERP | 1m | 61184.1 | 61244.5 | 61175.8 | 61244.5 | 159.142 |
163
+ | 1 | 2024-03-01 00:01:00 | BTC_USDT_PERP | 1m | 61245.3 | 61276.5 | 61200.7 | 61232.2 | 227.242 |
164
+ | 2 | 2024-03-01 00:02:00 | BTC_USDT_PERP | 1m | 61232.2 | 61249 | 61180 | 61184.2 | 91.446 |
165
+
166
+
167
+ 3. pfeed also supports simple wrapping of [yfinance](https://github.com/ranaroussi/yfinance)
168
+ ```python
169
+ import pfeed as pe
170
+
171
+ feed = pe.YahooFinanceFeed()
172
+
173
+ # you can still use any kwargs supported by yfinance's ticker.history(...)
174
+ # e.g. 'prepost', 'auto_adjust' etc.
175
+ yfinance_kwargs = {}
176
+
177
+ df = feed.get_historical_data(
178
+ 'AAPL',
179
+ resolution='1d',
180
+ start_date='2024-03-01',
181
+ end_date='2024-03-20',
182
+ **yfinance_kwargs
183
+ )
184
+ ```
185
+ > Note that YahooFinanceFeed doesn't support the kwarg `data_tool`, e.g. polars
186
+
187
+ Printing the first few rows of `df`:
188
+ | ts | symbol | resolution | open | high | low | close | volume | dividends | stock_splits |
189
+ |:--------------------|:---------|:-------------|-------:|-------:|-------:|--------:|---------:|------------:|---------------:|
190
+ | 2024-03-01 05:00:00 | AAPL | 1d | 179.55 | 180.53 | 177.38 | 179.66 | 73488000 | 0 | 0 |
191
+ | 2024-03-04 05:00:00 | AAPL | 1d | 176.15 | 176.9 | 173.79 | 175.1 | 81510100 | 0 | 0 |
192
+ | 2024-03-05 05:00:00 | AAPL | 1d | 170.76 | 172.04 | 169.62 | 170.12 | 95132400 | 0 | 0 |
193
+
194
+
195
+
196
+ ### Download Historical Data on the Command Line Interface (CLI)
197
+ ```bash
198
+ # download data, default data type (dtype) is 'raw' data
199
+ pfeed download -d BYBIT -p BTC_USDT_PERP --start-date 2024-03-01 --end-date 2024-03-08
200
+
201
+ # download multiple products BTC_USDT_PERP and ETH_USDT_PERP and minute data
202
+ pfeed download -d BYBIT -p BTC_USDT_PERP -p ETH_USDT_PERP --dtype minute
203
+
204
+ # download all perpetuals data from bybit
205
+ pfeed download -d BYBIT --ptype PERP
206
+
207
+ # download all the data from bybit (CAUTION: your local machine probably won't have enough space for this!)
208
+ pfeed download -d BYBIT
209
+
210
+ # store data into MinIO (need to start MinIO by running `pfeed docker-compose up -d` first)
211
+ pfeed download -d BYBIT -p BTC_USDT_PERP --use-minio
212
+
213
+ # enable debug mode and turn off using Ray
214
+ pfeed download -d BYBIT -p BTC_USDT_PERP --debug --no-ray
215
+ ```
216
+
217
+ ### Download Historical Data in Python
218
+ ```python
219
+ import pfeed as pe
220
+
221
+ # compared to the CLI approach, this is more convenient for downloading multiple products
222
+ pe.bybit.download(
223
+ pdts=[
224
+ 'BTC_USDT_PERP',
225
+ 'ETH_USDT_PERP',
226
+ 'BCH_USDT_PERP',
227
+ ],
228
+ dtypes=['raw'], # data types, e.g. 'raw', 'tick', 'second', 'minute' etc.
229
+ start_date='2024-03-01',
230
+ end_date='2024-03-08',
231
+ use_minio=False,
232
+ )
233
+ ```
234
+
235
+ ### List Current Config
236
+ ```bash
237
+ # list the current config:
238
+ pfeed config --list
239
+
240
+ # change the data storage location to your local project's 'data' folder:
241
+ pfeed config --data-path ./data
242
+
243
+ # for more commands:
244
+ pfeed --help
245
+ ```
246
+
247
+ ### Run PFeed's docker-compose.yml
248
+ ```bash
249
+ # same as 'docker-compose', only difference is it has pointed to pfeed's docker-compose.yml file
250
+ pfeed docker-compose [COMMAND]
251
+
252
+ # e.g. start services
253
+ pfeed docker-compose up -d
254
+
255
+ # e.g. stop services
256
+ pfeed docker-compose down
257
+ ```
258
+
259
+
260
+ ## Supported Data Sources
261
+ | Data Source | Get Historical Data | Download Historical Data | Get Live/Paper Data | Stream Live/Paper Data |
262
+ | ------------------------- | ------------------- | ------------------------ | ------------------- | ---------------------- |
263
+ | Yahoo Finance | 🟢 | ⚪ | ⚪ | ⚪ |
264
+ | Bybit | 🟢 | 🟢 | 🟡 | 🔴 |
265
+ | *Interactive Brokers (IB) | 🔴 | ⚪ | 🔴 | 🔴 |
266
+ | *[FirstRate Data] | 🔴 | 🔴 | ⚪ | ⚪ |
267
+ | Binance | 🔴 | 🔴 | 🔴 | 🔴 |
268
+ | OKX | 🔴 | 🔴 | 🔴 | 🔴 |
269
+
270
+ [FirstRate Data]: https://firstratedata.com
271
+
272
+ 🟢 = finished \
273
+ 🟡 = in progress \
274
+ 🔴 = todo \
275
+ ⚪ = not applicable \
276
+ \* = paid data
277
+
278
+
279
+ ## Related Projects
280
+ - [PFund](https://github.com/PFund-Software-Ltd/pfund) — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading
281
+ - [PyTrade.org](https://pytrade.org) - A curated list of Python libraries and resources for algorithmic trading.
@@ -0,0 +1,246 @@
1
+ # PFeed: Data Pipeline for Algo-Trading, Getting and Storing Real-Time and Historical Data Made Easy.
2
+
3
+ ![GitHub stars](https://img.shields.io/github/stars/PFund-Software-Ltd/pfeed?style=social)
4
+ ![PyPI downloads](https://img.shields.io/pypi/dm/pfeed?label=downloads)
5
+ [![PyPI](https://img.shields.io/pypi/v/pfeed.svg)](https://pypi.org/project/pfeed)
6
+ ![PyPI - Support Python Versions](https://img.shields.io/pypi/pyversions/pfeed)
7
+ [![Jupyter Book Badge](https://raw.githubusercontent.com/PFund-Software-Ltd/pfeed/main/docs/images/jupyterbook.svg)](https://jupyterbook.org)
8
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
9
+
10
+ PFeed (/piː fiːd/) is a data integration library tailored for algorithmic trading,
11
+ serving as an ETL (Extract, Transform, Load) data pipeline between raw data sources and traders,
12
+ helping them in creating a **local data lake for quantitative research**.
13
+
14
+ PFeed allows traders to download historical, paper, and live data from various data sources, both free and paid,
15
+ and stores them into a local data lake using [MinIO](https://min.io/).
16
+
17
+ It is designed to be used alongside [PFund](https://github.com/PFund-Software-Ltd/pfund) — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading, or as a standalone package.
18
+
19
+ <details>
20
+ <summary>Table of Contents</summary>
21
+
22
+ - [Project Status](#project-status)
23
+ - [Mission](#mission)
24
+ - [Core Features](#core-features)
25
+ - [Installation](#installation)
26
+ - [Quick Start](#quick-start)
27
+ - [Main Usage: Data Feed](#main-usage-data-feed)
28
+ - [Download Historical Data on Command Line](#download-historical-data-on-command-line)
29
+ - [Download Historical Data in Python](#download-historical-data-in-python)
30
+ - [List Current Config](#list-current-config)
31
+ - [Run PFeed's docker-compose.yml](#run-pfeeds-docker-composeyml)
32
+ - [Supported Data Sources](#supported-data-sources)
33
+ - [Related Projects](#related-projects)
34
+
35
+ </details>
36
+
37
+
38
+ ## Project Status
39
+ **_Caution: PFeed is at a VERY EARLY stage, use it at your own risk._**
40
+
41
+ PFeed is currently under active development, the framework design will be prioritized first over
42
+ stability and scalability.
43
+
44
+ Please note that the available version is a *dev* version, not a *stable* one. \
45
+ You are encouraged to play with the *dev* version, but only use it when a *stable* version is released.
46
+
47
+ > PFeed for the time being only supports [Bybit](https://bybit.com/) and Yahoo Finance for testing purpose.
48
+
49
+ ## Mission
50
+ Algo-trading has always been a complicated task due to the multitude of components and procedures involved. \
51
+ Data collection and processing is probably the most mundane and yet critical part of it, as all results and findings
52
+ are derived from the data.
53
+
54
+ However, preparing this data for use is not quick and easy. For example, sometimes even when the data is available (e.g. [Bybit data](https://public.bybit.com/trading/)), it is often in raw form and requires some cleaning.
55
+
56
+ > PFeed's mission is to **_free traders from the tedious data work_** by providing cleaned data in a standard format that is ready for use, making them significantly faster to get to the analysis and strategy development phase.
57
+
58
+
59
+ ## Core Features
60
+ - [x] Unified approach for interacting with various data sources, obtaining historical and real-time data
61
+ - [x] ETL data pipline for transforming raw data and storing it in [MinIO](https://min.io/) (optional)
62
+ - [x] Utilizes [Ray](https://github.com/ray-project/ray) for parallel data downloading
63
+ - [x] Supports Pandas, [Polars](https://github.com/pola-rs/polars) as data tools
64
+ - [ ] Integrates with [Prefect](https://www.prefect.io) to control data flows
65
+ - [ ] Listens to PFund's trade engine and adds trade history to a local database [Timescaledb](https://www.timescale.com/) (optional)
66
+
67
+
68
+ ## Installation
69
+ ### Using [Poetry](https://python-poetry.org) (Recommended)
70
+ ```bash
71
+ poetry add pfeed
72
+ ```
73
+
74
+ ### Using Pip
75
+ ```bash
76
+ pip install pfeed
77
+ ```
78
+
79
+
80
+ ## Quick Start
81
+ ### Main Usage: Data Feed
82
+ 1. Download bybit raw data on the fly if not stored locally
83
+
84
+ ```python
85
+ import pfeed as pe
86
+
87
+ feed = pe.BybitFeed()
88
+
89
+ # df is a dataframe or a lazyframe (lazily loaded dataframe)
90
+ df = feed.get_historical_data(
91
+ 'BTC_USDT_PERP',
92
+ resolution='raw',
93
+ start_date='2024-03-01',
94
+ end_date='2024-03-01',
95
+ data_tool='polars', # or 'pandas'
96
+ )
97
+ ```
98
+
99
+ > By using pfeed, you are just one line of code away from playing with e.g. bybit data, how convenient!
100
+
101
+ Printing the first few rows of `df`:
102
+ | | ts | symbol | side | volume | price | tickDirection | trdMatchID | grossValue | homeNotional | foreignNotional |
103
+ |---:|:------------------------------|:---------|-------:|---------:|--------:|:----------------|:-------------------------------------|-------------:|---------------:|------------------:|
104
+ | 0 | 2024-03-01 00:00:00.097599983 | BTCUSDT | 1 | 0.003 | 61184.1 | ZeroMinusTick | 79ac9a21-0249-5985-b042-906ec7604794 | 1.83552e+10 | 0.003 | 183.552 |
105
+ | 1 | 2024-03-01 00:00:00.098299980 | BTCUSDT | 1 | 0.078 | 61184.9 | PlusTick | 2af4e516-8ff4-5955-bb9c-38aa385b7b44 | 4.77242e+11 | 0.078 | 4772.42 |
106
+
107
+ 2. Get dataframe with different resolution, e.g. 1-minute data
108
+ ```python
109
+ import pfeed as pe
110
+
111
+ feed = pe.BybitFeed()
112
+
113
+ # df is a dataframe or a lazyframe (lazily loaded dataframe)
114
+ df = feed.get_historical_data(
115
+ 'BTC_USDT_PERP',
116
+ resolution='1minute', # or '1tick'/'1t', '2second'/'2s', '3minute'/'3m' etc.
117
+ start_date='2024-03-01',
118
+ end_date='2024-03-01',
119
+ data_tool='polars',
120
+ )
121
+ ```
122
+ > If you will be interacting with the data frequently, you should consider downloading it to your local machine.
123
+
124
+ Printing the first few rows of `df`:
125
+ | | ts | product | resolution | open | high | low | close | volume |
126
+ |---:|:--------------------|:--------------|:-------------|--------:|--------:|--------:|--------:|---------:|
127
+ | 0 | 2024-03-01 00:00:00 | BTC_USDT_PERP | 1m | 61184.1 | 61244.5 | 61175.8 | 61244.5 | 159.142 |
128
+ | 1 | 2024-03-01 00:01:00 | BTC_USDT_PERP | 1m | 61245.3 | 61276.5 | 61200.7 | 61232.2 | 227.242 |
129
+ | 2 | 2024-03-01 00:02:00 | BTC_USDT_PERP | 1m | 61232.2 | 61249 | 61180 | 61184.2 | 91.446 |
130
+
131
+
132
+ 3. pfeed also supports simple wrapping of [yfinance](https://github.com/ranaroussi/yfinance)
133
+ ```python
134
+ import pfeed as pe
135
+
136
+ feed = pe.YahooFinanceFeed()
137
+
138
+ # you can still use any kwargs supported by yfinance's ticker.history(...)
139
+ # e.g. 'prepost', 'auto_adjust' etc.
140
+ yfinance_kwargs = {}
141
+
142
+ df = feed.get_historical_data(
143
+ 'AAPL',
144
+ resolution='1d',
145
+ start_date='2024-03-01',
146
+ end_date='2024-03-20',
147
+ **yfinance_kwargs
148
+ )
149
+ ```
150
+ > Note that YahooFinanceFeed doesn't support the kwarg `data_tool`, e.g. polars
151
+
152
+ Printing the first few rows of `df`:
153
+ | ts | symbol | resolution | open | high | low | close | volume | dividends | stock_splits |
154
+ |:--------------------|:---------|:-------------|-------:|-------:|-------:|--------:|---------:|------------:|---------------:|
155
+ | 2024-03-01 05:00:00 | AAPL | 1d | 179.55 | 180.53 | 177.38 | 179.66 | 73488000 | 0 | 0 |
156
+ | 2024-03-04 05:00:00 | AAPL | 1d | 176.15 | 176.9 | 173.79 | 175.1 | 81510100 | 0 | 0 |
157
+ | 2024-03-05 05:00:00 | AAPL | 1d | 170.76 | 172.04 | 169.62 | 170.12 | 95132400 | 0 | 0 |
158
+
159
+
160
+
161
+ ### Download Historical Data on the Command Line Interface (CLI)
162
+ ```bash
163
+ # download data, default data type (dtype) is 'raw' data
164
+ pfeed download -d BYBIT -p BTC_USDT_PERP --start-date 2024-03-01 --end-date 2024-03-08
165
+
166
+ # download multiple products BTC_USDT_PERP and ETH_USDT_PERP and minute data
167
+ pfeed download -d BYBIT -p BTC_USDT_PERP -p ETH_USDT_PERP --dtype minute
168
+
169
+ # download all perpetuals data from bybit
170
+ pfeed download -d BYBIT --ptype PERP
171
+
172
+ # download all the data from bybit (CAUTION: your local machine probably won't have enough space for this!)
173
+ pfeed download -d BYBIT
174
+
175
+ # store data into MinIO (need to start MinIO by running `pfeed docker-compose up -d` first)
176
+ pfeed download -d BYBIT -p BTC_USDT_PERP --use-minio
177
+
178
+ # enable debug mode and turn off using Ray
179
+ pfeed download -d BYBIT -p BTC_USDT_PERP --debug --no-ray
180
+ ```
181
+
182
+ ### Download Historical Data in Python
183
+ ```python
184
+ import pfeed as pe
185
+
186
+ # compared to the CLI approach, this is more convenient for downloading multiple products
187
+ pe.bybit.download(
188
+ pdts=[
189
+ 'BTC_USDT_PERP',
190
+ 'ETH_USDT_PERP',
191
+ 'BCH_USDT_PERP',
192
+ ],
193
+ dtypes=['raw'], # data types, e.g. 'raw', 'tick', 'second', 'minute' etc.
194
+ start_date='2024-03-01',
195
+ end_date='2024-03-08',
196
+ use_minio=False,
197
+ )
198
+ ```
199
+
200
+ ### List Current Config
201
+ ```bash
202
+ # list the current config:
203
+ pfeed config --list
204
+
205
+ # change the data storage location to your local project's 'data' folder:
206
+ pfeed config --data-path ./data
207
+
208
+ # for more commands:
209
+ pfeed --help
210
+ ```
211
+
212
+ ### Run PFeed's docker-compose.yml
213
+ ```bash
214
+ # same as 'docker-compose', only difference is it has pointed to pfeed's docker-compose.yml file
215
+ pfeed docker-compose [COMMAND]
216
+
217
+ # e.g. start services
218
+ pfeed docker-compose up -d
219
+
220
+ # e.g. stop services
221
+ pfeed docker-compose down
222
+ ```
223
+
224
+
225
+ ## Supported Data Sources
226
+ | Data Source | Get Historical Data | Download Historical Data | Get Live/Paper Data | Stream Live/Paper Data |
227
+ | ------------------------- | ------------------- | ------------------------ | ------------------- | ---------------------- |
228
+ | Yahoo Finance | 🟢 | ⚪ | ⚪ | ⚪ |
229
+ | Bybit | 🟢 | 🟢 | 🟡 | 🔴 |
230
+ | *Interactive Brokers (IB) | 🔴 | ⚪ | 🔴 | 🔴 |
231
+ | *[FirstRate Data] | 🔴 | 🔴 | ⚪ | ⚪ |
232
+ | Binance | 🔴 | 🔴 | 🔴 | 🔴 |
233
+ | OKX | 🔴 | 🔴 | 🔴 | 🔴 |
234
+
235
+ [FirstRate Data]: https://firstratedata.com
236
+
237
+ 🟢 = finished \
238
+ 🟡 = in progress \
239
+ 🔴 = todo \
240
+ ⚪ = not applicable \
241
+ \* = paid data
242
+
243
+
244
+ ## Related Projects
245
+ - [PFund](https://github.com/PFund-Software-Ltd/pfund) — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading
246
+ - [PyTrade.org](https://pytrade.org) - A curated list of Python libraries and resources for algorithmic trading.
@@ -2,6 +2,7 @@ from pfeed.config_handler import configure
2
2
  from pfeed.sources import bybit
3
3
  from pfeed.feeds import YahooFinanceFeed, BybitFeed
4
4
  from importlib.metadata import version
5
+ from pfeed import etl
5
6
 
6
7
 
7
8
  __version__ = version('pfeed')
@@ -10,6 +10,9 @@ from pfeed.config_handler import ConfigHandler
10
10
 
11
11
 
12
12
  def save_config(config: ConfigHandler, config_file_path: str | Path):
13
+ if type(config_file_path) is str:
14
+ config_file_path = Path(config_file_path)
15
+ config_file_path.parent.mkdir(parents=True, exist_ok=True)
13
16
  with open(config_file_path, 'w') as f:
14
17
  yaml.dump(config.__dict__, f, default_flow_style=False)
15
18
 
@@ -19,16 +19,15 @@ from pfeed.const.paths import PROJ_NAME
19
19
  def docker_compose(ctx, env_file_path, docker_file_path):
20
20
  """Forwards commands to docker-compose with the package's docker-compose.yml file if not specified."""
21
21
  if not env_file_path:
22
- env_file_path = find_dotenv(usecwd=True, raise_error_if_not_found=True)
23
- click.echo(f'.env file path is not specified, using env file in "{env_file_path}"')
22
+ if env_file_path := find_dotenv(usecwd=True, raise_error_if_not_found=False):
23
+ click.echo(f'.env file path is not specified, using env file in "{env_file_path}"')
24
+ else:
25
+ click.echo('.env file is not found')
24
26
  load_dotenv(env_file_path, override=True)
25
27
 
26
- # write config's data path to environment variable if not set
27
- if not os.getenv('PFEED_DATA_PATH'):
28
- config = ctx.obj['config']
29
- click.echo(f'PFEED_DATA_PATH is not set, using data path "{config.data_path}" in config')
30
- os.environ['PFEED_DATA_PATH'] = config.data_path
31
-
28
+ config = ctx.obj['config']
29
+ os.environ['PFEED_DATA_PATH'] = config.data_path
30
+
32
31
  if not docker_file_path:
33
32
  package_dir = Path(importlib.resources.files(PROJ_NAME)).resolve().parents[0]
34
33
  docker_file_path = package_dir / 'docker-compose.yml'
@@ -0,0 +1,53 @@
1
+ import importlib
2
+
3
+ import click
4
+ from dotenv import find_dotenv, load_dotenv
5
+
6
+ from pfeed.const.commons import ALIASES, SUPPORTED_DOWNLOAD_DATA_SOURCES, SUPPORTED_DATA_TYPES
7
+
8
+
9
+ # add aliases to supported download data sources
10
+ SUPPORTED_DOWNLOAD_DATA_SOURCES_ALIASES_INCLUDED = SUPPORTED_DOWNLOAD_DATA_SOURCES + [k for k, v in ALIASES.items() if v in SUPPORTED_DOWNLOAD_DATA_SOURCES]
11
+
12
+ # 'raw' data type is implicit since it doesn't have the timeframe specified, but still allow it for convenience
13
+ # since for data source like bybit, there's only one raw data type, 'raw_tick', i.e. 'raw' will be converted to 'raw_tick'
14
+ SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED = SUPPORTED_DATA_TYPES + ['raw']
15
+
16
+
17
+ @click.command()
18
+ @click.pass_context
19
+ @click.option('--env-file', 'env_file_path', type=click.Path(exists=True), help='Path to the .env file')
20
+ @click.option('--data-source', '-d', required=True, type=click.Choice(SUPPORTED_DOWNLOAD_DATA_SOURCES_ALIASES_INCLUDED, case_sensitive=False), help='Data source')
21
+ @click.option('--dtype', '--dt', 'dtypes', multiple=True, default=['raw'], type=click.Choice(SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED, case_sensitive=False), help=f'{SUPPORTED_DATA_TYPES=}. How to pass in multiple values: --dt raw --dt tick')
22
+ @click.option('--pdt', '-p', 'pdts', multiple=True, default=[], help='List of trading products')
23
+ @click.option('--ptype', '--pt', 'ptypes', multiple=True, default=[], help='List of product types, e.g. PERP = get all perpetuals')
24
+ @click.option('--start-date', '-s', type=click.DateTime(formats=["%Y-%m-%d"]), help='Start date in YYYY-MM-DD format')
25
+ @click.option('--end-date', '-e', type=click.DateTime(formats=["%Y-%m-%d"]), help='End date in YYYY-MM-DD format')
26
+ @click.option('--batch-size', default=8, type=int, help='batch size for Ray tasks') # REVIEW
27
+ @click.option('--no-ray', is_flag=True, help='if enabled, Ray will not be used')
28
+ @click.option('--use-minio', is_flag=True, help='if enabled, data will be loaded into Minio')
29
+ @click.option('--debug', is_flag=True, help='if enabled, debug mode will be enabled where logs at DEBUG level will be printed')
30
+ def download(ctx, env_file_path, data_source, pdts, dtypes, ptypes, start_date, end_date, batch_size, no_ray, use_minio, debug):
31
+ if not env_file_path:
32
+ if env_file_path := find_dotenv(usecwd=True, raise_error_if_not_found=False):
33
+ click.echo(f'.env file path is not specified, using env file in "{env_file_path}"')
34
+ else:
35
+ click.echo('.env file is not found')
36
+ load_dotenv(env_file_path, override=True)
37
+
38
+ if data_source in ALIASES:
39
+ data_source = ALIASES[data_source]
40
+
41
+ pipeline = importlib.import_module(f'pfeed.sources.{data_source.lower()}.download')
42
+ pipeline.download_historical_data(
43
+ pdts=pdts,
44
+ dtypes=list(dtypes),
45
+ ptypes=list(ptypes),
46
+ start_date=start_date.date().strftime('%Y-%m-%d') if start_date else start_date,
47
+ end_date=end_date.date().strftime('%Y-%m-%d') if end_date else end_date,
48
+ batch_size=batch_size,
49
+ use_ray=not no_ray,
50
+ use_minio=use_minio,
51
+ debug=debug,
52
+ config=ctx.obj['config'],
53
+ )
@@ -0,0 +1,10 @@
1
+ SUPPORTED_DATA_FEEDS = ['YAHOO_FINANCE', 'BYBIT']
2
+ SUPPORTED_DATA_TYPES = ['raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw_daily',
3
+ 'tick', 'second', 'minute', 'hour', 'daily']
4
+ SUPPORTED_DATA_SINKS = ['local', 'minio']
5
+ SUPPORTED_DOWNLOAD_DATA_SOURCES = ['BYBIT']
6
+ ALIASES = {
7
+ 'YF': 'YAHOO_FINANCE',
8
+ }
9
+ SUPPORTED_DATA_MODES = ['historical', 'streaming']
10
+ SUPPORTED_DATA_TOOLS = ['pandas', 'polars', 'pyspark']