pfeed 0.0.1.dev12__tar.gz → 0.0.1.dev13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. pfeed-0.0.1.dev13/PKG-INFO +267 -0
  2. pfeed-0.0.1.dev13/README.md +231 -0
  3. pfeed-0.0.1.dev13/pfeed/__init__.py +64 -0
  4. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/cli/commands/config.py +3 -1
  5. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/cli/commands/docker_compose.py +1 -8
  6. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/cli/commands/download.py +14 -24
  7. pfeed-0.0.1.dev13/pfeed/cli/commands/open.py +47 -0
  8. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/cli/main.py +4 -2
  9. pfeed-0.0.1.dev13/pfeed/config_handler.py +148 -0
  10. pfeed-0.0.1.dev12/pfeed/const/commons.py → pfeed-0.0.1.dev13/pfeed/const/common.py +3 -3
  11. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/const/paths.py +0 -2
  12. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/datastore.py +44 -33
  13. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/etl.py +120 -29
  14. pfeed-0.0.1.dev13/pfeed/feeds/__init__.py +3 -0
  15. pfeed-0.0.1.dev13/pfeed/feeds/base_feed.py +300 -0
  16. pfeed-0.0.1.dev13/pfeed/feeds/binance_feed.py +21 -0
  17. pfeed-0.0.1.dev13/pfeed/feeds/bybit_feed.py +55 -0
  18. pfeed-0.0.1.dev13/pfeed/feeds/yahoo_finance_feed.py +178 -0
  19. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/filepath.py +4 -2
  20. pfeed-0.0.1.dev13/pfeed/sources/binance/__init__.py +11 -0
  21. pfeed-0.0.1.dev13/pfeed/sources/binance/api.py +105 -0
  22. pfeed-0.0.1.dev13/pfeed/sources/binance/const.py +47 -0
  23. {pfeed-0.0.1.dev12/pfeed/sources/bybit → pfeed-0.0.1.dev13/pfeed/sources/binance}/download.py +22 -12
  24. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/sources/bybit/__init__.py +1 -8
  25. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/sources/bybit/api.py +26 -20
  26. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/sources/bybit/const.py +1 -17
  27. pfeed-0.0.1.dev13/pfeed/sources/bybit/download.py +197 -0
  28. pfeed-0.0.1.dev13/pfeed/sources/bybit/stream.py +3 -0
  29. pfeed-0.0.1.dev13/pfeed/sources/bybit/types.py +5 -0
  30. pfeed-0.0.1.dev13/pfeed/sources/bybit/utils.py +19 -0
  31. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/types/common_literals.py +2 -2
  32. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/utils/utils.py +21 -0
  33. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/utils/validate.py +13 -12
  34. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pyproject.toml +16 -14
  35. pfeed-0.0.1.dev12/PKG-INFO +0 -293
  36. pfeed-0.0.1.dev12/README.md +0 -260
  37. pfeed-0.0.1.dev12/pfeed/__init__.py +0 -55
  38. pfeed-0.0.1.dev12/pfeed/config/logging.yml +0 -51
  39. pfeed-0.0.1.dev12/pfeed/config_handler.py +0 -71
  40. pfeed-0.0.1.dev12/pfeed/data_tools/data_tool_pandas.py +0 -132
  41. pfeed-0.0.1.dev12/pfeed/data_tools/data_tool_polars.py +0 -104
  42. pfeed-0.0.1.dev12/pfeed/data_tools/data_tool_pyspark.py +0 -2
  43. pfeed-0.0.1.dev12/pfeed/feeds/__init__.py +0 -2
  44. pfeed-0.0.1.dev12/pfeed/feeds/base_feed.py +0 -48
  45. pfeed-0.0.1.dev12/pfeed/feeds/bybit_feed.py +0 -174
  46. pfeed-0.0.1.dev12/pfeed/feeds/yahoo_finance_feed.py +0 -102
  47. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/LICENSE +0 -0
  48. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/cli/__init__.py +0 -0
  49. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/cli/commands/__init__.py +0 -0
  50. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/cli/commands/stream.py +0 -0
  51. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/feeds/custom_csv_feed.py +0 -0
  52. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/main.py +0 -0
  53. {pfeed-0.0.1.dev12/pfeed/sources/bybit → pfeed-0.0.1.dev13/pfeed/sources/binance}/stream.py +0 -0
  54. {pfeed-0.0.1.dev12 → pfeed-0.0.1.dev13}/pfeed/utils/monitor.py +0 -0
@@ -0,0 +1,267 @@
1
+ Metadata-Version: 2.1
2
+ Name: pfeed
3
+ Version: 0.0.1.dev13
4
+ Summary: Data pipeline for algo-trading, getting and storing both real-time and historical data made easy.
5
+ Home-page: https://pfund.ai
6
+ License: Apache-2.0
7
+ Keywords: trading,algo-trading,data pipeline,ETL,data lake,data warehouse,data integration,historical data,live data,data streaming
8
+ Author: Stephen Yau
9
+ Author-email: softwareentrepreneer+pfeed@gmail.com
10
+ Requires-Python: >=3.10,<3.13
11
+ Classifier: License :: OSI Approved :: Apache Software License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Provides-Extra: all
17
+ Provides-Extra: boost
18
+ Provides-Extra: data
19
+ Provides-Extra: df
20
+ Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
21
+ Requires-Dist: connectorx (>=0.3.3,<0.4.0) ; extra == "boost" or extra == "all"
22
+ Requires-Dist: fastparquet (>=2024.5.0,<2025.0.0)
23
+ Requires-Dist: minio (>=7.2.8,<8.0.0) ; extra == "data" or extra == "all"
24
+ Requires-Dist: pandas (>=2.2.2,<3.0.0) ; extra == "df" or extra == "all"
25
+ Requires-Dist: pfund (>=0.0.1.dev13,<0.0.2)
26
+ Requires-Dist: polars (>=1.5.0,<2.0.0) ; extra == "df" or extra == "all"
27
+ Requires-Dist: psutil (>=6.0.0,<7.0.0) ; extra == "data" or extra == "all"
28
+ Requires-Dist: pyarrow (>=15.0.0,<16.0.0) ; extra == "boost" or extra == "all"
29
+ Requires-Dist: ray (>=2.34.0,<3.0.0) ; extra == "boost" or extra == "all"
30
+ Requires-Dist: s3fs (>=2024.6.1,<2025.0.0) ; extra == "data" or extra == "all"
31
+ Requires-Dist: yfinance (>=0.2.43,<0.3.0)
32
+ Project-URL: Documentation, https://pfeed-docs.pfund.ai
33
+ Project-URL: Repository, https://github.com/PFund-Software-Ltd/pfeed
34
+ Description-Content-Type: text/markdown
35
+
36
+ # PFeed: Data Pipeline for Algo-Trading, Getting and Storing Real-Time and Historical Data Made Easy.
37
+
38
+ ![GitHub stars](https://img.shields.io/github/stars/PFund-Software-Ltd/pfeed?style=social)
39
+ ![PyPI downloads](https://img.shields.io/pypi/dm/pfeed?label=downloads)
40
+ [![PyPI](https://img.shields.io/pypi/v/pfeed.svg)](https://pypi.org/project/pfeed)
41
+ ![PyPI - Support Python Versions](https://img.shields.io/pypi/pyversions/pfeed)
42
+ [![Jupyter Book Badge](https://raw.githubusercontent.com/PFund-Software-Ltd/pfeed/main/docs/images/jupyterbook.svg)](https://jupyterbook.org)
43
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
44
+
45
+ [MinIO]: https://min.io/
46
+ [PFund]: https://github.com/PFund-Software-Ltd/pfund
47
+ [Ray]: https://github.com/ray-project/ray
48
+ [Polars]: https://github.com/pola-rs/polars
49
+ [Prefect]: https://www.prefect.io
50
+ [Timescaledb]: https://www.timescale.com/
51
+ [Dask]: https://www.dask.org/
52
+ [Spark]: https://spark.apache.org/docs/latest/api/python/index.html
53
+ [DuckDB]: https://github.com/duckdb/duckdb
54
+ [Daft]: https://github.com/Eventual-Inc/Daft
55
+ [PyTrade.org]: https://pytrade.org
56
+ [Databento]: https://databento.com/
57
+ [Polygon]: https://polygon.io/
58
+ [Bybit]: https://bybit.com/
59
+ [FirstRate Data]: https://firstratedata.com
60
+
61
+ ## Problem
62
+ Starting algo-trading requires reliable, clean data. However, the time-consuming and mundane tasks of data cleaning and storage often discourage traders from embarking on their algo-trading journey.
63
+
64
+ ## Solution
65
+ By leveraging modern data engineering tools, `pfeed` handles the tedious data work and **outputs backtesting-ready data**, accelerating traders to get to the strategy development phase.
66
+
67
+ ---
68
+ PFeed (/piː fiːd/) is a data pipeline for algorithmic trading, serving as a bridge between raw data sources and traders by automating the process of data collection, cleaning, transformation, and storage, loading clean data into a **local data lake for quantitative analysis**.
69
+
70
+ ## Core Features
71
+ - [x] Unified approach for interacting with various data sources and obtaining historical and live data
72
+ - [x] ETL data pipline for transforming raw data to clean data and storing it in [MinIO] (optional)
73
+ - [x] Fast data downloading, utilizing [Ray] for parallelization
74
+ - [x] Supports multiple data tools (e.g. Pandas, [Polars], [Dask], [Spark], [DuckDB], [Daft])
75
+ - [ ] Integrates with [Prefect] to control data flows
76
+ - [ ] Listens to PFund's trade engine and adds trade history to a local database [Timescaledb] (optional)
77
+
78
+ > It is designed to be used alongside [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready.
79
+
80
+ ---
81
+
82
+ <details>
83
+ <summary>Table of Contents</summary>
84
+
85
+ - [Installation](#installation)
86
+ - [Quick Start](#quick-start)
87
+ - [Main Usage: Data Feed](#main-usage-data-feed)
88
+ - [Download Historical Data on Command Line](#download-historical-data-on-command-line)
89
+ - [Download Historical Data in Python](#download-historical-data-in-python)
90
+ - [List Current Config](#list-current-config)
91
+ - [Run PFeed's docker-compose.yml](#run-pfeeds-docker-composeyml)
92
+ - [Supported Data Sources](#supported-data-sources)
93
+ - [Supported Data Tools](#supported-data-tools)
94
+ - [Related Projects](#related-projects)
95
+ - [Disclaimer](#disclaimer)
96
+
97
+ </details>
98
+
99
+
100
+ ## Installation
101
+ ### Using [Poetry](https://python-poetry.org) (Recommended)
102
+ ```bash
103
+ # [RECOMMENDED]: Download data (e.g. Bybit and Yahoo Finance) + Data tools (e.g. pandas, polars) + Data storage (e.g. MinIO) + Boosted performance (e.g. Ray)
104
+ poetry add "pfeed[all]"
105
+
106
+ # [Download data + Data tools + Data storage]
107
+ poetry add "pfeed[df,data]"
108
+
109
+ # [Download data + Data tools]
110
+ poetry add "pfeed[df]"
111
+
112
+ # [Download data only]:
113
+ poetry add pfeed
114
+
115
+ # update to the latest version:
116
+ poetry update pfeed
117
+ ```
118
+
119
+ ### Using Pip
120
+ ```bash
121
+ # same as above, you can choose to install "pfeed[all]", "pfeed[df,data]", "pfeed[df]" or "pfeed"
122
+ pip install "pfeed[all]"
123
+
124
+ # install the latest version:
125
+ pip install -U pfeed
126
+ ```
127
+
128
+ ### Checking your installation
129
+ ```bash
130
+ $ pfeed --version
131
+ ```
132
+
133
+ ## Quick Start
134
+ ### 1. Get Historical Data in Dataframe (No storage)
135
+ Get [Bybit]'s data in dataframe, e.g. 1-minute data (data is downloaded on the fly if not stored locally)
136
+
137
+ ```python
138
+ import pfeed as pe
139
+
140
+ feed = pe.BybitFeed(data_tool='polars')
141
+
142
+ df = feed.get_historical_data(
143
+ 'BTC_USDT_PERP',
144
+ resolution='1minute', # 'raw' or '1tick'/'1t' or '2second'/'2s' etc.
145
+ start_date='2024-03-01',
146
+ end_date='2024-03-01',
147
+ )
148
+ ```
149
+
150
+ Printing the first few rows of `df`:
151
+ | | ts | product | resolution | open | high | low | close | volume |
152
+ |---:|:--------------------|:--------------|:-------------|--------:|--------:|--------:|--------:|---------:|
153
+ | 0 | 2024-03-01 00:00:00 | BTC_USDT_PERP | 1m | 61184.1 | 61244.5 | 61175.8 | 61244.5 | 159.142 |
154
+ | 1 | 2024-03-01 00:01:00 | BTC_USDT_PERP | 1m | 61245.3 | 61276.5 | 61200.7 | 61232.2 | 227.242 |
155
+ | 2 | 2024-03-01 00:02:00 | BTC_USDT_PERP | 1m | 61232.2 | 61249 | 61180 | 61184.2 | 91.446 |
156
+
157
+ > By using pfeed, you are just a few lines of code away from a standardized dataframe, how convenient!
158
+
159
+
160
+
161
+ ### 2. Download Historical Data on the Command Line Interface (CLI)
162
+ ```bash
163
+ # download data, default data type (dtype) is 'raw' data
164
+ pfeed download -d BYBIT -p BTC_USDT_PERP --start-date 2024-03-01 --end-date 2024-03-08
165
+
166
+ # download multiple products BTC_USDT_PERP and ETH_USDT_PERP and minute data
167
+ pfeed download -d BYBIT -p BTC_USDT_PERP -p ETH_USDT_PERP --dtypes minute
168
+
169
+ # download all perpetuals data from bybit
170
+ pfeed download -d BYBIT --ptypes PERP
171
+
172
+ # download all the data from bybit (CAUTION: your local machine probably won't have enough space for this!)
173
+ pfeed download -d BYBIT
174
+
175
+ # store data into MinIO (need to start MinIO by running `pfeed docker-compose up -d` first)
176
+ pfeed download -d BYBIT -p BTC_USDT_PERP --use-minio
177
+
178
+ # enable debug mode and turn off using Ray
179
+ pfeed download -d BYBIT -p BTC_USDT_PERP --debug --no-ray
180
+ ```
181
+
182
+ ### 3. Download Historical Data in Python
183
+ ```python
184
+ import pfeed as pe
185
+
186
+ # compared to the CLI approach, this approach is more convenient for downloading multiple products
187
+ pe.download(
188
+ data_source='bybit',
189
+ pdts=[
190
+ 'BTC_USDT_PERP',
191
+ 'ETH_USDT_PERP',
192
+ 'BCH_USDT_PERP',
193
+ ],
194
+ dtypes=['raw'], # data types, e.g. 'raw', 'tick', 'second', 'minute' etc.
195
+ start_date='2024-03-01',
196
+ end_date='2024-03-08',
197
+ use_minio=False,
198
+ )
199
+ ```
200
+
201
+ ### List Current Config
202
+ ```bash
203
+ # list the current config:
204
+ pfeed config --list
205
+
206
+ # change the data storage location to your local project's 'data' folder:
207
+ pfeed config --data-path ./data
208
+
209
+ # for more commands:
210
+ pfeed --help
211
+ ```
212
+
213
+ ### Run PFeed's docker-compose.yml
214
+ ```bash
215
+ # same as 'docker-compose', only difference is it has pointed to pfeed's docker-compose.yml file
216
+ pfeed docker-compose [COMMAND]
217
+
218
+ # e.g. start services
219
+ pfeed docker-compose up -d
220
+
221
+ # e.g. stop services
222
+ pfeed docker-compose down
223
+ ```
224
+
225
+
226
+ ## Supported Data Sources
227
+ | Data Source | Get Historical Data | Download Historical Data | Get Live/Paper Data | Stream Live/Paper Data |
228
+ | ------------------------- | ------------------- | ------------------------ | ------------------- | ---------------------- |
229
+ | Yahoo Finance | 🟢 | ⚪ | ⚪ | ⚪ |
230
+ | Bybit | 🟢 | 🟢 | 🟡 | 🔴 |
231
+ | *Interactive Brokers (IB) | 🔴 | ⚪ | 🔴 | 🔴 |
232
+ | *[FirstRate Data] | 🔴 | 🔴 | ⚪ | ⚪ |
233
+ | [Databento] | 🔴 | 🔴 | 🔴 | 🔴 |
234
+ | [Polygon] | 🔴 | 🔴 | 🔴 | 🔴 |
235
+ | Binance | 🔴 | 🔴 | 🔴 | 🔴 |
236
+ | OKX | 🔴 | 🔴 | 🔴 | 🔴 |
237
+
238
+ 🟢 = finished \
239
+ 🟡 = in progress \
240
+ 🔴 = todo \
241
+ ⚪ = not applicable \
242
+ \* = paid data
243
+
244
+
245
+ ## Supported Data Tools
246
+ | Data Tools | Supported |
247
+ | ------------------------ | --------- |
248
+ | Pandas | 🟢 |
249
+ | [Polars] | 🟢 |
250
+ | [Dask] | 🔴 |
251
+ | [Spark] | 🔴 |
252
+ | [DuckDB] | 🔴 |
253
+ | [Daft] | 🔴 |
254
+
255
+
256
+ ## Related Projects
257
+ - [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading
258
+ - [PyTrade.org] - A curated list of Python libraries and resources for algorithmic trading.
259
+
260
+
261
+ ## Disclaimer
262
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
263
+
264
+ This framework is intended for educational and research purposes only. It should not be used for real trading without understanding the risks involved. Trading in financial markets involves significant risk, and there is always the potential for loss. Your trading results may vary. No representation is being made that any account will or is likely to achieve profits or losses similar to those discussed on this platform.
265
+
266
+ The developers of this framework are not responsible for any financial losses incurred from using this software. This includes but not limited to losses resulting from inaccuracies in any financial data output by PFeed. Users should conduct their due diligence, verify the accuracy of any data produced by PFeed, and consult with a professional financial advisor before engaging in real trading activities.
267
+
@@ -0,0 +1,231 @@
1
+ # PFeed: Data Pipeline for Algo-Trading, Getting and Storing Real-Time and Historical Data Made Easy.
2
+
3
+ ![GitHub stars](https://img.shields.io/github/stars/PFund-Software-Ltd/pfeed?style=social)
4
+ ![PyPI downloads](https://img.shields.io/pypi/dm/pfeed?label=downloads)
5
+ [![PyPI](https://img.shields.io/pypi/v/pfeed.svg)](https://pypi.org/project/pfeed)
6
+ ![PyPI - Support Python Versions](https://img.shields.io/pypi/pyversions/pfeed)
7
+ [![Jupyter Book Badge](https://raw.githubusercontent.com/PFund-Software-Ltd/pfeed/main/docs/images/jupyterbook.svg)](https://jupyterbook.org)
8
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
9
+
10
+ [MinIO]: https://min.io/
11
+ [PFund]: https://github.com/PFund-Software-Ltd/pfund
12
+ [Ray]: https://github.com/ray-project/ray
13
+ [Polars]: https://github.com/pola-rs/polars
14
+ [Prefect]: https://www.prefect.io
15
+ [Timescaledb]: https://www.timescale.com/
16
+ [Dask]: https://www.dask.org/
17
+ [Spark]: https://spark.apache.org/docs/latest/api/python/index.html
18
+ [DuckDB]: https://github.com/duckdb/duckdb
19
+ [Daft]: https://github.com/Eventual-Inc/Daft
20
+ [PyTrade.org]: https://pytrade.org
21
+ [Databento]: https://databento.com/
22
+ [Polygon]: https://polygon.io/
23
+ [Bybit]: https://bybit.com/
24
+ [FirstRate Data]: https://firstratedata.com
25
+
26
+ ## Problem
27
+ Starting algo-trading requires reliable, clean data. However, the time-consuming and mundane tasks of data cleaning and storage often discourage traders from embarking on their algo-trading journey.
28
+
29
+ ## Solution
30
+ By leveraging modern data engineering tools, `pfeed` handles the tedious data work and **outputs backtesting-ready data**, accelerating traders to get to the strategy development phase.
31
+
32
+ ---
33
+ PFeed (/piː fiːd/) is a data pipeline for algorithmic trading, serving as a bridge between raw data sources and traders by automating the process of data collection, cleaning, transformation, and storage, loading clean data into a **local data lake for quantitative analysis**.
34
+
35
+ ## Core Features
36
+ - [x] Unified approach for interacting with various data sources and obtaining historical and live data
37
+ - [x] ETL data pipline for transforming raw data to clean data and storing it in [MinIO] (optional)
38
+ - [x] Fast data downloading, utilizing [Ray] for parallelization
39
+ - [x] Supports multiple data tools (e.g. Pandas, [Polars], [Dask], [Spark], [DuckDB], [Daft])
40
+ - [ ] Integrates with [Prefect] to control data flows
41
+ - [ ] Listens to PFund's trade engine and adds trade history to a local database [Timescaledb] (optional)
42
+
43
+ > It is designed to be used alongside [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready.
44
+
45
+ ---
46
+
47
+ <details>
48
+ <summary>Table of Contents</summary>
49
+
50
+ - [Installation](#installation)
51
+ - [Quick Start](#quick-start)
52
+ - [Main Usage: Data Feed](#main-usage-data-feed)
53
+ - [Download Historical Data on Command Line](#download-historical-data-on-command-line)
54
+ - [Download Historical Data in Python](#download-historical-data-in-python)
55
+ - [List Current Config](#list-current-config)
56
+ - [Run PFeed's docker-compose.yml](#run-pfeeds-docker-composeyml)
57
+ - [Supported Data Sources](#supported-data-sources)
58
+ - [Supported Data Tools](#supported-data-tools)
59
+ - [Related Projects](#related-projects)
60
+ - [Disclaimer](#disclaimer)
61
+
62
+ </details>
63
+
64
+
65
+ ## Installation
66
+ ### Using [Poetry](https://python-poetry.org) (Recommended)
67
+ ```bash
68
+ # [RECOMMENDED]: Download data (e.g. Bybit and Yahoo Finance) + Data tools (e.g. pandas, polars) + Data storage (e.g. MinIO) + Boosted performance (e.g. Ray)
69
+ poetry add "pfeed[all]"
70
+
71
+ # [Download data + Data tools + Data storage]
72
+ poetry add "pfeed[df,data]"
73
+
74
+ # [Download data + Data tools]
75
+ poetry add "pfeed[df]"
76
+
77
+ # [Download data only]:
78
+ poetry add pfeed
79
+
80
+ # update to the latest version:
81
+ poetry update pfeed
82
+ ```
83
+
84
+ ### Using Pip
85
+ ```bash
86
+ # same as above, you can choose to install "pfeed[all]", "pfeed[df,data]", "pfeed[df]" or "pfeed"
87
+ pip install "pfeed[all]"
88
+
89
+ # install the latest version:
90
+ pip install -U pfeed
91
+ ```
92
+
93
+ ### Checking your installation
94
+ ```bash
95
+ $ pfeed --version
96
+ ```
97
+
98
+ ## Quick Start
99
+ ### 1. Get Historical Data in Dataframe (No storage)
100
+ Get [Bybit]'s data in dataframe, e.g. 1-minute data (data is downloaded on the fly if not stored locally)
101
+
102
+ ```python
103
+ import pfeed as pe
104
+
105
+ feed = pe.BybitFeed(data_tool='polars')
106
+
107
+ df = feed.get_historical_data(
108
+ 'BTC_USDT_PERP',
109
+ resolution='1minute', # 'raw' or '1tick'/'1t' or '2second'/'2s' etc.
110
+ start_date='2024-03-01',
111
+ end_date='2024-03-01',
112
+ )
113
+ ```
114
+
115
+ Printing the first few rows of `df`:
116
+ | | ts | product | resolution | open | high | low | close | volume |
117
+ |---:|:--------------------|:--------------|:-------------|--------:|--------:|--------:|--------:|---------:|
118
+ | 0 | 2024-03-01 00:00:00 | BTC_USDT_PERP | 1m | 61184.1 | 61244.5 | 61175.8 | 61244.5 | 159.142 |
119
+ | 1 | 2024-03-01 00:01:00 | BTC_USDT_PERP | 1m | 61245.3 | 61276.5 | 61200.7 | 61232.2 | 227.242 |
120
+ | 2 | 2024-03-01 00:02:00 | BTC_USDT_PERP | 1m | 61232.2 | 61249 | 61180 | 61184.2 | 91.446 |
121
+
122
+ > By using pfeed, you are just a few lines of code away from a standardized dataframe, how convenient!
123
+
124
+
125
+
126
+ ### 2. Download Historical Data on the Command Line Interface (CLI)
127
+ ```bash
128
+ # download data, default data type (dtype) is 'raw' data
129
+ pfeed download -d BYBIT -p BTC_USDT_PERP --start-date 2024-03-01 --end-date 2024-03-08
130
+
131
+ # download multiple products BTC_USDT_PERP and ETH_USDT_PERP and minute data
132
+ pfeed download -d BYBIT -p BTC_USDT_PERP -p ETH_USDT_PERP --dtypes minute
133
+
134
+ # download all perpetuals data from bybit
135
+ pfeed download -d BYBIT --ptypes PERP
136
+
137
+ # download all the data from bybit (CAUTION: your local machine probably won't have enough space for this!)
138
+ pfeed download -d BYBIT
139
+
140
+ # store data into MinIO (need to start MinIO by running `pfeed docker-compose up -d` first)
141
+ pfeed download -d BYBIT -p BTC_USDT_PERP --use-minio
142
+
143
+ # enable debug mode and turn off using Ray
144
+ pfeed download -d BYBIT -p BTC_USDT_PERP --debug --no-ray
145
+ ```
146
+
147
+ ### 3. Download Historical Data in Python
148
+ ```python
149
+ import pfeed as pe
150
+
151
+ # compared to the CLI approach, this approach is more convenient for downloading multiple products
152
+ pe.download(
153
+ data_source='bybit',
154
+ pdts=[
155
+ 'BTC_USDT_PERP',
156
+ 'ETH_USDT_PERP',
157
+ 'BCH_USDT_PERP',
158
+ ],
159
+ dtypes=['raw'], # data types, e.g. 'raw', 'tick', 'second', 'minute' etc.
160
+ start_date='2024-03-01',
161
+ end_date='2024-03-08',
162
+ use_minio=False,
163
+ )
164
+ ```
165
+
166
+ ### List Current Config
167
+ ```bash
168
+ # list the current config:
169
+ pfeed config --list
170
+
171
+ # change the data storage location to your local project's 'data' folder:
172
+ pfeed config --data-path ./data
173
+
174
+ # for more commands:
175
+ pfeed --help
176
+ ```
177
+
178
+ ### Run PFeed's docker-compose.yml
179
+ ```bash
180
+ # same as 'docker-compose', only difference is it has pointed to pfeed's docker-compose.yml file
181
+ pfeed docker-compose [COMMAND]
182
+
183
+ # e.g. start services
184
+ pfeed docker-compose up -d
185
+
186
+ # e.g. stop services
187
+ pfeed docker-compose down
188
+ ```
189
+
190
+
191
+ ## Supported Data Sources
192
+ | Data Source | Get Historical Data | Download Historical Data | Get Live/Paper Data | Stream Live/Paper Data |
193
+ | ------------------------- | ------------------- | ------------------------ | ------------------- | ---------------------- |
194
+ | Yahoo Finance | 🟢 | ⚪ | ⚪ | ⚪ |
195
+ | Bybit | 🟢 | 🟢 | 🟡 | 🔴 |
196
+ | *Interactive Brokers (IB) | 🔴 | ⚪ | 🔴 | 🔴 |
197
+ | *[FirstRate Data] | 🔴 | 🔴 | ⚪ | ⚪ |
198
+ | [Databento] | 🔴 | 🔴 | 🔴 | 🔴 |
199
+ | [Polygon] | 🔴 | 🔴 | 🔴 | 🔴 |
200
+ | Binance | 🔴 | 🔴 | 🔴 | 🔴 |
201
+ | OKX | 🔴 | 🔴 | 🔴 | 🔴 |
202
+
203
+ 🟢 = finished \
204
+ 🟡 = in progress \
205
+ 🔴 = todo \
206
+ ⚪ = not applicable \
207
+ \* = paid data
208
+
209
+
210
+ ## Supported Data Tools
211
+ | Data Tools | Supported |
212
+ | ------------------------ | --------- |
213
+ | Pandas | 🟢 |
214
+ | [Polars] | 🟢 |
215
+ | [Dask] | 🔴 |
216
+ | [Spark] | 🔴 |
217
+ | [DuckDB] | 🔴 |
218
+ | [Daft] | 🔴 |
219
+
220
+
221
+ ## Related Projects
222
+ - [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading
223
+ - [PyTrade.org] - A curated list of Python libraries and resources for algorithmic trading.
224
+
225
+
226
+ ## Disclaimer
227
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
228
+
229
+ This framework is intended for educational and research purposes only. It should not be used for real trading without understanding the risks involved. Trading in financial markets involves significant risk, and there is always the potential for loss. Your trading results may vary. No representation is being made that any account will or is likely to achieve profits or losses similar to those discussed on this platform.
230
+
231
+ The developers of this framework are not responsible for any financial losses incurred from using this software. This includes but not limited to losses resulting from inaccuracies in any financial data output by PFeed. Users should conduct their due diligence, verify the accuracy of any data produced by PFeed, and consult with a professional financial advisor before engaging in real trading activities.
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING
3
+
4
+ if TYPE_CHECKING:
5
+ from pfeed.types.common_literals import tSUPPORTED_DOWNLOAD_DATA_SOURCES, tSUPPORTED_DATA_TYPES
6
+
7
+ import importlib
8
+ from importlib.metadata import version
9
+
10
+ from pfeed import etl
11
+ from pfeed.config_handler import configure, get_config
12
+ from pfeed.const.common import ALIASES
13
+ from pfeed.sources import bybit
14
+ from pfeed.feeds import BybitFeed, YahooFinanceFeed
15
+
16
+
17
+ def download_historical_data(
18
+ data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES,
19
+ pdts: str | list[str] | None = None,
20
+ dtypes: tSUPPORTED_DATA_TYPES | list[tSUPPORTED_DATA_TYPES] | None = None,
21
+ ptypes: str | list[str] | None = None,
22
+ start_date: str | None = None,
23
+ end_date: str | None = None,
24
+ num_cpus: int = 8,
25
+ use_ray: bool = True,
26
+ use_minio: bool = False,
27
+ ):
28
+ data_source = importlib.import_module(f"pfeed.sources.{data_source.lower()}")
29
+ return data_source.download_historical_data(
30
+ pdts=pdts,
31
+ dtypes=dtypes,
32
+ ptypes=ptypes,
33
+ start_date=start_date,
34
+ end_date=end_date,
35
+ num_cpus=num_cpus,
36
+ use_ray=use_ray,
37
+ use_minio=use_minio,
38
+ )
39
+
40
+
41
+ # TODO
42
+ def stream_realtime_data(data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES):
43
+ data_source = importlib.import_module(f"pfeed.sources.{data_source.lower()}")
44
+ return data_source.stream_realtime_data()
45
+
46
+
47
+
48
+ download = download_historical_data
49
+ stream = stream_realtime_data
50
+
51
+
52
+ __version__ = version("pfeed")
53
+ __all__ = (
54
+ "__version__",
55
+ "configure",
56
+ "get_config",
57
+ "ALIASES",
58
+ "etl",
59
+ "bybit",
60
+ "binance",
61
+ "YahooFinanceFeed",
62
+ "BybitFeed",
63
+ "BinanceFeed",
64
+ )
@@ -10,7 +10,7 @@ from pfeed.config_handler import ConfigHandler
10
10
 
11
11
 
12
12
  def save_config(config: ConfigHandler, config_file_path: str | Path):
13
- if type(config_file_path) is str:
13
+ if isinstance(config_file_path, str):
14
14
  config_file_path = Path(config_file_path)
15
15
  config_file_path.parent.mkdir(parents=True, exist_ok=True)
16
16
  with open(config_file_path, 'w') as f:
@@ -31,6 +31,8 @@ def remove_config(config_file_path: str | Path):
31
31
  @click.option('--logging-config', type=dict, help='Set the logging config')
32
32
  @click.option('--use-fork-process', type=bool, help='If True, multiprocessing.set_start_method("fork")')
33
33
  @click.option('--use-custom-excepthook', type=bool, help='If True, log uncaught exceptions to file')
34
+ @click.option('--env-file', 'env_file_path', type=click.Path(resolve_path=True, exists=True), help='Path to the .env file')
35
+ @click.option('--debug', is_flag=True, help='if enabled, debug mode will be enabled where logs at DEBUG level will be printed')
34
36
  @click.option('--list', '-l', is_flag=True, is_eager=True, help='List all available options')
35
37
  @click.option('--reset', is_flag=True, is_eager=True, help='Reset the configuration to defaults')
36
38
  def config(ctx, **kwargs):
@@ -3,7 +3,6 @@ from pathlib import Path
3
3
  import importlib.resources
4
4
  import subprocess
5
5
 
6
- from dotenv import find_dotenv, load_dotenv
7
6
  import click
8
7
 
9
8
  from pfeed.const.paths import PROJ_NAME
@@ -18,14 +17,8 @@ from pfeed.const.paths import PROJ_NAME
18
17
  @click.option('--docker-file', 'docker_file_path', type=click.Path(exists=True), help='Path to the docker-compose.yml file')
19
18
  def docker_compose(ctx, env_file_path, docker_file_path):
20
19
  """Forwards commands to docker-compose with the package's docker-compose.yml file if not specified."""
21
- if not env_file_path:
22
- if env_file_path := find_dotenv(usecwd=True, raise_error_if_not_found=False):
23
- click.echo(f'.env file path is not specified, using env file in "{env_file_path}"')
24
- else:
25
- click.echo('.env file is not found')
26
- load_dotenv(env_file_path, override=True)
27
-
28
20
  config = ctx.obj['config']
21
+ config.load_env_file(env_file_path)
29
22
  os.environ['PFEED_DATA_PATH'] = config.data_path
30
23
 
31
24
  if not docker_file_path:
@@ -1,9 +1,9 @@
1
1
  import importlib
2
2
 
3
3
  import click
4
- from dotenv import find_dotenv, load_dotenv
5
4
 
6
- from pfeed.const.commons import ALIASES, SUPPORTED_DOWNLOAD_DATA_SOURCES, SUPPORTED_DATA_TYPES
5
+ import pfeed as pe
6
+ from pfeed.const.common import ALIASES, SUPPORTED_DOWNLOAD_DATA_SOURCES, SUPPORTED_DATA_TYPES
7
7
 
8
8
 
9
9
  # add aliases to supported download data sources
@@ -16,38 +16,28 @@ SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED = SUPPORTED_DATA_TYPES + ['raw']
16
16
 
17
17
  @click.command()
18
18
  @click.pass_context
19
- @click.option('--env-file', 'env_file_path', type=click.Path(exists=True), help='Path to the .env file')
20
19
  @click.option('--data-source', '-d', required=True, type=click.Choice(SUPPORTED_DOWNLOAD_DATA_SOURCES_ALIASES_INCLUDED, case_sensitive=False), help='Data source')
21
- @click.option('--dtype', '--dt', 'dtypes', multiple=True, default=['raw'], type=click.Choice(SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED, case_sensitive=False), help=f'{SUPPORTED_DATA_TYPES=}. How to pass in multiple values: --dt raw --dt tick')
22
- @click.option('--pdt', '-p', 'pdts', multiple=True, default=[], help='List of trading products')
23
- @click.option('--ptype', '--pt', 'ptypes', multiple=True, default=[], help='List of product types, e.g. PERP = get all perpetuals')
20
+ @click.option('--dtypes', '--dt', 'dtypes', multiple=True, default=['raw'], type=click.Choice(SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED, case_sensitive=False), help=f'{SUPPORTED_DATA_TYPES=}. How to pass in multiple values: --dt raw --dt tick')
21
+ @click.option('--pdts', '-p', 'pdts', multiple=True, default=[], help='List of trading products')
22
+ @click.option('--ptypes', '--pt', 'ptypes', multiple=True, default=[], help='List of product types, e.g. PERP = get all perpetuals')
24
23
  @click.option('--start-date', '-s', type=click.DateTime(formats=["%Y-%m-%d"]), help='Start date in YYYY-MM-DD format')
25
24
  @click.option('--end-date', '-e', type=click.DateTime(formats=["%Y-%m-%d"]), help='End date in YYYY-MM-DD format')
26
- @click.option('--batch-size', default=8, type=int, help='batch size for Ray tasks') # REVIEW
25
+ @click.option('--num-cpus', '-n', default=8, type=int, help="number of logical CPUs used for Ray's tasks")
26
+ @click.option('--use-minio', '-m', is_flag=True, help='if enabled, data will be loaded into Minio')
27
27
  @click.option('--no-ray', is_flag=True, help='if enabled, Ray will not be used')
28
- @click.option('--use-minio', is_flag=True, help='if enabled, data will be loaded into Minio')
28
+ @click.option('--env-file', 'env_file_path', type=click.Path(exists=True), help='Path to the .env file')
29
29
  @click.option('--debug', is_flag=True, help='if enabled, debug mode will be enabled where logs at DEBUG level will be printed')
30
- def download(ctx, env_file_path, data_source, pdts, dtypes, ptypes, start_date, end_date, batch_size, no_ray, use_minio, debug):
31
- if not env_file_path:
32
- if env_file_path := find_dotenv(usecwd=True, raise_error_if_not_found=False):
33
- click.echo(f'.env file path is not specified, using env file in "{env_file_path}"')
34
- else:
35
- click.echo('.env file is not found')
36
- load_dotenv(env_file_path, override=True)
37
-
38
- if data_source in ALIASES:
39
- data_source = ALIASES[data_source]
40
-
30
+ def download(data_source, dtypes, pdts, ptypes, start_date, end_date, num_cpus, no_ray, use_minio, env_file_path, debug):
31
+ pe.configure(env_file_path=env_file_path, debug=debug)
32
+ data_source = ALIASES.get(data_source, data_source)
41
33
  pipeline = importlib.import_module(f'pfeed.sources.{data_source.lower()}.download')
42
34
  pipeline.download_historical_data(
43
35
  pdts=pdts,
44
- dtypes=list(dtypes),
45
- ptypes=list(ptypes),
36
+ dtypes=dtypes,
37
+ ptypes=ptypes,
46
38
  start_date=start_date.date().strftime('%Y-%m-%d') if start_date else start_date,
47
39
  end_date=end_date.date().strftime('%Y-%m-%d') if end_date else end_date,
48
- batch_size=batch_size,
40
+ num_cpus=num_cpus,
49
41
  use_ray=not no_ray,
50
42
  use_minio=use_minio,
51
- debug=debug,
52
- config=ctx.obj['config'],
53
43
  )