pfeed 0.0.2.dev1__py3-none-any.whl → 0.0.2.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pfeed/__init__.py +6 -6
- pfeed/cli/commands/config.py +8 -11
- pfeed/cli/commands/doc.py +0 -0
- pfeed/cli/commands/download.py +4 -4
- pfeed/cli/main.py +3 -1
- pfeed/config_handler.py +13 -13
- pfeed/const/common.py +18 -1
- pfeed/feeds/base_feed.py +19 -25
- pfeed/feeds/yahoo_finance_feed.py +17 -9
- pfeed/sources/binance/download.py +1 -1
- pfeed/sources/bybit/const.py +1 -0
- pfeed/sources/bybit/download.py +6 -6
- pfeed/types/common_literals.py +3 -1
- pfeed/types/core.py +11 -0
- pfeed-0.0.2.dev2.dist-info/METADATA +213 -0
- {pfeed-0.0.2.dev1.dist-info → pfeed-0.0.2.dev2.dist-info}/RECORD +19 -17
- pfeed-0.0.2.dev1.dist-info/METADATA +0 -267
- {pfeed-0.0.2.dev1.dist-info → pfeed-0.0.2.dev2.dist-info}/LICENSE +0 -0
- {pfeed-0.0.2.dev1.dist-info → pfeed-0.0.2.dev2.dist-info}/WHEEL +0 -0
- {pfeed-0.0.2.dev1.dist-info → pfeed-0.0.2.dev2.dist-info}/entry_points.txt +0 -0
pfeed/__init__.py
CHANGED
|
@@ -8,32 +8,32 @@ import importlib
|
|
|
8
8
|
from importlib.metadata import version
|
|
9
9
|
|
|
10
10
|
from pfeed.config_handler import configure, get_config
|
|
11
|
-
from pfeed.const.common import ALIASES
|
|
11
|
+
from pfeed.const.common import ALIASES as aliases
|
|
12
12
|
from pfeed.sources import bybit
|
|
13
13
|
from pfeed.feeds import BybitFeed, YahooFinanceFeed
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def download_historical_data(
|
|
17
17
|
data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES,
|
|
18
|
-
|
|
18
|
+
products: str | list[str] | None = None,
|
|
19
19
|
dtypes: tSUPPORTED_DATA_TYPES | list[tSUPPORTED_DATA_TYPES] | None = None,
|
|
20
20
|
ptypes: str | list[str] | None = None,
|
|
21
21
|
start_date: str | None = None,
|
|
22
22
|
end_date: str | None = None,
|
|
23
23
|
use_minio: bool = False,
|
|
24
24
|
use_ray: bool = True,
|
|
25
|
-
|
|
25
|
+
num_cpus: int = 8,
|
|
26
26
|
):
|
|
27
27
|
data_source = importlib.import_module(f"pfeed.sources.{data_source.lower()}")
|
|
28
28
|
return data_source.download_historical_data(
|
|
29
|
-
|
|
29
|
+
products=products,
|
|
30
30
|
dtypes=dtypes,
|
|
31
31
|
ptypes=ptypes,
|
|
32
32
|
start_date=start_date,
|
|
33
33
|
end_date=end_date,
|
|
34
34
|
use_minio=use_minio,
|
|
35
35
|
use_ray=use_ray,
|
|
36
|
-
|
|
36
|
+
num_cpus=num_cpus,
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
|
|
@@ -53,7 +53,7 @@ __all__ = (
|
|
|
53
53
|
"__version__",
|
|
54
54
|
"configure",
|
|
55
55
|
"get_config",
|
|
56
|
-
"
|
|
56
|
+
"aliases",
|
|
57
57
|
"bybit",
|
|
58
58
|
"binance",
|
|
59
59
|
"YahooFinanceFeed",
|
pfeed/cli/commands/config.py
CHANGED
|
@@ -32,33 +32,30 @@ def remove_config(config_file_path: str | Path):
|
|
|
32
32
|
@click.option('--use-fork-process', type=bool, help='If True, multiprocessing.set_start_method("fork")')
|
|
33
33
|
@click.option('--use-custom-excepthook', type=bool, help='If True, log uncaught exceptions to file')
|
|
34
34
|
@click.option('--env-file', 'env_file_path', type=click.Path(resolve_path=True, exists=True), help='Path to the .env file')
|
|
35
|
-
@click.option('--debug',
|
|
36
|
-
@click.option('--list', '-l', is_flag=True, is_eager=True, help='List all available options')
|
|
37
|
-
@click.option('--reset', is_flag=True, is_eager=True, help='Reset the configuration to defaults')
|
|
38
|
-
def config(ctx, **kwargs):
|
|
35
|
+
@click.option('--debug', '-d', type=bool, help='If True, enable debug mode where logs at DEBUG level will be printed')
|
|
36
|
+
@click.option('--list', '-l', 'is_list', is_flag=True, is_eager=True, help='List all available options')
|
|
37
|
+
@click.option('--reset', 'is_reset', is_flag=True, is_eager=True, help='Reset the configuration to defaults')
|
|
38
|
+
def config(ctx, is_list, is_reset, **kwargs):
|
|
39
39
|
"""Configures pfeed settings."""
|
|
40
40
|
config: ConfigHandler = ctx.obj['config']
|
|
41
41
|
|
|
42
42
|
# Filter out options that were not provided by the user
|
|
43
|
-
provided_options = {k: v for k, v in kwargs.items() if v is not None
|
|
43
|
+
provided_options = {k: v for k, v in kwargs.items() if v is not None}
|
|
44
44
|
|
|
45
|
-
if
|
|
46
|
-
del provided_options['list']
|
|
45
|
+
if is_list: # Check if --list was used
|
|
47
46
|
assert not provided_options, "No options should be provided with --list"
|
|
48
47
|
config_dict = config.__dict__
|
|
49
48
|
config_dict.update({'config_file_path': USER_CONFIG_FILE_PATH})
|
|
50
49
|
click.echo(f"PFeed's config:\n{pformat(config_dict)}")
|
|
51
50
|
return
|
|
52
51
|
|
|
53
|
-
if
|
|
54
|
-
del provided_options['reset']
|
|
52
|
+
if is_reset: # Check if --reset was used
|
|
55
53
|
assert not provided_options, "No options should be provided with --reset"
|
|
56
54
|
remove_config(USER_CONFIG_FILE_PATH)
|
|
57
55
|
click.echo("PFeed's config successfully reset.")
|
|
58
|
-
return
|
|
59
56
|
|
|
60
57
|
# prints out current config if no options are provided
|
|
61
|
-
if not provided_options:
|
|
58
|
+
if not provided_options and not is_list and not is_reset:
|
|
62
59
|
raise click.UsageError("No options provided. Use --list to see all available options.")
|
|
63
60
|
else:
|
|
64
61
|
for option, value in provided_options.items():
|
|
File without changes
|
pfeed/cli/commands/download.py
CHANGED
|
@@ -21,7 +21,7 @@ SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED = SUPPORTED_DATA_TYPES + ['raw']
|
|
|
21
21
|
|
|
22
22
|
@click.command()
|
|
23
23
|
@click.option('--data-source', '-d', required=True, type=click.Choice(SUPPORTED_DOWNLOAD_DATA_SOURCES_ALIASES_INCLUDED, case_sensitive=False), help='Data source')
|
|
24
|
-
@click.option('--
|
|
24
|
+
@click.option('--products', '-p', 'products', multiple=True, default=[], help='List of trading products')
|
|
25
25
|
@click.option('--dtypes', '--dt', 'dtypes', multiple=True, default=['raw'], type=click.Choice(SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED, case_sensitive=False), help=f'{SUPPORTED_DATA_TYPES=}. How to pass in multiple values: --dt raw --dt tick')
|
|
26
26
|
@click.option('--ptypes', '--pt', 'ptypes', multiple=True, default=[], type=click.Choice(SUPPORTED_PRODUCT_TYPES, case_sensitive=False), help='List of product types, e.g. PERP = get all perpetuals')
|
|
27
27
|
@click.option('--start-date', '-s', type=click.DateTime(formats=["%Y-%m-%d"]), help='Start date in YYYY-MM-DD format')
|
|
@@ -31,17 +31,17 @@ SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED = SUPPORTED_DATA_TYPES + ['raw']
|
|
|
31
31
|
@click.option('--no-ray', is_flag=True, help='if enabled, Ray will not be used')
|
|
32
32
|
@click.option('--env-file', 'env_file_path', type=click.Path(exists=True), help='Path to the .env file')
|
|
33
33
|
@click.option('--debug', is_flag=True, help='if enabled, debug mode will be enabled where logs at DEBUG level will be printed')
|
|
34
|
-
def download(data_source,
|
|
34
|
+
def download(data_source, products, dtypes, ptypes, start_date, end_date, num_cpus, no_ray, use_minio, env_file_path, debug):
|
|
35
35
|
pe.configure(env_file_path=env_file_path, debug=debug)
|
|
36
36
|
data_source = ALIASES.get(data_source, data_source)
|
|
37
37
|
pipeline = importlib.import_module(f'pfeed.sources.{data_source.lower()}.download')
|
|
38
38
|
pipeline.download_historical_data(
|
|
39
|
-
|
|
39
|
+
products=products,
|
|
40
40
|
dtypes=dtypes,
|
|
41
41
|
ptypes=ptypes,
|
|
42
42
|
start_date=start_date.date().strftime('%Y-%m-%d') if start_date else start_date,
|
|
43
43
|
end_date=end_date.date().strftime('%Y-%m-%d') if end_date else end_date,
|
|
44
44
|
use_ray=not no_ray,
|
|
45
|
-
|
|
45
|
+
num_cpus=num_cpus,
|
|
46
46
|
use_minio=use_minio,
|
|
47
47
|
)
|
pfeed/cli/main.py
CHANGED
|
@@ -6,6 +6,7 @@ from pfeed.cli.commands.config import config
|
|
|
6
6
|
from pfeed.cli.commands.download import download
|
|
7
7
|
# from pfeed.cli.commands.stream import stream
|
|
8
8
|
from pfeed.cli.commands.open import open
|
|
9
|
+
from pfeed.cli.commands.doc import doc
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
@click.group(context_settings={"help_option_names": ["-h", "--help"]})
|
|
@@ -21,4 +22,5 @@ pfeed_group.add_command(docker_compose)
|
|
|
21
22
|
pfeed_group.add_command(config)
|
|
22
23
|
pfeed_group.add_command(download)
|
|
23
24
|
# pfeed_group.add_command(stream)
|
|
24
|
-
pfeed_group.add_command(open)
|
|
25
|
+
pfeed_group.add_command(open)
|
|
26
|
+
pfeed_group.add_command(doc)
|
pfeed/config_handler.py
CHANGED
|
@@ -33,7 +33,7 @@ class ConfigHandler:
|
|
|
33
33
|
logging_config: dict | None = None
|
|
34
34
|
use_fork_process: bool = True
|
|
35
35
|
use_custom_excepthook: bool = False
|
|
36
|
-
env_file_path: str
|
|
36
|
+
env_file_path: str = ''
|
|
37
37
|
debug: bool = False
|
|
38
38
|
|
|
39
39
|
@classmethod
|
|
@@ -57,9 +57,9 @@ class ConfigHandler:
|
|
|
57
57
|
return cls(**config)
|
|
58
58
|
|
|
59
59
|
def __post_init__(self):
|
|
60
|
-
self.
|
|
60
|
+
self._initialize()
|
|
61
61
|
|
|
62
|
-
def
|
|
62
|
+
def _initialize(self):
|
|
63
63
|
self.logging_config = self.logging_config or {}
|
|
64
64
|
|
|
65
65
|
for path in [self.data_path]:
|
|
@@ -76,19 +76,15 @@ class ConfigHandler:
|
|
|
76
76
|
self.load_env_file(self.env_file_path)
|
|
77
77
|
|
|
78
78
|
if self.debug:
|
|
79
|
-
|
|
80
|
-
if is_loggers_set_up:
|
|
81
|
-
print('loggers are already set up, ignoring enabling debug mode')
|
|
82
|
-
else:
|
|
83
|
-
self.enable_debug_mode()
|
|
79
|
+
self.enable_debug_mode()
|
|
84
80
|
|
|
85
|
-
def load_env_file(self, env_file_path: str
|
|
81
|
+
def load_env_file(self, env_file_path: str=''):
|
|
86
82
|
from dotenv import find_dotenv, load_dotenv
|
|
87
83
|
|
|
88
84
|
if not env_file_path:
|
|
89
|
-
|
|
90
|
-
if
|
|
91
|
-
print(f'.env file path is not specified, using env file in "{
|
|
85
|
+
env_file_path = find_dotenv(usecwd=True, raise_error_if_not_found=False)
|
|
86
|
+
if env_file_path:
|
|
87
|
+
print(f'.env file path is not specified, using env file in "{env_file_path}"')
|
|
92
88
|
else:
|
|
93
89
|
# print('.env file is not found')
|
|
94
90
|
return
|
|
@@ -96,6 +92,10 @@ class ConfigHandler:
|
|
|
96
92
|
|
|
97
93
|
def enable_debug_mode(self):
|
|
98
94
|
'''Enables debug mode by setting the log level to DEBUG for all stream handlers'''
|
|
95
|
+
is_loggers_set_up = bool(logging.getLogger('pfeed').handlers)
|
|
96
|
+
if is_loggers_set_up:
|
|
97
|
+
print('loggers are already set up, ignoring debug mode')
|
|
98
|
+
return
|
|
99
99
|
if 'handlers' not in self.logging_config:
|
|
100
100
|
self.logging_config['handlers'] = {}
|
|
101
101
|
for handler in ['stream_handler', 'stream_path_handler']:
|
|
@@ -145,7 +145,7 @@ def configure(
|
|
|
145
145
|
else:
|
|
146
146
|
raise AttributeError(f'{k} is not an attribute of ConfigHandler')
|
|
147
147
|
|
|
148
|
-
_global_config.
|
|
148
|
+
_global_config._initialize()
|
|
149
149
|
return _global_config
|
|
150
150
|
|
|
151
151
|
|
pfeed/const/common.py
CHANGED
|
@@ -1,9 +1,25 @@
|
|
|
1
|
+
from pfeed.types.common_literals import tSUPPORTED_DATA_TOOLS, tSUPPORTED_DATA_ENGINES
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
SUPPORTED_ENVIRONMENTS = ['BACKTEST', 'SANDBOX', 'PAPER', 'LIVE']
|
|
2
5
|
SUPPORTED_DATA_FEEDS = ['YAHOO_FINANCE', 'BYBIT', 'BINANCE']
|
|
3
6
|
SUPPORTED_STORAGES = ['local', 'minio']
|
|
4
7
|
SUPPORTED_DOWNLOAD_DATA_SOURCES = ['BYBIT', 'BINANCE']
|
|
5
8
|
SUPPORTED_CRYPTO_EXCHANGES = ['BYBIT', 'BINANCE']
|
|
6
|
-
SUPPORTED_DATA_TOOLS
|
|
9
|
+
SUPPORTED_DATA_TOOLS: dict[tSUPPORTED_DATA_TOOLS, tSUPPORTED_DATA_ENGINES | bool] = {
|
|
10
|
+
'pandas': ['dask'],
|
|
11
|
+
'polars': ['ray'],
|
|
12
|
+
# True means the data tool is also an execution engine
|
|
13
|
+
'dask': True,
|
|
14
|
+
'spark': True,
|
|
15
|
+
}
|
|
16
|
+
SUPPORTED_DATA_ENGINES: dict[tSUPPORTED_DATA_ENGINES, list[str]] = {
|
|
17
|
+
# execution engine: supported cloud services
|
|
18
|
+
'dask': ['coiled'],
|
|
19
|
+
'spark': ['databricks'],
|
|
20
|
+
'ray': ['aws'],
|
|
21
|
+
}
|
|
22
|
+
SUPPORTED_CLOUDS = ['aws']
|
|
7
23
|
SUPPORTED_PRODUCT_TYPES = ['SPOT', 'PERP', 'IPERP', 'FUT', 'IFUT']
|
|
8
24
|
SUPPORTED_DATA_TYPES = [
|
|
9
25
|
'raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw_daily',
|
|
@@ -12,4 +28,5 @@ SUPPORTED_DATA_TYPES = [
|
|
|
12
28
|
|
|
13
29
|
ALIASES = {
|
|
14
30
|
'YF': 'YAHOO_FINANCE',
|
|
31
|
+
'FRD': 'FIRSTRATE_DATA'
|
|
15
32
|
}
|
pfeed/feeds/base_feed.py
CHANGED
|
@@ -1,15 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
from typing import TYPE_CHECKING
|
|
3
3
|
if TYPE_CHECKING:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
import polars as pl
|
|
7
|
-
except ImportError:
|
|
8
|
-
pass
|
|
9
|
-
from pfeed.types.common_literals import tSUPPORTED_DATA_TOOLS, tSUPPORTED_STORAGES
|
|
10
|
-
from pfeed.sources.bybit.types import tSUPPORTED_DATA_TYPES
|
|
4
|
+
from pfeed.types.common_literals import tSUPPORTED_DATA_TOOLS, tSUPPORTED_STORAGES, tSUPPORTED_DATA_TYPES
|
|
5
|
+
from pfeed.types.core import tDataFrame
|
|
11
6
|
from pfeed.resolution import ExtendedResolution
|
|
12
|
-
DataFrame = pd.DataFrame | pl.LazyFrame
|
|
13
7
|
|
|
14
8
|
import os
|
|
15
9
|
import io
|
|
@@ -19,11 +13,6 @@ import logging
|
|
|
19
13
|
import datetime
|
|
20
14
|
import importlib
|
|
21
15
|
|
|
22
|
-
try:
|
|
23
|
-
import polars as pl
|
|
24
|
-
except ImportError:
|
|
25
|
-
pass
|
|
26
|
-
|
|
27
16
|
from pfeed.config_handler import get_config
|
|
28
17
|
from pfeed.const.common import SUPPORTED_DATA_FEEDS, SUPPORTED_DATA_TOOLS, SUPPORTED_STORAGES
|
|
29
18
|
from pfeed.utils.utils import (
|
|
@@ -99,7 +88,7 @@ class BaseFeed:
|
|
|
99
88
|
dates: list[datetime.date] = get_dates_in_between(start_date, end_date)
|
|
100
89
|
return dates
|
|
101
90
|
|
|
102
|
-
def _get_historical_data_from_storages(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date], storage: tSUPPORTED_STORAGES='') ->
|
|
91
|
+
def _get_historical_data_from_storages(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date], storage: tSUPPORTED_STORAGES='') -> tDataFrame | None:
|
|
103
92
|
from pfeed import etl
|
|
104
93
|
default_raw_resolution = self.utils.get_default_raw_resolution()
|
|
105
94
|
storages = [storage] if storage else SUPPORTED_STORAGES
|
|
@@ -113,7 +102,7 @@ class BaseFeed:
|
|
|
113
102
|
self.logger.debug(f'transformed {self.name} raw data to {resolution=}')
|
|
114
103
|
return transformed_df
|
|
115
104
|
|
|
116
|
-
def _get_historical_data_from_temp(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date]) ->
|
|
105
|
+
def _get_historical_data_from_temp(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date]) -> tDataFrame | None:
|
|
117
106
|
from pfeed import etl
|
|
118
107
|
default_raw_resolution = self.utils.get_default_raw_resolution()
|
|
119
108
|
temp_file_paths = [self._create_temp_file_path(trading_venue, pdt, resolution, date) for date in dates]
|
|
@@ -134,7 +123,7 @@ class BaseFeed:
|
|
|
134
123
|
) -> list[bytes]:
|
|
135
124
|
raise NotImplementedError(f"{self.name} _get_historical_data_from_source() is not implemented")
|
|
136
125
|
|
|
137
|
-
def _get_historical_data(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date], storage: tSUPPORTED_STORAGES='') ->
|
|
126
|
+
def _get_historical_data(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date], storage: tSUPPORTED_STORAGES='') -> tDataFrame:
|
|
138
127
|
if (df := self._get_historical_data_from_storages(trading_venue, pdt, resolution, dates, storage=storage)) is not None:
|
|
139
128
|
pass
|
|
140
129
|
elif (df := self._get_historical_data_from_temp(trading_venue, pdt, resolution, dates)) is not None:
|
|
@@ -153,22 +142,23 @@ class BaseFeed:
|
|
|
153
142
|
|
|
154
143
|
def get_historical_data(
|
|
155
144
|
self,
|
|
156
|
-
|
|
157
|
-
rollback_period: str="1w",
|
|
145
|
+
product: str,
|
|
158
146
|
resolution: str="1d",
|
|
147
|
+
rollback_period: str="1w",
|
|
159
148
|
start_date: str="",
|
|
160
149
|
end_date: str="",
|
|
161
150
|
trading_venue: str='',
|
|
162
151
|
storage: tSUPPORTED_STORAGES='',
|
|
163
|
-
) ->
|
|
152
|
+
) -> tDataFrame:
|
|
164
153
|
"""Get historical data from the data source.
|
|
165
154
|
Args:
|
|
166
|
-
|
|
155
|
+
product: Product symbol, e.g. BTC_USDT_PERP, where PERP = product type "perpetual".
|
|
167
156
|
rollback_period:
|
|
168
157
|
Period to rollback from today, only used when `start_date` is not specified.
|
|
169
158
|
Default is '1w' = 1 week.
|
|
170
159
|
resolution: Data resolution. e.g. '1m' = 1 minute as the unit of each data bar/candle.
|
|
171
160
|
Also supports raw resolution such as 'r1m', where 'r' stands for raw.
|
|
161
|
+
If resolution is 'raw', the default raw resolution of the data type will be used.
|
|
172
162
|
Default is '1d' = 1 day.
|
|
173
163
|
start_date: Start date.
|
|
174
164
|
end_date: End date.
|
|
@@ -178,13 +168,17 @@ class BaseFeed:
|
|
|
178
168
|
from pfeed import etl
|
|
179
169
|
from pfeed.resolution import ExtendedResolution
|
|
180
170
|
|
|
181
|
-
pdt, trading_venue, storage =
|
|
171
|
+
pdt, trading_venue, storage = product.upper(), trading_venue.upper(), storage.lower()
|
|
182
172
|
assert validate_pdt(
|
|
183
173
|
self.name, pdt
|
|
184
174
|
), f'"{pdt}" does not match the required format "XXX_YYY_PTYPE" or has an unsupported product type. (PTYPE means product type, e.g. PERP, Supported types for {self.name} are: {self.const.SUPPORTED_PRODUCT_TYPES})'
|
|
185
175
|
if storage:
|
|
186
176
|
assert storage in SUPPORTED_STORAGES, f"Invalid {storage=}, {SUPPORTED_STORAGES=}"
|
|
187
177
|
self._prepare_temp_dir()
|
|
178
|
+
if resolution == 'raw':
|
|
179
|
+
assert self.const.SUPPORTED_DATA_TYPES[0].startswith('raw_')
|
|
180
|
+
default_raw_dtype = self.const.SUPPORTED_DATA_TYPES[0]
|
|
181
|
+
resolution = self.const.DTYPES_TO_RAW_RESOLUTIOS[default_raw_dtype]
|
|
188
182
|
resolution = ExtendedResolution(resolution)
|
|
189
183
|
trading_venue = trading_venue or derive_trading_venue(self.name)
|
|
190
184
|
dates: list[datetime.date] = self._prepare_dates(start_date, end_date, rollback_period)
|
|
@@ -202,26 +196,26 @@ class BaseFeed:
|
|
|
202
196
|
|
|
203
197
|
def download_historical_data(
|
|
204
198
|
self,
|
|
205
|
-
|
|
199
|
+
products: str | list[str] | None = None,
|
|
206
200
|
dtypes: tSUPPORTED_DATA_TYPES | list[tSUPPORTED_DATA_TYPES] | None = None,
|
|
207
201
|
ptypes: str | list[str] | None = None,
|
|
208
202
|
start_date: str | None = None,
|
|
209
203
|
end_date: str | None = None,
|
|
210
204
|
use_minio: bool = False,
|
|
211
205
|
use_ray: bool = True,
|
|
212
|
-
|
|
206
|
+
num_cpus: int = 8,
|
|
213
207
|
):
|
|
214
208
|
try:
|
|
215
209
|
data_source = getattr(self, self.name.lower())
|
|
216
210
|
data_source.download_historical_data(
|
|
217
|
-
|
|
211
|
+
products=products,
|
|
218
212
|
dtypes=dtypes,
|
|
219
213
|
ptypes=ptypes,
|
|
220
214
|
start_date=start_date,
|
|
221
215
|
end_date=end_date,
|
|
222
216
|
use_minio=use_minio,
|
|
223
217
|
use_ray=use_ray,
|
|
224
|
-
|
|
218
|
+
num_cpus=num_cpus,
|
|
225
219
|
)
|
|
226
220
|
except AttributeError:
|
|
227
221
|
raise Exception(f'{self.name} does not support download_historical_data()')
|
|
@@ -37,7 +37,7 @@ class YahooFinanceFeed(BaseFeed):
|
|
|
37
37
|
"y": [1, 2, 5, 10],
|
|
38
38
|
}
|
|
39
39
|
# yfinance's valid intervals
|
|
40
|
-
|
|
40
|
+
SUPPORTED_TIMEFRAMES_AND_PERIODS = {
|
|
41
41
|
"m": [1, 2, 5, 15, 30, 60, 90],
|
|
42
42
|
"h": [1],
|
|
43
43
|
"d": [1, 5],
|
|
@@ -56,17 +56,19 @@ class YahooFinanceFeed(BaseFeed):
|
|
|
56
56
|
def get_historical_data(
|
|
57
57
|
self,
|
|
58
58
|
symbol: str,
|
|
59
|
-
rollback_period: str | Literal["ytd", "max"] = "1M",
|
|
60
59
|
resolution: str = "1d",
|
|
60
|
+
rollback_period: str | Literal["ytd", "max"] = "1M",
|
|
61
61
|
start_date: str = "",
|
|
62
62
|
end_date: str = "",
|
|
63
63
|
use_pfeed_resample: bool = True,
|
|
64
|
+
product: str = "",
|
|
64
65
|
**kwargs,
|
|
65
66
|
) -> pd.DataFrame | pl.DataFrame:
|
|
66
67
|
"""Simple Wrapper of yfinance history().
|
|
67
68
|
For the details of args and kwargs, please refer to https://github.com/ranaroussi/yfinance
|
|
68
69
|
|
|
69
70
|
Args:
|
|
71
|
+
symbol: ticker symbol used in yfinance
|
|
70
72
|
rollback_period: Data resolution or 'ytd' or 'max'
|
|
71
73
|
Period to rollback from today, only used when `start_date` is not specified.
|
|
72
74
|
Default is '1M' = 1 month.
|
|
@@ -78,6 +80,7 @@ class YahooFinanceFeed(BaseFeed):
|
|
|
78
80
|
use_pfeed_resample: Whether to use pfeed's resampling logic.
|
|
79
81
|
Default is True.
|
|
80
82
|
This will automatically be triggered if yfinance does not support the resolution.
|
|
83
|
+
product: Product symbol, e.g. AAPL_USD_STK. If provided, it will be used to create a column 'product' in the output dataframe.
|
|
81
84
|
**kwargs: kwargs supported by `yfinance`
|
|
82
85
|
"""
|
|
83
86
|
from pfeed import etl
|
|
@@ -113,11 +116,11 @@ class YahooFinanceFeed(BaseFeed):
|
|
|
113
116
|
etimeframe = self._ADAPTER["timeframe"].get(timeframe, timeframe)
|
|
114
117
|
interval = str(resolution.period) + etimeframe
|
|
115
118
|
|
|
116
|
-
# manipulate the input resolution and support e.g. '2d' resolution even it is not in the
|
|
119
|
+
# manipulate the input resolution and support e.g. '2d' resolution even it is not in the SUPPORTED_TIMEFRAMES_AND_PERIODS
|
|
117
120
|
if (use_pfeed_resample and resolution.period != 1) or (
|
|
118
|
-
timeframe in self.
|
|
119
|
-
and resolution.period not in self.
|
|
120
|
-
and 1 in self.
|
|
121
|
+
timeframe in self.SUPPORTED_TIMEFRAMES_AND_PERIODS
|
|
122
|
+
and resolution.period not in self.SUPPORTED_TIMEFRAMES_AND_PERIODS[timeframe]
|
|
123
|
+
and 1 in self.SUPPORTED_TIMEFRAMES_AND_PERIODS[timeframe]
|
|
121
124
|
):
|
|
122
125
|
# if resolution (e.g. '2d') is not supported in yfinance, using "1d" instead'
|
|
123
126
|
interval = "1" + etimeframe
|
|
@@ -167,12 +170,17 @@ class YahooFinanceFeed(BaseFeed):
|
|
|
167
170
|
df = etl.resample_data(df, resolution)
|
|
168
171
|
|
|
169
172
|
df["symbol"] = symbol
|
|
173
|
+
if product:
|
|
174
|
+
df["product"] = product
|
|
170
175
|
df["resolution"] = repr(resolution)
|
|
171
176
|
# reorder columns
|
|
172
|
-
|
|
177
|
+
if "product" in df.columns:
|
|
178
|
+
left_cols = ["ts", "symbol", "product", "resolution"]
|
|
179
|
+
else:
|
|
180
|
+
left_cols = ["ts", "symbol", "resolution"]
|
|
173
181
|
df = df[left_cols + [col for col in df.columns if col not in left_cols]]
|
|
174
182
|
|
|
175
|
-
if self.data_tool == "pandas":
|
|
183
|
+
if self.data_tool.name == "pandas":
|
|
176
184
|
return df
|
|
177
|
-
elif self.data_tool == "polars":
|
|
185
|
+
elif self.data_tool.name == "polars":
|
|
178
186
|
return pl.from_pandas(df)
|
|
@@ -141,7 +141,7 @@ def download_historical_data(
|
|
|
141
141
|
pdt_splits = pdt.split('_')
|
|
142
142
|
ptype = pdt_splits[-1].upper()
|
|
143
143
|
exchange = Exchange(env='LIVE', ptype=ptype)
|
|
144
|
-
product = exchange.create_product(
|
|
144
|
+
product = exchange.create_product(pdt)
|
|
145
145
|
except KeyError:
|
|
146
146
|
raise ValueError(f'"{pdt}" is not a valid product in {source}')
|
|
147
147
|
efilenames = api.get_efilenames(pdt)
|
pfeed/sources/bybit/const.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
DATA_SOURCE = 'BYBIT'
|
|
3
3
|
SUPPORTED_PRODUCT_TYPES = ['SPOT', 'PERP', 'IPERP', 'FUT', 'IFUT']
|
|
4
4
|
SUPPORTED_DATA_TYPES = ['raw_tick', 'tick', 'second', 'minute', 'hour', 'daily']
|
|
5
|
+
# this specifies the raw resolution of the data type, e.g. 'raw_minute': 'r5m', meaning raw_minute is 5-minute data
|
|
5
6
|
DTYPES_TO_RAW_RESOLUTIOS = {
|
|
6
7
|
'raw_tick': 'r1tick',
|
|
7
8
|
}
|
pfeed/sources/bybit/download.py
CHANGED
|
@@ -98,14 +98,14 @@ def _run_etl(storage: tSUPPORTED_STORAGES, product: CryptoProduct, date: datetim
|
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
def download_historical_data(
|
|
101
|
-
|
|
101
|
+
products: str | list[str] | None=None,
|
|
102
102
|
dtypes: tSUPPORTED_DATA_TYPES | list[tSUPPORTED_DATA_TYPES] | None=None,
|
|
103
103
|
ptypes: tSUPPORTED_PRODUCT_TYPES | list[tSUPPORTED_PRODUCT_TYPES] | None=None,
|
|
104
104
|
start_date: str | None=None,
|
|
105
105
|
end_date: str | None=None,
|
|
106
106
|
use_minio: bool=False,
|
|
107
107
|
use_ray: bool=True,
|
|
108
|
-
|
|
108
|
+
num_cpus: int=8,
|
|
109
109
|
) -> None:
|
|
110
110
|
from pfund.plogging import set_up_loggers
|
|
111
111
|
|
|
@@ -116,12 +116,12 @@ def download_historical_data(
|
|
|
116
116
|
logger = logging.getLogger(DATA_SOURCE.lower() + '_data')
|
|
117
117
|
|
|
118
118
|
print(f'''Hint:
|
|
119
|
-
You can use the command "pfeed config --data-path
|
|
119
|
+
You can use the command "pfeed config --data-path {{your_path}}" to set your data path that stores downloaded data.
|
|
120
120
|
The current data path is: {config.data_path}.
|
|
121
121
|
''')
|
|
122
122
|
|
|
123
123
|
resolutions: list[ExtendedResolution] = _convert_dtypes_to_resolutions(dtypes)
|
|
124
|
-
pdts = _prepare_pdts(
|
|
124
|
+
pdts = _prepare_pdts(products, ptypes)
|
|
125
125
|
start_date, end_date = _prepare_dates(start_date, end_date)
|
|
126
126
|
dates: list[datetime.date] = get_dates_in_between(start_date, end_date)
|
|
127
127
|
|
|
@@ -136,7 +136,7 @@ def download_historical_data(
|
|
|
136
136
|
ray_tasks = defaultdict(list)
|
|
137
137
|
for pdt in pdts if use_ray else tqdm(pdts, desc=f'Downloading {DATA_SOURCE} historical data by product', colour='green'):
|
|
138
138
|
try:
|
|
139
|
-
product = exchange.create_product(
|
|
139
|
+
product = exchange.create_product(pdt)
|
|
140
140
|
except KeyError:
|
|
141
141
|
raise ValueError(f'"{pdt}" is not a valid product in {DATA_SOURCE}')
|
|
142
142
|
efilenames = api.get_efilenames(pdt)
|
|
@@ -176,7 +176,7 @@ def download_historical_data(
|
|
|
176
176
|
try:
|
|
177
177
|
log_listener = None
|
|
178
178
|
logical_cpus = os.cpu_count()
|
|
179
|
-
num_cpus = min(
|
|
179
|
+
num_cpus = min(num_cpus, logical_cpus)
|
|
180
180
|
ray.init(num_cpus=num_cpus)
|
|
181
181
|
batch_size = num_cpus
|
|
182
182
|
print(f"Ray's num_cpus is set to {num_cpus}")
|
pfeed/types/common_literals.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
from typing import Literal
|
|
2
2
|
|
|
3
|
+
|
|
3
4
|
# since Literal doesn't support variables as inputs, define variables in common.py here with prefix 't'
|
|
4
5
|
tSUPPORTED_ENVIRONMENTS = Literal['BACKTEST', 'SANDBOX', 'PAPER', 'LIVE']
|
|
5
6
|
tSUPPORTED_DATA_FEEDS = Literal['YAHOO_FINANCE', 'BYBIT']
|
|
6
7
|
tSUPPORTED_STORAGES = Literal['local', 'minio']
|
|
7
8
|
tSUPPORTED_DOWNLOAD_DATA_SOURCES = Literal['BYBIT', 'BINANCE']
|
|
8
9
|
tSUPPORTED_CRYPTO_EXCHANGES = Literal['BYBIT', 'BINANCE']
|
|
9
|
-
tSUPPORTED_DATA_TOOLS = Literal['pandas', 'polars']
|
|
10
|
+
tSUPPORTED_DATA_TOOLS = Literal['pandas', 'polars', 'dask', 'spark']
|
|
11
|
+
tSUPPORTED_DATA_ENGINES = Literal['ray', 'dask', 'spark']
|
|
10
12
|
tSUPPORTED_DATA_TYPES = Literal[
|
|
11
13
|
'raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw_daily',
|
|
12
14
|
'tick', 'second', 'minute', 'hour', 'daily'
|
pfeed/types/core.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pfeed
|
|
3
|
+
Version: 0.0.2.dev2
|
|
4
|
+
Summary: Data pipeline for algo-trading, getting and storing both real-time and historical data made easy.
|
|
5
|
+
Home-page: https://pfund.ai
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Keywords: trading,algo-trading,data pipeline,ETL,data lake,data warehouse,data integration,historical data,live data,data streaming
|
|
8
|
+
Author: Stephen Yau
|
|
9
|
+
Author-email: softwareentrepreneer+pfeed@gmail.com
|
|
10
|
+
Requires-Python: >=3.10,<4.0
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Provides-Extra: all
|
|
17
|
+
Provides-Extra: core
|
|
18
|
+
Provides-Extra: dask
|
|
19
|
+
Provides-Extra: dataops
|
|
20
|
+
Provides-Extra: dfs
|
|
21
|
+
Provides-Extra: polars
|
|
22
|
+
Provides-Extra: spark
|
|
23
|
+
Provides-Extra: storage
|
|
24
|
+
Requires-Dist: adlfs (>=2024.7.0,<2025.0.0) ; extra == "storage" or extra == "all"
|
|
25
|
+
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
|
26
|
+
Requires-Dist: bytewax (>=0.21.0,<0.22.0) ; extra == "dataops" or extra == "all"
|
|
27
|
+
Requires-Dist: coiled (>=1.54.0,<2.0.0) ; extra == "dask" or extra == "dfs" or extra == "all"
|
|
28
|
+
Requires-Dist: confluent-kafka (>=2.5.3,<3.0.0) ; extra == "dataops" or extra == "all"
|
|
29
|
+
Requires-Dist: connectorx (>=0.3.3,<0.4.0) ; extra == "storage" or extra == "all"
|
|
30
|
+
Requires-Dist: dask[complete] (>=2024.9.1,<2025.0.0) ; extra == "dask" or extra == "dfs" or extra == "all"
|
|
31
|
+
Requires-Dist: databento (>=0.42.0,<0.43.0) ; extra == "core" or extra == "all"
|
|
32
|
+
Requires-Dist: databricks-connect (>=15.4.2,<16.0.0) ; extra == "spark" or extra == "dfs" or extra == "all"
|
|
33
|
+
Requires-Dist: fastparquet (>=2024.2.0,<2025.0.0)
|
|
34
|
+
Requires-Dist: gcsfs (>=2024.9.0,<2025.0.0) ; extra == "storage" or extra == "all"
|
|
35
|
+
Requires-Dist: minio (>=7.2.8,<8.0.0) ; extra == "core" or extra == "all"
|
|
36
|
+
Requires-Dist: modin[all] (>=0.32.0,<0.33.0) ; extra == "core" or extra == "all"
|
|
37
|
+
Requires-Dist: pandas (>=2.2.0,<3.0.0)
|
|
38
|
+
Requires-Dist: pfund (>=0.0.2.dev1,<0.0.3)
|
|
39
|
+
Requires-Dist: polars (>=1.7.1,<2.0.0) ; extra == "polars" or extra == "dfs" or extra == "all"
|
|
40
|
+
Requires-Dist: polars-xdt (>=0.16.0,<0.17.0) ; extra == "polars" or extra == "dfs" or extra == "all"
|
|
41
|
+
Requires-Dist: polygon-api-client (>=1.14.2,<2.0.0) ; extra == "core" or extra == "all"
|
|
42
|
+
Requires-Dist: prefect (>=3.0.5,<4.0.0) ; extra == "dataops" or extra == "all"
|
|
43
|
+
Requires-Dist: psutil (>=6.0.0,<7.0.0) ; extra == "core" or extra == "all"
|
|
44
|
+
Requires-Dist: psycopg2 (>=2.9.9,<3.0.0) ; extra == "storage" or extra == "all"
|
|
45
|
+
Requires-Dist: pyarrow (>=17.0.0,<18.0.0) ; extra == "core" or extra == "all"
|
|
46
|
+
Requires-Dist: pydantic (>=2.7.0,<3.0.0)
|
|
47
|
+
Requires-Dist: pyspark (>=3.5.3,<4.0.0) ; extra == "spark" or extra == "dfs" or extra == "all"
|
|
48
|
+
Requires-Dist: ray (>=2.35.0,<3.0.0) ; extra == "core" or extra == "all"
|
|
49
|
+
Requires-Dist: s3fs (>=2024.9.0,<2025.0.0) ; extra == "storage" or extra == "all"
|
|
50
|
+
Requires-Dist: yfinance (>=0.2.43,<0.3.0)
|
|
51
|
+
Project-URL: Documentation, https://pfeed-docs.pfund.ai
|
|
52
|
+
Project-URL: Repository, https://github.com/PFund-Software-Ltd/pfeed
|
|
53
|
+
Description-Content-Type: text/markdown
|
|
54
|
+
|
|
55
|
+
# PFeed: Data Pipeline for Algo-Trading, Getting and Storing Real-Time and Historical Data Made Easy.
|
|
56
|
+
|
|
57
|
+
[](https://x.com/pfund_ai)
|
|
58
|
+

|
|
59
|
+

|
|
60
|
+
[](https://pypi.org/project/pfeed)
|
|
61
|
+

|
|
62
|
+
<!-- [](https://jupyterbook.org) -->
|
|
63
|
+
[](https://python-poetry.org/)
|
|
64
|
+
|
|
65
|
+
[MinIO]: https://min.io/
|
|
66
|
+
[PFund]: https://github.com/PFund-Software-Ltd/pfund
|
|
67
|
+
[Polars]: https://github.com/pola-rs/polars
|
|
68
|
+
[Dask]: https://www.dask.org/
|
|
69
|
+
[Spark]: https://spark.apache.org/docs/latest/api/python/index.html
|
|
70
|
+
[PyTrade.org]: https://pytrade.org
|
|
71
|
+
[Yahoo Finance]: https://github.com/ranaroussi/yfinance
|
|
72
|
+
[Bybit]: https://public.bybit.com
|
|
73
|
+
[Binance]: https://data.binance.vision
|
|
74
|
+
[OKX]: https://www.okx.com/data-download
|
|
75
|
+
[Databento]: https://databento.com/
|
|
76
|
+
[Polygon]: https://polygon.io/
|
|
77
|
+
[FirstRate Data]: https://firstratedata.com
|
|
78
|
+
|
|
79
|
+
## Problem
|
|
80
|
+
Starting algo-trading requires reliable, clean data. However, the time-consuming and mundane tasks of data cleaning and storage often discourage traders from embarking on their algo-trading journey.
|
|
81
|
+
|
|
82
|
+
## Solution
|
|
83
|
+
By leveraging modern data engineering tools, `pfeed` handles the tedious data work and **outputs backtesting-ready data**, allowing traders to focus on strategy development.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
PFeed (/piː fiːd/) is a data pipeline for algorithmic trading, serving as a bridge between raw data sources and traders. It enables you to **download historical data**, **stream real-time data**, and **store cleaned data** in a **local data lake for quantitative analysis**, by automating the processes of data collection, cleaning, transformation, and storage.
|
|
87
|
+
|
|
88
|
+
## Core Features
|
|
89
|
+
- [x] Download or stream reliable, validated and **clean data** for research, backtesting, or live trading
|
|
90
|
+
- [x] Get historical data (**dataframe**) or live data in standardized formats by just calling a **single** function
|
|
91
|
+
- [x] **Own your data** by storing them locally using [MinIO], with the option to connect to the cloud
|
|
92
|
+
- [x] Interact with different kinds of data (including TradFi, CeFi and DeFi) using a **unified interface**
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
<details>
|
|
97
|
+
<summary>Table of Contents</summary>
|
|
98
|
+
|
|
99
|
+
- [Installation](#installation)
|
|
100
|
+
- [Quick Start](#quick-start)
|
|
101
|
+
- [Get Historical Data in Dataframe](#1-get-historical-data-in-dataframe-no-storage)
|
|
102
|
+
- [Download Historical Data on Command Line](#2-download-historical-data-on-the-command-line-interface-cli)
|
|
103
|
+
- [Download Historical Data in Python](#3-download-historical-data-in-python)
|
|
104
|
+
- [Supported Data Sources](#supported-data-sources)
|
|
105
|
+
- [Related Projects](#related-projects)
|
|
106
|
+
- [Disclaimer](#disclaimer)
|
|
107
|
+
|
|
108
|
+
</details>
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
## Installation
|
|
113
|
+
> For more installation options, please refer to the [documentation](https://pfeed-docs.pfund.ai/installation).
|
|
114
|
+
```bash
|
|
115
|
+
# [RECOMMENDED]: Full Features, choose this if you do not care about the package size
|
|
116
|
+
pip install -U "pfeed[all]"
|
|
117
|
+
|
|
118
|
+
# Minimal Features, only supports getting, downloading and streaming data
|
|
119
|
+
pip install -U "pfeed[core]"
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
## Quick Start
|
|
125
|
+
### 1. Get Historical Data in Dataframe (No storage)
|
|
126
|
+
Get [Bybit]'s data in dataframe, e.g. 1-minute data (data is downloaded on the fly if not stored locally)
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
import pfeed as pe
|
|
130
|
+
|
|
131
|
+
feed = pe.BybitFeed(data_tool='polars')
|
|
132
|
+
|
|
133
|
+
df = feed.get_historical_data(
|
|
134
|
+
'BTC_USDT_PERP',
|
|
135
|
+
resolution='1minute', # 'raw' or '1tick'/'1t' or '2second'/'2s' etc.
|
|
136
|
+
start_date='2024-03-01',
|
|
137
|
+
end_date='2024-03-01',
|
|
138
|
+
)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Printing the first few rows of `df`:
|
|
142
|
+
| | ts | product | resolution | open | high | low | close | volume |
|
|
143
|
+
|---:|:--------------------|:--------------|:-------------|--------:|--------:|--------:|--------:|---------:|
|
|
144
|
+
| 0 | 2024-03-01 00:00:00 | BTC_USDT_PERP | 1m | 61184.1 | 61244.5 | 61175.8 | 61244.5 | 159.142 |
|
|
145
|
+
| 1 | 2024-03-01 00:01:00 | BTC_USDT_PERP | 1m | 61245.3 | 61276.5 | 61200.7 | 61232.2 | 227.242 |
|
|
146
|
+
| 2 | 2024-03-01 00:02:00 | BTC_USDT_PERP | 1m | 61232.2 | 61249 | 61180 | 61184.2 | 91.446 |
|
|
147
|
+
|
|
148
|
+
> By using pfeed, you are just a few lines of code away from getting a standardized dataframe, how convenient!
|
|
149
|
+
|
|
150
|
+
### 2. Download Historical Data on the Command Line Interface (CLI)
|
|
151
|
+
> For more CLI commands, please refer to the [documentation](https://pfeed-docs.pfund.ai/cli-commands).
|
|
152
|
+
```bash
|
|
153
|
+
# download data, default data type (dtype) is 'raw' data
|
|
154
|
+
pfeed download -d BYBIT -p BTC_USDT_PERP --start-date 2024-03-01 --end-date 2024-03-08
|
|
155
|
+
|
|
156
|
+
# download multiple products BTC_USDT_PERP and ETH_USDT_PERP as minute data and store them locally
|
|
157
|
+
pfeed download -d BYBIT -p BTC_USDT_PERP -p ETH_USDT_PERP --dtypes minute --use-minio
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### 3. Download Historical Data in Python
|
|
161
|
+
```python
|
|
162
|
+
import pfeed as pe
|
|
163
|
+
|
|
164
|
+
# compared to the CLI approach, this approach is more convenient for downloading multiple products
|
|
165
|
+
pe.download(
|
|
166
|
+
data_source='bybit',
|
|
167
|
+
pdts=[
|
|
168
|
+
'BTC_USDT_PERP',
|
|
169
|
+
'ETH_USDT_PERP',
|
|
170
|
+
'BCH_USDT_PERP',
|
|
171
|
+
],
|
|
172
|
+
dtypes=['raw'], # data types, e.g. 'raw', 'tick', 'second', 'minute' etc.
|
|
173
|
+
start_date='2024-03-01',
|
|
174
|
+
end_date='2024-03-08',
|
|
175
|
+
use_minio=False,
|
|
176
|
+
)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
## Supported Data Sources
|
|
182
|
+
| Data Source | Get Historical Data | Download Historical Data | Get Live Data | Stream Live Data |
|
|
183
|
+
| -------------------- | ------------------- | ------------------------ | --------------| ---------------- |
|
|
184
|
+
| [Yahoo Finance] | 🟢 | ⚪ | ⚪ | ⚪ |
|
|
185
|
+
| [Bybit] | 🟢 | 🟢 | 🟡 | 🔴 |
|
|
186
|
+
| *Interactive Brokers | 🔴 | ⚪ | 🔴 | 🔴 |
|
|
187
|
+
| *[FirstRate Data] | 🔴 | 🔴 | ⚪ | ⚪ |
|
|
188
|
+
| *[Databento] | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
189
|
+
| *[Polygon] | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
190
|
+
| [Binance] | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
191
|
+
| [OKX] | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
192
|
+
|
|
193
|
+
🟢 = finished \
|
|
194
|
+
🟡 = in progress \
|
|
195
|
+
🔴 = todo \
|
|
196
|
+
⚪ = not applicable \
|
|
197
|
+
\* = paid data
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
## Related Projects
|
|
202
|
+
- [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading
|
|
203
|
+
- [PyTrade.org] - A curated list of Python libraries and resources for algorithmic trading.
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
## Disclaimer
|
|
208
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
209
|
+
|
|
210
|
+
This framework is intended for educational and research purposes only. It should not be used for real trading without understanding the risks involved. Trading in financial markets involves significant risk, and there is always the potential for loss. Your trading results may vary. No representation is being made that any account will or is likely to achieve profits or losses similar to those discussed on this platform.
|
|
211
|
+
|
|
212
|
+
The developers of this framework are not responsible for any financial losses incurred from using this software. This includes but not limited to losses resulting from inaccuracies in any financial data output by PFeed. Users should conduct their due diligence, verify the accuracy of any data produced by PFeed, and consult with a professional financial advisor before engaging in real trading activities.
|
|
213
|
+
|
|
@@ -1,47 +1,49 @@
|
|
|
1
|
-
pfeed/__init__.py,sha256=
|
|
1
|
+
pfeed/__init__.py,sha256=zD1yEHw4-oIX-fNIR2dEVh-tQ5cyVhBpwi0IjOadYaU,1669
|
|
2
2
|
pfeed/cli/__init__.py,sha256=xRBbc1F6E4xiWFhNKHA12OjKXd29QI9T2rA11iZDhrk,66
|
|
3
3
|
pfeed/cli/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
pfeed/cli/commands/config.py,sha256=
|
|
4
|
+
pfeed/cli/commands/config.py,sha256=hXQ2lljvUz8mWcWrEXgYDU3IJxRJdhon_MB7LpJsiAE,3131
|
|
5
|
+
pfeed/cli/commands/doc.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
6
|
pfeed/cli/commands/docker_compose.py,sha256=xFN2nbZ10pSZSWxgUZRdUnxj8pbq12RplEPvms95kt4,1160
|
|
6
|
-
pfeed/cli/commands/download.py,sha256=
|
|
7
|
+
pfeed/cli/commands/download.py,sha256=ZxnAv4Jx9hbEo3M33xM-b0hSLwjRZiE7RE4ivUtJnek,2900
|
|
7
8
|
pfeed/cli/commands/open.py,sha256=VGrSGwowQJMTJ1j8R67J4rkOcIS7Xr6PsFnwYYPpOCY,1728
|
|
8
9
|
pfeed/cli/commands/stream.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
pfeed/cli/main.py,sha256=
|
|
10
|
-
pfeed/config_handler.py,sha256=
|
|
11
|
-
pfeed/const/common.py,sha256=
|
|
10
|
+
pfeed/cli/main.py,sha256=55spaJbEQY7QgAIByR3EPQYYUz9bmDWT4qhGBiNoOXA,767
|
|
11
|
+
pfeed/config_handler.py,sha256=vIQwicjRKMen_S3F6amrvbIMcpXtq_PPXRbxK7Xt3uo,5173
|
|
12
|
+
pfeed/const/common.py,sha256=c-DMmjaPdafgmIb6W56g5suneGZb67WlDz-l6Km_pnY,1093
|
|
12
13
|
pfeed/const/paths.py,sha256=IKBV15R5q1poI4AM2jd-CSqtB9DPewwZSZ8H3YJA72w,477
|
|
13
14
|
pfeed/data_tools/data_tool_pandas.py,sha256=Fn0ej5tS9twO5NzSsxUvc35ud5PMps1EPgQMr-qWLb0,2247
|
|
14
15
|
pfeed/data_tools/data_tool_polars.py,sha256=qsmrz92JweEOK9bwapukezN0C18WHKHrFX5725Mser4,2501
|
|
15
16
|
pfeed/datastore.py,sha256=mKEM1cBeAZnbpV392-pOoYTuxqxXjHGrZhv43P0a2k0,5196
|
|
16
17
|
pfeed/etl.py,sha256=fLHxUZMx_AE-0Pq5S2rTuafHXozDYGbt0fSQUDEXd7M,15446
|
|
17
18
|
pfeed/feeds/__init__.py,sha256=bY7wGBlW4BdMIJRKiZ_dHZf_-ag-CAqN-__iFV5XpK8,153
|
|
18
|
-
pfeed/feeds/base_feed.py,sha256=
|
|
19
|
+
pfeed/feeds/base_feed.py,sha256=HQOOGCII28ga0i9WGryecDsqWMnrb8SfnElgdLeRkSQ,12264
|
|
19
20
|
pfeed/feeds/binance_feed.py,sha256=nS_-WeXeaGtgjAaEV9ZOFi4xqkWHLoRJaiCSMWNHma4,632
|
|
20
21
|
pfeed/feeds/bybit_feed.py,sha256=Im1sHJtqYhKFRxTMHz3iQ46KVQCUAA5Ug2jZSop9RFk,2093
|
|
21
22
|
pfeed/feeds/custom_csv_feed.py,sha256=qTkanCebga7Vcrz9Gus0YXX6ZZkhxwiXw02ID9zmZ5M,281
|
|
22
|
-
pfeed/feeds/yahoo_finance_feed.py,sha256=
|
|
23
|
+
pfeed/feeds/yahoo_finance_feed.py,sha256=UTDsQ1aIKrAzdt685bPVGdnfqYM_VyOhwDavTind9qg,7323
|
|
23
24
|
pfeed/filepath.py,sha256=Jl__1G314foQ8Fvkh6EoaBxzyIq0wyyXltC6VO-Zr1Y,2819
|
|
24
25
|
pfeed/main.py,sha256=J9ATB3AT4VY7IlCTeK7xeDpOlRJV5cetQ1LZxQn2CDc,294
|
|
25
26
|
pfeed/resolution.py,sha256=YA4q-2Ohl3xaYzjwzEWSEhxepkS8h38Xz1YKcUx3ShE,2409
|
|
26
27
|
pfeed/sources/binance/__init__.py,sha256=GHdr0Tf_sRirKOfqJ__nN4D9L0hZTn657kt-ar_zgLY,498
|
|
27
28
|
pfeed/sources/binance/api.py,sha256=v_WIyke_JQJupdQliTVQ8lqZ3reD7xCQiMFTH-Saguo,3710
|
|
28
29
|
pfeed/sources/binance/const.py,sha256=7XSPvfhGLbi0U0IMWmv0OWb86PPEzObpbBv3WUAzKT8,2034
|
|
29
|
-
pfeed/sources/binance/download.py,sha256
|
|
30
|
+
pfeed/sources/binance/download.py,sha256=go-YRGDtCW4TwTHm4-LGkfmWtB9ehlHHB312TPArFGA,7642
|
|
30
31
|
pfeed/sources/binance/stream.py,sha256=WSyzvrInWCaFPOoeZ2KnBbKef4HKSrUY8zwHanfyAOw,72
|
|
31
32
|
pfeed/sources/bybit/__init__.py,sha256=PgmOrdy3FWqmii122brz5rC08ig15erH70gIra6-Kn0,317
|
|
32
33
|
pfeed/sources/bybit/api.py,sha256=EP6CJo_aaBrbezkBs1z2Y5PMXDqVBfRyjjV3X8tag5A,2790
|
|
33
|
-
pfeed/sources/bybit/const.py,sha256=
|
|
34
|
-
pfeed/sources/bybit/download.py,sha256=
|
|
34
|
+
pfeed/sources/bybit/const.py,sha256=NEr8uU9KJvt26w_UXmn1PJtZh5Y7HuNCpBV2xICoDWE,1198
|
|
35
|
+
pfeed/sources/bybit/download.py,sha256=AXqAoXQMWDd3Jyl2V4aXgv2lu4AWqdd_EXRGq0BS5sE,9316
|
|
35
36
|
pfeed/sources/bybit/stream.py,sha256=b0frKmuhC2jTmq4flX5_ff0i7U-ksjPY6OjGTjsD1Zs,144
|
|
36
37
|
pfeed/sources/bybit/types.py,sha256=gOZKjHe9keq50FHz4KAid2JH2gw4M3iMp__NeMh56AU,191
|
|
37
38
|
pfeed/sources/bybit/utils.py,sha256=HiMbM62WxVr9gwKhiMKvWSdY1RXw-rEq4OPwadFrZk0,1628
|
|
38
|
-
pfeed/types/common_literals.py,sha256=
|
|
39
|
+
pfeed/types/common_literals.py,sha256=lNbTgXJ5HY_UmahmnvoIlzNJAYHpDUxl7AO0r936feU,712
|
|
40
|
+
pfeed/types/core.py,sha256=BX-XSON6mvRKfuxiiRP3_e47eeO36DHHIY_lfxESfhw,240
|
|
39
41
|
pfeed/utils/file_formats.py,sha256=iEkPmXWN5neDCB-faoQ2pDWjyQ1FmuYutS-9Fr-bgko,2019
|
|
40
42
|
pfeed/utils/monitor.py,sha256=S5roDtzHVpiiuCqVl4oT7MGuky4LPTAQ8MJuyVVqcno,480
|
|
41
43
|
pfeed/utils/utils.py,sha256=LaMctJw12RwEUF9XZQ-T3PZKpnwHZi3MoYes8IZcSkI,4999
|
|
42
44
|
pfeed/utils/validate.py,sha256=UNBd6OIKao1b_5qSVHwfqLKX87Gv9Z4KW7nkFmKckco,1909
|
|
43
|
-
pfeed-0.0.2.
|
|
44
|
-
pfeed-0.0.2.
|
|
45
|
-
pfeed-0.0.2.
|
|
46
|
-
pfeed-0.0.2.
|
|
47
|
-
pfeed-0.0.2.
|
|
45
|
+
pfeed-0.0.2.dev2.dist-info/LICENSE,sha256=QBDNo3Na8pzY1OUD6k6KEIhRvTP_zHg_q78IkYDLzIo,11355
|
|
46
|
+
pfeed-0.0.2.dev2.dist-info/METADATA,sha256=203cAqP7LT32wAwqlYnnAq9MfCvz3zRWXrBiq2pg1ss,11567
|
|
47
|
+
pfeed-0.0.2.dev2.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
48
|
+
pfeed-0.0.2.dev2.dist-info/entry_points.txt,sha256=hK2aA9xFMsx6MJX2lKTVtH3uMtM1aCce6vLv9a67RF0,44
|
|
49
|
+
pfeed-0.0.2.dev2.dist-info/RECORD,,
|
|
@@ -1,267 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: pfeed
|
|
3
|
-
Version: 0.0.2.dev1
|
|
4
|
-
Summary: Data pipeline for algo-trading, getting and storing both real-time and historical data made easy.
|
|
5
|
-
Home-page: https://pfund.ai
|
|
6
|
-
License: Apache-2.0
|
|
7
|
-
Keywords: trading,algo-trading,data pipeline,ETL,data lake,data warehouse,data integration,historical data,live data,data streaming
|
|
8
|
-
Author: Stephen Yau
|
|
9
|
-
Author-email: softwareentrepreneer+pfeed@gmail.com
|
|
10
|
-
Requires-Python: >=3.10,<3.13
|
|
11
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
-
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
-
Provides-Extra: all
|
|
17
|
-
Provides-Extra: boost
|
|
18
|
-
Provides-Extra: data
|
|
19
|
-
Provides-Extra: df
|
|
20
|
-
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
|
21
|
-
Requires-Dist: connectorx (>=0.3.3,<0.4.0) ; extra == "boost" or extra == "all"
|
|
22
|
-
Requires-Dist: fastparquet (>=2024.5.0,<2025.0.0)
|
|
23
|
-
Requires-Dist: minio (>=7.2.8,<8.0.0) ; extra == "data" or extra == "all"
|
|
24
|
-
Requires-Dist: pandas (>=2.2.2,<3.0.0) ; extra == "df" or extra == "all"
|
|
25
|
-
Requires-Dist: pfund (>=0.0.1.dev13,<0.0.2)
|
|
26
|
-
Requires-Dist: polars (>=1.7.1,<2.0.0) ; extra == "df" or extra == "all"
|
|
27
|
-
Requires-Dist: psutil (>=6.0.0,<7.0.0) ; extra == "data" or extra == "all"
|
|
28
|
-
Requires-Dist: pyarrow (>=15.0.0,<16.0.0) ; extra == "df" or extra == "all"
|
|
29
|
-
Requires-Dist: ray (>=2.35.0,<3.0.0) ; extra == "boost" or extra == "all"
|
|
30
|
-
Requires-Dist: s3fs (>=2024.9.0,<2025.0.0) ; extra == "data" or extra == "all"
|
|
31
|
-
Requires-Dist: yfinance (>=0.2.43,<0.3.0)
|
|
32
|
-
Project-URL: Documentation, https://pfeed-docs.pfund.ai
|
|
33
|
-
Project-URL: Repository, https://github.com/PFund-Software-Ltd/pfeed
|
|
34
|
-
Description-Content-Type: text/markdown
|
|
35
|
-
|
|
36
|
-
# PFeed: Data Pipeline for Algo-Trading, Getting and Storing Real-Time and Historical Data Made Easy.
|
|
37
|
-
|
|
38
|
-

|
|
39
|
-

|
|
40
|
-
[](https://pypi.org/project/pfeed)
|
|
41
|
-

|
|
42
|
-
[](https://jupyterbook.org)
|
|
43
|
-
[](https://python-poetry.org/)
|
|
44
|
-
|
|
45
|
-
[MinIO]: https://min.io/
|
|
46
|
-
[PFund]: https://github.com/PFund-Software-Ltd/pfund
|
|
47
|
-
[Ray]: https://github.com/ray-project/ray
|
|
48
|
-
[Polars]: https://github.com/pola-rs/polars
|
|
49
|
-
[Prefect]: https://www.prefect.io
|
|
50
|
-
[Timescaledb]: https://www.timescale.com/
|
|
51
|
-
[Dask]: https://www.dask.org/
|
|
52
|
-
[Spark]: https://spark.apache.org/docs/latest/api/python/index.html
|
|
53
|
-
[DuckDB]: https://github.com/duckdb/duckdb
|
|
54
|
-
[Daft]: https://github.com/Eventual-Inc/Daft
|
|
55
|
-
[PyTrade.org]: https://pytrade.org
|
|
56
|
-
[Databento]: https://databento.com/
|
|
57
|
-
[Polygon]: https://polygon.io/
|
|
58
|
-
[Bybit]: https://bybit.com/
|
|
59
|
-
[FirstRate Data]: https://firstratedata.com
|
|
60
|
-
|
|
61
|
-
## Problem
|
|
62
|
-
Starting algo-trading requires reliable, clean data. However, the time-consuming and mundane tasks of data cleaning and storage often discourage traders from embarking on their algo-trading journey.
|
|
63
|
-
|
|
64
|
-
## Solution
|
|
65
|
-
By leveraging modern data engineering tools, `pfeed` handles the tedious data work and **outputs backtesting-ready data**, accelerating traders to get to the strategy development phase.
|
|
66
|
-
|
|
67
|
-
---
|
|
68
|
-
PFeed (/piː fiːd/) is a data pipeline for algorithmic trading, serving as a bridge between raw data sources and traders by automating the process of data collection, cleaning, transformation, and storage, loading clean data into a **local data lake for quantitative analysis**.
|
|
69
|
-
|
|
70
|
-
## Core Features
|
|
71
|
-
- [x] Unified approach for interacting with various [data sources](#supported-data-sources) and obtaining historical and live data
|
|
72
|
-
- [x] ETL data pipline for transforming raw data to clean data and storing it in [MinIO] (optional)
|
|
73
|
-
- [x] Fast data downloading, utilizing [Ray] for parallelization
|
|
74
|
-
- [x] Supports multiple data tools (e.g. Pandas, [Polars], [Dask], [Spark], [DuckDB], [Daft])
|
|
75
|
-
- [ ] Integrates with [Prefect] to control data flows
|
|
76
|
-
- [ ] Listens to PFund's trade engine and adds trade history to a local database [Timescaledb] (optional)
|
|
77
|
-
|
|
78
|
-
> It is designed to be used alongside [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready.
|
|
79
|
-
|
|
80
|
-
---
|
|
81
|
-
|
|
82
|
-
<details>
|
|
83
|
-
<summary>Table of Contents</summary>
|
|
84
|
-
|
|
85
|
-
- [Installation](#installation)
|
|
86
|
-
- [Quick Start](#quick-start)
|
|
87
|
-
- [Main Usage: Data Feed](#main-usage-data-feed)
|
|
88
|
-
- [Download Historical Data on Command Line](#download-historical-data-on-command-line)
|
|
89
|
-
- [Download Historical Data in Python](#download-historical-data-in-python)
|
|
90
|
-
- [List Current Config](#list-current-config)
|
|
91
|
-
- [Run PFeed's docker-compose.yml](#run-pfeeds-docker-composeyml)
|
|
92
|
-
- [Supported Data Sources](#supported-data-sources)
|
|
93
|
-
- [Supported Data Tools](#supported-data-tools)
|
|
94
|
-
- [Related Projects](#related-projects)
|
|
95
|
-
- [Disclaimer](#disclaimer)
|
|
96
|
-
|
|
97
|
-
</details>
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
## Installation
|
|
101
|
-
### Using [Poetry](https://python-poetry.org) (Recommended)
|
|
102
|
-
```bash
|
|
103
|
-
# [RECOMMENDED]: Download data (e.g. Bybit and Yahoo Finance) + Data tools (e.g. pandas, polars) + Data storage (e.g. MinIO) + Boosted performance (e.g. Ray)
|
|
104
|
-
poetry add "pfeed[all]"
|
|
105
|
-
|
|
106
|
-
# [Download data + Data tools + Data storage]
|
|
107
|
-
poetry add "pfeed[df,data]"
|
|
108
|
-
|
|
109
|
-
# [Download data + Data tools]
|
|
110
|
-
poetry add "pfeed[df]"
|
|
111
|
-
|
|
112
|
-
# [Download data only]:
|
|
113
|
-
poetry add pfeed
|
|
114
|
-
|
|
115
|
-
# update to the latest version:
|
|
116
|
-
poetry update pfeed
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
### Using Pip
|
|
120
|
-
```bash
|
|
121
|
-
# same as above, you can choose to install "pfeed[all]", "pfeed[df,data]", "pfeed[df]" or "pfeed"
|
|
122
|
-
pip install "pfeed[all]"
|
|
123
|
-
|
|
124
|
-
# install the latest version:
|
|
125
|
-
pip install -U pfeed
|
|
126
|
-
```
|
|
127
|
-
|
|
128
|
-
### Checking your installation
|
|
129
|
-
```bash
|
|
130
|
-
$ pfeed --version
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
## Quick Start
|
|
134
|
-
### 1. Get Historical Data in Dataframe (No storage)
|
|
135
|
-
Get [Bybit]'s data in dataframe, e.g. 1-minute data (data is downloaded on the fly if not stored locally)
|
|
136
|
-
|
|
137
|
-
```python
|
|
138
|
-
import pfeed as pe
|
|
139
|
-
|
|
140
|
-
feed = pe.BybitFeed(data_tool='polars')
|
|
141
|
-
|
|
142
|
-
df = feed.get_historical_data(
|
|
143
|
-
'BTC_USDT_PERP',
|
|
144
|
-
resolution='1minute', # 'raw' or '1tick'/'1t' or '2second'/'2s' etc.
|
|
145
|
-
start_date='2024-03-01',
|
|
146
|
-
end_date='2024-03-01',
|
|
147
|
-
)
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
Printing the first few rows of `df`:
|
|
151
|
-
| | ts | product | resolution | open | high | low | close | volume |
|
|
152
|
-
|---:|:--------------------|:--------------|:-------------|--------:|--------:|--------:|--------:|---------:|
|
|
153
|
-
| 0 | 2024-03-01 00:00:00 | BTC_USDT_PERP | 1m | 61184.1 | 61244.5 | 61175.8 | 61244.5 | 159.142 |
|
|
154
|
-
| 1 | 2024-03-01 00:01:00 | BTC_USDT_PERP | 1m | 61245.3 | 61276.5 | 61200.7 | 61232.2 | 227.242 |
|
|
155
|
-
| 2 | 2024-03-01 00:02:00 | BTC_USDT_PERP | 1m | 61232.2 | 61249 | 61180 | 61184.2 | 91.446 |
|
|
156
|
-
|
|
157
|
-
> By using pfeed, you are just a few lines of code away from a standardized dataframe, how convenient!
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
### 2. Download Historical Data on the Command Line Interface (CLI)
|
|
162
|
-
```bash
|
|
163
|
-
# download data, default data type (dtype) is 'raw' data
|
|
164
|
-
pfeed download -d BYBIT -p BTC_USDT_PERP --start-date 2024-03-01 --end-date 2024-03-08
|
|
165
|
-
|
|
166
|
-
# download multiple products BTC_USDT_PERP and ETH_USDT_PERP and minute data
|
|
167
|
-
pfeed download -d BYBIT -p BTC_USDT_PERP -p ETH_USDT_PERP --dtypes minute
|
|
168
|
-
|
|
169
|
-
# download all perpetuals data from bybit
|
|
170
|
-
pfeed download -d BYBIT --ptypes PERP
|
|
171
|
-
|
|
172
|
-
# download all the data from bybit (CAUTION: your local machine probably won't have enough space for this!)
|
|
173
|
-
pfeed download -d BYBIT
|
|
174
|
-
|
|
175
|
-
# store data into MinIO (need to start MinIO by running `pfeed docker-compose up -d` first)
|
|
176
|
-
pfeed download -d BYBIT -p BTC_USDT_PERP --use-minio
|
|
177
|
-
|
|
178
|
-
# enable debug mode and turn off using Ray
|
|
179
|
-
pfeed download -d BYBIT -p BTC_USDT_PERP --debug --no-ray
|
|
180
|
-
```
|
|
181
|
-
|
|
182
|
-
### 3. Download Historical Data in Python
|
|
183
|
-
```python
|
|
184
|
-
import pfeed as pe
|
|
185
|
-
|
|
186
|
-
# compared to the CLI approach, this approach is more convenient for downloading multiple products
|
|
187
|
-
pe.download(
|
|
188
|
-
data_source='bybit',
|
|
189
|
-
pdts=[
|
|
190
|
-
'BTC_USDT_PERP',
|
|
191
|
-
'ETH_USDT_PERP',
|
|
192
|
-
'BCH_USDT_PERP',
|
|
193
|
-
],
|
|
194
|
-
dtypes=['raw'], # data types, e.g. 'raw', 'tick', 'second', 'minute' etc.
|
|
195
|
-
start_date='2024-03-01',
|
|
196
|
-
end_date='2024-03-08',
|
|
197
|
-
use_minio=False,
|
|
198
|
-
)
|
|
199
|
-
```
|
|
200
|
-
|
|
201
|
-
### List Current Config
|
|
202
|
-
```bash
|
|
203
|
-
# list the current config:
|
|
204
|
-
pfeed config --list
|
|
205
|
-
|
|
206
|
-
# change the data storage location to your local project's 'data' folder:
|
|
207
|
-
pfeed config --data-path ./data
|
|
208
|
-
|
|
209
|
-
# for more commands:
|
|
210
|
-
pfeed --help
|
|
211
|
-
```
|
|
212
|
-
|
|
213
|
-
### Run PFeed's docker-compose.yml
|
|
214
|
-
```bash
|
|
215
|
-
# same as 'docker-compose', only difference is it has pointed to pfeed's docker-compose.yml file
|
|
216
|
-
pfeed docker-compose [COMMAND]
|
|
217
|
-
|
|
218
|
-
# e.g. start services
|
|
219
|
-
pfeed docker-compose up -d
|
|
220
|
-
|
|
221
|
-
# e.g. stop services
|
|
222
|
-
pfeed docker-compose down
|
|
223
|
-
```
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
## Supported Data Sources
|
|
227
|
-
| Data Source | Get Historical Data | Download Historical Data | Get Live/Paper Data | Stream Live/Paper Data |
|
|
228
|
-
| ------------------------- | ------------------- | ------------------------ | ------------------- | ---------------------- |
|
|
229
|
-
| Yahoo Finance | 🟢 | ⚪ | ⚪ | ⚪ |
|
|
230
|
-
| Bybit | 🟢 | 🟢 | 🟡 | 🔴 |
|
|
231
|
-
| *Interactive Brokers (IB) | 🔴 | ⚪ | 🔴 | 🔴 |
|
|
232
|
-
| *[FirstRate Data] | 🔴 | 🔴 | ⚪ | ⚪ |
|
|
233
|
-
| [Databento] | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
234
|
-
| [Polygon] | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
235
|
-
| Binance | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
236
|
-
| OKX | 🔴 | 🔴 | 🔴 | 🔴 |
|
|
237
|
-
|
|
238
|
-
🟢 = finished \
|
|
239
|
-
🟡 = in progress \
|
|
240
|
-
🔴 = todo \
|
|
241
|
-
⚪ = not applicable \
|
|
242
|
-
\* = paid data
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
## Supported Data Tools
|
|
246
|
-
| Data Tools | Supported |
|
|
247
|
-
| ------------------------ | --------- |
|
|
248
|
-
| Pandas | 🟢 |
|
|
249
|
-
| [Polars] | 🟢 |
|
|
250
|
-
| [Dask] | 🔴 |
|
|
251
|
-
| [Spark] | 🔴 |
|
|
252
|
-
| [DuckDB] | 🔴 |
|
|
253
|
-
| [Daft] | 🔴 |
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
## Related Projects
|
|
257
|
-
- [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading
|
|
258
|
-
- [PyTrade.org] - A curated list of Python libraries and resources for algorithmic trading.
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
## Disclaimer
|
|
262
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
263
|
-
|
|
264
|
-
This framework is intended for educational and research purposes only. It should not be used for real trading without understanding the risks involved. Trading in financial markets involves significant risk, and there is always the potential for loss. Your trading results may vary. No representation is being made that any account will or is likely to achieve profits or losses similar to those discussed on this platform.
|
|
265
|
-
|
|
266
|
-
The developers of this framework are not responsible for any financial losses incurred from using this software. This includes but not limited to losses resulting from inaccuracies in any financial data output by PFeed. Users should conduct their due diligence, verify the accuracy of any data produced by PFeed, and consult with a professional financial advisor before engaging in real trading activities.
|
|
267
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|