pfeed 0.0.2.dev1__py3-none-any.whl → 0.0.2.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pfeed/__init__.py CHANGED
@@ -8,32 +8,32 @@ import importlib
8
8
  from importlib.metadata import version
9
9
 
10
10
  from pfeed.config_handler import configure, get_config
11
- from pfeed.const.common import ALIASES
11
+ from pfeed.const.common import ALIASES as aliases
12
12
  from pfeed.sources import bybit
13
13
  from pfeed.feeds import BybitFeed, YahooFinanceFeed
14
14
 
15
15
 
16
16
  def download_historical_data(
17
17
  data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES,
18
- pdts: str | list[str] | None = None,
18
+ products: str | list[str] | None = None,
19
19
  dtypes: tSUPPORTED_DATA_TYPES | list[tSUPPORTED_DATA_TYPES] | None = None,
20
20
  ptypes: str | list[str] | None = None,
21
21
  start_date: str | None = None,
22
22
  end_date: str | None = None,
23
23
  use_minio: bool = False,
24
24
  use_ray: bool = True,
25
- ray_num_cpus: int = 8,
25
+ num_cpus: int = 8,
26
26
  ):
27
27
  data_source = importlib.import_module(f"pfeed.sources.{data_source.lower()}")
28
28
  return data_source.download_historical_data(
29
- pdts=pdts,
29
+ products=products,
30
30
  dtypes=dtypes,
31
31
  ptypes=ptypes,
32
32
  start_date=start_date,
33
33
  end_date=end_date,
34
34
  use_minio=use_minio,
35
35
  use_ray=use_ray,
36
- ray_num_cpus=ray_num_cpus,
36
+ num_cpus=num_cpus,
37
37
  )
38
38
 
39
39
 
@@ -53,7 +53,7 @@ __all__ = (
53
53
  "__version__",
54
54
  "configure",
55
55
  "get_config",
56
- "ALIASES",
56
+ "aliases",
57
57
  "bybit",
58
58
  "binance",
59
59
  "YahooFinanceFeed",
@@ -32,33 +32,30 @@ def remove_config(config_file_path: str | Path):
32
32
  @click.option('--use-fork-process', type=bool, help='If True, multiprocessing.set_start_method("fork")')
33
33
  @click.option('--use-custom-excepthook', type=bool, help='If True, log uncaught exceptions to file')
34
34
  @click.option('--env-file', 'env_file_path', type=click.Path(resolve_path=True, exists=True), help='Path to the .env file')
35
- @click.option('--debug', is_flag=True, help='if enabled, debug mode will be enabled where logs at DEBUG level will be printed')
36
- @click.option('--list', '-l', is_flag=True, is_eager=True, help='List all available options')
37
- @click.option('--reset', is_flag=True, is_eager=True, help='Reset the configuration to defaults')
38
- def config(ctx, **kwargs):
35
+ @click.option('--debug', '-d', type=bool, help='If True, enable debug mode where logs at DEBUG level will be printed')
36
+ @click.option('--list', '-l', 'is_list', is_flag=True, is_eager=True, help='List all available options')
37
+ @click.option('--reset', 'is_reset', is_flag=True, is_eager=True, help='Reset the configuration to defaults')
38
+ def config(ctx, is_list, is_reset, **kwargs):
39
39
  """Configures pfeed settings."""
40
40
  config: ConfigHandler = ctx.obj['config']
41
41
 
42
42
  # Filter out options that were not provided by the user
43
- provided_options = {k: v for k, v in kwargs.items() if v is not None and v is not False}
43
+ provided_options = {k: v for k, v in kwargs.items() if v is not None}
44
44
 
45
- if kwargs.get('list'): # Check if --list was used
46
- del provided_options['list']
45
+ if is_list: # Check if --list was used
47
46
  assert not provided_options, "No options should be provided with --list"
48
47
  config_dict = config.__dict__
49
48
  config_dict.update({'config_file_path': USER_CONFIG_FILE_PATH})
50
49
  click.echo(f"PFeed's config:\n{pformat(config_dict)}")
51
50
  return
52
51
 
53
- if kwargs.get('reset'): # Check if --reset was used
54
- del provided_options['reset']
52
+ if is_reset: # Check if --reset was used
55
53
  assert not provided_options, "No options should be provided with --reset"
56
54
  remove_config(USER_CONFIG_FILE_PATH)
57
55
  click.echo("PFeed's config successfully reset.")
58
- return
59
56
 
60
57
  # prints out current config if no options are provided
61
- if not provided_options:
58
+ if not provided_options and not is_list and not is_reset:
62
59
  raise click.UsageError("No options provided. Use --list to see all available options.")
63
60
  else:
64
61
  for option, value in provided_options.items():
File without changes
@@ -21,7 +21,7 @@ SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED = SUPPORTED_DATA_TYPES + ['raw']
21
21
 
22
22
  @click.command()
23
23
  @click.option('--data-source', '-d', required=True, type=click.Choice(SUPPORTED_DOWNLOAD_DATA_SOURCES_ALIASES_INCLUDED, case_sensitive=False), help='Data source')
24
- @click.option('--pdts', '-p', 'pdts', multiple=True, default=[], help='List of trading products')
24
+ @click.option('--products', '-p', 'products', multiple=True, default=[], help='List of trading products')
25
25
  @click.option('--dtypes', '--dt', 'dtypes', multiple=True, default=['raw'], type=click.Choice(SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED, case_sensitive=False), help=f'{SUPPORTED_DATA_TYPES=}. How to pass in multiple values: --dt raw --dt tick')
26
26
  @click.option('--ptypes', '--pt', 'ptypes', multiple=True, default=[], type=click.Choice(SUPPORTED_PRODUCT_TYPES, case_sensitive=False), help='List of product types, e.g. PERP = get all perpetuals')
27
27
  @click.option('--start-date', '-s', type=click.DateTime(formats=["%Y-%m-%d"]), help='Start date in YYYY-MM-DD format')
@@ -31,17 +31,17 @@ SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED = SUPPORTED_DATA_TYPES + ['raw']
31
31
  @click.option('--no-ray', is_flag=True, help='if enabled, Ray will not be used')
32
32
  @click.option('--env-file', 'env_file_path', type=click.Path(exists=True), help='Path to the .env file')
33
33
  @click.option('--debug', is_flag=True, help='if enabled, debug mode will be enabled where logs at DEBUG level will be printed')
34
- def download(data_source, pdts, dtypes, ptypes, start_date, end_date, num_cpus, no_ray, use_minio, env_file_path, debug):
34
+ def download(data_source, products, dtypes, ptypes, start_date, end_date, num_cpus, no_ray, use_minio, env_file_path, debug):
35
35
  pe.configure(env_file_path=env_file_path, debug=debug)
36
36
  data_source = ALIASES.get(data_source, data_source)
37
37
  pipeline = importlib.import_module(f'pfeed.sources.{data_source.lower()}.download')
38
38
  pipeline.download_historical_data(
39
- pdts=pdts,
39
+ products=products,
40
40
  dtypes=dtypes,
41
41
  ptypes=ptypes,
42
42
  start_date=start_date.date().strftime('%Y-%m-%d') if start_date else start_date,
43
43
  end_date=end_date.date().strftime('%Y-%m-%d') if end_date else end_date,
44
44
  use_ray=not no_ray,
45
- ray_num_cpus=num_cpus,
45
+ num_cpus=num_cpus,
46
46
  use_minio=use_minio,
47
47
  )
pfeed/cli/main.py CHANGED
@@ -6,6 +6,7 @@ from pfeed.cli.commands.config import config
6
6
  from pfeed.cli.commands.download import download
7
7
  # from pfeed.cli.commands.stream import stream
8
8
  from pfeed.cli.commands.open import open
9
+ from pfeed.cli.commands.doc import doc
9
10
 
10
11
 
11
12
  @click.group(context_settings={"help_option_names": ["-h", "--help"]})
@@ -21,4 +22,5 @@ pfeed_group.add_command(docker_compose)
21
22
  pfeed_group.add_command(config)
22
23
  pfeed_group.add_command(download)
23
24
  # pfeed_group.add_command(stream)
24
- pfeed_group.add_command(open)
25
+ pfeed_group.add_command(open)
26
+ pfeed_group.add_command(doc)
pfeed/config_handler.py CHANGED
@@ -33,7 +33,7 @@ class ConfigHandler:
33
33
  logging_config: dict | None = None
34
34
  use_fork_process: bool = True
35
35
  use_custom_excepthook: bool = False
36
- env_file_path: str | None=None
36
+ env_file_path: str = ''
37
37
  debug: bool = False
38
38
 
39
39
  @classmethod
@@ -57,9 +57,9 @@ class ConfigHandler:
57
57
  return cls(**config)
58
58
 
59
59
  def __post_init__(self):
60
- self.initialize()
60
+ self._initialize()
61
61
 
62
- def initialize(self):
62
+ def _initialize(self):
63
63
  self.logging_config = self.logging_config or {}
64
64
 
65
65
  for path in [self.data_path]:
@@ -76,19 +76,15 @@ class ConfigHandler:
76
76
  self.load_env_file(self.env_file_path)
77
77
 
78
78
  if self.debug:
79
- is_loggers_set_up = bool(logging.getLogger('pfeed').handlers)
80
- if is_loggers_set_up:
81
- print('loggers are already set up, ignoring enabling debug mode')
82
- else:
83
- self.enable_debug_mode()
79
+ self.enable_debug_mode()
84
80
 
85
- def load_env_file(self, env_file_path: str | None):
81
+ def load_env_file(self, env_file_path: str=''):
86
82
  from dotenv import find_dotenv, load_dotenv
87
83
 
88
84
  if not env_file_path:
89
- found_env_file_path = find_dotenv(usecwd=True, raise_error_if_not_found=False)
90
- if found_env_file_path:
91
- print(f'.env file path is not specified, using env file in "{found_env_file_path}"')
85
+ env_file_path = find_dotenv(usecwd=True, raise_error_if_not_found=False)
86
+ if env_file_path:
87
+ print(f'.env file path is not specified, using env file in "{env_file_path}"')
92
88
  else:
93
89
  # print('.env file is not found')
94
90
  return
@@ -96,6 +92,10 @@ class ConfigHandler:
96
92
 
97
93
  def enable_debug_mode(self):
98
94
  '''Enables debug mode by setting the log level to DEBUG for all stream handlers'''
95
+ is_loggers_set_up = bool(logging.getLogger('pfeed').handlers)
96
+ if is_loggers_set_up:
97
+ print('loggers are already set up, ignoring debug mode')
98
+ return
99
99
  if 'handlers' not in self.logging_config:
100
100
  self.logging_config['handlers'] = {}
101
101
  for handler in ['stream_handler', 'stream_path_handler']:
@@ -145,7 +145,7 @@ def configure(
145
145
  else:
146
146
  raise AttributeError(f'{k} is not an attribute of ConfigHandler')
147
147
 
148
- _global_config.initialize()
148
+ _global_config._initialize()
149
149
  return _global_config
150
150
 
151
151
 
pfeed/const/common.py CHANGED
@@ -1,9 +1,25 @@
1
+ from pfeed.types.common_literals import tSUPPORTED_DATA_TOOLS, tSUPPORTED_DATA_ENGINES
2
+
3
+
1
4
  SUPPORTED_ENVIRONMENTS = ['BACKTEST', 'SANDBOX', 'PAPER', 'LIVE']
2
5
  SUPPORTED_DATA_FEEDS = ['YAHOO_FINANCE', 'BYBIT', 'BINANCE']
3
6
  SUPPORTED_STORAGES = ['local', 'minio']
4
7
  SUPPORTED_DOWNLOAD_DATA_SOURCES = ['BYBIT', 'BINANCE']
5
8
  SUPPORTED_CRYPTO_EXCHANGES = ['BYBIT', 'BINANCE']
6
- SUPPORTED_DATA_TOOLS = ['pandas', 'polars']
9
+ SUPPORTED_DATA_TOOLS: dict[tSUPPORTED_DATA_TOOLS, tSUPPORTED_DATA_ENGINES | bool] = {
10
+ 'pandas': ['dask'],
11
+ 'polars': ['ray'],
12
+ # True means the data tool is also an execution engine
13
+ 'dask': True,
14
+ 'spark': True,
15
+ }
16
+ SUPPORTED_DATA_ENGINES: dict[tSUPPORTED_DATA_ENGINES, list[str]] = {
17
+ # execution engine: supported cloud services
18
+ 'dask': ['coiled'],
19
+ 'spark': ['databricks'],
20
+ 'ray': ['aws'],
21
+ }
22
+ SUPPORTED_CLOUDS = ['aws']
7
23
  SUPPORTED_PRODUCT_TYPES = ['SPOT', 'PERP', 'IPERP', 'FUT', 'IFUT']
8
24
  SUPPORTED_DATA_TYPES = [
9
25
  'raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw_daily',
@@ -12,4 +28,5 @@ SUPPORTED_DATA_TYPES = [
12
28
 
13
29
  ALIASES = {
14
30
  'YF': 'YAHOO_FINANCE',
31
+ 'FRD': 'FIRSTRATE_DATA'
15
32
  }
pfeed/feeds/base_feed.py CHANGED
@@ -1,15 +1,9 @@
1
1
  from __future__ import annotations
2
2
  from typing import TYPE_CHECKING
3
3
  if TYPE_CHECKING:
4
- try:
5
- import pandas as pd
6
- import polars as pl
7
- except ImportError:
8
- pass
9
- from pfeed.types.common_literals import tSUPPORTED_DATA_TOOLS, tSUPPORTED_STORAGES
10
- from pfeed.sources.bybit.types import tSUPPORTED_DATA_TYPES
4
+ from pfeed.types.common_literals import tSUPPORTED_DATA_TOOLS, tSUPPORTED_STORAGES, tSUPPORTED_DATA_TYPES
5
+ from pfeed.types.core import tDataFrame
11
6
  from pfeed.resolution import ExtendedResolution
12
- DataFrame = pd.DataFrame | pl.LazyFrame
13
7
 
14
8
  import os
15
9
  import io
@@ -19,11 +13,6 @@ import logging
19
13
  import datetime
20
14
  import importlib
21
15
 
22
- try:
23
- import polars as pl
24
- except ImportError:
25
- pass
26
-
27
16
  from pfeed.config_handler import get_config
28
17
  from pfeed.const.common import SUPPORTED_DATA_FEEDS, SUPPORTED_DATA_TOOLS, SUPPORTED_STORAGES
29
18
  from pfeed.utils.utils import (
@@ -99,7 +88,7 @@ class BaseFeed:
99
88
  dates: list[datetime.date] = get_dates_in_between(start_date, end_date)
100
89
  return dates
101
90
 
102
- def _get_historical_data_from_storages(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date], storage: tSUPPORTED_STORAGES='') -> DataFrame | None:
91
+ def _get_historical_data_from_storages(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date], storage: tSUPPORTED_STORAGES='') -> tDataFrame | None:
103
92
  from pfeed import etl
104
93
  default_raw_resolution = self.utils.get_default_raw_resolution()
105
94
  storages = [storage] if storage else SUPPORTED_STORAGES
@@ -113,7 +102,7 @@ class BaseFeed:
113
102
  self.logger.debug(f'transformed {self.name} raw data to {resolution=}')
114
103
  return transformed_df
115
104
 
116
- def _get_historical_data_from_temp(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date]) -> DataFrame | None:
105
+ def _get_historical_data_from_temp(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date]) -> tDataFrame | None:
117
106
  from pfeed import etl
118
107
  default_raw_resolution = self.utils.get_default_raw_resolution()
119
108
  temp_file_paths = [self._create_temp_file_path(trading_venue, pdt, resolution, date) for date in dates]
@@ -134,7 +123,7 @@ class BaseFeed:
134
123
  ) -> list[bytes]:
135
124
  raise NotImplementedError(f"{self.name} _get_historical_data_from_source() is not implemented")
136
125
 
137
- def _get_historical_data(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date], storage: tSUPPORTED_STORAGES='') -> DataFrame:
126
+ def _get_historical_data(self, trading_venue: str, pdt: str, resolution: ExtendedResolution, dates: list[datetime.date], storage: tSUPPORTED_STORAGES='') -> tDataFrame:
138
127
  if (df := self._get_historical_data_from_storages(trading_venue, pdt, resolution, dates, storage=storage)) is not None:
139
128
  pass
140
129
  elif (df := self._get_historical_data_from_temp(trading_venue, pdt, resolution, dates)) is not None:
@@ -153,22 +142,23 @@ class BaseFeed:
153
142
 
154
143
  def get_historical_data(
155
144
  self,
156
- pdt: str,
157
- rollback_period: str="1w",
145
+ product: str,
158
146
  resolution: str="1d",
147
+ rollback_period: str="1w",
159
148
  start_date: str="",
160
149
  end_date: str="",
161
150
  trading_venue: str='',
162
151
  storage: tSUPPORTED_STORAGES='',
163
- ) -> DataFrame:
152
+ ) -> tDataFrame:
164
153
  """Get historical data from the data source.
165
154
  Args:
166
- pdt: Product symbol, e.g. BTC_USDT_PERP, where PERP = product type "perpetual".
155
+ product: Product symbol, e.g. BTC_USDT_PERP, where PERP = product type "perpetual".
167
156
  rollback_period:
168
157
  Period to rollback from today, only used when `start_date` is not specified.
169
158
  Default is '1w' = 1 week.
170
159
  resolution: Data resolution. e.g. '1m' = 1 minute as the unit of each data bar/candle.
171
160
  Also supports raw resolution such as 'r1m', where 'r' stands for raw.
161
+ If resolution is 'raw', the default raw resolution of the data type will be used.
172
162
  Default is '1d' = 1 day.
173
163
  start_date: Start date.
174
164
  end_date: End date.
@@ -178,13 +168,17 @@ class BaseFeed:
178
168
  from pfeed import etl
179
169
  from pfeed.resolution import ExtendedResolution
180
170
 
181
- pdt, trading_venue, storage = pdt.upper(), trading_venue.upper(), storage.lower()
171
+ pdt, trading_venue, storage = product.upper(), trading_venue.upper(), storage.lower()
182
172
  assert validate_pdt(
183
173
  self.name, pdt
184
174
  ), f'"{pdt}" does not match the required format "XXX_YYY_PTYPE" or has an unsupported product type. (PTYPE means product type, e.g. PERP, Supported types for {self.name} are: {self.const.SUPPORTED_PRODUCT_TYPES})'
185
175
  if storage:
186
176
  assert storage in SUPPORTED_STORAGES, f"Invalid {storage=}, {SUPPORTED_STORAGES=}"
187
177
  self._prepare_temp_dir()
178
+ if resolution == 'raw':
179
+ assert self.const.SUPPORTED_DATA_TYPES[0].startswith('raw_')
180
+ default_raw_dtype = self.const.SUPPORTED_DATA_TYPES[0]
181
+ resolution = self.const.DTYPES_TO_RAW_RESOLUTIOS[default_raw_dtype]
188
182
  resolution = ExtendedResolution(resolution)
189
183
  trading_venue = trading_venue or derive_trading_venue(self.name)
190
184
  dates: list[datetime.date] = self._prepare_dates(start_date, end_date, rollback_period)
@@ -202,26 +196,26 @@ class BaseFeed:
202
196
 
203
197
  def download_historical_data(
204
198
  self,
205
- pdts: str | list[str] | None = None,
199
+ products: str | list[str] | None = None,
206
200
  dtypes: tSUPPORTED_DATA_TYPES | list[tSUPPORTED_DATA_TYPES] | None = None,
207
201
  ptypes: str | list[str] | None = None,
208
202
  start_date: str | None = None,
209
203
  end_date: str | None = None,
210
204
  use_minio: bool = False,
211
205
  use_ray: bool = True,
212
- ray_num_cpus: int = 8,
206
+ num_cpus: int = 8,
213
207
  ):
214
208
  try:
215
209
  data_source = getattr(self, self.name.lower())
216
210
  data_source.download_historical_data(
217
- pdts=pdts,
211
+ products=products,
218
212
  dtypes=dtypes,
219
213
  ptypes=ptypes,
220
214
  start_date=start_date,
221
215
  end_date=end_date,
222
216
  use_minio=use_minio,
223
217
  use_ray=use_ray,
224
- ray_num_cpus=ray_num_cpus,
218
+ num_cpus=num_cpus,
225
219
  )
226
220
  except AttributeError:
227
221
  raise Exception(f'{self.name} does not support download_historical_data()')
@@ -37,7 +37,7 @@ class YahooFinanceFeed(BaseFeed):
37
37
  "y": [1, 2, 5, 10],
38
38
  }
39
39
  # yfinance's valid intervals
40
- SUPPORTED_INTERVALS = {
40
+ SUPPORTED_TIMEFRAMES_AND_PERIODS = {
41
41
  "m": [1, 2, 5, 15, 30, 60, 90],
42
42
  "h": [1],
43
43
  "d": [1, 5],
@@ -56,17 +56,19 @@ class YahooFinanceFeed(BaseFeed):
56
56
  def get_historical_data(
57
57
  self,
58
58
  symbol: str,
59
- rollback_period: str | Literal["ytd", "max"] = "1M",
60
59
  resolution: str = "1d",
60
+ rollback_period: str | Literal["ytd", "max"] = "1M",
61
61
  start_date: str = "",
62
62
  end_date: str = "",
63
63
  use_pfeed_resample: bool = True,
64
+ product: str = "",
64
65
  **kwargs,
65
66
  ) -> pd.DataFrame | pl.DataFrame:
66
67
  """Simple Wrapper of yfinance history().
67
68
  For the details of args and kwargs, please refer to https://github.com/ranaroussi/yfinance
68
69
 
69
70
  Args:
71
+ symbol: ticker symbol used in yfinance
70
72
  rollback_period: Data resolution or 'ytd' or 'max'
71
73
  Period to rollback from today, only used when `start_date` is not specified.
72
74
  Default is '1M' = 1 month.
@@ -78,6 +80,7 @@ class YahooFinanceFeed(BaseFeed):
78
80
  use_pfeed_resample: Whether to use pfeed's resampling logic.
79
81
  Default is True.
80
82
  This will automatically be triggered if yfinance does not support the resolution.
83
+ product: Product symbol, e.g. AAPL_USD_STK. If provided, it will be used to create a column 'product' in the output dataframe.
81
84
  **kwargs: kwargs supported by `yfinance`
82
85
  """
83
86
  from pfeed import etl
@@ -113,11 +116,11 @@ class YahooFinanceFeed(BaseFeed):
113
116
  etimeframe = self._ADAPTER["timeframe"].get(timeframe, timeframe)
114
117
  interval = str(resolution.period) + etimeframe
115
118
 
116
- # manipulate the input resolution and support e.g. '2d' resolution even it is not in the SUPPORTED_INTERVALS
119
+ # manipulate the input resolution and support e.g. '2d' resolution even it is not in the SUPPORTED_TIMEFRAMES_AND_PERIODS
117
120
  if (use_pfeed_resample and resolution.period != 1) or (
118
- timeframe in self.SUPPORTED_INTERVALS
119
- and resolution.period not in self.SUPPORTED_INTERVALS[timeframe]
120
- and 1 in self.SUPPORTED_INTERVALS[timeframe]
121
+ timeframe in self.SUPPORTED_TIMEFRAMES_AND_PERIODS
122
+ and resolution.period not in self.SUPPORTED_TIMEFRAMES_AND_PERIODS[timeframe]
123
+ and 1 in self.SUPPORTED_TIMEFRAMES_AND_PERIODS[timeframe]
121
124
  ):
122
125
  # if resolution (e.g. '2d') is not supported in yfinance, using "1d" instead'
123
126
  interval = "1" + etimeframe
@@ -167,12 +170,17 @@ class YahooFinanceFeed(BaseFeed):
167
170
  df = etl.resample_data(df, resolution)
168
171
 
169
172
  df["symbol"] = symbol
173
+ if product:
174
+ df["product"] = product
170
175
  df["resolution"] = repr(resolution)
171
176
  # reorder columns
172
- left_cols = ["ts", "symbol", "resolution"]
177
+ if "product" in df.columns:
178
+ left_cols = ["ts", "symbol", "product", "resolution"]
179
+ else:
180
+ left_cols = ["ts", "symbol", "resolution"]
173
181
  df = df[left_cols + [col for col in df.columns if col not in left_cols]]
174
182
 
175
- if self.data_tool == "pandas":
183
+ if self.data_tool.name == "pandas":
176
184
  return df
177
- elif self.data_tool == "polars":
185
+ elif self.data_tool.name == "polars":
178
186
  return pl.from_pandas(df)
@@ -141,7 +141,7 @@ def download_historical_data(
141
141
  pdt_splits = pdt.split('_')
142
142
  ptype = pdt_splits[-1].upper()
143
143
  exchange = Exchange(env='LIVE', ptype=ptype)
144
- product = exchange.create_product(*pdt_splits)
144
+ product = exchange.create_product(pdt)
145
145
  except KeyError:
146
146
  raise ValueError(f'"{pdt}" is not a valid product in {source}')
147
147
  efilenames = api.get_efilenames(pdt)
@@ -2,6 +2,7 @@
2
2
  DATA_SOURCE = 'BYBIT'
3
3
  SUPPORTED_PRODUCT_TYPES = ['SPOT', 'PERP', 'IPERP', 'FUT', 'IFUT']
4
4
  SUPPORTED_DATA_TYPES = ['raw_tick', 'tick', 'second', 'minute', 'hour', 'daily']
5
+ # this specifies the raw resolution of the data type, e.g. 'raw_minute': 'r5m', meaning raw_minute is 5-minute data
5
6
  DTYPES_TO_RAW_RESOLUTIOS = {
6
7
  'raw_tick': 'r1tick',
7
8
  }
@@ -98,14 +98,14 @@ def _run_etl(storage: tSUPPORTED_STORAGES, product: CryptoProduct, date: datetim
98
98
 
99
99
 
100
100
  def download_historical_data(
101
- pdts: str | list[str] | None=None,
101
+ products: str | list[str] | None=None,
102
102
  dtypes: tSUPPORTED_DATA_TYPES | list[tSUPPORTED_DATA_TYPES] | None=None,
103
103
  ptypes: tSUPPORTED_PRODUCT_TYPES | list[tSUPPORTED_PRODUCT_TYPES] | None=None,
104
104
  start_date: str | None=None,
105
105
  end_date: str | None=None,
106
106
  use_minio: bool=False,
107
107
  use_ray: bool=True,
108
- ray_num_cpus: int=8,
108
+ num_cpus: int=8,
109
109
  ) -> None:
110
110
  from pfund.plogging import set_up_loggers
111
111
 
@@ -116,12 +116,12 @@ def download_historical_data(
116
116
  logger = logging.getLogger(DATA_SOURCE.lower() + '_data')
117
117
 
118
118
  print(f'''Hint:
119
- You can use the command "pfeed config --data-path ..." to set your data path that stores downloaded data.
119
+ You can use the command "pfeed config --data-path {{your_path}}" to set your data path that stores downloaded data.
120
120
  The current data path is: {config.data_path}.
121
121
  ''')
122
122
 
123
123
  resolutions: list[ExtendedResolution] = _convert_dtypes_to_resolutions(dtypes)
124
- pdts = _prepare_pdts(pdts, ptypes)
124
+ pdts = _prepare_pdts(products, ptypes)
125
125
  start_date, end_date = _prepare_dates(start_date, end_date)
126
126
  dates: list[datetime.date] = get_dates_in_between(start_date, end_date)
127
127
 
@@ -136,7 +136,7 @@ def download_historical_data(
136
136
  ray_tasks = defaultdict(list)
137
137
  for pdt in pdts if use_ray else tqdm(pdts, desc=f'Downloading {DATA_SOURCE} historical data by product', colour='green'):
138
138
  try:
139
- product = exchange.create_product(*pdt.split('_'))
139
+ product = exchange.create_product(pdt)
140
140
  except KeyError:
141
141
  raise ValueError(f'"{pdt}" is not a valid product in {DATA_SOURCE}')
142
142
  efilenames = api.get_efilenames(pdt)
@@ -176,7 +176,7 @@ def download_historical_data(
176
176
  try:
177
177
  log_listener = None
178
178
  logical_cpus = os.cpu_count()
179
- num_cpus = min(ray_num_cpus, logical_cpus)
179
+ num_cpus = min(num_cpus, logical_cpus)
180
180
  ray.init(num_cpus=num_cpus)
181
181
  batch_size = num_cpus
182
182
  print(f"Ray's num_cpus is set to {num_cpus}")
@@ -1,12 +1,14 @@
1
1
  from typing import Literal
2
2
 
3
+
3
4
  # since Literal doesn't support variables as inputs, define variables in common.py here with prefix 't'
4
5
  tSUPPORTED_ENVIRONMENTS = Literal['BACKTEST', 'SANDBOX', 'PAPER', 'LIVE']
5
6
  tSUPPORTED_DATA_FEEDS = Literal['YAHOO_FINANCE', 'BYBIT']
6
7
  tSUPPORTED_STORAGES = Literal['local', 'minio']
7
8
  tSUPPORTED_DOWNLOAD_DATA_SOURCES = Literal['BYBIT', 'BINANCE']
8
9
  tSUPPORTED_CRYPTO_EXCHANGES = Literal['BYBIT', 'BINANCE']
9
- tSUPPORTED_DATA_TOOLS = Literal['pandas', 'polars']
10
+ tSUPPORTED_DATA_TOOLS = Literal['pandas', 'polars', 'dask', 'spark']
11
+ tSUPPORTED_DATA_ENGINES = Literal['ray', 'dask', 'spark']
10
12
  tSUPPORTED_DATA_TYPES = Literal[
11
13
  'raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw_daily',
12
14
  'tick', 'second', 'minute', 'hour', 'daily'
pfeed/types/core.py ADDED
@@ -0,0 +1,11 @@
1
+ from typing import TypeVar
2
+
3
+ try:
4
+ import pandas as pd
5
+ import polars as pl
6
+ except ImportError:
7
+ pass
8
+
9
+
10
+ tDataFrame = TypeVar('tDataFrame', pd.DataFrame, pl.DataFrame, pl.LazyFrame)
11
+ tSeries = TypeVar('tSeries', pd.Series, pl.Series)
@@ -0,0 +1,213 @@
1
+ Metadata-Version: 2.1
2
+ Name: pfeed
3
+ Version: 0.0.2.dev2
4
+ Summary: Data pipeline for algo-trading, getting and storing both real-time and historical data made easy.
5
+ Home-page: https://pfund.ai
6
+ License: Apache-2.0
7
+ Keywords: trading,algo-trading,data pipeline,ETL,data lake,data warehouse,data integration,historical data,live data,data streaming
8
+ Author: Stephen Yau
9
+ Author-email: softwareentrepreneer+pfeed@gmail.com
10
+ Requires-Python: >=3.10,<4.0
11
+ Classifier: License :: OSI Approved :: Apache Software License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Provides-Extra: all
17
+ Provides-Extra: core
18
+ Provides-Extra: dask
19
+ Provides-Extra: dataops
20
+ Provides-Extra: dfs
21
+ Provides-Extra: polars
22
+ Provides-Extra: spark
23
+ Provides-Extra: storage
24
+ Requires-Dist: adlfs (>=2024.7.0,<2025.0.0) ; extra == "storage" or extra == "all"
25
+ Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
26
+ Requires-Dist: bytewax (>=0.21.0,<0.22.0) ; extra == "dataops" or extra == "all"
27
+ Requires-Dist: coiled (>=1.54.0,<2.0.0) ; extra == "dask" or extra == "dfs" or extra == "all"
28
+ Requires-Dist: confluent-kafka (>=2.5.3,<3.0.0) ; extra == "dataops" or extra == "all"
29
+ Requires-Dist: connectorx (>=0.3.3,<0.4.0) ; extra == "storage" or extra == "all"
30
+ Requires-Dist: dask[complete] (>=2024.9.1,<2025.0.0) ; extra == "dask" or extra == "dfs" or extra == "all"
31
+ Requires-Dist: databento (>=0.42.0,<0.43.0) ; extra == "core" or extra == "all"
32
+ Requires-Dist: databricks-connect (>=15.4.2,<16.0.0) ; extra == "spark" or extra == "dfs" or extra == "all"
33
+ Requires-Dist: fastparquet (>=2024.2.0,<2025.0.0)
34
+ Requires-Dist: gcsfs (>=2024.9.0,<2025.0.0) ; extra == "storage" or extra == "all"
35
+ Requires-Dist: minio (>=7.2.8,<8.0.0) ; extra == "core" or extra == "all"
36
+ Requires-Dist: modin[all] (>=0.32.0,<0.33.0) ; extra == "core" or extra == "all"
37
+ Requires-Dist: pandas (>=2.2.0,<3.0.0)
38
+ Requires-Dist: pfund (>=0.0.2.dev1,<0.0.3)
39
+ Requires-Dist: polars (>=1.7.1,<2.0.0) ; extra == "polars" or extra == "dfs" or extra == "all"
40
+ Requires-Dist: polars-xdt (>=0.16.0,<0.17.0) ; extra == "polars" or extra == "dfs" or extra == "all"
41
+ Requires-Dist: polygon-api-client (>=1.14.2,<2.0.0) ; extra == "core" or extra == "all"
42
+ Requires-Dist: prefect (>=3.0.5,<4.0.0) ; extra == "dataops" or extra == "all"
43
+ Requires-Dist: psutil (>=6.0.0,<7.0.0) ; extra == "core" or extra == "all"
44
+ Requires-Dist: psycopg2 (>=2.9.9,<3.0.0) ; extra == "storage" or extra == "all"
45
+ Requires-Dist: pyarrow (>=17.0.0,<18.0.0) ; extra == "core" or extra == "all"
46
+ Requires-Dist: pydantic (>=2.7.0,<3.0.0)
47
+ Requires-Dist: pyspark (>=3.5.3,<4.0.0) ; extra == "spark" or extra == "dfs" or extra == "all"
48
+ Requires-Dist: ray (>=2.35.0,<3.0.0) ; extra == "core" or extra == "all"
49
+ Requires-Dist: s3fs (>=2024.9.0,<2025.0.0) ; extra == "storage" or extra == "all"
50
+ Requires-Dist: yfinance (>=0.2.43,<0.3.0)
51
+ Project-URL: Documentation, https://pfeed-docs.pfund.ai
52
+ Project-URL: Repository, https://github.com/PFund-Software-Ltd/pfeed
53
+ Description-Content-Type: text/markdown
54
+
55
+ # PFeed: Data Pipeline for Algo-Trading, Getting and Storing Real-Time and Historical Data Made Easy.
56
+
57
+ [![Twitter Follow](https://img.shields.io/twitter/follow/pfund_ai?style=social)](https://x.com/pfund_ai)
58
+ ![GitHub stars](https://img.shields.io/github/stars/PFund-Software-Ltd/pfeed?style=social)
59
+ ![PyPI downloads](https://img.shields.io/pypi/dm/pfeed?label=downloads)
60
+ [![PyPI](https://img.shields.io/pypi/v/pfeed.svg)](https://pypi.org/project/pfeed)
61
+ ![PyPI - Support Python Versions](https://img.shields.io/pypi/pyversions/pfeed)
62
+ <!-- [![Jupyter Book Badge](https://raw.githubusercontent.com/PFund-Software-Ltd/pfeed/main/docs/images/jupyterbook.svg)](https://jupyterbook.org) -->
63
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
64
+
65
+ [MinIO]: https://min.io/
66
+ [PFund]: https://github.com/PFund-Software-Ltd/pfund
67
+ [Polars]: https://github.com/pola-rs/polars
68
+ [Dask]: https://www.dask.org/
69
+ [Spark]: https://spark.apache.org/docs/latest/api/python/index.html
70
+ [PyTrade.org]: https://pytrade.org
71
+ [Yahoo Finance]: https://github.com/ranaroussi/yfinance
72
+ [Bybit]: https://public.bybit.com
73
+ [Binance]: https://data.binance.vision
74
+ [OKX]: https://www.okx.com/data-download
75
+ [Databento]: https://databento.com/
76
+ [Polygon]: https://polygon.io/
77
+ [FirstRate Data]: https://firstratedata.com
78
+
79
+ ## Problem
80
+ Starting algo-trading requires reliable, clean data. However, the time-consuming and mundane tasks of data cleaning and storage often discourage traders from embarking on their algo-trading journey.
81
+
82
+ ## Solution
83
+ By leveraging modern data engineering tools, `pfeed` handles the tedious data work and **outputs backtesting-ready data**, allowing traders to focus on strategy development.
84
+
85
+ ---
86
+ PFeed (/piː fiːd/) is a data pipeline for algorithmic trading, serving as a bridge between raw data sources and traders. It enables you to **download historical data**, **stream real-time data**, and **store cleaned data** in a **local data lake for quantitative analysis**, by automating the processes of data collection, cleaning, transformation, and storage.
87
+
88
+ ## Core Features
89
+ - [x] Download or stream reliable, validated and **clean data** for research, backtesting, or live trading
90
+ - [x] Get historical data (**dataframe**) or live data in standardized formats by just calling a **single** function
91
+ - [x] **Own your data** by storing them locally using [MinIO], with the option to connect to the cloud
92
+ - [x] Interact with different kinds of data (including TradFi, CeFi and DeFi) using a **unified interface**
93
+
94
+ ---
95
+
96
+ <details>
97
+ <summary>Table of Contents</summary>
98
+
99
+ - [Installation](#installation)
100
+ - [Quick Start](#quick-start)
101
+ - [Get Historical Data in Dataframe](#1-get-historical-data-in-dataframe-no-storage)
102
+ - [Download Historical Data on Command Line](#2-download-historical-data-on-the-command-line-interface-cli)
103
+ - [Download Historical Data in Python](#3-download-historical-data-in-python)
104
+ - [Supported Data Sources](#supported-data-sources)
105
+ - [Related Projects](#related-projects)
106
+ - [Disclaimer](#disclaimer)
107
+
108
+ </details>
109
+
110
+
111
+
112
+ ## Installation
113
+ > For more installation options, please refer to the [documentation](https://pfeed-docs.pfund.ai/installation).
114
+ ```bash
115
+ # [RECOMMENDED]: Full Features, choose this if you do not care about the package size
116
+ pip install -U "pfeed[all]"
117
+
118
+ # Minimal Features, only supports getting, downloading and streaming data
119
+ pip install -U "pfeed[core]"
120
+ ```
121
+
122
+
123
+
124
+ ## Quick Start
125
+ ### 1. Get Historical Data in Dataframe (No storage)
126
+ Get [Bybit]'s data in dataframe, e.g. 1-minute data (data is downloaded on the fly if not stored locally)
127
+
128
+ ```python
129
+ import pfeed as pe
130
+
131
+ feed = pe.BybitFeed(data_tool='polars')
132
+
133
+ df = feed.get_historical_data(
134
+ 'BTC_USDT_PERP',
135
+ resolution='1minute', # 'raw' or '1tick'/'1t' or '2second'/'2s' etc.
136
+ start_date='2024-03-01',
137
+ end_date='2024-03-01',
138
+ )
139
+ ```
140
+
141
+ Printing the first few rows of `df`:
142
+ | | ts | product | resolution | open | high | low | close | volume |
143
+ |---:|:--------------------|:--------------|:-------------|--------:|--------:|--------:|--------:|---------:|
144
+ | 0 | 2024-03-01 00:00:00 | BTC_USDT_PERP | 1m | 61184.1 | 61244.5 | 61175.8 | 61244.5 | 159.142 |
145
+ | 1 | 2024-03-01 00:01:00 | BTC_USDT_PERP | 1m | 61245.3 | 61276.5 | 61200.7 | 61232.2 | 227.242 |
146
+ | 2 | 2024-03-01 00:02:00 | BTC_USDT_PERP | 1m | 61232.2 | 61249 | 61180 | 61184.2 | 91.446 |
147
+
148
+ > By using pfeed, you are just a few lines of code away from getting a standardized dataframe, how convenient!
149
+
150
+ ### 2. Download Historical Data on the Command Line Interface (CLI)
151
+ > For more CLI commands, please refer to the [documentation](https://pfeed-docs.pfund.ai/cli-commands).
152
+ ```bash
153
+ # download data, default data type (dtype) is 'raw' data
154
+ pfeed download -d BYBIT -p BTC_USDT_PERP --start-date 2024-03-01 --end-date 2024-03-08
155
+
156
+ # download multiple products BTC_USDT_PERP and ETH_USDT_PERP as minute data and store them locally
157
+ pfeed download -d BYBIT -p BTC_USDT_PERP -p ETH_USDT_PERP --dtypes minute --use-minio
158
+ ```
159
+
160
+ ### 3. Download Historical Data in Python
161
+ ```python
162
+ import pfeed as pe
163
+
164
+ # compared to the CLI approach, this approach is more convenient for downloading multiple products
165
+ pe.download(
166
+ data_source='bybit',
167
+ pdts=[
168
+ 'BTC_USDT_PERP',
169
+ 'ETH_USDT_PERP',
170
+ 'BCH_USDT_PERP',
171
+ ],
172
+ dtypes=['raw'], # data types, e.g. 'raw', 'tick', 'second', 'minute' etc.
173
+ start_date='2024-03-01',
174
+ end_date='2024-03-08',
175
+ use_minio=False,
176
+ )
177
+ ```
178
+
179
+
180
+
181
+ ## Supported Data Sources
182
+ | Data Source | Get Historical Data | Download Historical Data | Get Live Data | Stream Live Data |
183
+ | -------------------- | ------------------- | ------------------------ | --------------| ---------------- |
184
+ | [Yahoo Finance] | 🟢 | ⚪ | ⚪ | ⚪ |
185
+ | [Bybit] | 🟢 | 🟢 | 🟡 | 🔴 |
186
+ | *Interactive Brokers | 🔴 | ⚪ | 🔴 | 🔴 |
187
+ | *[FirstRate Data] | 🔴 | 🔴 | ⚪ | ⚪ |
188
+ | *[Databento] | 🔴 | 🔴 | 🔴 | 🔴 |
189
+ | *[Polygon] | 🔴 | 🔴 | 🔴 | 🔴 |
190
+ | [Binance] | 🔴 | 🔴 | 🔴 | 🔴 |
191
+ | [OKX] | 🔴 | 🔴 | 🔴 | 🔴 |
192
+
193
+ 🟢 = finished \
194
+ 🟡 = in progress \
195
+ 🔴 = todo \
196
+ ⚪ = not applicable \
197
+ \* = paid data
198
+
199
+
200
+
201
+ ## Related Projects
202
+ - [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading
203
+ - [PyTrade.org] - A curated list of Python libraries and resources for algorithmic trading.
204
+
205
+
206
+
207
+ ## Disclaimer
208
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
209
+
210
+ This framework is intended for educational and research purposes only. It should not be used for real trading without understanding the risks involved. Trading in financial markets involves significant risk, and there is always the potential for loss. Your trading results may vary. No representation is being made that any account will or is likely to achieve profits or losses similar to those discussed on this platform.
211
+
212
+ The developers of this framework are not responsible for any financial losses incurred from using this software. This includes but not limited to losses resulting from inaccuracies in any financial data output by PFeed. Users should conduct their due diligence, verify the accuracy of any data produced by PFeed, and consult with a professional financial advisor before engaging in real trading activities.
213
+
@@ -1,47 +1,49 @@
1
- pfeed/__init__.py,sha256=wzxcCqGSNuWIy3oOPiZFQIuqfQBUUltIq_f28uiz_vk,1658
1
+ pfeed/__init__.py,sha256=zD1yEHw4-oIX-fNIR2dEVh-tQ5cyVhBpwi0IjOadYaU,1669
2
2
  pfeed/cli/__init__.py,sha256=xRBbc1F6E4xiWFhNKHA12OjKXd29QI9T2rA11iZDhrk,66
3
3
  pfeed/cli/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- pfeed/cli/commands/config.py,sha256=hs-j7cYpAADBenA8h_SPZcLznNZGwYXQOEsm26p81nc,3195
4
+ pfeed/cli/commands/config.py,sha256=hXQ2lljvUz8mWcWrEXgYDU3IJxRJdhon_MB7LpJsiAE,3131
5
+ pfeed/cli/commands/doc.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
6
  pfeed/cli/commands/docker_compose.py,sha256=xFN2nbZ10pSZSWxgUZRdUnxj8pbq12RplEPvms95kt4,1160
6
- pfeed/cli/commands/download.py,sha256=yRWuPhnwkA_IZwHrsH7DsePrfBh6tdA_ys2LRVqsnPg,2884
7
+ pfeed/cli/commands/download.py,sha256=ZxnAv4Jx9hbEo3M33xM-b0hSLwjRZiE7RE4ivUtJnek,2900
7
8
  pfeed/cli/commands/open.py,sha256=VGrSGwowQJMTJ1j8R67J4rkOcIS7Xr6PsFnwYYPpOCY,1728
8
9
  pfeed/cli/commands/stream.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- pfeed/cli/main.py,sha256=1qcI8uQg3isLYJ4GGPVjm2DnGruFw9E4dd4IADo8AAA,698
10
- pfeed/config_handler.py,sha256=Aqz_SHuF01S7z85lCguVOB4UOJLmdhOIZve978kv3dw,5223
11
- pfeed/const/common.py,sha256=_LeGAFeaTL0vo9ujvrswohwtkeztL8r7fbq1si8c0tE,569
10
+ pfeed/cli/main.py,sha256=55spaJbEQY7QgAIByR3EPQYYUz9bmDWT4qhGBiNoOXA,767
11
+ pfeed/config_handler.py,sha256=vIQwicjRKMen_S3F6amrvbIMcpXtq_PPXRbxK7Xt3uo,5173
12
+ pfeed/const/common.py,sha256=c-DMmjaPdafgmIb6W56g5suneGZb67WlDz-l6Km_pnY,1093
12
13
  pfeed/const/paths.py,sha256=IKBV15R5q1poI4AM2jd-CSqtB9DPewwZSZ8H3YJA72w,477
13
14
  pfeed/data_tools/data_tool_pandas.py,sha256=Fn0ej5tS9twO5NzSsxUvc35ud5PMps1EPgQMr-qWLb0,2247
14
15
  pfeed/data_tools/data_tool_polars.py,sha256=qsmrz92JweEOK9bwapukezN0C18WHKHrFX5725Mser4,2501
15
16
  pfeed/datastore.py,sha256=mKEM1cBeAZnbpV392-pOoYTuxqxXjHGrZhv43P0a2k0,5196
16
17
  pfeed/etl.py,sha256=fLHxUZMx_AE-0Pq5S2rTuafHXozDYGbt0fSQUDEXd7M,15446
17
18
  pfeed/feeds/__init__.py,sha256=bY7wGBlW4BdMIJRKiZ_dHZf_-ag-CAqN-__iFV5XpK8,153
18
- pfeed/feeds/base_feed.py,sha256=uIqVGxMrX87v8LZ6p00Mg--D4d717TstLbXpqshjeqg,12100
19
+ pfeed/feeds/base_feed.py,sha256=HQOOGCII28ga0i9WGryecDsqWMnrb8SfnElgdLeRkSQ,12264
19
20
  pfeed/feeds/binance_feed.py,sha256=nS_-WeXeaGtgjAaEV9ZOFi4xqkWHLoRJaiCSMWNHma4,632
20
21
  pfeed/feeds/bybit_feed.py,sha256=Im1sHJtqYhKFRxTMHz3iQ46KVQCUAA5Ug2jZSop9RFk,2093
21
22
  pfeed/feeds/custom_csv_feed.py,sha256=qTkanCebga7Vcrz9Gus0YXX6ZZkhxwiXw02ID9zmZ5M,281
22
- pfeed/feeds/yahoo_finance_feed.py,sha256=iPQNSq8DTCLMBr9BPo-AK89uNQWCKnfll-DK0BEAjhU,6855
23
+ pfeed/feeds/yahoo_finance_feed.py,sha256=UTDsQ1aIKrAzdt685bPVGdnfqYM_VyOhwDavTind9qg,7323
23
24
  pfeed/filepath.py,sha256=Jl__1G314foQ8Fvkh6EoaBxzyIq0wyyXltC6VO-Zr1Y,2819
24
25
  pfeed/main.py,sha256=J9ATB3AT4VY7IlCTeK7xeDpOlRJV5cetQ1LZxQn2CDc,294
25
26
  pfeed/resolution.py,sha256=YA4q-2Ohl3xaYzjwzEWSEhxepkS8h38Xz1YKcUx3ShE,2409
26
27
  pfeed/sources/binance/__init__.py,sha256=GHdr0Tf_sRirKOfqJ__nN4D9L0hZTn657kt-ar_zgLY,498
27
28
  pfeed/sources/binance/api.py,sha256=v_WIyke_JQJupdQliTVQ8lqZ3reD7xCQiMFTH-Saguo,3710
28
29
  pfeed/sources/binance/const.py,sha256=7XSPvfhGLbi0U0IMWmv0OWb86PPEzObpbBv3WUAzKT8,2034
29
- pfeed/sources/binance/download.py,sha256=-AFtq9Oeu8UAqn4o2g1B2H72wVE1gSSk9gUA0GafBU8,7650
30
+ pfeed/sources/binance/download.py,sha256=go-YRGDtCW4TwTHm4-LGkfmWtB9ehlHHB312TPArFGA,7642
30
31
  pfeed/sources/binance/stream.py,sha256=WSyzvrInWCaFPOoeZ2KnBbKef4HKSrUY8zwHanfyAOw,72
31
32
  pfeed/sources/bybit/__init__.py,sha256=PgmOrdy3FWqmii122brz5rC08ig15erH70gIra6-Kn0,317
32
33
  pfeed/sources/bybit/api.py,sha256=EP6CJo_aaBrbezkBs1z2Y5PMXDqVBfRyjjV3X8tag5A,2790
33
- pfeed/sources/bybit/const.py,sha256=XRWEWs2kZg8yOs6J8Lr3hY4T6SbuQpaQ02CqjrxlBYo,1082
34
- pfeed/sources/bybit/download.py,sha256=Q-DCW-yk62me3tYNfySaw1-YXU-sAiRH3pyscr3cz9Y,9318
34
+ pfeed/sources/bybit/const.py,sha256=NEr8uU9KJvt26w_UXmn1PJtZh5Y7HuNCpBV2xICoDWE,1198
35
+ pfeed/sources/bybit/download.py,sha256=AXqAoXQMWDd3Jyl2V4aXgv2lu4AWqdd_EXRGq0BS5sE,9316
35
36
  pfeed/sources/bybit/stream.py,sha256=b0frKmuhC2jTmq4flX5_ff0i7U-ksjPY6OjGTjsD1Zs,144
36
37
  pfeed/sources/bybit/types.py,sha256=gOZKjHe9keq50FHz4KAid2JH2gw4M3iMp__NeMh56AU,191
37
38
  pfeed/sources/bybit/utils.py,sha256=HiMbM62WxVr9gwKhiMKvWSdY1RXw-rEq4OPwadFrZk0,1628
38
- pfeed/types/common_literals.py,sha256=wHF4VLJsFc3XlCY_-LqIFdU_Zm0XshIhLWRVjajVKYg,636
39
+ pfeed/types/common_literals.py,sha256=lNbTgXJ5HY_UmahmnvoIlzNJAYHpDUxl7AO0r936feU,712
40
+ pfeed/types/core.py,sha256=BX-XSON6mvRKfuxiiRP3_e47eeO36DHHIY_lfxESfhw,240
39
41
  pfeed/utils/file_formats.py,sha256=iEkPmXWN5neDCB-faoQ2pDWjyQ1FmuYutS-9Fr-bgko,2019
40
42
  pfeed/utils/monitor.py,sha256=S5roDtzHVpiiuCqVl4oT7MGuky4LPTAQ8MJuyVVqcno,480
41
43
  pfeed/utils/utils.py,sha256=LaMctJw12RwEUF9XZQ-T3PZKpnwHZi3MoYes8IZcSkI,4999
42
44
  pfeed/utils/validate.py,sha256=UNBd6OIKao1b_5qSVHwfqLKX87Gv9Z4KW7nkFmKckco,1909
43
- pfeed-0.0.2.dev1.dist-info/LICENSE,sha256=QBDNo3Na8pzY1OUD6k6KEIhRvTP_zHg_q78IkYDLzIo,11355
44
- pfeed-0.0.2.dev1.dist-info/METADATA,sha256=Mf-j2Sg22Inx78iwifuEgUdEy3VaSXEfPfWwc73jhzc,12182
45
- pfeed-0.0.2.dev1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
46
- pfeed-0.0.2.dev1.dist-info/entry_points.txt,sha256=hK2aA9xFMsx6MJX2lKTVtH3uMtM1aCce6vLv9a67RF0,44
47
- pfeed-0.0.2.dev1.dist-info/RECORD,,
45
+ pfeed-0.0.2.dev2.dist-info/LICENSE,sha256=QBDNo3Na8pzY1OUD6k6KEIhRvTP_zHg_q78IkYDLzIo,11355
46
+ pfeed-0.0.2.dev2.dist-info/METADATA,sha256=203cAqP7LT32wAwqlYnnAq9MfCvz3zRWXrBiq2pg1ss,11567
47
+ pfeed-0.0.2.dev2.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
48
+ pfeed-0.0.2.dev2.dist-info/entry_points.txt,sha256=hK2aA9xFMsx6MJX2lKTVtH3uMtM1aCce6vLv9a67RF0,44
49
+ pfeed-0.0.2.dev2.dist-info/RECORD,,
@@ -1,267 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: pfeed
3
- Version: 0.0.2.dev1
4
- Summary: Data pipeline for algo-trading, getting and storing both real-time and historical data made easy.
5
- Home-page: https://pfund.ai
6
- License: Apache-2.0
7
- Keywords: trading,algo-trading,data pipeline,ETL,data lake,data warehouse,data integration,historical data,live data,data streaming
8
- Author: Stephen Yau
9
- Author-email: softwareentrepreneer+pfeed@gmail.com
10
- Requires-Python: >=3.10,<3.13
11
- Classifier: License :: OSI Approved :: Apache Software License
12
- Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: Programming Language :: Python :: 3.11
15
- Classifier: Programming Language :: Python :: 3.12
16
- Provides-Extra: all
17
- Provides-Extra: boost
18
- Provides-Extra: data
19
- Provides-Extra: df
20
- Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
21
- Requires-Dist: connectorx (>=0.3.3,<0.4.0) ; extra == "boost" or extra == "all"
22
- Requires-Dist: fastparquet (>=2024.5.0,<2025.0.0)
23
- Requires-Dist: minio (>=7.2.8,<8.0.0) ; extra == "data" or extra == "all"
24
- Requires-Dist: pandas (>=2.2.2,<3.0.0) ; extra == "df" or extra == "all"
25
- Requires-Dist: pfund (>=0.0.1.dev13,<0.0.2)
26
- Requires-Dist: polars (>=1.7.1,<2.0.0) ; extra == "df" or extra == "all"
27
- Requires-Dist: psutil (>=6.0.0,<7.0.0) ; extra == "data" or extra == "all"
28
- Requires-Dist: pyarrow (>=15.0.0,<16.0.0) ; extra == "df" or extra == "all"
29
- Requires-Dist: ray (>=2.35.0,<3.0.0) ; extra == "boost" or extra == "all"
30
- Requires-Dist: s3fs (>=2024.9.0,<2025.0.0) ; extra == "data" or extra == "all"
31
- Requires-Dist: yfinance (>=0.2.43,<0.3.0)
32
- Project-URL: Documentation, https://pfeed-docs.pfund.ai
33
- Project-URL: Repository, https://github.com/PFund-Software-Ltd/pfeed
34
- Description-Content-Type: text/markdown
35
-
36
- # PFeed: Data Pipeline for Algo-Trading, Getting and Storing Real-Time and Historical Data Made Easy.
37
-
38
- ![GitHub stars](https://img.shields.io/github/stars/PFund-Software-Ltd/pfeed?style=social)
39
- ![PyPI downloads](https://img.shields.io/pypi/dm/pfeed?label=downloads)
40
- [![PyPI](https://img.shields.io/pypi/v/pfeed.svg)](https://pypi.org/project/pfeed)
41
- ![PyPI - Support Python Versions](https://img.shields.io/pypi/pyversions/pfeed)
42
- [![Jupyter Book Badge](https://raw.githubusercontent.com/PFund-Software-Ltd/pfeed/main/docs/images/jupyterbook.svg)](https://jupyterbook.org)
43
- [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
44
-
45
- [MinIO]: https://min.io/
46
- [PFund]: https://github.com/PFund-Software-Ltd/pfund
47
- [Ray]: https://github.com/ray-project/ray
48
- [Polars]: https://github.com/pola-rs/polars
49
- [Prefect]: https://www.prefect.io
50
- [Timescaledb]: https://www.timescale.com/
51
- [Dask]: https://www.dask.org/
52
- [Spark]: https://spark.apache.org/docs/latest/api/python/index.html
53
- [DuckDB]: https://github.com/duckdb/duckdb
54
- [Daft]: https://github.com/Eventual-Inc/Daft
55
- [PyTrade.org]: https://pytrade.org
56
- [Databento]: https://databento.com/
57
- [Polygon]: https://polygon.io/
58
- [Bybit]: https://bybit.com/
59
- [FirstRate Data]: https://firstratedata.com
60
-
61
- ## Problem
62
- Starting algo-trading requires reliable, clean data. However, the time-consuming and mundane tasks of data cleaning and storage often discourage traders from embarking on their algo-trading journey.
63
-
64
- ## Solution
65
- By leveraging modern data engineering tools, `pfeed` handles the tedious data work and **outputs backtesting-ready data**, accelerating traders to get to the strategy development phase.
66
-
67
- ---
68
- PFeed (/piː fiːd/) is a data pipeline for algorithmic trading, serving as a bridge between raw data sources and traders by automating the process of data collection, cleaning, transformation, and storage, loading clean data into a **local data lake for quantitative analysis**.
69
-
70
- ## Core Features
71
- - [x] Unified approach for interacting with various [data sources](#supported-data-sources) and obtaining historical and live data
72
- - [x] ETL data pipline for transforming raw data to clean data and storing it in [MinIO] (optional)
73
- - [x] Fast data downloading, utilizing [Ray] for parallelization
74
- - [x] Supports multiple data tools (e.g. Pandas, [Polars], [Dask], [Spark], [DuckDB], [Daft])
75
- - [ ] Integrates with [Prefect] to control data flows
76
- - [ ] Listens to PFund's trade engine and adds trade history to a local database [Timescaledb] (optional)
77
-
78
- > It is designed to be used alongside [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready.
79
-
80
- ---
81
-
82
- <details>
83
- <summary>Table of Contents</summary>
84
-
85
- - [Installation](#installation)
86
- - [Quick Start](#quick-start)
87
- - [Main Usage: Data Feed](#main-usage-data-feed)
88
- - [Download Historical Data on Command Line](#download-historical-data-on-command-line)
89
- - [Download Historical Data in Python](#download-historical-data-in-python)
90
- - [List Current Config](#list-current-config)
91
- - [Run PFeed's docker-compose.yml](#run-pfeeds-docker-composeyml)
92
- - [Supported Data Sources](#supported-data-sources)
93
- - [Supported Data Tools](#supported-data-tools)
94
- - [Related Projects](#related-projects)
95
- - [Disclaimer](#disclaimer)
96
-
97
- </details>
98
-
99
-
100
- ## Installation
101
- ### Using [Poetry](https://python-poetry.org) (Recommended)
102
- ```bash
103
- # [RECOMMENDED]: Download data (e.g. Bybit and Yahoo Finance) + Data tools (e.g. pandas, polars) + Data storage (e.g. MinIO) + Boosted performance (e.g. Ray)
104
- poetry add "pfeed[all]"
105
-
106
- # [Download data + Data tools + Data storage]
107
- poetry add "pfeed[df,data]"
108
-
109
- # [Download data + Data tools]
110
- poetry add "pfeed[df]"
111
-
112
- # [Download data only]:
113
- poetry add pfeed
114
-
115
- # update to the latest version:
116
- poetry update pfeed
117
- ```
118
-
119
- ### Using Pip
120
- ```bash
121
- # same as above, you can choose to install "pfeed[all]", "pfeed[df,data]", "pfeed[df]" or "pfeed"
122
- pip install "pfeed[all]"
123
-
124
- # install the latest version:
125
- pip install -U pfeed
126
- ```
127
-
128
- ### Checking your installation
129
- ```bash
130
- $ pfeed --version
131
- ```
132
-
133
- ## Quick Start
134
- ### 1. Get Historical Data in Dataframe (No storage)
135
- Get [Bybit]'s data in dataframe, e.g. 1-minute data (data is downloaded on the fly if not stored locally)
136
-
137
- ```python
138
- import pfeed as pe
139
-
140
- feed = pe.BybitFeed(data_tool='polars')
141
-
142
- df = feed.get_historical_data(
143
- 'BTC_USDT_PERP',
144
- resolution='1minute', # 'raw' or '1tick'/'1t' or '2second'/'2s' etc.
145
- start_date='2024-03-01',
146
- end_date='2024-03-01',
147
- )
148
- ```
149
-
150
- Printing the first few rows of `df`:
151
- | | ts | product | resolution | open | high | low | close | volume |
152
- |---:|:--------------------|:--------------|:-------------|--------:|--------:|--------:|--------:|---------:|
153
- | 0 | 2024-03-01 00:00:00 | BTC_USDT_PERP | 1m | 61184.1 | 61244.5 | 61175.8 | 61244.5 | 159.142 |
154
- | 1 | 2024-03-01 00:01:00 | BTC_USDT_PERP | 1m | 61245.3 | 61276.5 | 61200.7 | 61232.2 | 227.242 |
155
- | 2 | 2024-03-01 00:02:00 | BTC_USDT_PERP | 1m | 61232.2 | 61249 | 61180 | 61184.2 | 91.446 |
156
-
157
- > By using pfeed, you are just a few lines of code away from a standardized dataframe, how convenient!
158
-
159
-
160
-
161
- ### 2. Download Historical Data on the Command Line Interface (CLI)
162
- ```bash
163
- # download data, default data type (dtype) is 'raw' data
164
- pfeed download -d BYBIT -p BTC_USDT_PERP --start-date 2024-03-01 --end-date 2024-03-08
165
-
166
- # download multiple products BTC_USDT_PERP and ETH_USDT_PERP and minute data
167
- pfeed download -d BYBIT -p BTC_USDT_PERP -p ETH_USDT_PERP --dtypes minute
168
-
169
- # download all perpetuals data from bybit
170
- pfeed download -d BYBIT --ptypes PERP
171
-
172
- # download all the data from bybit (CAUTION: your local machine probably won't have enough space for this!)
173
- pfeed download -d BYBIT
174
-
175
- # store data into MinIO (need to start MinIO by running `pfeed docker-compose up -d` first)
176
- pfeed download -d BYBIT -p BTC_USDT_PERP --use-minio
177
-
178
- # enable debug mode and turn off using Ray
179
- pfeed download -d BYBIT -p BTC_USDT_PERP --debug --no-ray
180
- ```
181
-
182
- ### 3. Download Historical Data in Python
183
- ```python
184
- import pfeed as pe
185
-
186
- # compared to the CLI approach, this approach is more convenient for downloading multiple products
187
- pe.download(
188
- data_source='bybit',
189
- pdts=[
190
- 'BTC_USDT_PERP',
191
- 'ETH_USDT_PERP',
192
- 'BCH_USDT_PERP',
193
- ],
194
- dtypes=['raw'], # data types, e.g. 'raw', 'tick', 'second', 'minute' etc.
195
- start_date='2024-03-01',
196
- end_date='2024-03-08',
197
- use_minio=False,
198
- )
199
- ```
200
-
201
- ### List Current Config
202
- ```bash
203
- # list the current config:
204
- pfeed config --list
205
-
206
- # change the data storage location to your local project's 'data' folder:
207
- pfeed config --data-path ./data
208
-
209
- # for more commands:
210
- pfeed --help
211
- ```
212
-
213
- ### Run PFeed's docker-compose.yml
214
- ```bash
215
- # same as 'docker-compose', only difference is it has pointed to pfeed's docker-compose.yml file
216
- pfeed docker-compose [COMMAND]
217
-
218
- # e.g. start services
219
- pfeed docker-compose up -d
220
-
221
- # e.g. stop services
222
- pfeed docker-compose down
223
- ```
224
-
225
-
226
- ## Supported Data Sources
227
- | Data Source | Get Historical Data | Download Historical Data | Get Live/Paper Data | Stream Live/Paper Data |
228
- | ------------------------- | ------------------- | ------------------------ | ------------------- | ---------------------- |
229
- | Yahoo Finance | 🟢 | ⚪ | ⚪ | ⚪ |
230
- | Bybit | 🟢 | 🟢 | 🟡 | 🔴 |
231
- | *Interactive Brokers (IB) | 🔴 | ⚪ | 🔴 | 🔴 |
232
- | *[FirstRate Data] | 🔴 | 🔴 | ⚪ | ⚪ |
233
- | [Databento] | 🔴 | 🔴 | 🔴 | 🔴 |
234
- | [Polygon] | 🔴 | 🔴 | 🔴 | 🔴 |
235
- | Binance | 🔴 | 🔴 | 🔴 | 🔴 |
236
- | OKX | 🔴 | 🔴 | 🔴 | 🔴 |
237
-
238
- 🟢 = finished \
239
- 🟡 = in progress \
240
- 🔴 = todo \
241
- ⚪ = not applicable \
242
- \* = paid data
243
-
244
-
245
- ## Supported Data Tools
246
- | Data Tools | Supported |
247
- | ------------------------ | --------- |
248
- | Pandas | 🟢 |
249
- | [Polars] | 🟢 |
250
- | [Dask] | 🔴 |
251
- | [Spark] | 🔴 |
252
- | [DuckDB] | 🔴 |
253
- | [Daft] | 🔴 |
254
-
255
-
256
- ## Related Projects
257
- - [PFund] — A Complete Algo-Trading Framework for Machine Learning, TradFi, CeFi and DeFi ready. Supports Vectorized and Event-Driven Backtesting, Paper and Live Trading
258
- - [PyTrade.org] - A curated list of Python libraries and resources for algorithmic trading.
259
-
260
-
261
- ## Disclaimer
262
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
263
-
264
- This framework is intended for educational and research purposes only. It should not be used for real trading without understanding the risks involved. Trading in financial markets involves significant risk, and there is always the potential for loss. Your trading results may vary. No representation is being made that any account will or is likely to achieve profits or losses similar to those discussed on this platform.
265
-
266
- The developers of this framework are not responsible for any financial losses incurred from using this software. This includes but not limited to losses resulting from inaccuracies in any financial data output by PFeed. Users should conduct their due diligence, verify the accuracy of any data produced by PFeed, and consult with a professional financial advisor before engaging in real trading activities.
267
-