pfeed 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pfeed/__init__.py +62 -0
- pfeed/cli/__init__.py +4 -0
- pfeed/cli/commands/__init__.py +0 -0
- pfeed/cli/commands/config.py +69 -0
- pfeed/cli/commands/docker_compose.py +33 -0
- pfeed/cli/commands/download.py +47 -0
- pfeed/cli/commands/open.py +47 -0
- pfeed/cli/commands/stream.py +0 -0
- pfeed/cli/main.py +24 -0
- pfeed/config_handler.py +148 -0
- pfeed/const/common.py +15 -0
- pfeed/const/paths.py +15 -0
- pfeed/data_tools/data_tool_pandas.py +62 -0
- pfeed/data_tools/data_tool_polars.py +65 -0
- pfeed/datastore.py +145 -0
- pfeed/etl.py +405 -0
- pfeed/feeds/__init__.py +3 -0
- pfeed/feeds/base_feed.py +296 -0
- pfeed/feeds/binance_feed.py +21 -0
- pfeed/feeds/bybit_feed.py +53 -0
- pfeed/feeds/custom_csv_feed.py +13 -0
- pfeed/feeds/yahoo_finance_feed.py +178 -0
- pfeed/filepath.py +103 -0
- pfeed/main.py +18 -0
- pfeed/resolution.py +62 -0
- pfeed/sources/binance/__init__.py +11 -0
- pfeed/sources/binance/api.py +105 -0
- pfeed/sources/binance/const.py +47 -0
- pfeed/sources/binance/download.py +181 -0
- pfeed/sources/binance/stream.py +3 -0
- pfeed/sources/bybit/__init__.py +4 -0
- pfeed/sources/bybit/api.py +76 -0
- pfeed/sources/bybit/const.py +25 -0
- pfeed/sources/bybit/download.py +196 -0
- pfeed/sources/bybit/stream.py +3 -0
- pfeed/sources/bybit/types.py +4 -0
- pfeed/sources/bybit/utils.py +44 -0
- pfeed/types/common_literals.py +13 -0
- pfeed/utils/file_format.py +76 -0
- pfeed/utils/monitor.py +21 -0
- pfeed/utils/utils.py +122 -0
- pfeed/utils/validate.py +39 -0
- pfeed-0.0.1.dist-info/LICENSE +201 -0
- pfeed-0.0.1.dist-info/METADATA +267 -0
- pfeed-0.0.1.dist-info/RECORD +47 -0
- pfeed-0.0.1.dist-info/WHEEL +4 -0
- pfeed-0.0.1.dist-info/entry_points.txt +3 -0
pfeed/__init__.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
|
|
4
|
+
if TYPE_CHECKING:
|
|
5
|
+
from pfeed.types.common_literals import tSUPPORTED_DOWNLOAD_DATA_SOURCES, tSUPPORTED_DATA_TYPES
|
|
6
|
+
|
|
7
|
+
import importlib
|
|
8
|
+
from importlib.metadata import version
|
|
9
|
+
|
|
10
|
+
from pfeed.config_handler import configure, get_config
|
|
11
|
+
from pfeed.const.common import ALIASES
|
|
12
|
+
from pfeed.sources import bybit
|
|
13
|
+
from pfeed.feeds import BybitFeed, YahooFinanceFeed
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def download_historical_data(
|
|
17
|
+
data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES,
|
|
18
|
+
pdts: str | list[str] | None = None,
|
|
19
|
+
dtypes: tSUPPORTED_DATA_TYPES | list[tSUPPORTED_DATA_TYPES] | None = None,
|
|
20
|
+
ptypes: str | list[str] | None = None,
|
|
21
|
+
start_date: str | None = None,
|
|
22
|
+
end_date: str | None = None,
|
|
23
|
+
num_cpus: int = 8,
|
|
24
|
+
use_ray: bool = True,
|
|
25
|
+
use_minio: bool = False,
|
|
26
|
+
):
|
|
27
|
+
data_source = importlib.import_module(f"pfeed.sources.{data_source.lower()}")
|
|
28
|
+
return data_source.download_historical_data(
|
|
29
|
+
pdts=pdts,
|
|
30
|
+
dtypes=dtypes,
|
|
31
|
+
ptypes=ptypes,
|
|
32
|
+
start_date=start_date,
|
|
33
|
+
end_date=end_date,
|
|
34
|
+
num_cpus=num_cpus,
|
|
35
|
+
use_ray=use_ray,
|
|
36
|
+
use_minio=use_minio,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# TODO
|
|
41
|
+
def stream_realtime_data(data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES):
|
|
42
|
+
data_source = importlib.import_module(f"pfeed.sources.{data_source.lower()}")
|
|
43
|
+
return data_source.stream_realtime_data()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
download = download_historical_data
|
|
48
|
+
stream = stream_realtime_data
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
__version__ = version("pfeed")
|
|
52
|
+
__all__ = (
|
|
53
|
+
"__version__",
|
|
54
|
+
"configure",
|
|
55
|
+
"get_config",
|
|
56
|
+
"ALIASES",
|
|
57
|
+
"bybit",
|
|
58
|
+
"binance",
|
|
59
|
+
"YahooFinanceFeed",
|
|
60
|
+
"BybitFeed",
|
|
61
|
+
"BinanceFeed",
|
|
62
|
+
)
|
pfeed/cli/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import yaml
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from pprint import pformat
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
|
|
8
|
+
from pfeed.const.paths import USER_CONFIG_FILE_PATH
|
|
9
|
+
from pfeed.config_handler import ConfigHandler
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def save_config(config: ConfigHandler, config_file_path: str | Path):
|
|
13
|
+
if isinstance(config_file_path, str):
|
|
14
|
+
config_file_path = Path(config_file_path)
|
|
15
|
+
config_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
16
|
+
with open(config_file_path, 'w') as f:
|
|
17
|
+
yaml.dump(config.__dict__, f, default_flow_style=False)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def remove_config(config_file_path: str | Path):
|
|
21
|
+
config_file_path = Path(config_file_path)
|
|
22
|
+
if config_file_path.is_file():
|
|
23
|
+
os.remove(config_file_path)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@click.command()
|
|
27
|
+
@click.pass_context
|
|
28
|
+
@click.option('--data-path', type=click.Path(resolve_path=True), help='Set the data path')
|
|
29
|
+
@click.option('--log-path', type=click.Path(resolve_path=True), help='Set the log path')
|
|
30
|
+
@click.option('--logging-file', 'logging_config_file_path', type=click.Path(resolve_path=True, exists=True), help='Set the logging config file path')
|
|
31
|
+
@click.option('--logging-config', type=dict, help='Set the logging config')
|
|
32
|
+
@click.option('--use-fork-process', type=bool, help='If True, multiprocessing.set_start_method("fork")')
|
|
33
|
+
@click.option('--use-custom-excepthook', type=bool, help='If True, log uncaught exceptions to file')
|
|
34
|
+
@click.option('--env-file', 'env_file_path', type=click.Path(resolve_path=True, exists=True), help='Path to the .env file')
|
|
35
|
+
@click.option('--debug', is_flag=True, help='if enabled, debug mode will be enabled where logs at DEBUG level will be printed')
|
|
36
|
+
@click.option('--list', '-l', is_flag=True, is_eager=True, help='List all available options')
|
|
37
|
+
@click.option('--reset', is_flag=True, is_eager=True, help='Reset the configuration to defaults')
|
|
38
|
+
def config(ctx, **kwargs):
|
|
39
|
+
"""Configures pfeed settings."""
|
|
40
|
+
config: ConfigHandler = ctx.obj['config']
|
|
41
|
+
|
|
42
|
+
# Filter out options that were not provided by the user
|
|
43
|
+
provided_options = {k: v for k, v in kwargs.items() if v is not None and v is not False}
|
|
44
|
+
|
|
45
|
+
if kwargs.get('list'): # Check if --list was used
|
|
46
|
+
del provided_options['list']
|
|
47
|
+
assert not provided_options, "No options should be provided with --list"
|
|
48
|
+
config_dict = config.__dict__
|
|
49
|
+
config_dict.update({'config_file_path': USER_CONFIG_FILE_PATH})
|
|
50
|
+
click.echo(f"PFeed's config:\n{pformat(config_dict)}")
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
if kwargs.get('reset'): # Check if --reset was used
|
|
54
|
+
del provided_options['reset']
|
|
55
|
+
assert not provided_options, "No options should be provided with --reset"
|
|
56
|
+
remove_config(USER_CONFIG_FILE_PATH)
|
|
57
|
+
click.echo("PFeed's config successfully reset.")
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
# prints out current config if no options are provided
|
|
61
|
+
if not provided_options:
|
|
62
|
+
raise click.UsageError("No options provided. Use --list to see all available options.")
|
|
63
|
+
else:
|
|
64
|
+
for option, value in provided_options.items():
|
|
65
|
+
setattr(config, option, value)
|
|
66
|
+
click.echo(f"{option} set to: {value}")
|
|
67
|
+
|
|
68
|
+
save_config(config, USER_CONFIG_FILE_PATH)
|
|
69
|
+
click.echo(f"config saved to {USER_CONFIG_FILE_PATH}.")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import importlib.resources
|
|
4
|
+
import subprocess
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
|
|
8
|
+
from pfeed.const.paths import PROJ_NAME
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@click.command(context_settings=dict(
|
|
12
|
+
ignore_unknown_options=True,
|
|
13
|
+
allow_extra_args=True,
|
|
14
|
+
))
|
|
15
|
+
@click.pass_context
|
|
16
|
+
@click.option('--env-file', 'env_file_path', type=click.Path(exists=True), help='Path to the .env file')
|
|
17
|
+
@click.option('--docker-file', 'docker_file_path', type=click.Path(exists=True), help='Path to the docker-compose.yml file')
|
|
18
|
+
def docker_compose(ctx, env_file_path, docker_file_path):
|
|
19
|
+
"""Forwards commands to docker-compose with the package's docker-compose.yml file if not specified."""
|
|
20
|
+
config = ctx.obj['config']
|
|
21
|
+
config.load_env_file(env_file_path)
|
|
22
|
+
os.environ['PFEED_DATA_PATH'] = config.data_path
|
|
23
|
+
|
|
24
|
+
if not docker_file_path:
|
|
25
|
+
package_dir = Path(importlib.resources.files(PROJ_NAME)).resolve().parents[0]
|
|
26
|
+
docker_file_path = package_dir / 'docker-compose.yml'
|
|
27
|
+
else:
|
|
28
|
+
click.echo(f'loaded custom docker-compose.yml file from "{docker_file_path}"')
|
|
29
|
+
command = ['docker-compose', '-f', str(docker_file_path)] + ctx.args
|
|
30
|
+
subprocess.run(command)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
import pfeed as pe
|
|
6
|
+
from pfeed.const.common import (
|
|
7
|
+
ALIASES,
|
|
8
|
+
SUPPORTED_DOWNLOAD_DATA_SOURCES,
|
|
9
|
+
SUPPORTED_DATA_TYPES,
|
|
10
|
+
SUPPORTED_PRODUCT_TYPES,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# add aliases to supported download data sources
|
|
15
|
+
SUPPORTED_DOWNLOAD_DATA_SOURCES_ALIASES_INCLUDED = SUPPORTED_DOWNLOAD_DATA_SOURCES + [k for k, v in ALIASES.items() if v in SUPPORTED_DOWNLOAD_DATA_SOURCES]
|
|
16
|
+
|
|
17
|
+
# 'raw' data type is implicit since it doesn't have the timeframe specified, but still allow it for convenience
|
|
18
|
+
# since for data source like bybit, there's only one raw data type, 'raw_tick', i.e. 'raw' will be converted to 'raw_tick'
|
|
19
|
+
SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED = SUPPORTED_DATA_TYPES + ['raw']
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.command()
|
|
23
|
+
@click.option('--data-source', '-d', required=True, type=click.Choice(SUPPORTED_DOWNLOAD_DATA_SOURCES_ALIASES_INCLUDED, case_sensitive=False), help='Data source')
|
|
24
|
+
@click.option('--pdts', '-p', 'pdts', multiple=True, default=[], help='List of trading products')
|
|
25
|
+
@click.option('--dtypes', '--dt', 'dtypes', multiple=True, default=['raw'], type=click.Choice(SUPPORTED_DATA_TYPES_IMPLICIT_RAW_ALLOWED, case_sensitive=False), help=f'{SUPPORTED_DATA_TYPES=}. How to pass in multiple values: --dt raw --dt tick')
|
|
26
|
+
@click.option('--ptypes', '--pt', 'ptypes', multiple=True, default=[], type=click.Choice(SUPPORTED_PRODUCT_TYPES, case_sensitive=False), help='List of product types, e.g. PERP = get all perpetuals')
|
|
27
|
+
@click.option('--start-date', '-s', type=click.DateTime(formats=["%Y-%m-%d"]), help='Start date in YYYY-MM-DD format')
|
|
28
|
+
@click.option('--end-date', '-e', type=click.DateTime(formats=["%Y-%m-%d"]), help='End date in YYYY-MM-DD format')
|
|
29
|
+
@click.option('--num-cpus', '-n', default=8, type=int, help="number of logical CPUs used for Ray's tasks")
|
|
30
|
+
@click.option('--use-minio', '-m', is_flag=True, help='if enabled, data will be loaded into Minio')
|
|
31
|
+
@click.option('--no-ray', is_flag=True, help='if enabled, Ray will not be used')
|
|
32
|
+
@click.option('--env-file', 'env_file_path', type=click.Path(exists=True), help='Path to the .env file')
|
|
33
|
+
@click.option('--debug', is_flag=True, help='if enabled, debug mode will be enabled where logs at DEBUG level will be printed')
|
|
34
|
+
def download(data_source, pdts, dtypes, ptypes, start_date, end_date, num_cpus, no_ray, use_minio, env_file_path, debug):
|
|
35
|
+
pe.configure(env_file_path=env_file_path, debug=debug)
|
|
36
|
+
data_source = ALIASES.get(data_source, data_source)
|
|
37
|
+
pipeline = importlib.import_module(f'pfeed.sources.{data_source.lower()}.download')
|
|
38
|
+
pipeline.download_historical_data(
|
|
39
|
+
pdts=pdts,
|
|
40
|
+
dtypes=dtypes,
|
|
41
|
+
ptypes=ptypes,
|
|
42
|
+
start_date=start_date.date().strftime('%Y-%m-%d') if start_date else start_date,
|
|
43
|
+
end_date=end_date.date().strftime('%Y-%m-%d') if end_date else end_date,
|
|
44
|
+
num_cpus=num_cpus,
|
|
45
|
+
use_ray=not no_ray,
|
|
46
|
+
use_minio=use_minio,
|
|
47
|
+
)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import importlib.resources
|
|
3
|
+
import subprocess
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from pfeed.const.paths import PROJ_NAME, USER_CONFIG_FILE_PATH
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def open_with_vscode(file_path):
|
|
11
|
+
try:
|
|
12
|
+
subprocess.run(["code", str(file_path)], check=True)
|
|
13
|
+
click.echo(f"Opened {file_path} with VS Code")
|
|
14
|
+
except subprocess.CalledProcessError:
|
|
15
|
+
click.echo("Failed to open with VS Code. Falling back to default editor.")
|
|
16
|
+
click.edit(filename=file_path)
|
|
17
|
+
except FileNotFoundError:
|
|
18
|
+
click.echo("VS Code command 'code' not found. Falling back to default editor.")
|
|
19
|
+
click.edit(filename=file_path)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.command()
|
|
23
|
+
@click.option('--config-file', '-c', is_flag=True, help='Open the config file')
|
|
24
|
+
@click.option('--log-file', '-l', is_flag=True, help='Open the logging.yaml file for logging config')
|
|
25
|
+
@click.option('--docker-file', '-d', is_flag=True, help='Open the docker-compose.yml file')
|
|
26
|
+
@click.option('--default-editor', '-e', is_flag=True, help='Use default editor')
|
|
27
|
+
def open(config_file, log_file, docker_file, default_editor):
|
|
28
|
+
"""Opens the log file or docker-compose.yml file."""
|
|
29
|
+
if all([config_file, log_file, docker_file]):
|
|
30
|
+
click.echo('Please specify only one file to open')
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
package_dir = Path(importlib.resources.files(PROJ_NAME)).resolve().parents[0]
|
|
34
|
+
if config_file:
|
|
35
|
+
file_path = USER_CONFIG_FILE_PATH
|
|
36
|
+
elif log_file:
|
|
37
|
+
file_path = package_dir / 'logging.yml'
|
|
38
|
+
elif docker_file:
|
|
39
|
+
file_path = package_dir / 'docker-compose.yml'
|
|
40
|
+
else:
|
|
41
|
+
click.echo('Please specify a file to open')
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
if default_editor:
|
|
45
|
+
click.edit(filename=file_path)
|
|
46
|
+
else:
|
|
47
|
+
open_with_vscode(file_path)
|
|
File without changes
|
pfeed/cli/main.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import click
|
|
2
|
+
|
|
3
|
+
from pfeed.config_handler import get_config
|
|
4
|
+
from pfeed.cli.commands.docker_compose import docker_compose
|
|
5
|
+
from pfeed.cli.commands.config import config
|
|
6
|
+
from pfeed.cli.commands.download import download
|
|
7
|
+
# from pfeed.cli.commands.stream import stream
|
|
8
|
+
from pfeed.cli.commands.open import open
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@click.group(context_settings={"help_option_names": ["-h", "--help"]})
|
|
12
|
+
@click.pass_context
|
|
13
|
+
@click.version_option()
|
|
14
|
+
def pfeed_group(ctx):
|
|
15
|
+
"""PFeed's CLI"""
|
|
16
|
+
ctx.ensure_object(dict)
|
|
17
|
+
ctx.obj['config'] = get_config()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
pfeed_group.add_command(docker_compose)
|
|
21
|
+
pfeed_group.add_command(config)
|
|
22
|
+
pfeed_group.add_command(download)
|
|
23
|
+
# pfeed_group.add_command(stream)
|
|
24
|
+
pfeed_group.add_command(open)
|
pfeed/config_handler.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import multiprocessing
|
|
4
|
+
import logging
|
|
5
|
+
from types import TracebackType
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
from dotenv import find_dotenv, load_dotenv
|
|
10
|
+
|
|
11
|
+
from pfeed.const.paths import PROJ_NAME, MAIN_PATH, LOG_PATH, DATA_PATH, USER_CONFIG_FILE_PATH
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Global configuration object
|
|
15
|
+
_global_config = None
|
|
16
|
+
__all__ = [
|
|
17
|
+
'get_config',
|
|
18
|
+
'configure',
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _custom_excepthook(exception_class: type[BaseException], exception: BaseException, traceback: TracebackType):
|
|
23
|
+
'''Catches any uncaught exceptions and logs them'''
|
|
24
|
+
# sys.__excepthook__(exception_class, exception, traceback)
|
|
25
|
+
try:
|
|
26
|
+
raise exception
|
|
27
|
+
except:
|
|
28
|
+
logging.getLogger(PROJ_NAME).exception('Uncaught exception:')
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ConfigHandler:
|
|
33
|
+
data_path: str = str(DATA_PATH)
|
|
34
|
+
log_path: str = str(LOG_PATH)
|
|
35
|
+
logging_config_file_path: str = f'{MAIN_PATH}/logging.yml'
|
|
36
|
+
logging_config: dict | None = None
|
|
37
|
+
use_fork_process: bool = True
|
|
38
|
+
use_custom_excepthook: bool = False
|
|
39
|
+
env_file_path: str | None=None
|
|
40
|
+
debug: bool = False
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def get_instance(cls):
|
|
44
|
+
global _global_config
|
|
45
|
+
if _global_config is None:
|
|
46
|
+
_global_config = cls.load_config()
|
|
47
|
+
return _global_config
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def load_config(cls):
|
|
51
|
+
'''Loads user's config file and returns a ConfigHandler object'''
|
|
52
|
+
config_file_path = USER_CONFIG_FILE_PATH
|
|
53
|
+
if config_file_path.is_file():
|
|
54
|
+
with open(config_file_path, 'r') as f:
|
|
55
|
+
config = yaml.safe_load(f) or {}
|
|
56
|
+
else:
|
|
57
|
+
config = {}
|
|
58
|
+
return cls(**config)
|
|
59
|
+
|
|
60
|
+
def __post_init__(self):
|
|
61
|
+
self.initialize()
|
|
62
|
+
|
|
63
|
+
def initialize(self):
|
|
64
|
+
self.logging_config = self.logging_config or {}
|
|
65
|
+
|
|
66
|
+
for path in [self.data_path]:
|
|
67
|
+
if not os.path.exists(path):
|
|
68
|
+
os.makedirs(path)
|
|
69
|
+
print(f'created {path}')
|
|
70
|
+
|
|
71
|
+
if self.use_fork_process and sys.platform != 'win32':
|
|
72
|
+
multiprocessing.set_start_method('fork', force=True)
|
|
73
|
+
|
|
74
|
+
if self.use_custom_excepthook and sys.excepthook is sys.__excepthook__:
|
|
75
|
+
sys.excepthook = _custom_excepthook
|
|
76
|
+
|
|
77
|
+
self.load_env_file(self.env_file_path)
|
|
78
|
+
|
|
79
|
+
if self.debug:
|
|
80
|
+
self.enable_debug_mode()
|
|
81
|
+
|
|
82
|
+
def load_env_file(self, env_file_path: str | None):
|
|
83
|
+
if not env_file_path:
|
|
84
|
+
found_env_file_path = find_dotenv(usecwd=True, raise_error_if_not_found=False)
|
|
85
|
+
if found_env_file_path:
|
|
86
|
+
print(f'.env file path is not specified, using env file in "{found_env_file_path}"')
|
|
87
|
+
else:
|
|
88
|
+
# print('.env file is not found')
|
|
89
|
+
return
|
|
90
|
+
load_dotenv(env_file_path, override=True)
|
|
91
|
+
|
|
92
|
+
def enable_debug_mode(self):
|
|
93
|
+
'''Enables debug mode by setting the log level to DEBUG for all stream handlers'''
|
|
94
|
+
if 'handlers' not in self.logging_config:
|
|
95
|
+
self.logging_config['handlers'] = {}
|
|
96
|
+
for handler in ['stream_handler', 'stream_path_handler']:
|
|
97
|
+
if handler not in self.logging_config['handlers']:
|
|
98
|
+
self.logging_config['handlers'][handler] = {}
|
|
99
|
+
self.logging_config['handlers'][handler]['level'] = 'DEBUG'
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def configure(
|
|
103
|
+
data_path: str | None = None,
|
|
104
|
+
log_path: str | None = None,
|
|
105
|
+
logging_config_file_path: str | None = None,
|
|
106
|
+
logging_config: dict | None = None,
|
|
107
|
+
use_fork_process: bool | None = None,
|
|
108
|
+
use_custom_excepthook: bool | None = None,
|
|
109
|
+
env_file_path: str | None = None,
|
|
110
|
+
debug: bool | None = None,
|
|
111
|
+
**kwargs,
|
|
112
|
+
):
|
|
113
|
+
'''Configures the global config object.
|
|
114
|
+
It will override the existing config values from the existing config file or the default values.
|
|
115
|
+
'''
|
|
116
|
+
global _global_config
|
|
117
|
+
_global_config = get_config()
|
|
118
|
+
|
|
119
|
+
# override config values, if provided
|
|
120
|
+
if data_path is not None:
|
|
121
|
+
_global_config.data_path = data_path
|
|
122
|
+
if log_path is not None:
|
|
123
|
+
_global_config.log_path = log_path
|
|
124
|
+
if logging_config_file_path is not None:
|
|
125
|
+
_global_config.logging_config_file_path = logging_config_file_path
|
|
126
|
+
if logging_config is not None:
|
|
127
|
+
_global_config.logging_config = logging_config
|
|
128
|
+
if use_fork_process is not None:
|
|
129
|
+
_global_config.use_fork_process = use_fork_process
|
|
130
|
+
if use_custom_excepthook is not None:
|
|
131
|
+
_global_config.use_custom_excepthook = use_custom_excepthook
|
|
132
|
+
if env_file_path is not None:
|
|
133
|
+
_global_config.env_file_path = env_file_path
|
|
134
|
+
if debug is not None:
|
|
135
|
+
_global_config.debug = debug
|
|
136
|
+
|
|
137
|
+
for k, v in kwargs.items():
|
|
138
|
+
if hasattr(_global_config, k):
|
|
139
|
+
setattr(_global_config, k, v)
|
|
140
|
+
else:
|
|
141
|
+
raise AttributeError(f'{k} is not an attribute of ConfigHandler')
|
|
142
|
+
|
|
143
|
+
_global_config.initialize()
|
|
144
|
+
return _global_config
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def get_config() -> ConfigHandler:
|
|
148
|
+
return ConfigHandler.get_instance()
|
pfeed/const/common.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
SUPPORTED_ENVIRONMENTS = ['BACKTEST', 'SANDBOX', 'PAPER', 'LIVE']
|
|
2
|
+
SUPPORTED_DATA_FEEDS = ['YAHOO_FINANCE', 'BYBIT', 'BINANCE']
|
|
3
|
+
SUPPORTED_STORAGES = ['local', 'minio']
|
|
4
|
+
SUPPORTED_DOWNLOAD_DATA_SOURCES = ['BYBIT', 'BINANCE']
|
|
5
|
+
SUPPORTED_CRYPTO_EXCHANGES = ['BYBIT', 'BINANCE']
|
|
6
|
+
SUPPORTED_DATA_TOOLS = ['pandas', 'polars']
|
|
7
|
+
SUPPORTED_PRODUCT_TYPES = ['SPOT', 'PERP', 'IPERP', 'FUT', 'IFUT']
|
|
8
|
+
SUPPORTED_DATA_TYPES = [
|
|
9
|
+
'raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw_daily',
|
|
10
|
+
'tick', 'second', 'minute', 'hour', 'daily',
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
ALIASES = {
|
|
14
|
+
'YF': 'YAHOO_FINANCE',
|
|
15
|
+
}
|
pfeed/const/paths.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from platformdirs import user_log_dir, user_data_dir, user_config_dir
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# project paths
|
|
6
|
+
PROJ_NAME = Path(__file__).resolve().parents[1].name
|
|
7
|
+
MAIN_PATH = Path(__file__).resolve().parents[2]
|
|
8
|
+
PROJ_PATH = MAIN_PATH / PROJ_NAME
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# user paths
|
|
12
|
+
LOG_PATH = Path(user_log_dir()) / PROJ_NAME
|
|
13
|
+
DATA_PATH = Path(user_data_dir()) / PROJ_NAME
|
|
14
|
+
USER_CONFIG_PATH = Path(user_config_dir()) / PROJ_NAME
|
|
15
|
+
USER_CONFIG_FILE_PATH = USER_CONFIG_PATH / f'{PROJ_NAME}_config.yml'
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
if TYPE_CHECKING:
|
|
4
|
+
from pfeed.resolution import ExtendedResolution
|
|
5
|
+
from pfeed.types.common_literals import tSUPPORTED_STORAGES
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import io
|
|
9
|
+
|
|
10
|
+
import s3fs
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
from pfeed.const.common import SUPPORTED_STORAGES
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
name = 'pandas'
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def read_parquet(path_or_obj: str | bytes, *args, storage: tSUPPORTED_STORAGES='local', **kwargs) -> pd.DataFrame:
|
|
20
|
+
assert storage in SUPPORTED_STORAGES, f'{storage=} not in {SUPPORTED_STORAGES}'
|
|
21
|
+
if isinstance(path_or_obj, bytes):
|
|
22
|
+
obj = io.BytesIO(path_or_obj)
|
|
23
|
+
return pd.read_parquet(obj, *args, **kwargs)
|
|
24
|
+
else:
|
|
25
|
+
path = path_or_obj
|
|
26
|
+
if storage == 'local':
|
|
27
|
+
return pd.read_parquet(path, *args, **kwargs)
|
|
28
|
+
elif storage == 'minio':
|
|
29
|
+
fs = s3fs.S3FileSystem(
|
|
30
|
+
endpoint_url="http://"+os.getenv('MINIO_HOST', 'localhost')+':'+os.getenv('MINIO_PORT', '9000'),
|
|
31
|
+
key=os.getenv('MINIO_ROOT_USER', 'pfunder'),
|
|
32
|
+
secret=os.getenv('MINIO_ROOT_PASSWORD', 'password'),
|
|
33
|
+
)
|
|
34
|
+
return pd.read_parquet(path, *args, filesystem=fs, **kwargs)
|
|
35
|
+
else:
|
|
36
|
+
raise NotImplementedError(f'{storage=}')
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def concat(dfs: list[pd.DataFrame], *args, **kwargs) -> pd.DataFrame:
|
|
40
|
+
return pd.concat(dfs, *args, **kwargs)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def estimate_memory_usage(df: pd.DataFrame) -> float:
|
|
44
|
+
"""Estimate the memory usage of a pandas DataFrame in GB."""
|
|
45
|
+
return df.memory_usage(deep=True).sum() / (1024 ** 3)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def organize_time_series_columns(pdt: str, resolution: str | ExtendedResolution, df: pd.DataFrame) -> pd.DataFrame:
|
|
49
|
+
"""Organize the columns of a pandas DataFrame.
|
|
50
|
+
Moving 'ts', 'product', 'resolution' to the leftmost side.
|
|
51
|
+
"""
|
|
52
|
+
from pfeed.resolution import ExtendedResolution
|
|
53
|
+
assert 'ts' in df.columns, "'ts' column not found"
|
|
54
|
+
assert 'product' not in df.columns, "'product' column already exists"
|
|
55
|
+
assert 'resolution' not in df.columns, "'resolution' column already exists"
|
|
56
|
+
if isinstance(resolution, str):
|
|
57
|
+
resolution = ExtendedResolution(resolution)
|
|
58
|
+
left_cols = ['ts', 'product', 'resolution']
|
|
59
|
+
df['product'] = pdt
|
|
60
|
+
df['resolution'] = repr(resolution)
|
|
61
|
+
df = df.reindex(left_cols + [col for col in df.columns if col not in left_cols], axis=1)
|
|
62
|
+
return df
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
if TYPE_CHECKING:
|
|
4
|
+
from pfeed.resolution import ExtendedResolution
|
|
5
|
+
from pfeed.types.common_literals import tSUPPORTED_STORAGES
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
import polars as pl
|
|
10
|
+
|
|
11
|
+
from pfeed.const.common import SUPPORTED_STORAGES
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
name = 'polars'
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def read_parquet(path_or_obj: str | bytes, *args, storage: tSUPPORTED_STORAGES='local', **kwargs) -> pl.DataFrame | pl.LazyFrame:
|
|
18
|
+
assert storage in SUPPORTED_STORAGES, f'{storage=} not in {SUPPORTED_STORAGES}'
|
|
19
|
+
if isinstance(path_or_obj, bytes):
|
|
20
|
+
obj = path_or_obj
|
|
21
|
+
return pl.read_parquet(obj, *args, **kwargs)
|
|
22
|
+
else:
|
|
23
|
+
path = path_or_obj
|
|
24
|
+
if storage == 'local':
|
|
25
|
+
return pl.scan_parquet(path, *args, **kwargs)
|
|
26
|
+
elif storage == 'minio':
|
|
27
|
+
storage_options = {
|
|
28
|
+
"endpoint_url": "http://"+os.getenv('MINIO_HOST', 'localhost')+':'+os.getenv('MINIO_PORT', '9000'),
|
|
29
|
+
"access_key_id": os.getenv('MINIO_ROOT_USER', 'pfunder'),
|
|
30
|
+
"secret_access_key": os.getenv('MINIO_ROOT_PASSWORD', 'password'),
|
|
31
|
+
}
|
|
32
|
+
return pl.scan_parquet(path, *args, storage_options=storage_options, **kwargs)
|
|
33
|
+
else:
|
|
34
|
+
raise NotImplementedError(f'{storage=}')
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def concat(dfs: list[pl.DataFrame | pl.LazyFrame], *args, **kwargs) -> pl.DataFrame | pl.LazyFrame:
|
|
38
|
+
return pl.concat(dfs, *args, **kwargs)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def estimate_memory_usage(df: pl.DataFrame | pl.LazyFrame) -> float:
|
|
42
|
+
"""Estimate the memory usage of a polars DataFrame in GB."""
|
|
43
|
+
if isinstance(df, pl.LazyFrame):
|
|
44
|
+
df = df.collect()
|
|
45
|
+
return df.estimated_size(unit='gb')
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def organize_time_series_columns(pdt: str, resolution: str | ExtendedResolution, df: pl.DataFrame | pl.LazyFrame) -> pl.DataFrame | pl.LazyFrame:
|
|
49
|
+
from pfeed.resolution import ExtendedResolution
|
|
50
|
+
if isinstance(df, pl.LazyFrame):
|
|
51
|
+
cols = df.collect_schema().names()
|
|
52
|
+
else:
|
|
53
|
+
cols = df.columns
|
|
54
|
+
assert 'ts' in cols, "'ts' column not found"
|
|
55
|
+
assert 'product' not in cols, "'product' column already exists"
|
|
56
|
+
assert 'resolution' not in cols, "'resolution' column already exists"
|
|
57
|
+
if isinstance(resolution, str):
|
|
58
|
+
resolution = ExtendedResolution(resolution)
|
|
59
|
+
df = df.with_columns(
|
|
60
|
+
pl.lit(pdt).alias('product'),
|
|
61
|
+
pl.lit(repr(resolution)).alias('resolution')
|
|
62
|
+
)
|
|
63
|
+
left_cols = ['ts', 'product', 'resolution']
|
|
64
|
+
df = df.select(left_cols + [col for col in df.collect_schema().names() if col not in left_cols])
|
|
65
|
+
return df
|