pfeed 0.0.1.dev6__tar.gz → 0.0.1.dev7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/PKG-INFO +4 -8
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/const/commons.py +4 -3
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/etl.py +14 -22
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/feeds/bybit_feed.py +4 -6
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/filepath.py +4 -5
- pfeed-0.0.1.dev7/pfeed/types/common_literals.py +10 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pyproject.toml +15 -11
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/LICENSE +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/README.md +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/__init__.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/__init__.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/commands/__init__.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/commands/config.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/commands/docker_compose.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/commands/download.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/commands/stream.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/main.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/config/logging.yml +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/config_handler.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/const/paths.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/data_tools/data_tool_pandas.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/data_tools/data_tool_polars.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/data_tools/data_tool_pyspark.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/datastore.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/feeds/__init__.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/feeds/base_feed.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/feeds/custom_csv_feed.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/feeds/yahoo_finance_feed.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/main.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/__init__.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/bybit/__init__.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/bybit/api.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/bybit/const.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/bybit/download.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/bybit/stream.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/utils/monitor.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/utils/utils.py +0 -0
- {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/utils/validate.py +0 -0
|
@@ -1,29 +1,25 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pfeed
|
|
3
|
-
Version: 0.0.1.
|
|
3
|
+
Version: 0.0.1.dev7
|
|
4
4
|
Summary: Data pipeline for algo-trading, getting and storing both real-time and historical data made easy.
|
|
5
5
|
Home-page: https://pfund.ai
|
|
6
6
|
License: Apache-2.0
|
|
7
7
|
Keywords: trading,algo-trading,data pipeline,ETL,data lake,data warehouse,data integration,historical data,live data,data streaming
|
|
8
8
|
Author: Stephen Yau
|
|
9
9
|
Author-email: softwareentrepreneer+pfeed@gmail.com
|
|
10
|
-
Requires-Python: >=3.10,<3.
|
|
10
|
+
Requires-Python: >=3.10,<3.13
|
|
11
11
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
16
|
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
|
16
17
|
Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
17
|
-
Requires-Dist: connectorx (>=0.3.2,<0.4.0)
|
|
18
18
|
Requires-Dist: minio (>=7.2.5,<8.0.0)
|
|
19
|
-
Requires-Dist:
|
|
20
|
-
Requires-Dist: pfund (>=0.0.1.dev5,<0.0.2)
|
|
19
|
+
Requires-Dist: pfund (>=0.0.1.dev7,<0.0.2)
|
|
21
20
|
Requires-Dist: platformdirs (>=4.2.0,<5.0.0)
|
|
22
|
-
Requires-Dist: polars (>=0.20.16,<0.21.0)
|
|
23
|
-
Requires-Dist: pyarrow (>=15.0.0,<16.0.0)
|
|
24
21
|
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
|
25
22
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
26
|
-
Requires-Dist: ray (>=2.10.0,<3.0.0)
|
|
27
23
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
28
24
|
Requires-Dist: rich (>=13.7.0,<14.0.0)
|
|
29
25
|
Requires-Dist: s3fs (>=2024.3.1,<2025.0.0)
|
|
@@ -3,8 +3,9 @@ SUPPORTED_DATA_TYPES = ['raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw
|
|
|
3
3
|
'tick', 'second', 'minute', 'hour', 'daily']
|
|
4
4
|
SUPPORTED_DATA_SINKS = ['local', 'minio']
|
|
5
5
|
SUPPORTED_DOWNLOAD_DATA_SOURCES = ['BYBIT']
|
|
6
|
+
SUPPORTED_DATA_MODES = ['historical', 'streaming']
|
|
7
|
+
SUPPORTED_DATA_TOOLS = ['pandas', 'polars', 'pyspark']
|
|
8
|
+
|
|
6
9
|
ALIASES = {
|
|
7
10
|
'YF': 'YAHOO_FINANCE',
|
|
8
|
-
}
|
|
9
|
-
SUPPORTED_DATA_MODES = ['historical', 'streaming']
|
|
10
|
-
SUPPORTED_DATA_TOOLS = ['pandas', 'polars', 'pyspark']
|
|
11
|
+
}
|
|
@@ -3,8 +3,6 @@ import io
|
|
|
3
3
|
import logging
|
|
4
4
|
import importlib
|
|
5
5
|
|
|
6
|
-
from typing import Literal
|
|
7
|
-
|
|
8
6
|
import pandas as pd
|
|
9
7
|
from minio.error import MinioException
|
|
10
8
|
|
|
@@ -12,17 +10,11 @@ from pfeed.datastore import Datastore
|
|
|
12
10
|
from pfeed.filepath import FilePath
|
|
13
11
|
from pfeed.config_handler import ConfigHandler
|
|
14
12
|
from pfeed.const.commons import SUPPORTED_DATA_TYPES, SUPPORTED_DATA_SINKS, SUPPORTED_DOWNLOAD_DATA_SOURCES, SUPPORTED_DATA_MODES
|
|
13
|
+
from pfeed.types.common_literals import tSUPPORTED_DATA_TOOLS, tSUPPORTED_DOWNLOAD_DATA_SOURCES, tSUPPORTED_DATA_SINKS, tSUPPORTED_DATA_TYPES, tSUPPORTED_DATA_MODES
|
|
15
14
|
from pfeed.utils.monitor import print_disk_usage
|
|
16
15
|
from pfund.datas.resolution import Resolution
|
|
17
16
|
|
|
18
17
|
|
|
19
|
-
DataSink = Literal['local', 'minio']
|
|
20
|
-
DataSource = Literal['BYBIT']
|
|
21
|
-
DataTool = Literal['pandas', 'polars', 'pyspark']
|
|
22
|
-
DataType = Literal['raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw_daily', 'raw', 'tick', 'second', 'minute', 'hour', 'daily']
|
|
23
|
-
DataMode = Literal['historical', 'streaming']
|
|
24
|
-
|
|
25
|
-
|
|
26
18
|
logger = logging.getLogger('pfeed')
|
|
27
19
|
|
|
28
20
|
|
|
@@ -37,11 +29,11 @@ def _convert_raw_dtype_to_explicit(data_source: str, dtype: str):
|
|
|
37
29
|
|
|
38
30
|
|
|
39
31
|
def get_data(
|
|
40
|
-
data_source:
|
|
41
|
-
dtype:
|
|
32
|
+
data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES,
|
|
33
|
+
dtype: tSUPPORTED_DATA_TYPES,
|
|
42
34
|
pdt: str,
|
|
43
35
|
date: str,
|
|
44
|
-
mode:
|
|
36
|
+
mode: tSUPPORTED_DATA_MODES='historical',
|
|
45
37
|
) -> bytes | None:
|
|
46
38
|
"""Extract data without specifying the data origin.
|
|
47
39
|
This function will try to extract data from all supported data origins.
|
|
@@ -68,12 +60,12 @@ def get_data(
|
|
|
68
60
|
|
|
69
61
|
|
|
70
62
|
def extract_data(
|
|
71
|
-
data_sink:
|
|
72
|
-
data_source:
|
|
73
|
-
dtype:
|
|
63
|
+
data_sink: tSUPPORTED_DATA_SINKS,
|
|
64
|
+
data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES,
|
|
65
|
+
dtype: tSUPPORTED_DATA_TYPES,
|
|
74
66
|
pdt: str,
|
|
75
67
|
date: str,
|
|
76
|
-
mode:
|
|
68
|
+
mode: tSUPPORTED_DATA_MODES='historical',
|
|
77
69
|
) -> bytes | None:
|
|
78
70
|
"""
|
|
79
71
|
Extracts data from a specified data source and returns it as bytes.
|
|
@@ -126,13 +118,13 @@ def extract_data(
|
|
|
126
118
|
|
|
127
119
|
|
|
128
120
|
def load_data(
|
|
129
|
-
data_sink:
|
|
130
|
-
data_source:
|
|
121
|
+
data_sink: tSUPPORTED_DATA_SINKS,
|
|
122
|
+
data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES,
|
|
131
123
|
data: bytes,
|
|
132
|
-
dtype:
|
|
124
|
+
dtype: tSUPPORTED_DATA_TYPES,
|
|
133
125
|
pdt: str,
|
|
134
126
|
date: str,
|
|
135
|
-
mode:
|
|
127
|
+
mode: tSUPPORTED_DATA_MODES = 'historical',
|
|
136
128
|
**kwargs
|
|
137
129
|
) -> None:
|
|
138
130
|
"""
|
|
@@ -184,7 +176,7 @@ def load_data(
|
|
|
184
176
|
print_disk_usage(config.data_path)
|
|
185
177
|
|
|
186
178
|
|
|
187
|
-
def clean_raw_data(data_source:
|
|
179
|
+
def clean_raw_data(data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES, raw_data: bytes) -> bytes:
|
|
188
180
|
module = importlib.import_module(f'pfeed.sources.{data_source.lower()}.const')
|
|
189
181
|
RENAMING_COLS = getattr(module, 'RENAMING_COLS')
|
|
190
182
|
MAPPING_COLS = getattr(module, 'MAPPING_COLS')
|
|
@@ -216,6 +208,6 @@ def clean_raw_tick_data(raw_tick: bytes) -> bytes:
|
|
|
216
208
|
return tick_data
|
|
217
209
|
|
|
218
210
|
|
|
219
|
-
def resample_data(data: bytes, resolution: str | Resolution, data_tool:
|
|
211
|
+
def resample_data(data: bytes, resolution: str | Resolution, data_tool: tSUPPORTED_DATA_TOOLS='polars', check_if_drop_last_bar=False) -> bytes:
|
|
220
212
|
data_tool = importlib.import_module(f'pfeed.data_tools.data_tool_{data_tool.lower()}')
|
|
221
213
|
return data_tool.resample_data(data, resolution, check_if_drop_last_bar=check_if_drop_last_bar)
|
|
@@ -9,6 +9,7 @@ import pandas as pd
|
|
|
9
9
|
from pfeed import etl
|
|
10
10
|
from pfeed.config_handler import ConfigHandler
|
|
11
11
|
from pfeed.const.commons import SUPPORTED_DATA_TOOLS
|
|
12
|
+
from pfeed.types.common_literals import tSUPPORTED_DATA_TOOLS
|
|
12
13
|
from pfeed.feeds.base_feed import BaseFeed
|
|
13
14
|
from pfeed.sources.bybit import api
|
|
14
15
|
from pfeed.sources.bybit.const import DATA_SOURCE, SUPPORTED_PRODUCT_TYPES, create_efilename, SUPPORTED_RAW_DATA_TYPES
|
|
@@ -18,9 +19,6 @@ from pfeed.data_tools.data_tool_polars import estimate_memory_usage
|
|
|
18
19
|
# from pfund.exchanges.bybit.exchange import Exchange
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
DataTool = Literal['pandas', 'polars', 'pyspark']
|
|
22
|
-
|
|
23
|
-
|
|
24
22
|
__all__ = ['BybitFeed']
|
|
25
23
|
|
|
26
24
|
|
|
@@ -36,7 +34,7 @@ class BybitFeed(BaseFeed):
|
|
|
36
34
|
resolution: str | Literal['raw', 'raw_tick']='1d',
|
|
37
35
|
start_date: str=None,
|
|
38
36
|
end_date: str=None,
|
|
39
|
-
data_tool:
|
|
37
|
+
data_tool: tSUPPORTED_DATA_TOOLS='pandas',
|
|
40
38
|
memory_usage_limit_in_gb: int=2, # in GB
|
|
41
39
|
) -> pd.DataFrame | pl.LazyFrame:
|
|
42
40
|
"""Get historical data from Bybit.
|
|
@@ -90,8 +88,8 @@ class BybitFeed(BaseFeed):
|
|
|
90
88
|
local_data_dtype = ''
|
|
91
89
|
|
|
92
90
|
if local_data:
|
|
93
|
-
data_str = f'{source} {pdt} {date}'
|
|
94
|
-
self.logger.info(f'loaded {data_str}
|
|
91
|
+
data_str = f'{source} {pdt} {date} {local_data_dtype}'
|
|
92
|
+
self.logger.info(f'loaded {data_str} data locally')
|
|
95
93
|
# REVIEW: last bar is very likely incomplete when e.g. 20 days of daily data is resampled to '3d'
|
|
96
94
|
check_if_drop_last_bar = (local_data_dtype == dtype and resolution.period != 1)
|
|
97
95
|
if local_data_dtype == dtype and resolution.period == 1:
|
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
-
from
|
|
5
|
-
|
|
4
|
+
from pfeed.types.common_literals import tSUPPORTED_DOWNLOAD_DATA_SOURCES, tSUPPORTED_DATA_MODES, tSUPPORTED_DATA_TYPES
|
|
6
5
|
from pfeed.utils.utils import create_filename
|
|
7
6
|
from pfeed.const.paths import DATA_PATH
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
@dataclass
|
|
11
10
|
class FilePath:
|
|
12
|
-
data_source:
|
|
13
|
-
mode:
|
|
14
|
-
dtype:
|
|
11
|
+
data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES
|
|
12
|
+
mode: tSUPPORTED_DATA_MODES
|
|
13
|
+
dtype: tSUPPORTED_DATA_TYPES
|
|
15
14
|
pdt: str
|
|
16
15
|
date: str
|
|
17
16
|
file_extension: str = '.parquet.gz'
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
# since Literal doesn't support variables as inputs, define variables in commons.py here with prefix 't'
|
|
4
|
+
tSUPPORTED_DATA_FEEDS = Literal['YAHOO_FINANCE', 'BYBIT']
|
|
5
|
+
tSUPPORTED_DATA_TYPES = Literal['raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw_daily',
|
|
6
|
+
'tick', 'second', 'minute', 'hour', 'daily']
|
|
7
|
+
tSUPPORTED_DATA_SINKS = Literal['local', 'minio']
|
|
8
|
+
tSUPPORTED_DOWNLOAD_DATA_SOURCES = Literal['BYBIT']
|
|
9
|
+
tSUPPORTED_DATA_MODES = Literal['historical', 'streaming']
|
|
10
|
+
tSUPPORTED_DATA_TOOLS = Literal['pandas', 'polars', 'pyspark']
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "pfeed"
|
|
3
|
-
version = "0.0.1.
|
|
3
|
+
version = "0.0.1.dev7"
|
|
4
4
|
description = "Data pipeline for algo-trading, getting and storing both real-time and historical data made easy."
|
|
5
5
|
license = "Apache-2.0"
|
|
6
6
|
authors = ["Stephen Yau <softwareentrepreneer+pfeed@gmail.com>"]
|
|
@@ -11,28 +11,30 @@ documentation = "https://pfeed-docs.pfund.ai"
|
|
|
11
11
|
keywords = ["trading", "algo-trading", "data pipeline", "ETL", "data lake", "data warehouse", "data integration", "historical data", "live data", "data streaming"]
|
|
12
12
|
|
|
13
13
|
[tool.poetry.dependencies]
|
|
14
|
-
python = "
|
|
15
|
-
pfund = "^0.0.1.
|
|
14
|
+
python = ">=3.10 <3.13"
|
|
15
|
+
pfund = "^0.0.1.dev7"
|
|
16
16
|
python-dotenv = "^1.0.1"
|
|
17
17
|
pyyaml = "^6.0.1"
|
|
18
18
|
beautifulsoup4 = "^4.12.3"
|
|
19
19
|
requests = "^2.31.0"
|
|
20
20
|
rich = "^13.7.0"
|
|
21
21
|
tqdm = "^4.66.2"
|
|
22
|
-
pandas = "^2.2.0"
|
|
23
|
-
pyarrow = "^15.0.0"
|
|
24
22
|
click = "^8.1.7"
|
|
25
|
-
platformdirs = "^4.2.0"
|
|
26
|
-
polars = "^0.20.16"
|
|
27
23
|
s3fs = "^2024.3.1"
|
|
28
|
-
connectorx = "^0.3.2"
|
|
29
|
-
ray = "^2.10.0"
|
|
30
24
|
minio = "^7.2.5"
|
|
31
25
|
yfinance = "^0.2.37"
|
|
26
|
+
platformdirs = "^4.2.0"
|
|
32
27
|
|
|
33
28
|
[tool.poetry.scripts]
|
|
34
29
|
pfeed = "pfeed.main:run_cli"
|
|
35
30
|
|
|
31
|
+
[tool.poetry.group.pyodide-incompatible.dependencies]
|
|
32
|
+
pandas = "^2.2.0"
|
|
33
|
+
pyarrow = "^15.0.0"
|
|
34
|
+
polars = "^0.20.16"
|
|
35
|
+
connectorx = "^0.3.2"
|
|
36
|
+
ray = "^2.10.0"
|
|
37
|
+
|
|
36
38
|
[tool.poetry.group.dev]
|
|
37
39
|
optional = true
|
|
38
40
|
|
|
@@ -41,11 +43,13 @@ pfund = {path = "../pfund", develop = true}
|
|
|
41
43
|
pytest = "^8.0.0"
|
|
42
44
|
pre-commit = "^3.6.0"
|
|
43
45
|
bandit = "^1.7.7"
|
|
44
|
-
ruff = "^0.1.15"
|
|
45
|
-
pyright = "^1.1.349"
|
|
46
46
|
pytest-xdist = "^3.5.0"
|
|
47
47
|
faker = "^24.4.0"
|
|
48
48
|
tox = "^4.14.2"
|
|
49
|
+
mypy = "^1.9.0"
|
|
50
|
+
ruff = "^0.3.5"
|
|
51
|
+
pytest-mock = "^3.14.0"
|
|
52
|
+
pytest-cov = "^5.0.0"
|
|
49
53
|
|
|
50
54
|
[tool.poetry.group.doc]
|
|
51
55
|
optional = true
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|