pfeed 0.0.1.dev6__tar.gz → 0.0.1.dev7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/PKG-INFO +4 -8
  2. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/const/commons.py +4 -3
  3. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/etl.py +14 -22
  4. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/feeds/bybit_feed.py +4 -6
  5. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/filepath.py +4 -5
  6. pfeed-0.0.1.dev7/pfeed/types/common_literals.py +10 -0
  7. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pyproject.toml +15 -11
  8. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/LICENSE +0 -0
  9. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/README.md +0 -0
  10. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/__init__.py +0 -0
  11. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/__init__.py +0 -0
  12. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/commands/__init__.py +0 -0
  13. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/commands/config.py +0 -0
  14. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/commands/docker_compose.py +0 -0
  15. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/commands/download.py +0 -0
  16. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/commands/stream.py +0 -0
  17. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/cli/main.py +0 -0
  18. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/config/logging.yml +0 -0
  19. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/config_handler.py +0 -0
  20. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/const/paths.py +0 -0
  21. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/data_tools/data_tool_pandas.py +0 -0
  22. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/data_tools/data_tool_polars.py +0 -0
  23. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/data_tools/data_tool_pyspark.py +0 -0
  24. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/datastore.py +0 -0
  25. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/feeds/__init__.py +0 -0
  26. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/feeds/base_feed.py +0 -0
  27. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/feeds/custom_csv_feed.py +0 -0
  28. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/feeds/yahoo_finance_feed.py +0 -0
  29. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/main.py +0 -0
  30. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/__init__.py +0 -0
  31. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/bybit/__init__.py +0 -0
  32. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/bybit/api.py +0 -0
  33. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/bybit/const.py +0 -0
  34. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/bybit/download.py +0 -0
  35. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/sources/bybit/stream.py +0 -0
  36. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/utils/monitor.py +0 -0
  37. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/utils/utils.py +0 -0
  38. {pfeed-0.0.1.dev6 → pfeed-0.0.1.dev7}/pfeed/utils/validate.py +0 -0
@@ -1,29 +1,25 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pfeed
3
- Version: 0.0.1.dev6
3
+ Version: 0.0.1.dev7
4
4
  Summary: Data pipeline for algo-trading, getting and storing both real-time and historical data made easy.
5
5
  Home-page: https://pfund.ai
6
6
  License: Apache-2.0
7
7
  Keywords: trading,algo-trading,data pipeline,ETL,data lake,data warehouse,data integration,historical data,live data,data streaming
8
8
  Author: Stephen Yau
9
9
  Author-email: softwareentrepreneer+pfeed@gmail.com
10
- Requires-Python: >=3.10,<3.12
10
+ Requires-Python: >=3.10,<3.13
11
11
  Classifier: License :: OSI Approved :: Apache Software License
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
15
16
  Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
16
17
  Requires-Dist: click (>=8.1.7,<9.0.0)
17
- Requires-Dist: connectorx (>=0.3.2,<0.4.0)
18
18
  Requires-Dist: minio (>=7.2.5,<8.0.0)
19
- Requires-Dist: pandas (>=2.2.0,<3.0.0)
20
- Requires-Dist: pfund (>=0.0.1.dev5,<0.0.2)
19
+ Requires-Dist: pfund (>=0.0.1.dev7,<0.0.2)
21
20
  Requires-Dist: platformdirs (>=4.2.0,<5.0.0)
22
- Requires-Dist: polars (>=0.20.16,<0.21.0)
23
- Requires-Dist: pyarrow (>=15.0.0,<16.0.0)
24
21
  Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
25
22
  Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
26
- Requires-Dist: ray (>=2.10.0,<3.0.0)
27
23
  Requires-Dist: requests (>=2.31.0,<3.0.0)
28
24
  Requires-Dist: rich (>=13.7.0,<14.0.0)
29
25
  Requires-Dist: s3fs (>=2024.3.1,<2025.0.0)
@@ -3,8 +3,9 @@ SUPPORTED_DATA_TYPES = ['raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw
3
3
  'tick', 'second', 'minute', 'hour', 'daily']
4
4
  SUPPORTED_DATA_SINKS = ['local', 'minio']
5
5
  SUPPORTED_DOWNLOAD_DATA_SOURCES = ['BYBIT']
6
+ SUPPORTED_DATA_MODES = ['historical', 'streaming']
7
+ SUPPORTED_DATA_TOOLS = ['pandas', 'polars', 'pyspark']
8
+
6
9
  ALIASES = {
7
10
  'YF': 'YAHOO_FINANCE',
8
- }
9
- SUPPORTED_DATA_MODES = ['historical', 'streaming']
10
- SUPPORTED_DATA_TOOLS = ['pandas', 'polars', 'pyspark']
11
+ }
@@ -3,8 +3,6 @@ import io
3
3
  import logging
4
4
  import importlib
5
5
 
6
- from typing import Literal
7
-
8
6
  import pandas as pd
9
7
  from minio.error import MinioException
10
8
 
@@ -12,17 +10,11 @@ from pfeed.datastore import Datastore
12
10
  from pfeed.filepath import FilePath
13
11
  from pfeed.config_handler import ConfigHandler
14
12
  from pfeed.const.commons import SUPPORTED_DATA_TYPES, SUPPORTED_DATA_SINKS, SUPPORTED_DOWNLOAD_DATA_SOURCES, SUPPORTED_DATA_MODES
13
+ from pfeed.types.common_literals import tSUPPORTED_DATA_TOOLS, tSUPPORTED_DOWNLOAD_DATA_SOURCES, tSUPPORTED_DATA_SINKS, tSUPPORTED_DATA_TYPES, tSUPPORTED_DATA_MODES
15
14
  from pfeed.utils.monitor import print_disk_usage
16
15
  from pfund.datas.resolution import Resolution
17
16
 
18
17
 
19
- DataSink = Literal['local', 'minio']
20
- DataSource = Literal['BYBIT']
21
- DataTool = Literal['pandas', 'polars', 'pyspark']
22
- DataType = Literal['raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw_daily', 'raw', 'tick', 'second', 'minute', 'hour', 'daily']
23
- DataMode = Literal['historical', 'streaming']
24
-
25
-
26
18
  logger = logging.getLogger('pfeed')
27
19
 
28
20
 
@@ -37,11 +29,11 @@ def _convert_raw_dtype_to_explicit(data_source: str, dtype: str):
37
29
 
38
30
 
39
31
  def get_data(
40
- data_source: DataSource,
41
- dtype: DataType,
32
+ data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES,
33
+ dtype: tSUPPORTED_DATA_TYPES,
42
34
  pdt: str,
43
35
  date: str,
44
- mode: DataMode='historical',
36
+ mode: tSUPPORTED_DATA_MODES='historical',
45
37
  ) -> bytes | None:
46
38
  """Extract data without specifying the data origin.
47
39
  This function will try to extract data from all supported data origins.
@@ -68,12 +60,12 @@ def get_data(
68
60
 
69
61
 
70
62
  def extract_data(
71
- data_sink: DataSink,
72
- data_source: DataSource,
73
- dtype: DataType,
63
+ data_sink: tSUPPORTED_DATA_SINKS,
64
+ data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES,
65
+ dtype: tSUPPORTED_DATA_TYPES,
74
66
  pdt: str,
75
67
  date: str,
76
- mode: DataMode='historical',
68
+ mode: tSUPPORTED_DATA_MODES='historical',
77
69
  ) -> bytes | None:
78
70
  """
79
71
  Extracts data from a specified data source and returns it as bytes.
@@ -126,13 +118,13 @@ def extract_data(
126
118
 
127
119
 
128
120
  def load_data(
129
- data_sink: DataSink,
130
- data_source: DataSource,
121
+ data_sink: tSUPPORTED_DATA_SINKS,
122
+ data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES,
131
123
  data: bytes,
132
- dtype: DataType,
124
+ dtype: tSUPPORTED_DATA_TYPES,
133
125
  pdt: str,
134
126
  date: str,
135
- mode: DataMode = 'historical',
127
+ mode: tSUPPORTED_DATA_MODES = 'historical',
136
128
  **kwargs
137
129
  ) -> None:
138
130
  """
@@ -184,7 +176,7 @@ def load_data(
184
176
  print_disk_usage(config.data_path)
185
177
 
186
178
 
187
- def clean_raw_data(data_source: DataSource, raw_data: bytes) -> bytes:
179
+ def clean_raw_data(data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES, raw_data: bytes) -> bytes:
188
180
  module = importlib.import_module(f'pfeed.sources.{data_source.lower()}.const')
189
181
  RENAMING_COLS = getattr(module, 'RENAMING_COLS')
190
182
  MAPPING_COLS = getattr(module, 'MAPPING_COLS')
@@ -216,6 +208,6 @@ def clean_raw_tick_data(raw_tick: bytes) -> bytes:
216
208
  return tick_data
217
209
 
218
210
 
219
- def resample_data(data: bytes, resolution: str | Resolution, data_tool: DataTool='polars', check_if_drop_last_bar=False) -> bytes:
211
+ def resample_data(data: bytes, resolution: str | Resolution, data_tool: tSUPPORTED_DATA_TOOLS='polars', check_if_drop_last_bar=False) -> bytes:
220
212
  data_tool = importlib.import_module(f'pfeed.data_tools.data_tool_{data_tool.lower()}')
221
213
  return data_tool.resample_data(data, resolution, check_if_drop_last_bar=check_if_drop_last_bar)
@@ -9,6 +9,7 @@ import pandas as pd
9
9
  from pfeed import etl
10
10
  from pfeed.config_handler import ConfigHandler
11
11
  from pfeed.const.commons import SUPPORTED_DATA_TOOLS
12
+ from pfeed.types.common_literals import tSUPPORTED_DATA_TOOLS
12
13
  from pfeed.feeds.base_feed import BaseFeed
13
14
  from pfeed.sources.bybit import api
14
15
  from pfeed.sources.bybit.const import DATA_SOURCE, SUPPORTED_PRODUCT_TYPES, create_efilename, SUPPORTED_RAW_DATA_TYPES
@@ -18,9 +19,6 @@ from pfeed.data_tools.data_tool_polars import estimate_memory_usage
18
19
  # from pfund.exchanges.bybit.exchange import Exchange
19
20
 
20
21
 
21
- DataTool = Literal['pandas', 'polars', 'pyspark']
22
-
23
-
24
22
  __all__ = ['BybitFeed']
25
23
 
26
24
 
@@ -36,7 +34,7 @@ class BybitFeed(BaseFeed):
36
34
  resolution: str | Literal['raw', 'raw_tick']='1d',
37
35
  start_date: str=None,
38
36
  end_date: str=None,
39
- data_tool: Literal['pandas', 'polars', 'pyspark']='pandas',
37
+ data_tool: tSUPPORTED_DATA_TOOLS='pandas',
40
38
  memory_usage_limit_in_gb: int=2, # in GB
41
39
  ) -> pd.DataFrame | pl.LazyFrame:
42
40
  """Get historical data from Bybit.
@@ -90,8 +88,8 @@ class BybitFeed(BaseFeed):
90
88
  local_data_dtype = ''
91
89
 
92
90
  if local_data:
93
- data_str = f'{source} {pdt} {date}'
94
- self.logger.info(f'loaded {data_str} {local_data_dtype} data locally')
91
+ data_str = f'{source} {pdt} {date} {local_data_dtype}'
92
+ self.logger.info(f'loaded {data_str} data locally')
95
93
  # REVIEW: last bar is very likely incomplete when e.g. 20 days of daily data is resampled to '3d'
96
94
  check_if_drop_last_bar = (local_data_dtype == dtype and resolution.period != 1)
97
95
  if local_data_dtype == dtype and resolution.period == 1:
@@ -1,17 +1,16 @@
1
1
  from dataclasses import dataclass, field
2
2
  from pathlib import Path
3
3
 
4
- from typing import Literal
5
-
4
+ from pfeed.types.common_literals import tSUPPORTED_DOWNLOAD_DATA_SOURCES, tSUPPORTED_DATA_MODES, tSUPPORTED_DATA_TYPES
6
5
  from pfeed.utils.utils import create_filename
7
6
  from pfeed.const.paths import DATA_PATH
8
7
 
9
8
 
10
9
  @dataclass
11
10
  class FilePath:
12
- data_source: Literal['BYBIT']
13
- mode: Literal['historical', 'streaming']
14
- dtype: Literal['raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw_daily', 'tick', 'second', 'minute', 'hour', 'daily']
11
+ data_source: tSUPPORTED_DOWNLOAD_DATA_SOURCES
12
+ mode: tSUPPORTED_DATA_MODES
13
+ dtype: tSUPPORTED_DATA_TYPES
15
14
  pdt: str
16
15
  date: str
17
16
  file_extension: str = '.parquet.gz'
@@ -0,0 +1,10 @@
1
+ from typing import Literal
2
+
3
+ # since Literal doesn't support variables as inputs, define variables in commons.py here with prefix 't'
4
+ tSUPPORTED_DATA_FEEDS = Literal['YAHOO_FINANCE', 'BYBIT']
5
+ tSUPPORTED_DATA_TYPES = Literal['raw_tick', 'raw_second', 'raw_minute', 'raw_hour', 'raw_daily',
6
+ 'tick', 'second', 'minute', 'hour', 'daily']
7
+ tSUPPORTED_DATA_SINKS = Literal['local', 'minio']
8
+ tSUPPORTED_DOWNLOAD_DATA_SOURCES = Literal['BYBIT']
9
+ tSUPPORTED_DATA_MODES = Literal['historical', 'streaming']
10
+ tSUPPORTED_DATA_TOOLS = Literal['pandas', 'polars', 'pyspark']
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pfeed"
3
- version = "0.0.1.dev6"
3
+ version = "0.0.1.dev7"
4
4
  description = "Data pipeline for algo-trading, getting and storing both real-time and historical data made easy."
5
5
  license = "Apache-2.0"
6
6
  authors = ["Stephen Yau <softwareentrepreneer+pfeed@gmail.com>"]
@@ -11,28 +11,30 @@ documentation = "https://pfeed-docs.pfund.ai"
11
11
  keywords = ["trading", "algo-trading", "data pipeline", "ETL", "data lake", "data warehouse", "data integration", "historical data", "live data", "data streaming"]
12
12
 
13
13
  [tool.poetry.dependencies]
14
- python = "^3.10 <3.12"
15
- pfund = "^0.0.1.dev5"
14
+ python = ">=3.10 <3.13"
15
+ pfund = "^0.0.1.dev7"
16
16
  python-dotenv = "^1.0.1"
17
17
  pyyaml = "^6.0.1"
18
18
  beautifulsoup4 = "^4.12.3"
19
19
  requests = "^2.31.0"
20
20
  rich = "^13.7.0"
21
21
  tqdm = "^4.66.2"
22
- pandas = "^2.2.0"
23
- pyarrow = "^15.0.0"
24
22
  click = "^8.1.7"
25
- platformdirs = "^4.2.0"
26
- polars = "^0.20.16"
27
23
  s3fs = "^2024.3.1"
28
- connectorx = "^0.3.2"
29
- ray = "^2.10.0"
30
24
  minio = "^7.2.5"
31
25
  yfinance = "^0.2.37"
26
+ platformdirs = "^4.2.0"
32
27
 
33
28
  [tool.poetry.scripts]
34
29
  pfeed = "pfeed.main:run_cli"
35
30
 
31
+ [tool.poetry.group.pyodide-incompatible.dependencies]
32
+ pandas = "^2.2.0"
33
+ pyarrow = "^15.0.0"
34
+ polars = "^0.20.16"
35
+ connectorx = "^0.3.2"
36
+ ray = "^2.10.0"
37
+
36
38
  [tool.poetry.group.dev]
37
39
  optional = true
38
40
 
@@ -41,11 +43,13 @@ pfund = {path = "../pfund", develop = true}
41
43
  pytest = "^8.0.0"
42
44
  pre-commit = "^3.6.0"
43
45
  bandit = "^1.7.7"
44
- ruff = "^0.1.15"
45
- pyright = "^1.1.349"
46
46
  pytest-xdist = "^3.5.0"
47
47
  faker = "^24.4.0"
48
48
  tox = "^4.14.2"
49
+ mypy = "^1.9.0"
50
+ ruff = "^0.3.5"
51
+ pytest-mock = "^3.14.0"
52
+ pytest-cov = "^5.0.0"
49
53
 
50
54
  [tool.poetry.group.doc]
51
55
  optional = true
File without changes
File without changes
File without changes
File without changes
File without changes