cryptodatapy 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodatapy/conf/fields.csv +126 -126
- cryptodatapy/conf/tickers.csv +2020 -2020
- cryptodatapy/extract/data_vendors/coinmetrics_api.py +1 -1
- cryptodatapy/extract/data_vendors/polygon_api.py +388 -0
- cryptodatapy/extract/data_vendors/tiingo_api.py +0 -2
- cryptodatapy/extract/datarequest.py +2 -0
- cryptodatapy/extract/getdata.py +7 -6
- cryptodatapy/extract/libraries/pandasdr_api.py +20 -3
- cryptodatapy/transform/clean.py +0 -41
- cryptodatapy/transform/convertparams.py +222 -75
- cryptodatapy/transform/wrangle.py +71 -1
- cryptodatapy/util/datacredentials.py +11 -0
- cryptodatapy/util/utils.py +131 -0
- {cryptodatapy-0.2.25.dist-info → cryptodatapy-0.2.27.dist-info}/METADATA +2 -1
- {cryptodatapy-0.2.25.dist-info → cryptodatapy-0.2.27.dist-info}/RECORD +17 -15
- {cryptodatapy-0.2.25.dist-info → cryptodatapy-0.2.27.dist-info}/LICENSE +0 -0
- {cryptodatapy-0.2.25.dist-info → cryptodatapy-0.2.27.dist-info}/WHEEL +0 -0
@@ -2,6 +2,7 @@ import logging
|
|
2
2
|
from datetime import datetime, timedelta
|
3
3
|
from importlib import resources
|
4
4
|
from typing import Dict, List, Union
|
5
|
+
import re
|
5
6
|
|
6
7
|
import pandas as pd
|
7
8
|
|
@@ -309,7 +310,7 @@ class ConvertParams:
|
|
309
310
|
except KeyError:
|
310
311
|
logging.warning(
|
311
312
|
f"{ticker} not found for Tiingo source. Check tickers in"
|
312
|
-
f" data catalog
|
313
|
+
f" data catalog or try using source_tickers parameter."
|
313
314
|
)
|
314
315
|
|
315
316
|
# freq
|
@@ -667,7 +668,7 @@ class ConvertParams:
|
|
667
668
|
|
668
669
|
def to_wb(self) -> Dict[str, Union[list, str, int, float, datetime, None]]:
|
669
670
|
"""
|
670
|
-
Convert tickers from CryptoDataPy to
|
671
|
+
Convert tickers from CryptoDataPy to World Bank format.
|
671
672
|
"""
|
672
673
|
# tickers
|
673
674
|
with resources.path("cryptodatapy.conf", "tickers.csv") as f:
|
@@ -780,6 +781,72 @@ class ConvertParams:
|
|
780
781
|
|
781
782
|
return self.data_req
|
782
783
|
|
784
|
+
def to_alphavantage(self) -> DataRequest:
|
785
|
+
"""
|
786
|
+
Convert tickers from CryptoDataPy to Alpha Vantage format.
|
787
|
+
"""
|
788
|
+
# tickers
|
789
|
+
if self.data_req.source_tickers is None:
|
790
|
+
self.data_req.source_tickers = [ticker.upper() for ticker in self.data_req.tickers]
|
791
|
+
|
792
|
+
# convert quote ccy
|
793
|
+
if self.data_req.quote_ccy is None:
|
794
|
+
self.data_req.quote_ccy = "USD"
|
795
|
+
else:
|
796
|
+
self.data_req.quote_ccy = self.data_req.quote_ccy.upper()
|
797
|
+
|
798
|
+
# start date
|
799
|
+
self.data_req.source_start_date = self.data_req.start_date
|
800
|
+
|
801
|
+
# end date
|
802
|
+
self.data_req.source_end_date = self.data_req.end_date
|
803
|
+
|
804
|
+
# fields
|
805
|
+
if self.data_req.source_fields is None:
|
806
|
+
self.data_req.source_fields = self.data_req.fields
|
807
|
+
|
808
|
+
# tz
|
809
|
+
if self.data_req.tz is None:
|
810
|
+
self.data_req.tz = "America/New_York"
|
811
|
+
|
812
|
+
# freq
|
813
|
+
if self.data_req.cat == 'eqty':
|
814
|
+
|
815
|
+
# freq
|
816
|
+
if self.data_req.source_freq is None:
|
817
|
+
self.data_req.source_freq = 'av-daily'
|
818
|
+
elif self.data_req.freq in ['1min', '5min', '15min', '30min', '1h', '2h', '4h', '6h', '8h']:
|
819
|
+
self.data_req.source_freq = 'av-intraday'
|
820
|
+
elif self.data_req.freq == 'd':
|
821
|
+
self.data_req.source_freq = 'av-daily'
|
822
|
+
elif self.data_req.freq == 'w':
|
823
|
+
self.data_req.source_freq = 'av-weekly'
|
824
|
+
elif self.data_req.freq == 'm':
|
825
|
+
self.data_req.source_freq = 'av-monthly'
|
826
|
+
else:
|
827
|
+
self.data_req.source_freq = 'av-daily'
|
828
|
+
|
829
|
+
# adjusted prices
|
830
|
+
if any(col.endswith('_adj') for col in self.data_req.fields) and self.data_req.freq in ['d', 'w', 'm']:
|
831
|
+
self.data_req.source_freq = self.data_req.source_freq + '-adjusted'
|
832
|
+
|
833
|
+
# markets
|
834
|
+
if self.data_req.source_markets is None:
|
835
|
+
self.data_req.source_markets = self.data_req.source_tickers
|
836
|
+
|
837
|
+
elif self.data_req.cat == 'fx':
|
838
|
+
|
839
|
+
# freq
|
840
|
+
if self.data_req.source_freq is None:
|
841
|
+
self.data_req.source_freq = 'av-forex-daily'
|
842
|
+
|
843
|
+
# markets
|
844
|
+
if self.data_req.source_markets is None:
|
845
|
+
self.data_req.source_markets = [ticker + '/' + self.data_req.quote_ccy
|
846
|
+
for ticker in self.data_req.tickers]
|
847
|
+
|
848
|
+
return self.data_req
|
849
|
+
|
783
850
|
def to_famafrench(self) -> DataRequest:
|
784
851
|
"""
|
785
852
|
Convert tickers from CryptoDataPy to Fama-French format.
|
@@ -818,6 +885,84 @@ class ConvertParams:
|
|
818
885
|
|
819
886
|
return self.data_req
|
820
887
|
|
888
|
+
def to_polygon(self) -> DataRequest:
|
889
|
+
"""
|
890
|
+
Convert tickers from CryptoDataPy to Polygon format.
|
891
|
+
"""
|
892
|
+
# tickers
|
893
|
+
with resources.path("cryptodatapy.conf", "tickers.csv") as f:
|
894
|
+
tickers_path = f
|
895
|
+
tickers_df = pd.read_csv(tickers_path, index_col=0, encoding="latin1")
|
896
|
+
|
897
|
+
if self.data_req.source_tickers is None and self.data_req.cat == 'eqty':
|
898
|
+
self.data_req.source_tickers = []
|
899
|
+
for ticker in self.data_req.tickers:
|
900
|
+
try:
|
901
|
+
self.data_req.source_tickers.append(tickers_df.loc[ticker, "polygon_id"])
|
902
|
+
except KeyError:
|
903
|
+
logging.warning(
|
904
|
+
f"{ticker} not found for Polygon source. Check tickers in"
|
905
|
+
f" data catalog or try using source_tickers parameter."
|
906
|
+
)
|
907
|
+
|
908
|
+
# freq
|
909
|
+
if self.data_req.source_freq is None:
|
910
|
+
if self.data_req.freq is None:
|
911
|
+
self.data_req.source_freq = "day"
|
912
|
+
elif self.data_req.freq[-1] == "s":
|
913
|
+
self.data_req.source_freq = "second"
|
914
|
+
elif self.data_req.freq[-3:] == "min":
|
915
|
+
self.data_req.source_freq = "minute"
|
916
|
+
elif self.data_req.freq[-1] == "h":
|
917
|
+
self.data_req.source_freq = "hour"
|
918
|
+
elif self.data_req.freq == "w":
|
919
|
+
self.data_req.source_freq = "week"
|
920
|
+
elif self.data_req.freq == "m":
|
921
|
+
self.data_req.source_freq = "month"
|
922
|
+
elif self.data_req.freq == "q":
|
923
|
+
self.data_req.source_freq = "quarter"
|
924
|
+
elif self.data_req.freq == "y":
|
925
|
+
self.data_req.source_freq = "year"
|
926
|
+
else:
|
927
|
+
self.data_req.source_freq = "day"
|
928
|
+
|
929
|
+
# quote ccy
|
930
|
+
if self.data_req.quote_ccy is None:
|
931
|
+
self.data_req.quote_ccy = "usd"
|
932
|
+
else:
|
933
|
+
self.data_req.quote_ccy = self.data_req.quote_ccy.lower()
|
934
|
+
|
935
|
+
# markets
|
936
|
+
if self.data_req.source_markets is None:
|
937
|
+
if self.data_req.cat == 'fx':
|
938
|
+
self.data_req.source_markets = [ticker.upper() + self.data_req.quote_ccy.upper()
|
939
|
+
for ticker in self.data_req.tickers]
|
940
|
+
|
941
|
+
# start date
|
942
|
+
if self.data_req.start_date is None:
|
943
|
+
two_years_ago = pd.Timestamp.today() - pd.DateOffset(years=2)
|
944
|
+
self.data_req.source_start_date = two_years_ago.strftime("%Y-%m-%d")
|
945
|
+
else:
|
946
|
+
self.data_req.source_start_date = self.data_req.start_date
|
947
|
+
|
948
|
+
# end date
|
949
|
+
if self.data_req.end_date is None:
|
950
|
+
self.data_req.source_end_date = str(pd.Timestamp.utcnow().date())
|
951
|
+
else:
|
952
|
+
self.data_req.source_end_date = self.data_req.end_date
|
953
|
+
|
954
|
+
# fields
|
955
|
+
if self.data_req.source_fields is None:
|
956
|
+
self.data_req.source_fields = self.convert_fields(data_source='polygon')
|
957
|
+
|
958
|
+
# tz
|
959
|
+
if self.data_req.cat == 'eqty' or self.data_req.cat == 'fx':
|
960
|
+
self.data_req.tz = "America/New_York"
|
961
|
+
else:
|
962
|
+
self.data_req.tz = "UTC"
|
963
|
+
|
964
|
+
return self.data_req
|
965
|
+
|
821
966
|
def to_aqr(self) -> Dict[str, Union[list, str, int, dict, float, datetime, None]]:
|
822
967
|
"""
|
823
968
|
Convert tickers from CryptoDataPy to AQR format.
|
@@ -911,79 +1056,6 @@ class ConvertParams:
|
|
911
1056
|
"source_fields": self.data_req.source_fields,
|
912
1057
|
}
|
913
1058
|
|
914
|
-
def convert_fx_tickers(self, quote_ccy: str) -> List[str]:
|
915
|
-
"""
|
916
|
-
Converts base and quote currency tickers to fx pairs following fx quoting convention.
|
917
|
-
|
918
|
-
Parameters
|
919
|
-
---------
|
920
|
-
quote_ccy: str
|
921
|
-
Quote currency
|
922
|
-
|
923
|
-
Returns
|
924
|
-
-------
|
925
|
-
quote_ccy: str
|
926
|
-
Quote currency.
|
927
|
-
"""
|
928
|
-
mkts = [] # fx pairs list
|
929
|
-
# fx groups
|
930
|
-
base_ccys = ["EUR", "GBP", "AUD", "NZD"]
|
931
|
-
# g10_fx = ['USD', 'EUR', 'GBP', 'JPY', 'CHF', 'CAD', 'AUD', 'NZD', 'NOK', 'SEK']
|
932
|
-
# dm_fx = ['USD', 'EUR', 'GBP', 'JPY', 'CHF', 'CAD', 'AUD', 'NZD', 'NOK', 'SEK', 'SGD', 'ILS', 'HKD', ]
|
933
|
-
# em_fx = ['ARS', 'BRL', 'CHN', 'CLP', 'CNY', 'COP', 'IDR', 'INR', 'KRW', 'MYR', 'MXN', 'PEN', 'PHP', 'RUB',
|
934
|
-
# 'TRY', 'TWD', 'ZAR']
|
935
|
-
|
936
|
-
for ticker in self.data_req.tickers:
|
937
|
-
if ticker.upper() in base_ccys and quote_ccy.upper() == "USD":
|
938
|
-
mkts.append(ticker.upper() + "/" + quote_ccy.upper())
|
939
|
-
elif quote_ccy.upper() == "USD":
|
940
|
-
mkts.append(quote_ccy.upper() + "/" + ticker.upper())
|
941
|
-
else:
|
942
|
-
mkts.append(ticker.upper() + "/" + quote_ccy.upper())
|
943
|
-
|
944
|
-
return mkts
|
945
|
-
|
946
|
-
def convert_fields(self, data_source: str) -> List[str]:
|
947
|
-
"""
|
948
|
-
Converts fields from CryptoDataPy to data source format.
|
949
|
-
|
950
|
-
Parameters
|
951
|
-
---------
|
952
|
-
data_source: str
|
953
|
-
Name of data source for fields conversions.
|
954
|
-
|
955
|
-
Returns
|
956
|
-
-------
|
957
|
-
fields_list: list
|
958
|
-
List of fields in data source format.
|
959
|
-
|
960
|
-
"""
|
961
|
-
# fields
|
962
|
-
with resources.path("cryptodatapy.conf", "fields.csv") as f:
|
963
|
-
fields_dict_path = f
|
964
|
-
fields_df, fields_list = (
|
965
|
-
pd.read_csv(fields_dict_path, index_col=0, encoding="latin1"),
|
966
|
-
[],
|
967
|
-
)
|
968
|
-
|
969
|
-
# when source fields already provided in data req
|
970
|
-
if self.data_req.source_fields is not None:
|
971
|
-
fields_list = self.data_req.source_fields
|
972
|
-
|
973
|
-
# convert to source format
|
974
|
-
else:
|
975
|
-
for field in self.data_req.fields:
|
976
|
-
try:
|
977
|
-
fields_list.append(fields_df.loc[field, data_source + "_id"])
|
978
|
-
except KeyError as e:
|
979
|
-
logging.warning(e)
|
980
|
-
logging.warning(
|
981
|
-
f"Id for {field} could not be found in the data catalog."
|
982
|
-
f" Try using source field ids."
|
983
|
-
)
|
984
|
-
|
985
|
-
return fields_list
|
986
|
-
|
987
1059
|
def to_dydx_dict(self) -> Dict[str, Union[list, str, int, float, None]]:
|
988
1060
|
"""
|
989
1061
|
Convert parameters from CryptoDataPy to dYdX format.
|
@@ -1101,3 +1173,78 @@ class ConvertParams:
|
|
1101
1173
|
'oi': 'openInterest'
|
1102
1174
|
}
|
1103
1175
|
return self.data_req
|
1176
|
+
|
1177
|
+
def convert_fx_tickers(self, quote_ccy: str) -> List[str]:
|
1178
|
+
"""
|
1179
|
+
Converts base and quote currency tickers to fx pairs following fx quoting convention.
|
1180
|
+
|
1181
|
+
Parameters
|
1182
|
+
---------
|
1183
|
+
quote_ccy: str
|
1184
|
+
Quote currency
|
1185
|
+
|
1186
|
+
Returns
|
1187
|
+
-------
|
1188
|
+
quote_ccy: str
|
1189
|
+
Quote currency.
|
1190
|
+
"""
|
1191
|
+
mkts = [] # fx pairs list
|
1192
|
+
|
1193
|
+
# fx groups
|
1194
|
+
base_ccys = ["EUR", "GBP", "AUD", "NZD"]
|
1195
|
+
g10_fx = ['USD', 'EUR', 'GBP', 'JPY', 'CHF', 'CAD', 'AUD', 'NZD', 'NOK', 'SEK']
|
1196
|
+
dm_fx = ['USD', 'EUR', 'GBP', 'JPY', 'CHF', 'CAD', 'AUD', 'NZD', 'NOK', 'SEK', 'DKK', 'SGD', 'HKD']
|
1197
|
+
em_fx = ['ARS', 'BRL', 'CHN', 'CLP', 'CNY', 'COP', 'CZK', 'HUF', 'IDR', 'INR', 'ILS', 'KRW', 'MYR', 'MXN',
|
1198
|
+
'PEN', 'PHP', 'PLN', 'RUB', 'THB', 'TRY', 'TWD', 'ZAR']
|
1199
|
+
em_ndf_fx = ['ARS', 'BRL', 'CNY', 'CLP', 'COP', 'IDR', 'INR', 'KRW', 'PEN', 'PHP', 'RUB', 'THB', 'TRY', 'TWD']
|
1200
|
+
|
1201
|
+
for ticker in self.data_req.tickers:
|
1202
|
+
if ticker.upper() in base_ccys and quote_ccy.upper() == "USD":
|
1203
|
+
mkts.append(ticker.upper() + "/" + quote_ccy.upper())
|
1204
|
+
elif quote_ccy.upper() == "USD":
|
1205
|
+
mkts.append(quote_ccy.upper() + "/" + ticker.upper())
|
1206
|
+
else:
|
1207
|
+
mkts.append(ticker.upper() + "/" + quote_ccy.upper())
|
1208
|
+
|
1209
|
+
return mkts
|
1210
|
+
|
1211
|
+
def convert_fields(self, data_source: str) -> List[str]:
|
1212
|
+
"""
|
1213
|
+
Converts fields from CryptoDataPy to data source format.
|
1214
|
+
|
1215
|
+
Parameters
|
1216
|
+
---------
|
1217
|
+
data_source: str
|
1218
|
+
Name of data source for fields conversions.
|
1219
|
+
|
1220
|
+
Returns
|
1221
|
+
-------
|
1222
|
+
fields_list: list
|
1223
|
+
List of fields in data source format.
|
1224
|
+
|
1225
|
+
"""
|
1226
|
+
# fields
|
1227
|
+
with resources.path("cryptodatapy.conf", "fields.csv") as f:
|
1228
|
+
fields_dict_path = f
|
1229
|
+
fields_df, fields_list = (
|
1230
|
+
pd.read_csv(fields_dict_path, index_col=0, encoding="latin1"),
|
1231
|
+
[],
|
1232
|
+
)
|
1233
|
+
|
1234
|
+
# when source fields already provided in data req
|
1235
|
+
if self.data_req.source_fields is not None:
|
1236
|
+
fields_list = self.data_req.source_fields
|
1237
|
+
|
1238
|
+
# convert to source format
|
1239
|
+
else:
|
1240
|
+
for field in self.data_req.fields:
|
1241
|
+
try:
|
1242
|
+
fields_list.append(fields_df.loc[field, data_source + "_id"])
|
1243
|
+
except KeyError as e:
|
1244
|
+
logging.warning(e)
|
1245
|
+
logging.warning(
|
1246
|
+
f"Id for {field} could not be found in the data catalog."
|
1247
|
+
f" Try using source field ids."
|
1248
|
+
)
|
1249
|
+
|
1250
|
+
return fields_list
|
@@ -673,6 +673,44 @@ class WrangleData:
|
|
673
673
|
|
674
674
|
return self.data_resp
|
675
675
|
|
676
|
+
def polygon(self) -> pd.DataFrame:
|
677
|
+
"""
|
678
|
+
Wrangles Polygon data response to dataframe with tidy data format.
|
679
|
+
|
680
|
+
Returns
|
681
|
+
-------
|
682
|
+
pd.DataFrame
|
683
|
+
Wrangled dataframe into tidy data format.
|
684
|
+
|
685
|
+
"""
|
686
|
+
# create df
|
687
|
+
self.data_resp = pd.DataFrame(self.data_resp)
|
688
|
+
|
689
|
+
# convert cols/fields to lib
|
690
|
+
self.convert_fields_to_lib(data_source='polygon')
|
691
|
+
|
692
|
+
# convert to datetime
|
693
|
+
self.data_resp['date'] = pd.to_datetime(self.data_resp['date'], unit='ms')
|
694
|
+
|
695
|
+
# set index
|
696
|
+
self.data_resp = self.data_resp.set_index('date').sort_index()
|
697
|
+
|
698
|
+
# resample
|
699
|
+
self.data_resp = self.data_resp.resample(self.data_req.freq).last()
|
700
|
+
|
701
|
+
# type conversion
|
702
|
+
self.data_resp = self.data_resp.convert_dtypes()
|
703
|
+
|
704
|
+
# remove bad data
|
705
|
+
self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
|
706
|
+
self.data_resp = self.data_resp.dropna(how='all').dropna(how='all', axis=1) # entire row or col NaNs
|
707
|
+
self.data_resp = self.data_resp[self.data_resp != 0]
|
708
|
+
|
709
|
+
# keep only requested fields and sort index
|
710
|
+
self.data_resp = self.data_resp[self.data_req.fields].sort_index()
|
711
|
+
|
712
|
+
return self.data_resp
|
713
|
+
|
676
714
|
def investpy(self) -> pd.DataFrame:
|
677
715
|
"""
|
678
716
|
Wrangles InvestPy data response to dataframe with tidy data format.
|
@@ -957,6 +995,38 @@ class WrangleData:
|
|
957
995
|
|
958
996
|
return self.data_resp
|
959
997
|
|
998
|
+
def alphavantage(self) -> pd.DataFrame:
|
999
|
+
"""
|
1000
|
+
Wrangles Alpha Vantage data response to dataframe with tidy data format.
|
1001
|
+
|
1002
|
+
Returns
|
1003
|
+
-------
|
1004
|
+
pd.DataFrame
|
1005
|
+
Wrangled dataframe into tidy data format.
|
1006
|
+
"""
|
1007
|
+
# index
|
1008
|
+
self.data_resp.reset_index(inplace=True)
|
1009
|
+
self.data_resp['date'] = pd.to_datetime(self.data_resp['date'])
|
1010
|
+
self.data_resp.set_index(['date', 'ticker'], inplace=True)
|
1011
|
+
|
1012
|
+
# resample
|
1013
|
+
self.data_resp = self.data_resp.groupby('ticker').\
|
1014
|
+
resample(self.data_req.freq, level='date').\
|
1015
|
+
last().swaplevel('ticker', 'date').sort_index()
|
1016
|
+
|
1017
|
+
# type conversion
|
1018
|
+
self.data_resp = self.data_resp.convert_dtypes()
|
1019
|
+
|
1020
|
+
# remove bad data
|
1021
|
+
self.data_resp = self.data_resp[self.data_resp != 0] # 0 values
|
1022
|
+
self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
|
1023
|
+
self.data_resp = self.data_resp.dropna(how='all').dropna(how='all', axis=1) # entire row or col NaNs
|
1024
|
+
|
1025
|
+
# keep only requested fields and sort index
|
1026
|
+
self.data_resp = self.data_resp[self.data_req.fields].sort_index()
|
1027
|
+
|
1028
|
+
return self.data_resp
|
1029
|
+
|
960
1030
|
def famafrench(self) -> pd.DataFrame:
|
961
1031
|
"""
|
962
1032
|
Wrangles Fama-French data response to dataframe with tidy data format.
|
@@ -1175,4 +1245,4 @@ class WrangleData:
|
|
1175
1245
|
if self.data_req.end_date is not None:
|
1176
1246
|
self.data_resp = self.data_resp[(self.data_resp.index <= self.data_req.end_date)]
|
1177
1247
|
|
1178
|
-
return self
|
1248
|
+
return self
|
@@ -37,6 +37,16 @@ class DataCredentials:
|
|
37
37
|
tiingo_api_key: str = os.environ['TIINGO_API_KEY']
|
38
38
|
except KeyError:
|
39
39
|
tiingo_api_key: str = None
|
40
|
+
# alpha vantage api key
|
41
|
+
try:
|
42
|
+
alpha_vantage_api_key: str = os.environ['ALPHAVANTAGE_API_KEY']
|
43
|
+
except KeyError:
|
44
|
+
alpha_vantage_api_key: str = None
|
45
|
+
# polygon api key
|
46
|
+
try:
|
47
|
+
polygon_api_key: str = os.environ['POLYGON_API_KEY']
|
48
|
+
except KeyError:
|
49
|
+
polygon_api_key: str = None
|
40
50
|
# coinmetrics api key
|
41
51
|
try:
|
42
52
|
coinmetrics_api_key: str = os.environ['COINMETRICS_API_KEY']
|
@@ -52,6 +62,7 @@ class DataCredentials:
|
|
52
62
|
coinmetrics_base_url: str = 'https://api.coinmetrics.io/v4'
|
53
63
|
else:
|
54
64
|
coinmetrics_base_url: str = 'https://community-api.coinmetrics.io/v4'
|
65
|
+
polygon_base_url: str = 'https://api.polygon.io/v3/reference/'
|
55
66
|
|
56
67
|
# API endpoints
|
57
68
|
cryptomcompare_endpoints: dict = field(default_factory=lambda: {
|
@@ -0,0 +1,131 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
|
3
|
+
from tests.test_impute import filtered_data
|
4
|
+
|
5
|
+
|
6
|
+
def compute_reference_price(List: pd.DataFrame,
|
7
|
+
method: str = 'median',
|
8
|
+
trim_pct: float = 0.25,
|
9
|
+
) -> pd.DataFrame:
|
10
|
+
"""
|
11
|
+
Computes the consensus price from a list of dataframes.
|
12
|
+
|
13
|
+
Parameters
|
14
|
+
----------
|
15
|
+
List: pd.DataFrame
|
16
|
+
List of dataframes containing price data.
|
17
|
+
method: str, optional
|
18
|
+
Method to compute the consensus price. Options are 'median' or 'trimmed_mean'.
|
19
|
+
Default is 'median'.
|
20
|
+
trim_pct: float, optional
|
21
|
+
Percentage of data to trim from both ends for 'trimmed_mean' method.
|
22
|
+
Default is 0.25 (25%).
|
23
|
+
Returns
|
24
|
+
-------
|
25
|
+
pd.DataFrame
|
26
|
+
Dataframe with the consensus price.
|
27
|
+
"""
|
28
|
+
if not List:
|
29
|
+
raise ValueError("The input list is empty.")
|
30
|
+
|
31
|
+
# Concatenate all dataframes in the list
|
32
|
+
stacked_df = pd.concat(List)
|
33
|
+
|
34
|
+
# Compute consensus price based on the specified method
|
35
|
+
if method == 'median':
|
36
|
+
consensus_price = stacked_df.groupby(['date', 'ticker']).median()
|
37
|
+
|
38
|
+
elif method == 'trimmed_mean':
|
39
|
+
# Calculate trimmed mean with specified bounds
|
40
|
+
lower_bound = stacked_df.groupby(level=[0, 1]).quantile(trim_pct)
|
41
|
+
upper_bound = stacked_df.groupby(level=[0, 1]).quantile(1 - trim_pct)
|
42
|
+
|
43
|
+
# Filter out values outside the bounds
|
44
|
+
filtered_df = stacked_df[(stacked_df >= lower_bound.reindex(stacked_df.index)) &
|
45
|
+
(stacked_df <= upper_bound.reindex(stacked_df.index))]
|
46
|
+
|
47
|
+
consensus_price = filtered_df.groupby(level=[0, 1]).mean()
|
48
|
+
else:
|
49
|
+
raise ValueError("Method must be either 'median' or 'trimmed_mean'.")
|
50
|
+
|
51
|
+
return consensus_price.sort_index()
|
52
|
+
|
53
|
+
|
54
|
+
def stitch_dataframes(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
55
|
+
"""
|
56
|
+
Stitches together dataframes with different start dates.
|
57
|
+
|
58
|
+
Parameters
|
59
|
+
----------
|
60
|
+
df1: pd.DataFrame
|
61
|
+
First dataframe to be stitched.
|
62
|
+
df2: pd.DataFrame
|
63
|
+
Second dataframe to be stitched.
|
64
|
+
|
65
|
+
Returns
|
66
|
+
-------
|
67
|
+
combined_df: pd.DataFrame
|
68
|
+
Combined or stitched dataframes with extended data.
|
69
|
+
"""
|
70
|
+
# forward fill missing values
|
71
|
+
updated_df = df1.reindex(index=df2.index, columns=df2.columns).fillna(df2)
|
72
|
+
combined_df = df1.combine_first(updated_df)
|
73
|
+
|
74
|
+
return combined_df
|
75
|
+
|
76
|
+
|
77
|
+
def rebase_fx_to_foreign_vs_usd(df) -> pd.DataFrame:
|
78
|
+
"""
|
79
|
+
Rebase FX rates to foreign currency vs. USD format, so that an increase
|
80
|
+
means the foreign currency is appreciating. Works for both MultiIndex
|
81
|
+
(date, ticker) and single-index (date index, tickers as columns).
|
82
|
+
|
83
|
+
Parameters
|
84
|
+
----------
|
85
|
+
df : pd.DataFrame
|
86
|
+
FX DataFrame with either:
|
87
|
+
- MultiIndex (date, ticker)
|
88
|
+
- Datetime index and tickers as columns
|
89
|
+
|
90
|
+
Returns
|
91
|
+
-------
|
92
|
+
pd.DataFrame
|
93
|
+
Rebased FX rates with tickers as foreign currency (e.g., 'EUR', 'JPY').
|
94
|
+
"""
|
95
|
+
df = df.copy()
|
96
|
+
|
97
|
+
def get_foreign_currency(ticker: str) -> str:
|
98
|
+
if ticker.startswith("USD"):
|
99
|
+
return ticker[3:] # USDJPY → JPY
|
100
|
+
elif ticker.endswith("USD"):
|
101
|
+
return ticker[:3] # EURUSD → EUR
|
102
|
+
else:
|
103
|
+
raise ValueError(f"Unexpected ticker format: {ticker}")
|
104
|
+
|
105
|
+
if isinstance(df.index, pd.MultiIndex):
|
106
|
+
tickers = df.index.get_level_values(1)
|
107
|
+
inverted = tickers.str.startswith("USD")
|
108
|
+
|
109
|
+
# Invert rates for USDXXX
|
110
|
+
df[inverted] = 1 / df[inverted]
|
111
|
+
|
112
|
+
# Rename all tickers to just the foreign currency symbol
|
113
|
+
new_tickers = tickers.map(get_foreign_currency)
|
114
|
+
df.index = pd.MultiIndex.from_arrays(
|
115
|
+
[df.index.get_level_values(0), new_tickers],
|
116
|
+
names=df.index.names
|
117
|
+
)
|
118
|
+
|
119
|
+
else:
|
120
|
+
# Single index (datetime), columns = tickers
|
121
|
+
rebased = {}
|
122
|
+
for col in df.columns:
|
123
|
+
fx = get_foreign_currency(col)
|
124
|
+
if col.startswith("USD"):
|
125
|
+
rebased[fx] = 1 / df[col]
|
126
|
+
else:
|
127
|
+
rebased[fx] = df[col]
|
128
|
+
|
129
|
+
df = pd.DataFrame(rebased, index=df.index)
|
130
|
+
|
131
|
+
return df.sort_index()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: cryptodatapy
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.27
|
4
4
|
Summary: Cryptoasset data library
|
5
5
|
License: Apache-2.0
|
6
6
|
Author: Systamental
|
@@ -22,6 +22,7 @@ Requires-Dist: numpy (>=1.23.2)
|
|
22
22
|
Requires-Dist: openpyxl (>=3.1.2)
|
23
23
|
Requires-Dist: pandas (>=2.2.3)
|
24
24
|
Requires-Dist: pandas-datareader (>=0.10.0)
|
25
|
+
Requires-Dist: polygon-api-client (>=1.14.6)
|
25
26
|
Requires-Dist: prophet (>=1.1) ; python_version >= "3.7"
|
26
27
|
Requires-Dist: pyarrow (>=17.0.0)
|
27
28
|
Requires-Dist: requests (>=2.28.0) ; python_version >= "3.7"
|
@@ -1,7 +1,7 @@
|
|
1
1
|
cryptodatapy/__init__.py,sha256=ee1UaINHZn1A_SZ96XM3hCguQEJgiPTvKlnYsk3mmS4,185
|
2
2
|
cryptodatapy/conf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
cryptodatapy/conf/fields.csv,sha256=
|
4
|
-
cryptodatapy/conf/tickers.csv,sha256=
|
3
|
+
cryptodatapy/conf/fields.csv,sha256=aKrgG8yzKSKs8tX5AYPFklOtsATYhCfBZYLS3RpQPhk,26070
|
4
|
+
cryptodatapy/conf/tickers.csv,sha256=nTi_Ww1UgTkTZPWocJBmOgi56iOnrVN3le5tYkOi95E,357958
|
5
5
|
cryptodatapy/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
cryptodatapy/datasets/br_econ_calendar.csv,sha256=mSM0IOIByI-0gIIL1CbDQPqHYI5lK6vavrY1ODj3Jlk,1185318
|
7
7
|
cryptodatapy/datasets/ca_econ_calendar.csv,sha256=GtoopEhCSufBNjpAi2BiorSsm4RmoK5dfZe8lkOS-Jc,1521808
|
@@ -27,36 +27,38 @@ cryptodatapy/extract/data_vendors/.ipynb_checkpoints/InvestPy-checkpoint.ipynb,s
|
|
27
27
|
cryptodatapy/extract/data_vendors/.ipynb_checkpoints/NasdaqDataLink-checkpoint.ipynb,sha256=hY2QkCcTiLgPnl8SQPsO8spio5-RBMGeBLYzAwgSWb4,147170
|
28
28
|
cryptodatapy/extract/data_vendors/.ipynb_checkpoints/PandasDataReader-checkpoint.ipynb,sha256=n7vzOV6AxC_Ti5CLWW2ABMEEcbbBpiBBs4qTUBQinIg,24171
|
29
29
|
cryptodatapy/extract/data_vendors/__init__.py,sha256=Nk6gcT43d0XOLfrlVA9r--5mvHCgHfq295IKL3Puu74,354
|
30
|
-
cryptodatapy/extract/data_vendors/coinmetrics_api.py,sha256=
|
30
|
+
cryptodatapy/extract/data_vendors/coinmetrics_api.py,sha256=J9WIDU4Q_j3F13TkGG3aH6I7-wFY24ie-lnd_s8D6ls,33308
|
31
31
|
cryptodatapy/extract/data_vendors/cryptocompare_api.py,sha256=eEK0QTROr97hG4pxTufX45d9EEUNKo9Hy-INygn3AlA,27493
|
32
32
|
cryptodatapy/extract/data_vendors/datavendor.py,sha256=eH_yIJPy5FvZhgILqpgNYYkbmC5fK_5eIdIZTOeNw9Q,13292
|
33
33
|
cryptodatapy/extract/data_vendors/glassnode_api.py,sha256=PuuJOjHztoJyFijb5XU1zm1S_2NAj7MX-wC89DL_bWQ,13103
|
34
|
-
cryptodatapy/extract/data_vendors/
|
35
|
-
cryptodatapy/extract/
|
34
|
+
cryptodatapy/extract/data_vendors/polygon_api.py,sha256=wg1su_jMGCcmJ7Tqw5r562Ub03UuEc6jU9uhbchbFBM,13646
|
35
|
+
cryptodatapy/extract/data_vendors/tiingo_api.py,sha256=C9IkH0_z77FWMFq6pFXjs0Omw8inwb-5RUuhVs48Rvo,25617
|
36
|
+
cryptodatapy/extract/datarequest.py,sha256=d49Lmhy0ZEFo0K8T_dI-g7EB_WXFVVfmQ6PzVlGlYtY,25286
|
36
37
|
cryptodatapy/extract/exchanges/__init__.py,sha256=7QUxwrUiweyv_1u_tNjOyeAFKMwJ32Z0aiJ_Gh8bMXk,114
|
37
38
|
cryptodatapy/extract/exchanges/dydx.py,sha256=tBp60PG24tUZI949nHSiJQwjsP0zI2Oyz9yDkFDy1ZU,27697
|
38
39
|
cryptodatapy/extract/exchanges/exchange.py,sha256=Cicj3KS4zLbwmXX5fu89byXNwqqU4TH31GFv0zj3D4s,13010
|
39
|
-
cryptodatapy/extract/getdata.py,sha256=
|
40
|
+
cryptodatapy/extract/getdata.py,sha256=_8Hi4vdkj2xGykb_2fBcqzJTNROzX0QnQE2hxPfe690,11543
|
40
41
|
cryptodatapy/extract/libraries/__init__.py,sha256=KG2Rr3c8CcDq-nbhT-ItssqZE9U65xQXH0Wv0g86SVg,254
|
41
42
|
cryptodatapy/extract/libraries/ccxt_api.py,sha256=F4wYocKpaKngvXCZR-zTIBUOFZMGvQ_5Onw82uLCWOU,56131
|
42
43
|
cryptodatapy/extract/libraries/dbnomics_api.py,sha256=M6kPIH-hKqkmeBQb-g56dY9jatqLCtSl_MnvPblHtAc,9421
|
43
44
|
cryptodatapy/extract/libraries/investpy_api.py,sha256=qtGm3LDluXxJorvFv0w1bm1oBrcZIfE5cZSYzNYvttY,18409
|
44
45
|
cryptodatapy/extract/libraries/library.py,sha256=eU8NnQZ9luLGdIF5hms6j8VPCWc50evkREc4xdh-g1I,12301
|
45
|
-
cryptodatapy/extract/libraries/pandasdr_api.py,sha256
|
46
|
+
cryptodatapy/extract/libraries/pandasdr_api.py,sha256=hwb9BwME5AhlDQ3cSaELkBMwS87IBn31l7BkWh_SU1U,14778
|
46
47
|
cryptodatapy/extract/web/__init__.py,sha256=gePCHxIGxrG1atBQvp5MhNTWirztLNDQMCR4FRYujKg,90
|
47
48
|
cryptodatapy/extract/web/aqr.py,sha256=LS1D7QzG6UWkLUfDMgBFtiHpznnnAUOpec5Sx3vRGME,11875
|
48
49
|
cryptodatapy/extract/web/web.py,sha256=R1xEnHE1McxSWxp4vrTfgh9gW6FF6XDlp0gmp2NmWOM,12126
|
49
50
|
cryptodatapy/transform/__init__.py,sha256=Spb5cGJ3V_o8hgSWOSrF8J_vsSZpFk0uzW7RpkgfbFE,131
|
50
|
-
cryptodatapy/transform/clean.py,sha256=
|
51
|
-
cryptodatapy/transform/convertparams.py,sha256=
|
51
|
+
cryptodatapy/transform/clean.py,sha256=xpl3SgEutOZUBUbI8g7iky0uqvb4IGV_hDzU26QlEGs,11630
|
52
|
+
cryptodatapy/transform/convertparams.py,sha256=yrm9Gr6Fm7CaVTfxHGs0TJx6ZtP7llrlIA-oPjf0vdM,48484
|
52
53
|
cryptodatapy/transform/filter.py,sha256=37MjUKUay3dwwyn47rnNOU51X_OFzmWq_N9buALzq9k,9058
|
53
54
|
cryptodatapy/transform/impute.py,sha256=_0-SX5nnPrYgJYT-HKwBGNkmWXRMy9-C2oeU6VqkQp0,5537
|
54
55
|
cryptodatapy/transform/od.py,sha256=mI1oojMbfmdO9ZewL3AvMxoXuMM05Ut2oGm_ogMf2XU,30386
|
55
|
-
cryptodatapy/transform/wrangle.py,sha256=
|
56
|
+
cryptodatapy/transform/wrangle.py,sha256=cQOkPoiOmQtC7d2G15jMbMJSbinMmLYxM6Or7Ffh_hw,44818
|
56
57
|
cryptodatapy/util/__init__.py,sha256=zSQ2HU2QIXzCuptJjknmrClwtQKCvIj4aNysZljIgrU,116
|
57
58
|
cryptodatapy/util/datacatalog.py,sha256=qCCX6srXvaAbVAKuA0M2y5IK_2OEx5xA3yRahDZlC-g,13157
|
58
|
-
cryptodatapy/util/datacredentials.py,sha256=
|
59
|
-
cryptodatapy
|
60
|
-
cryptodatapy-0.2.
|
61
|
-
cryptodatapy-0.2.
|
62
|
-
cryptodatapy-0.2.
|
59
|
+
cryptodatapy/util/datacredentials.py,sha256=BnoQlUchbP0vfXqXRuhCOOsHyUTMuH5T4RAKBbHzMyo,3140
|
60
|
+
cryptodatapy/util/utils.py,sha256=CqxFkaNNuOwA8RLz-G11bZ0jn3rcZfwEvKBkU0GDUDA,4164
|
61
|
+
cryptodatapy-0.2.27.dist-info/LICENSE,sha256=sw4oVq8bDjT3uMtaFebQ-xeIVP4H-bXldTs9q-Jjeks,11344
|
62
|
+
cryptodatapy-0.2.27.dist-info/METADATA,sha256=MVJocFDsV4nFpRTr7Tr_Pg7ydNIiFT6D_Anu6oZmfZ4,6473
|
63
|
+
cryptodatapy-0.2.27.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
64
|
+
cryptodatapy-0.2.27.dist-info/RECORD,,
|
File without changes
|
File without changes
|