cryptodatapy 0.2.24__py3-none-any.whl → 0.2.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,8 +4,10 @@ import pandas as pd
4
4
  from cryptodatapy.extract.data_vendors.coinmetrics_api import CoinMetrics
5
5
  from cryptodatapy.extract.data_vendors.cryptocompare_api import CryptoCompare
6
6
  from cryptodatapy.extract.data_vendors.glassnode_api import Glassnode
7
+ from cryptodatapy.extract.data_vendors.polygon_api import Polygon
7
8
  from cryptodatapy.extract.data_vendors.tiingo_api import Tiingo
8
9
  from cryptodatapy.extract.datarequest import DataRequest
10
+ from cryptodatapy.extract.exchanges.dydx import Dydx
9
11
  from cryptodatapy.extract.libraries.ccxt_api import CCXT
10
12
  from cryptodatapy.extract.libraries.dbnomics_api import DBnomics
11
13
  from cryptodatapy.extract.libraries.pandasdr_api import PandasDataReader
@@ -92,10 +94,11 @@ class GetData:
92
94
  "dbnomics": DBnomics,
93
95
  "yahoo": PandasDataReader,
94
96
  "fred": PandasDataReader,
95
- "av-daily": PandasDataReader,
96
- "av-forex-daily": PandasDataReader,
97
+ "alphavantage": PandasDataReader,
98
+ "polygon": Polygon,
97
99
  "famafrench": PandasDataReader,
98
- "aqr": AQR
100
+ "aqr": AQR,
101
+ "dydx": Dydx
99
102
  }
100
103
 
101
104
  # available attr and methods
@@ -202,10 +205,11 @@ class GetData:
202
205
  "dbnomics": DBnomics,
203
206
  "yahoo": PandasDataReader,
204
207
  "fred": PandasDataReader,
205
- "av-daily": PandasDataReader,
206
- "av-forex-daily": PandasDataReader,
208
+ "alphavantage": PandasDataReader,
209
+ "polygon": Polygon,
207
210
  "famafrench": PandasDataReader,
208
- "aqr": AQR
211
+ "aqr": AQR,
212
+ "dydx": Dydx
209
213
  }
210
214
 
211
215
  # data source
@@ -269,10 +273,11 @@ class GetData:
269
273
  "dbnomics": DBnomics,
270
274
  "yahoo": PandasDataReader,
271
275
  "fred": PandasDataReader,
272
- "av-daily": PandasDataReader,
273
- "av-forex-daily": PandasDataReader,
276
+ "alphavantage": PandasDataReader,
277
+ "polygon": Polygon,
274
278
  "famafrench": PandasDataReader,
275
- "aqr": AQR
279
+ "aqr": AQR,
280
+ "dydx": Dydx
276
281
  }
277
282
 
278
283
  # data source
@@ -29,9 +29,12 @@ class PandasDataReader(Library):
29
29
  markets: Optional[Dict[str, List[str]]] = None,
30
30
  market_types: List[str] = ["spot", "future"],
31
31
  fields: Optional[Dict[str, List[str]]] = None,
32
- frequencies: Optional[Dict[str, List[str]]] = ["d", "w", "m", "q", "y"],
32
+ frequencies: Optional[Dict[str, List[str]]] = ["d", "w", "m", "q", "y",
33
+ "av-intraday", "av-daily", "av-weekly", "av-monthly",
34
+ "av-daily-adjusted", "av-weekly-adjusted",
35
+ "av-monthly-adjusted", "av-forex-daily"],
33
36
  base_url: Optional[str] = None,
34
- api_key: Optional[str] = None,
37
+ api_key: str = data_cred.alpha_vantage_api_key,
35
38
  max_obs_per_call: Optional[int] = None,
36
39
  rate_limit: Optional[Any] = None,
37
40
  ):
@@ -228,7 +231,7 @@ class PandasDataReader(Library):
228
231
  # mkt type
229
232
  if self.data_req.mkt_type not in self.market_types:
230
233
  raise ValueError(
231
- f"{self.data_req.mkt_type} is not available for {self.data_req.exch}."
234
+ f"{self.data_req.mkt_type} is not available."
232
235
  )
233
236
 
234
237
  # check fields
@@ -268,6 +271,20 @@ class PandasDataReader(Library):
268
271
  self.data_req.source_start_date,
269
272
  self.data_req.source_end_date)
270
273
 
274
+ # alpha vantage
275
+ elif self.data_req.source == "alphavantage":
276
+ for ticker, market in zip(self.data_req.source_tickers, self.data_req.source_markets):
277
+ df1 = web.DataReader(market,
278
+ self.data_req.source_freq,
279
+ self.data_req.source_start_date,
280
+ self.data_req.source_end_date,
281
+ api_key=self.api_key)
282
+ df1.index.name = 'date'
283
+ df1['ticker'] = ticker
284
+ df1.set_index(['ticker'], append=True, inplace=True)
285
+ # concat df and df1
286
+ self.data = pd.concat([self.data, df1])
287
+
271
288
  # fama-french
272
289
  elif data_req.source == "famafrench":
273
290
  for ticker in self.data_req.source_tickers:
@@ -6,47 +6,6 @@ from cryptodatapy.transform.impute import Impute
6
6
  from cryptodatapy.transform.filter import Filter
7
7
 
8
8
 
9
- def stitch_dataframes(dfs):
10
- """
11
- Stitches together dataframes with different start dates.
12
-
13
- Parameters
14
- ----------
15
- dfs: list
16
- List of dataframes to be stitched together.
17
-
18
- Returns
19
- -------
20
- combined_df: pd.DataFrame
21
- Combined dataframe with extended start date.
22
- """
23
- # check if dfs is a list
24
- if not isinstance(dfs, list):
25
- raise TypeError("Dataframes must be a list.")
26
-
27
- # check index types
28
- if all([isinstance(df.index, pd.MultiIndex) for df in dfs]):
29
- dfs.sort(key=lambda df: df.index.levels[0][0], reverse=True)
30
- elif all([isinstance(df.index, pd.DatetimeIndex) for df in dfs]):
31
- dfs.sort(key=lambda df: df.index[0], reverse=True)
32
- else:
33
- raise TypeError("Dataframes must be pd.MultiIndex or have DatetimeIndex.")
34
-
35
- # most recent start date
36
- combined_df = dfs[0]
37
-
38
- # combine dfs
39
- for df in dfs[1:]:
40
- combined_df = combined_df.combine_first(df)
41
-
42
- # reorder cols
43
- max_columns = max(len(df.columns) for df in dfs)
44
- cols = next(df.columns.tolist() for df in dfs if len(df.columns) == max_columns)
45
- combined_df = combined_df[cols]
46
-
47
- return combined_df
48
-
49
-
50
9
  class CleanData:
51
10
  """
52
11
  Cleans data to improve data quality.
@@ -2,6 +2,7 @@ import logging
2
2
  from datetime import datetime, timedelta
3
3
  from importlib import resources
4
4
  from typing import Dict, List, Union
5
+ import re
5
6
 
6
7
  import pandas as pd
7
8
 
@@ -309,7 +310,7 @@ class ConvertParams:
309
310
  except KeyError:
310
311
  logging.warning(
311
312
  f"{ticker} not found for Tiingo source. Check tickers in"
312
- f" data catalog and try again."
313
+ f" data catalog or try using source_tickers parameter."
313
314
  )
314
315
 
315
316
  # freq
@@ -667,7 +668,7 @@ class ConvertParams:
667
668
 
668
669
  def to_wb(self) -> Dict[str, Union[list, str, int, float, datetime, None]]:
669
670
  """
670
- Convert tickers from CryptoDataPy to Yahoo Finance format.
671
+ Convert tickers from CryptoDataPy to World Bank format.
671
672
  """
672
673
  # tickers
673
674
  with resources.path("cryptodatapy.conf", "tickers.csv") as f:
@@ -780,6 +781,72 @@ class ConvertParams:
780
781
 
781
782
  return self.data_req
782
783
 
784
+ def to_alphavantage(self) -> DataRequest:
785
+ """
786
+ Convert tickers from CryptoDataPy to Alpha Vantage format.
787
+ """
788
+ # tickers
789
+ if self.data_req.source_tickers is None:
790
+ self.data_req.source_tickers = [ticker.upper() for ticker in self.data_req.tickers]
791
+
792
+ # convert quote ccy
793
+ if self.data_req.quote_ccy is None:
794
+ self.data_req.quote_ccy = "USD"
795
+ else:
796
+ self.data_req.quote_ccy = self.data_req.quote_ccy.upper()
797
+
798
+ # start date
799
+ self.data_req.source_start_date = self.data_req.start_date
800
+
801
+ # end date
802
+ self.data_req.source_end_date = self.data_req.end_date
803
+
804
+ # fields
805
+ if self.data_req.source_fields is None:
806
+ self.data_req.source_fields = self.data_req.fields
807
+
808
+ # tz
809
+ if self.data_req.tz is None:
810
+ self.data_req.tz = "America/New_York"
811
+
812
+ # freq
813
+ if self.data_req.cat == 'eqty':
814
+
815
+ # freq
816
+ if self.data_req.source_freq is None:
817
+ self.data_req.source_freq = 'av-daily'
818
+ elif self.data_req.freq in ['1min', '5min', '15min', '30min', '1h', '2h', '4h', '6h', '8h']:
819
+ self.data_req.source_freq = 'av-intraday'
820
+ elif self.data_req.freq == 'd':
821
+ self.data_req.source_freq = 'av-daily'
822
+ elif self.data_req.freq == 'w':
823
+ self.data_req.source_freq = 'av-weekly'
824
+ elif self.data_req.freq == 'm':
825
+ self.data_req.source_freq = 'av-monthly'
826
+ else:
827
+ self.data_req.source_freq = 'av-daily'
828
+
829
+ # adjusted prices
830
+ if any(col.endswith('_adj') for col in self.data_req.fields) and self.data_req.freq in ['d', 'w', 'm']:
831
+ self.data_req.source_freq = self.data_req.source_freq + '-adjusted'
832
+
833
+ # markets
834
+ if self.data_req.source_markets is None:
835
+ self.data_req.source_markets = self.data_req.source_tickers
836
+
837
+ elif self.data_req.cat == 'fx':
838
+
839
+ # freq
840
+ if self.data_req.source_freq is None:
841
+ self.data_req.source_freq = 'av-forex-daily'
842
+
843
+ # markets
844
+ if self.data_req.source_markets is None:
845
+ self.data_req.source_markets = [ticker + '/' + self.data_req.quote_ccy
846
+ for ticker in self.data_req.tickers]
847
+
848
+ return self.data_req
849
+
783
850
  def to_famafrench(self) -> DataRequest:
784
851
  """
785
852
  Convert tickers from CryptoDataPy to Fama-French format.
@@ -818,6 +885,84 @@ class ConvertParams:
818
885
 
819
886
  return self.data_req
820
887
 
888
+ def to_polygon(self) -> DataRequest:
889
+ """
890
+ Convert tickers from CryptoDataPy to Polygon format.
891
+ """
892
+ # tickers
893
+ with resources.path("cryptodatapy.conf", "tickers.csv") as f:
894
+ tickers_path = f
895
+ tickers_df = pd.read_csv(tickers_path, index_col=0, encoding="latin1")
896
+
897
+ if self.data_req.source_tickers is None and self.data_req.cat == 'eqty':
898
+ self.data_req.source_tickers = []
899
+ for ticker in self.data_req.tickers:
900
+ try:
901
+ self.data_req.source_tickers.append(tickers_df.loc[ticker, "polygon_id"])
902
+ except KeyError:
903
+ logging.warning(
904
+ f"{ticker} not found for Polygon source. Check tickers in"
905
+ f" data catalog or try using source_tickers parameter."
906
+ )
907
+
908
+ # freq
909
+ if self.data_req.source_freq is None:
910
+ if self.data_req.freq is None:
911
+ self.data_req.source_freq = "day"
912
+ elif self.data_req.freq[-1] == "s":
913
+ self.data_req.source_freq = "second"
914
+ elif self.data_req.freq[-3:] == "min":
915
+ self.data_req.source_freq = "minute"
916
+ elif self.data_req.freq[-1] == "h":
917
+ self.data_req.source_freq = "hour"
918
+ elif self.data_req.freq == "w":
919
+ self.data_req.source_freq = "week"
920
+ elif self.data_req.freq == "m":
921
+ self.data_req.source_freq = "month"
922
+ elif self.data_req.freq == "q":
923
+ self.data_req.source_freq = "quarter"
924
+ elif self.data_req.freq == "y":
925
+ self.data_req.source_freq = "year"
926
+ else:
927
+ self.data_req.source_freq = "day"
928
+
929
+ # quote ccy
930
+ if self.data_req.quote_ccy is None:
931
+ self.data_req.quote_ccy = "usd"
932
+ else:
933
+ self.data_req.quote_ccy = self.data_req.quote_ccy.lower()
934
+
935
+ # markets
936
+ if self.data_req.source_markets is None:
937
+ if self.data_req.cat == 'fx':
938
+ self.data_req.source_markets = [ticker.upper() + self.data_req.quote_ccy.upper()
939
+ for ticker in self.data_req.tickers]
940
+
941
+ # start date
942
+ if self.data_req.start_date is None:
943
+ two_years_ago = pd.Timestamp.today() - pd.DateOffset(years=2)
944
+ self.data_req.source_start_date = two_years_ago.strftime("%Y-%m-%d")
945
+ else:
946
+ self.data_req.source_start_date = self.data_req.start_date
947
+
948
+ # end date
949
+ if self.data_req.end_date is None:
950
+ self.data_req.source_end_date = str(pd.Timestamp.utcnow().date())
951
+ else:
952
+ self.data_req.source_end_date = self.data_req.end_date
953
+
954
+ # fields
955
+ if self.data_req.source_fields is None:
956
+ self.data_req.source_fields = self.convert_fields(data_source='polygon')
957
+
958
+ # tz
959
+ if self.data_req.cat == 'eqty' or self.data_req.cat == 'fx':
960
+ self.data_req.tz = "America/New_York"
961
+ else:
962
+ self.data_req.tz = "UTC"
963
+
964
+ return self.data_req
965
+
821
966
  def to_aqr(self) -> Dict[str, Union[list, str, int, dict, float, datetime, None]]:
822
967
  """
823
968
  Convert tickers from CryptoDataPy to AQR format.
@@ -911,79 +1056,6 @@ class ConvertParams:
911
1056
  "source_fields": self.data_req.source_fields,
912
1057
  }
913
1058
 
914
- def convert_fx_tickers(self, quote_ccy: str) -> List[str]:
915
- """
916
- Converts base and quote currency tickers to fx pairs following fx quoting convention.
917
-
918
- Parameters
919
- ---------
920
- quote_ccy: str
921
- Quote currency
922
-
923
- Returns
924
- -------
925
- quote_ccy: str
926
- Quote currency.
927
- """
928
- mkts = [] # fx pairs list
929
- # fx groups
930
- base_ccys = ["EUR", "GBP", "AUD", "NZD"]
931
- # g10_fx = ['USD', 'EUR', 'GBP', 'JPY', 'CHF', 'CAD', 'AUD', 'NZD', 'NOK', 'SEK']
932
- # dm_fx = ['USD', 'EUR', 'GBP', 'JPY', 'CHF', 'CAD', 'AUD', 'NZD', 'NOK', 'SEK', 'SGD', 'ILS', 'HKD', ]
933
- # em_fx = ['ARS', 'BRL', 'CHN', 'CLP', 'CNY', 'COP', 'IDR', 'INR', 'KRW', 'MYR', 'MXN', 'PEN', 'PHP', 'RUB',
934
- # 'TRY', 'TWD', 'ZAR']
935
-
936
- for ticker in self.data_req.tickers:
937
- if ticker.upper() in base_ccys and quote_ccy.upper() == "USD":
938
- mkts.append(ticker.upper() + "/" + quote_ccy.upper())
939
- elif quote_ccy.upper() == "USD":
940
- mkts.append(quote_ccy.upper() + "/" + ticker.upper())
941
- else:
942
- mkts.append(ticker.upper() + "/" + quote_ccy.upper())
943
-
944
- return mkts
945
-
946
- def convert_fields(self, data_source: str) -> List[str]:
947
- """
948
- Converts fields from CryptoDataPy to data source format.
949
-
950
- Parameters
951
- ---------
952
- data_source: str
953
- Name of data source for fields conversions.
954
-
955
- Returns
956
- -------
957
- fields_list: list
958
- List of fields in data source format.
959
-
960
- """
961
- # fields
962
- with resources.path("cryptodatapy.conf", "fields.csv") as f:
963
- fields_dict_path = f
964
- fields_df, fields_list = (
965
- pd.read_csv(fields_dict_path, index_col=0, encoding="latin1"),
966
- [],
967
- )
968
-
969
- # when source fields already provided in data req
970
- if self.data_req.source_fields is not None:
971
- fields_list = self.data_req.source_fields
972
-
973
- # convert to source format
974
- else:
975
- for field in self.data_req.fields:
976
- try:
977
- fields_list.append(fields_df.loc[field, data_source + "_id"])
978
- except KeyError as e:
979
- logging.warning(e)
980
- logging.warning(
981
- f"Id for {field} could not be found in the data catalog."
982
- f" Try using source field ids."
983
- )
984
-
985
- return fields_list
986
-
987
1059
  def to_dydx_dict(self) -> Dict[str, Union[list, str, int, float, None]]:
988
1060
  """
989
1061
  Convert parameters from CryptoDataPy to dYdX format.
@@ -1101,3 +1173,78 @@ class ConvertParams:
1101
1173
  'oi': 'openInterest'
1102
1174
  }
1103
1175
  return self.data_req
1176
+
1177
+ def convert_fx_tickers(self, quote_ccy: str) -> List[str]:
1178
+ """
1179
+ Converts base and quote currency tickers to fx pairs following fx quoting convention.
1180
+
1181
+ Parameters
1182
+ ---------
1183
+ quote_ccy: str
1184
+ Quote currency
1185
+
1186
+ Returns
1187
+ -------
1188
+ quote_ccy: str
1189
+ Quote currency.
1190
+ """
1191
+ mkts = [] # fx pairs list
1192
+
1193
+ # fx groups
1194
+ base_ccys = ["EUR", "GBP", "AUD", "NZD"]
1195
+ g10_fx = ['USD', 'EUR', 'GBP', 'JPY', 'CHF', 'CAD', 'AUD', 'NZD', 'NOK', 'SEK']
1196
+ dm_fx = ['USD', 'EUR', 'GBP', 'JPY', 'CHF', 'CAD', 'AUD', 'NZD', 'NOK', 'SEK', 'DKK', 'SGD', 'HKD']
1197
+ em_fx = ['ARS', 'BRL', 'CHN', 'CLP', 'CNY', 'COP', 'CZK', 'HUF', 'IDR', 'INR', 'ILS', 'KRW', 'MYR', 'MXN',
1198
+ 'PEN', 'PHP', 'PLN', 'RUB', 'THB', 'TRY', 'TWD', 'ZAR']
1199
+ em_ndf_fx = ['ARS', 'BRL', 'CNY', 'CLP', 'COP', 'IDR', 'INR', 'KRW', 'PEN', 'PHP', 'RUB', 'THB', 'TRY', 'TWD']
1200
+
1201
+ for ticker in self.data_req.tickers:
1202
+ if ticker.upper() in base_ccys and quote_ccy.upper() == "USD":
1203
+ mkts.append(ticker.upper() + "/" + quote_ccy.upper())
1204
+ elif quote_ccy.upper() == "USD":
1205
+ mkts.append(quote_ccy.upper() + "/" + ticker.upper())
1206
+ else:
1207
+ mkts.append(ticker.upper() + "/" + quote_ccy.upper())
1208
+
1209
+ return mkts
1210
+
1211
+ def convert_fields(self, data_source: str) -> List[str]:
1212
+ """
1213
+ Converts fields from CryptoDataPy to data source format.
1214
+
1215
+ Parameters
1216
+ ---------
1217
+ data_source: str
1218
+ Name of data source for fields conversions.
1219
+
1220
+ Returns
1221
+ -------
1222
+ fields_list: list
1223
+ List of fields in data source format.
1224
+
1225
+ """
1226
+ # fields
1227
+ with resources.path("cryptodatapy.conf", "fields.csv") as f:
1228
+ fields_dict_path = f
1229
+ fields_df, fields_list = (
1230
+ pd.read_csv(fields_dict_path, index_col=0, encoding="latin1"),
1231
+ [],
1232
+ )
1233
+
1234
+ # when source fields already provided in data req
1235
+ if self.data_req.source_fields is not None:
1236
+ fields_list = self.data_req.source_fields
1237
+
1238
+ # convert to source format
1239
+ else:
1240
+ for field in self.data_req.fields:
1241
+ try:
1242
+ fields_list.append(fields_df.loc[field, data_source + "_id"])
1243
+ except KeyError as e:
1244
+ logging.warning(e)
1245
+ logging.warning(
1246
+ f"Id for {field} could not be found in the data catalog."
1247
+ f" Try using source field ids."
1248
+ )
1249
+
1250
+ return fields_list
@@ -673,6 +673,44 @@ class WrangleData:
673
673
 
674
674
  return self.data_resp
675
675
 
676
+ def polygon(self) -> pd.DataFrame:
677
+ """
678
+ Wrangles Polygon data response to dataframe with tidy data format.
679
+
680
+ Returns
681
+ -------
682
+ pd.DataFrame
683
+ Wrangled dataframe into tidy data format.
684
+
685
+ """
686
+ # create df
687
+ self.data_resp = pd.DataFrame(self.data_resp)
688
+
689
+ # convert cols/fields to lib
690
+ self.convert_fields_to_lib(data_source='polygon')
691
+
692
+ # convert to datetime
693
+ self.data_resp['date'] = pd.to_datetime(self.data_resp['date'], unit='ms')
694
+
695
+ # set index
696
+ self.data_resp = self.data_resp.set_index('date').sort_index()
697
+
698
+ # resample
699
+ self.data_resp = self.data_resp.resample(self.data_req.freq).last()
700
+
701
+ # type conversion
702
+ self.data_resp = self.data_resp.convert_dtypes()
703
+
704
+ # remove bad data
705
+ self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
706
+ self.data_resp = self.data_resp.dropna(how='all').dropna(how='all', axis=1) # entire row or col NaNs
707
+ self.data_resp = self.data_resp[self.data_resp != 0]
708
+
709
+ # keep only requested fields and sort index
710
+ self.data_resp = self.data_resp[self.data_req.fields].sort_index()
711
+
712
+ return self.data_resp
713
+
676
714
  def investpy(self) -> pd.DataFrame:
677
715
  """
678
716
  Wrangles InvestPy data response to dataframe with tidy data format.
@@ -957,6 +995,38 @@ class WrangleData:
957
995
 
958
996
  return self.data_resp
959
997
 
998
+ def alphavantage(self) -> pd.DataFrame:
999
+ """
1000
+ Wrangles Alpha Vantage data response to dataframe with tidy data format.
1001
+
1002
+ Returns
1003
+ -------
1004
+ pd.DataFrame
1005
+ Wrangled dataframe into tidy data format.
1006
+ """
1007
+ # index
1008
+ self.data_resp.reset_index(inplace=True)
1009
+ self.data_resp['date'] = pd.to_datetime(self.data_resp['date'])
1010
+ self.data_resp.set_index(['date', 'ticker'], inplace=True)
1011
+
1012
+ # resample
1013
+ self.data_resp = self.data_resp.groupby('ticker').\
1014
+ resample(self.data_req.freq, level='date').\
1015
+ last().swaplevel('ticker', 'date').sort_index()
1016
+
1017
+ # type conversion
1018
+ self.data_resp = self.data_resp.convert_dtypes()
1019
+
1020
+ # remove bad data
1021
+ self.data_resp = self.data_resp[self.data_resp != 0] # 0 values
1022
+ self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
1023
+ self.data_resp = self.data_resp.dropna(how='all').dropna(how='all', axis=1) # entire row or col NaNs
1024
+
1025
+ # keep only requested fields and sort index
1026
+ self.data_resp = self.data_resp[self.data_req.fields].sort_index()
1027
+
1028
+ return self.data_resp
1029
+
960
1030
  def famafrench(self) -> pd.DataFrame:
961
1031
  """
962
1032
  Wrangles Fama-French data response to dataframe with tidy data format.
@@ -1175,4 +1245,4 @@ class WrangleData:
1175
1245
  if self.data_req.end_date is not None:
1176
1246
  self.data_resp = self.data_resp[(self.data_resp.index <= self.data_req.end_date)]
1177
1247
 
1178
- return self
1248
+ return self
@@ -37,6 +37,16 @@ class DataCredentials:
37
37
  tiingo_api_key: str = os.environ['TIINGO_API_KEY']
38
38
  except KeyError:
39
39
  tiingo_api_key: str = None
40
+ # alpha vantage api key
41
+ try:
42
+ alpha_vantage_api_key: str = os.environ['ALPHAVANTAGE_API_KEY']
43
+ except KeyError:
44
+ alpha_vantage_api_key: str = None
45
+ # polygon api key
46
+ try:
47
+ polygon_api_key: str = os.environ['POLYGON_API_KEY']
48
+ except KeyError:
49
+ polygon_api_key: str = None
40
50
  # coinmetrics api key
41
51
  try:
42
52
  coinmetrics_api_key: str = os.environ['COINMETRICS_API_KEY']
@@ -52,6 +62,7 @@ class DataCredentials:
52
62
  coinmetrics_base_url: str = 'https://api.coinmetrics.io/v4'
53
63
  else:
54
64
  coinmetrics_base_url: str = 'https://community-api.coinmetrics.io/v4'
65
+ polygon_base_url: str = 'https://api.polygon.io/v3/reference/'
55
66
 
56
67
  # API endpoints
57
68
  cryptomcompare_endpoints: dict = field(default_factory=lambda: {
@@ -0,0 +1,82 @@
1
+ import pandas as pd
2
+
3
+
4
+ def stitch_dataframes(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
5
+ """
6
+ Stitches together dataframes with different start dates.
7
+
8
+ Parameters
9
+ ----------
10
+ df1: pd.DataFrame
11
+ First dataframe to be stitched.
12
+ df2: pd.DataFrame
13
+ Second dataframe to be stitched.
14
+
15
+ Returns
16
+ -------
17
+ combined_df: pd.DataFrame
18
+ Combined or stitched dataframes with extended data.
19
+ """
20
+ # forward fill missing values
21
+ updated_df = df1.reindex(index=df2.index, columns=df2.columns).fillna(df2)
22
+ combined_df = df1.combine_first(updated_df)
23
+
24
+ return combined_df
25
+
26
+
27
+ def rebase_fx_to_foreign_vs_usd(df) -> pd.DataFrame:
28
+ """
29
+ Rebase FX rates to foreign currency vs. USD format, so that an increase
30
+ means the foreign currency is appreciating. Works for both MultiIndex
31
+ (date, ticker) and single-index (date index, tickers as columns).
32
+
33
+ Parameters
34
+ ----------
35
+ df : pd.DataFrame
36
+ FX DataFrame with either:
37
+ - MultiIndex (date, ticker)
38
+ - Datetime index and tickers as columns
39
+
40
+ Returns
41
+ -------
42
+ pd.DataFrame
43
+ Rebased FX rates with tickers as foreign currency (e.g., 'EUR', 'JPY').
44
+ """
45
+ df = df.copy()
46
+
47
+ def get_foreign_currency(ticker: str) -> str:
48
+ if ticker.startswith("USD"):
49
+ return ticker[3:] # USDJPY → JPY
50
+ elif ticker.endswith("USD"):
51
+ return ticker[:3] # EURUSD → EUR
52
+ else:
53
+ raise ValueError(f"Unexpected ticker format: {ticker}")
54
+
55
+ if isinstance(df.index, pd.MultiIndex):
56
+ # MultiIndex: (date, ticker)
57
+ tickers = df.index.get_level_values(1)
58
+ inverted = tickers.str.startswith("USD")
59
+
60
+ # Invert rates for USDXXX
61
+ df[inverted] = 1 / df[inverted]
62
+
63
+ # Rename all tickers to just the foreign currency symbol
64
+ new_tickers = tickers.map(get_foreign_currency)
65
+ df.index = pd.MultiIndex.from_arrays(
66
+ [df.index.get_level_values(0), new_tickers],
67
+ names=df.index.names
68
+ )
69
+
70
+ else:
71
+ # Single index (datetime), columns = tickers
72
+ rebased = {}
73
+ for col in df.columns:
74
+ fx = get_foreign_currency(col)
75
+ if col.startswith("USD"):
76
+ rebased[fx] = 1 / df[col]
77
+ else:
78
+ rebased[fx] = df[col]
79
+
80
+ df = pd.DataFrame(rebased, index=df.index)
81
+
82
+ return df.sort_index()