cryptodatapy 0.2.25__py3-none-any.whl → 0.2.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  import logging
2
2
  from time import sleep
3
3
  from typing import Any, Dict, List, Optional, Union
4
-
5
4
  import pandas as pd
5
+
6
6
  from coinmetrics.api_client import CoinMetricsClient
7
7
 
8
8
  from cryptodatapy.extract.data_vendors.datavendor import DataVendor
@@ -0,0 +1,388 @@
1
+ import logging
2
+ from time import sleep
3
+ from typing import Any, Dict, List, Optional
4
+ import pandas as pd
5
+
6
+ from polygon import RESTClient
7
+
8
+
9
+ from cryptodatapy.extract.data_vendors.datavendor import DataVendor
10
+ from cryptodatapy.extract.datarequest import DataRequest
11
+ from cryptodatapy.transform.convertparams import ConvertParams
12
+ from cryptodatapy.transform.wrangle import WrangleData
13
+ from cryptodatapy.util.datacredentials import DataCredentials
14
+
15
+ # data credentials
16
+ data_cred = DataCredentials()
17
+
18
+
19
+ class Polygon(DataVendor):
20
+ """
21
+ Retrieves data from Tiingo API.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ categories: List[str] = ["crypto", "fx", "eqty", 'rates', "bonds", "cmdty", "index"],
27
+ exchanges: Optional[Dict[str, List[str]]] = None,
28
+ indexes: Optional[Dict[str, List[str]]] = None,
29
+ assets: Optional[Dict[str, List[str]]] = None,
30
+ markets: Optional[Dict[str, List[str]]] = None,
31
+ market_types: List[str] = ["spot", "future", "option"],
32
+ fields: Dict[str, List[str]] = None,
33
+ frequencies: List[str] = ["1s", "1min", "1h", "d", "w", "m", "q", "y"],
34
+ base_url: str = data_cred.tiingo_base_url,
35
+ api_key: str = data_cred.polygon_api_key,
36
+ api_endpoints: Optional[Dict[str, str]] = None,
37
+ max_obs_per_call: Optional[int] = None,
38
+ rate_limit: Optional[Any] = None,
39
+ ):
40
+ """
41
+ Constructor
42
+
43
+ Parameters
44
+ ----------
45
+ categories: list or str, {'crypto', 'fx', 'rates', 'eqty', 'commodities', 'credit', 'macro', 'alt'}
46
+ List or string of available categories, e.g. ['crypto', 'fx', 'alt'].
47
+ exchanges: dictionary, optional, default None
48
+ Dictionary with available exchanges, by cat-exchanges key-value pairs, e.g. {'eqty' : ['NYSE', 'DAX', ...],
49
+ 'crypto' : ['binance', 'ftx', ....]}.
50
+ indexes: dictionary, optional, default None
51
+ Dictionary of available indexes, by cat-indexes key-value pairs, e.g. [{'eqty': ['SPX', 'N225'],
52
+ 'rates': [.... , ...}.
53
+ assets: dictionary, optional, default None
54
+ Dictionary of available assets, by cat-assets key-value pairs, e.g. {'rates': ['Germany 2Y', 'Japan 10Y',
55
+ ...], 'eqty: ['SPY', 'TLT', ...], ...}.
56
+ markets: dictionary, optional, default None
57
+ Dictionary of available markets, by cat-markets key-value pairs, e.g. [{'fx': ['EUR/USD', 'USD/JPY', ...],
58
+ 'crypto': ['BTC/ETH', 'ETH/USDT', ...}.
59
+ market_types: list
60
+ List of available market types e.g. [spot', 'perpetual_future', 'future', 'option'].
61
+ fields: dictionary, optional, default None
62
+ Dictionary of available fields, by cat-fields key-value pairs, e.g. {'eqty': ['date', 'open', 'high',
63
+ 'low', 'close', 'volume'], 'fx': ['date', 'open', 'high', 'low', 'close']}
64
+ frequencies: list
65
+ List of available frequencies, e.g. ['tick', '1min', '5min', '10min', '20min', '30min', '1h', '2h', '4h',
66
+ '8h', 'd', 'w', 'm']
67
+ base_url: str
68
+ Base url used for GET requests. If not provided, default is set to base_url stored in DataCredentials.
69
+ api_endpoints: dict, optional, default None
70
+ Dictionary with available API endpoints. If not provided, default is set to api_endpoints stored in
71
+ DataCredentials.
72
+ api_key: str
73
+ Api key, e.g. 'dcf13983adf7dfa79a0dfa35adf'. If not provided, default is set to
74
+ api_key stored in DataCredentials.
75
+ max_obs_per_call: int, default None
76
+ Maximum number of observations returned per API call. If not provided, default is set to
77
+ api_limit stored in DataCredentials.
78
+ rate_limit: pd.DataFrame, optional, Default None
79
+ Number of API calls made and left, by time frequency.
80
+ """
81
+ super().__init__(
82
+ categories, exchanges, indexes, assets, markets, market_types,
83
+ fields, frequencies, base_url, api_endpoints, api_key, max_obs_per_call, rate_limit
84
+ )
85
+
86
+ if api_key is None:
87
+ raise TypeError("Set your Polygon api key in environment variables as 'POLYGON_API_KEY' or "
88
+ "add it as an argument when instantiating the class. To get an api key, visit: "
89
+ "https://polygon.io/dashboard/")
90
+
91
+ self.data_req = None
92
+ self.data = pd.DataFrame()
93
+ self.client = RESTClient(self.api_key)
94
+
95
+ def get_exchanges_info(self):
96
+ """
97
+ Get exchanges info from Polygon API.
98
+
99
+ Returns
100
+ -------
101
+ pd.DataFrame
102
+ DataFrame with exchanges info.
103
+ """
104
+ pass
105
+
106
+ def get_indexes_info(self):
107
+ """
108
+ Get indexes info from Polygon API.
109
+
110
+ Returns
111
+ -------
112
+ pd.DataFrame
113
+ DataFrame with indexes info.
114
+ """
115
+ pass
116
+
117
+ def get_assets_info(self):
118
+ """
119
+ Get assets info from Polygon API.
120
+
121
+ Returns
122
+ -------
123
+ pd.DataFrame
124
+ DataFrame with assets info.
125
+ """
126
+ pass
127
+
128
+ def get_markets_info(self):
129
+ """
130
+ Get markets info from Polygon API.
131
+
132
+ Returns
133
+ -------
134
+ pd.DataFrame
135
+ DataFrame with markets info.
136
+ """
137
+ pass
138
+
139
+ def get_fields_info(self, data_type: Optional[str]):
140
+ """
141
+ Get fields info from Polygon API.
142
+
143
+ Parameters
144
+ ----------
145
+ data_type: str, optional
146
+ Data type for which to get fields info. If None, returns all fields info.
147
+
148
+ Returns
149
+ -------
150
+ pd.DataFrame
151
+ DataFrame with fields info.
152
+ """
153
+ pass
154
+
155
+ def get_rate_limit_info(self):
156
+ """
157
+ Get rate limit info from Polygon API.
158
+
159
+ Returns
160
+ -------
161
+ pd.DataFrame
162
+ DataFrame with rate limit info.
163
+ """
164
+ pass
165
+
166
+ def req_data(self,
167
+ ticker: str,
168
+ multiplier: int,
169
+ timespan: str,
170
+ from_: str,
171
+ to: str
172
+ ) -> List:
173
+ """
174
+ Request data from Polygon API.
175
+
176
+ Parameters
177
+ ----------
178
+ ticker: str
179
+ Ticker symbol for the asset.
180
+ multiplier: int
181
+ Multiplier for the aggregation.
182
+ timespan: str
183
+ Timespan for the aggregation, e.g. 'minute', 'hour', 'day'.
184
+ from_: str
185
+ Start date for the data request in 'YYYY-MM-DD' format.
186
+ to: str
187
+ End date for the data request in 'YYYY-MM-DD' format.
188
+
189
+ Returns
190
+ -------
191
+ List: List of aggregated data from Polygon API.
192
+ """
193
+
194
+ aggs = []
195
+ for a in self.client.list_aggs(
196
+ f"C:{ticker}",
197
+ multiplier,
198
+ timespan,
199
+ from_,
200
+ to,
201
+ adjusted="true",
202
+ sort="asc",
203
+ limit=self.max_obs_per_call if self.max_obs_per_call else 500
204
+ ):
205
+ aggs.append(a)
206
+
207
+ if not aggs:
208
+ logging.warning(f"No data found for ticker {ticker} in the specified date range.")
209
+
210
+ return aggs
211
+
212
+ @staticmethod
213
+ def wrangle_data_resp(data_req: DataRequest, data_resp: Dict[str, Any]) -> pd.DataFrame:
214
+ """
215
+ Wrangle data response.
216
+
217
+ Parameters
218
+ ----------
219
+ data_req: DataRequest
220
+ Parameters of data request in CryptoDataPy format.
221
+ data_resp: dictionary
222
+ Data response from data request in JSON format.
223
+ Returns
224
+ -------
225
+ df: pd.DataFrame
226
+ Wrangled dataframe with DatetimeIndex and market data for selected fields (cols), in tidy format.
227
+ """
228
+ # wrangle data resp
229
+ df = WrangleData(data_req, data_resp).polygon()
230
+
231
+ return df
232
+
233
+ def get_tidy_data(self, data_req: DataRequest, ticker) -> pd.DataFrame:
234
+ """
235
+ Submits data request and wrangles the data response into tidy data format.
236
+
237
+ Parameters
238
+ ----------
239
+ data_req: DataRequest
240
+
241
+ Returns
242
+ -------
243
+ df: pd.DataFrame
244
+ Dataframe with DatetimeIndex and field values (col) wrangled into tidy data format.
245
+ """
246
+ # convert data request parameters to CryptoCompare format
247
+ self.data_req = ConvertParams(data_req).to_polygon()
248
+
249
+ # get entire data history
250
+ df = self.req_data(
251
+ ticker=ticker,
252
+ multiplier=1,
253
+ timespan=self.data_req.source_freq,
254
+ from_=self.data_req.source_start_date,
255
+ to=self.data_req.source_end_date,
256
+ )
257
+
258
+ # wrangle df
259
+ df = self.wrangle_data_resp(self.data_req, df)
260
+
261
+ return df
262
+
263
+ def get_all_tickers(self, data_req: DataRequest) -> pd.DataFrame:
264
+ """
265
+ Loops list of tickers, retrieves data in tidy format for each ticker and stores it in a
266
+ multiindex dataframe.
267
+
268
+ Parameters
269
+ ----------
270
+ data_req: DataRequest
271
+ Parameters of data request in CryptoDataPy format.
272
+
273
+ Returns
274
+ -------
275
+ df: pd.DataFrame - MultiIndex
276
+ Dataframe with DatetimeIndex (level 0), ticker (level 1) and values for fields (cols), in tidy data format.
277
+ """
278
+ # convert data request parameters to CryptoCompare format
279
+ self.data_req = ConvertParams(data_req).to_polygon()
280
+
281
+ # empty df to add data
282
+ df = pd.DataFrame()
283
+
284
+ if self.data_req.cat == 'fx':
285
+ for market, ticker in zip(self.data_req.source_markets, self.data_req.tickers):
286
+ try:
287
+ df0 = self.get_tidy_data(self.data_req, market)
288
+ except Exception as e:
289
+ logging.info(f"Failed to get fx data for {market} after many attempts: {e}.")
290
+ else:
291
+ # add ticker to index
292
+ df0['ticker'] = ticker.upper()
293
+ df0.set_index(['ticker'], append=True, inplace=True)
294
+ # concat df and df1
295
+ df = pd.concat([df, df0])
296
+
297
+ # sleep to avoid hitting API rate limits
298
+ sleep(self.data_req.pause)
299
+
300
+ elif self.data_req.cat == 'eqty':
301
+ for ticker in self.data_req.tickers:
302
+ try:
303
+ df0 = self.get_tidy_data(self.data_req, ticker)
304
+ except Exception as e:
305
+ logging.info(f"Failed to get eqty data for {ticker} after many attempts: {e}.")
306
+ else:
307
+ # add ticker to index
308
+ df0['ticker'] = ticker.upper()
309
+ df0.set_index(['ticker'], append=True, inplace=True)
310
+ # concat df and df1
311
+ df = pd.concat([df, df0])
312
+
313
+ # sleep to avoid hitting API rate limits
314
+ sleep(self.data_req.pause)
315
+
316
+ else:
317
+ raise NotImplementedError(
318
+ f"Data category '{self.data_req.cat}' is not implemented for Polygon API. "
319
+ "Supported categories are: 'fx', 'eqty'."
320
+ )
321
+
322
+ return df.sort_index()
323
+
324
+ def check_params(self, data_req: DataRequest) -> None:
325
+ """
326
+ Checks the parameters of the data request before requesting data to reduce API calls
327
+ and improve efficiency.
328
+
329
+ """
330
+ self.data_req = ConvertParams(data_req).to_polygon()
331
+
332
+ # get metadata
333
+ # self.get_assets_info(as_list=True)
334
+ # self.get_fields_info()
335
+
336
+ # check cat
337
+ if self.data_req.cat is None:
338
+ raise ValueError(
339
+ f"Cat cannot be None. Please provide category. Categories include: {self.categories}."
340
+ )
341
+
342
+ # # check assets
343
+ # if self.data_req.cat == 'eqty':
344
+ # if not any([ticker.upper() in self.assets[self.data_req.cat] for ticker in self.data_req.source_tickers]):
345
+ # raise ValueError(
346
+ # f"Selected eqty tickers are not available. Use assets attribute to see available eqty tickers."
347
+ # )
348
+ # elif self.data_req.cat == 'fx':
349
+ # if not any([ticker in self.assets[self.data_req.cat] for ticker in self.data_req.source_markets]):
350
+ # raise ValueError(
351
+ # f"Selected crypto tickers are not available.
352
+ # Use assets attribute to see available crypto tickers."
353
+ # )
354
+
355
+ # # check fields
356
+ # if not any([field in self.fields[data_req.cat] for field in self.data_req.fields]):
357
+ # raise ValueError(
358
+ # f"Selected fields are not available. Use fields attribute to see available fields."
359
+ # )
360
+
361
+ def get_data(self, data_req: DataRequest) -> pd.DataFrame:
362
+ """
363
+ Get market data (eqty, fx, crypto).
364
+
365
+ Parameters
366
+ data_req: DataRequest
367
+ Parameters of data request in CryptoDataPy format.
368
+
369
+ Returns
370
+ -------
371
+ df: pd.DataFrame - MultiIndex
372
+ DataFrame with DatetimeIndex (level 0), ticker (level 1), and values for market or series data
373
+ for selected fields (cols), in tidy format.
374
+ """
375
+ # check data req params
376
+ self.check_params(data_req)
377
+
378
+ # get data
379
+ try:
380
+ df = self.get_all_tickers(data_req)
381
+
382
+ except Exception as e:
383
+ logging.warning(e)
384
+ raise Exception(
385
+ "No data returned. Check data request parameters and try again."
386
+ )
387
+
388
+ return df
@@ -725,8 +725,6 @@ class Tiingo(DataVendor):
725
725
  DataFrame with DatetimeIndex (level 0), ticker (level 1), and values for market or series data
726
726
  for selected fields (cols), in tidy format.
727
727
  """
728
- logging.info("Retrieving data request from Tiingo...")
729
-
730
728
  # check data req params
731
729
  self.check_params(data_req)
732
730
 
@@ -135,6 +135,8 @@ class DataRequest:
135
135
  "tiingo",
136
136
  "investpy",
137
137
  "yahoo",
138
+ "alphavantage",
139
+ "polygon",
138
140
  "fred",
139
141
  "famafrench",
140
142
  "dbnomics",
@@ -4,6 +4,7 @@ import pandas as pd
4
4
  from cryptodatapy.extract.data_vendors.coinmetrics_api import CoinMetrics
5
5
  from cryptodatapy.extract.data_vendors.cryptocompare_api import CryptoCompare
6
6
  from cryptodatapy.extract.data_vendors.glassnode_api import Glassnode
7
+ from cryptodatapy.extract.data_vendors.polygon_api import Polygon
7
8
  from cryptodatapy.extract.data_vendors.tiingo_api import Tiingo
8
9
  from cryptodatapy.extract.datarequest import DataRequest
9
10
  from cryptodatapy.extract.exchanges.dydx import Dydx
@@ -93,8 +94,8 @@ class GetData:
93
94
  "dbnomics": DBnomics,
94
95
  "yahoo": PandasDataReader,
95
96
  "fred": PandasDataReader,
96
- "av-daily": PandasDataReader,
97
- "av-forex-daily": PandasDataReader,
97
+ "alphavantage": PandasDataReader,
98
+ "polygon": Polygon,
98
99
  "famafrench": PandasDataReader,
99
100
  "aqr": AQR,
100
101
  "dydx": Dydx
@@ -204,8 +205,8 @@ class GetData:
204
205
  "dbnomics": DBnomics,
205
206
  "yahoo": PandasDataReader,
206
207
  "fred": PandasDataReader,
207
- "av-daily": PandasDataReader,
208
- "av-forex-daily": PandasDataReader,
208
+ "alphavantage": PandasDataReader,
209
+ "polygon": Polygon,
209
210
  "famafrench": PandasDataReader,
210
211
  "aqr": AQR,
211
212
  "dydx": Dydx
@@ -272,8 +273,8 @@ class GetData:
272
273
  "dbnomics": DBnomics,
273
274
  "yahoo": PandasDataReader,
274
275
  "fred": PandasDataReader,
275
- "av-daily": PandasDataReader,
276
- "av-forex-daily": PandasDataReader,
276
+ "alphavantage": PandasDataReader,
277
+ "polygon": Polygon,
277
278
  "famafrench": PandasDataReader,
278
279
  "aqr": AQR,
279
280
  "dydx": Dydx
@@ -29,9 +29,12 @@ class PandasDataReader(Library):
29
29
  markets: Optional[Dict[str, List[str]]] = None,
30
30
  market_types: List[str] = ["spot", "future"],
31
31
  fields: Optional[Dict[str, List[str]]] = None,
32
- frequencies: Optional[Dict[str, List[str]]] = ["d", "w", "m", "q", "y"],
32
+ frequencies: Optional[Dict[str, List[str]]] = ["d", "w", "m", "q", "y",
33
+ "av-intraday", "av-daily", "av-weekly", "av-monthly",
34
+ "av-daily-adjusted", "av-weekly-adjusted",
35
+ "av-monthly-adjusted", "av-forex-daily"],
33
36
  base_url: Optional[str] = None,
34
- api_key: Optional[str] = None,
37
+ api_key: str = data_cred.alpha_vantage_api_key,
35
38
  max_obs_per_call: Optional[int] = None,
36
39
  rate_limit: Optional[Any] = None,
37
40
  ):
@@ -228,7 +231,7 @@ class PandasDataReader(Library):
228
231
  # mkt type
229
232
  if self.data_req.mkt_type not in self.market_types:
230
233
  raise ValueError(
231
- f"{self.data_req.mkt_type} is not available for {self.data_req.exch}."
234
+ f"{self.data_req.mkt_type} is not available."
232
235
  )
233
236
 
234
237
  # check fields
@@ -268,6 +271,20 @@ class PandasDataReader(Library):
268
271
  self.data_req.source_start_date,
269
272
  self.data_req.source_end_date)
270
273
 
274
+ # alpha vantage
275
+ elif self.data_req.source == "alphavantage":
276
+ for ticker, market in zip(self.data_req.source_tickers, self.data_req.source_markets):
277
+ df1 = web.DataReader(market,
278
+ self.data_req.source_freq,
279
+ self.data_req.source_start_date,
280
+ self.data_req.source_end_date,
281
+ api_key=self.api_key)
282
+ df1.index.name = 'date'
283
+ df1['ticker'] = ticker
284
+ df1.set_index(['ticker'], append=True, inplace=True)
285
+ # concat df and df1
286
+ self.data = pd.concat([self.data, df1])
287
+
271
288
  # fama-french
272
289
  elif data_req.source == "famafrench":
273
290
  for ticker in self.data_req.source_tickers:
@@ -6,47 +6,6 @@ from cryptodatapy.transform.impute import Impute
6
6
  from cryptodatapy.transform.filter import Filter
7
7
 
8
8
 
9
- def stitch_dataframes(dfs):
10
- """
11
- Stitches together dataframes with different start dates.
12
-
13
- Parameters
14
- ----------
15
- dfs: list
16
- List of dataframes to be stitched together.
17
-
18
- Returns
19
- -------
20
- combined_df: pd.DataFrame
21
- Combined dataframe with extended start date.
22
- """
23
- # check if dfs is a list
24
- if not isinstance(dfs, list):
25
- raise TypeError("Dataframes must be a list.")
26
-
27
- # check index types
28
- if all([isinstance(df.index, pd.MultiIndex) for df in dfs]):
29
- dfs.sort(key=lambda df: df.index.levels[0][0], reverse=True)
30
- elif all([isinstance(df.index, pd.DatetimeIndex) for df in dfs]):
31
- dfs.sort(key=lambda df: df.index[0], reverse=True)
32
- else:
33
- raise TypeError("Dataframes must be pd.MultiIndex or have DatetimeIndex.")
34
-
35
- # most recent start date
36
- combined_df = dfs[0]
37
-
38
- # combine dfs
39
- for df in dfs[1:]:
40
- combined_df = combined_df.combine_first(df)
41
-
42
- # reorder cols
43
- max_columns = max(len(df.columns) for df in dfs)
44
- cols = next(df.columns.tolist() for df in dfs if len(df.columns) == max_columns)
45
- combined_df = combined_df[cols]
46
-
47
- return combined_df
48
-
49
-
50
9
  class CleanData:
51
10
  """
52
11
  Cleans data to improve data quality.