cryptodatapy 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  import logging
2
- from typing import Any, Dict, List, Optional
2
+ from typing import Any, Dict, List, Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
  import yfinance as yf
6
- from pandas_datareader.data import DataReader as pdr_fetch
6
+ import pandas_datareader.data as web
7
7
  from pandas_datareader import wb
8
8
 
9
9
  from cryptodatapy.extract.datarequest import DataRequest
@@ -20,19 +20,18 @@ class PandasDataReader(Library):
20
20
  """
21
21
  Retrieves data from Pandas Data Reader API.
22
22
  """
23
-
24
23
  def __init__(
25
24
  self,
26
- categories=None,
25
+ categories: Union[str, List[str]] = ["fx", "rates", "eqty", "cmdty", "credit", "macro"],
27
26
  exchanges: Optional[List[str]] = None,
28
27
  indexes: Optional[Dict[str, List[str]]] = None,
29
28
  assets: Optional[Dict[str, List[str]]] = None,
30
29
  markets: Optional[Dict[str, List[str]]] = None,
31
- market_types=None,
30
+ market_types: List[str] = ["spot", "future"],
32
31
  fields: Optional[Dict[str, List[str]]] = None,
33
- frequencies=None,
32
+ frequencies: Optional[Dict[str, List[str]]] = ["d", "w", "m", "q", "y"],
34
33
  base_url: Optional[str] = None,
35
- api_key=None,
34
+ api_key: Optional[str] = None,
36
35
  max_obs_per_call: Optional[int] = None,
37
36
  rate_limit: Optional[Any] = None,
38
37
  ):
@@ -89,29 +88,8 @@ class PandasDataReader(Library):
89
88
  max_obs_per_call,
90
89
  rate_limit,
91
90
  )
92
-
93
- if api_key is None:
94
- self.api_key = {
95
- "fred": None,
96
- "yahoo": None,
97
- "fama_french": None
98
- }
99
- if frequencies is None:
100
- self.frequencies = {
101
- "crypto": ["d", "w", "m", "q", "y"],
102
- "fx": ["d", "w", "m", "q", "y"],
103
- "rates": ["d", "w", "m", "q", "y"],
104
- "cmdty": ["d", "w", "m", "q", "y"],
105
- "eqty": ["d", "w", "m", "q", "y"],
106
- "credit": ["d", "w", "m", "q", "y"],
107
- "macro": ["d", "w", "m", "q", "y"],
108
- }
109
- if market_types is None:
110
- self.market_types = ["spot"]
111
- if categories is None:
112
- self.categories = ["fx", "rates", "eqty", "cmdty", "credit", "macro"]
113
- if fields is None:
114
- self.fields = self.get_fields_info()
91
+ self.data_req = None
92
+ self.data = pd.DataFrame()
115
93
 
116
94
  @staticmethod
117
95
  def get_vendors_info():
@@ -122,7 +100,8 @@ class PandasDataReader(Library):
122
100
  f"See providers page to find available vendors: {data_cred.pdr_vendors_url} "
123
101
  )
124
102
 
125
- def get_exchanges_info(self) -> None:
103
+ @staticmethod
104
+ def get_exchanges_info() -> None:
126
105
  """
127
106
  Get exchanges info.
128
107
  """
@@ -130,7 +109,8 @@ class PandasDataReader(Library):
130
109
  f"See specific data vendor for available exchanges: {data_cred.pdr_vendors_url}"
131
110
  )
132
111
 
133
- def get_indexes_info(self) -> None:
112
+ @staticmethod
113
+ def get_indexes_info() -> None:
134
114
  """
135
115
  Get indexes info.
136
116
  """
@@ -138,7 +118,8 @@ class PandasDataReader(Library):
138
118
  f"See specific data vendor for available indexes: {data_cred.pdr_vendors_url}"
139
119
  )
140
120
 
141
- def get_assets_info(self) -> None:
121
+ @staticmethod
122
+ def get_assets_info() -> None:
142
123
  """
143
124
  Get assets info.
144
125
  """
@@ -146,7 +127,8 @@ class PandasDataReader(Library):
146
127
  f"See specific data vendor for available assets: {data_cred.pdr_vendors_url} "
147
128
  )
148
129
 
149
- def get_markets_info(self) -> None:
130
+ @staticmethod
131
+ def get_markets_info() -> None:
150
132
  """
151
133
  Get markets info.
152
134
  """
@@ -154,60 +136,114 @@ class PandasDataReader(Library):
154
136
  f"See specific data vendor for available markets: {data_cred.pdr_vendors_url}"
155
137
  )
156
138
 
157
- @staticmethod
158
- def get_fields_info(
159
- data_type: Optional[str] = "market", cat: Optional[str] = None
160
- ) -> Dict[str, List[str]]:
139
+ def get_fields_info(self) -> Dict[str, List[str]]:
161
140
  """
162
141
  Get fields info.
163
142
 
164
- Parameters
165
- ----------
166
- data_type: str, {'market', 'on-chain', 'off-chain'}, default 'market'
167
- Type of data.
168
- cat: str, {'crypto', 'eqty', 'fx', 'rates', 'cmdty', 'macro'}, optional, default None
169
- Asset class or time series category.
170
-
171
143
  Returns
172
144
  -------
173
145
  fields: dictionary
174
146
  Dictionary with info on available fields, by category.
175
147
  """
176
- if data_type == "on-chain":
177
- raise ValueError(
178
- "Pandas Data Reader is a market data aggregator of market and off-chain data."
179
- )
148
+ if self.fields is None:
149
+ self.fields = {
150
+ "fx": ["open", "high", "low", "close", "volume", "close_adj", "er"],
151
+ "rates": ["open", "high", "low", "close", "volume", "close_adj", "er"],
152
+ "eqty": ["open", "high", "low", "close", "volume", "close_adj", "er"],
153
+ "cmdty": ["open", "high", "low", "close", "volume", "close_adj", "er"],
154
+ "credit": ["open", "high", "low", "close", "volume", "close_adj", "er"],
155
+ "macro": ["actual"],
156
+ }
180
157
 
181
- # list of fields
182
- market_fields_list = ["open", "high", "low", "close", "volume", "close_adj", "er"]
183
- macro_fields_list = ["actual"]
158
+ # fields cat
159
+ if self.data_req is not None:
160
+ self.fields = self.fields[self.data_req.cat]
184
161
 
185
- # fields dict
186
- fields = {
187
- "fx": market_fields_list,
188
- "rates": market_fields_list,
189
- "eqty": market_fields_list,
190
- "cmdty": market_fields_list,
191
- "credit": market_fields_list,
192
- "macro": macro_fields_list,
193
- }
162
+ return self.fields
194
163
 
195
- # fields obj
196
- if cat is not None:
197
- fields = fields[cat]
164
+ def get_frequencies_info(self) -> Dict[str, Union[str, int]]:
165
+ """
166
+ Get frequencies info.
167
+
168
+ Returns
169
+ -------
170
+ freq: dictionary
171
+ Dictionary with info on available frequencies.
172
+ """
173
+ if self.frequencies is None:
174
+ self.frequencies = {
175
+ "crypto": ["d", "w", "m", "q", "y"],
176
+ "fx": ["d", "w", "m", "q", "y"],
177
+ "rates": ["d", "w", "m", "q", "y"],
178
+ "cmdty": ["d", "w", "m", "q", "y"],
179
+ "eqty": ["d", "w", "m", "q", "y"],
180
+ "credit": ["d", "w", "m", "q", "y"],
181
+ "macro": ["d", "w", "m", "q", "y"],
182
+ }
198
183
 
199
- return fields
184
+ return self.frequencies
200
185
 
201
- def get_rate_limit_info(self) -> None:
186
+ @staticmethod
187
+ def get_rate_limit_info() -> None:
202
188
  """
203
189
  Get rate limit info.
204
190
  """
205
191
  print(f"See specific data vendor for rate limits: {data_cred.pdr_vendors_url}")
206
192
 
207
- @staticmethod
208
- def get_series(data_req: DataRequest) -> pd.DataFrame:
193
+ def convert_params(self, data_req: DataRequest) -> DataRequest:
209
194
  """
210
- Gets series from DBnomics python client.
195
+ Converts data request parameters to source format.
196
+
197
+ Parameters
198
+ ----------
199
+ data_req: DataRequest
200
+ Parameters of data request in CryptoDataPy format.
201
+
202
+ Returns
203
+ -------
204
+ data_req: DataRequest
205
+ Parameters of data request in source format.
206
+ """
207
+ # convert params to source format
208
+ if self.data_req is None:
209
+ self.data_req = getattr(ConvertParams(data_req), f"to_{data_req.source}")()
210
+
211
+ # check cat
212
+ if self.data_req.cat not in self.categories:
213
+ raise ValueError(
214
+ f"Select a valid category. Valid categories are: {self.categories}."
215
+ )
216
+
217
+ # check tickers
218
+ if not self.data_req.source_tickers:
219
+ raise ValueError("No tickers provided for data request.")
220
+
221
+ # check freq
222
+ if self.data_req.source_freq not in self.frequencies:
223
+ raise ValueError(
224
+ f"{self.data_req.source_freq} frequency is not available. "
225
+ f"Use the '.frequencies' attribute to check available frequencies."
226
+ )
227
+
228
+ # mkt type
229
+ if self.data_req.mkt_type not in self.market_types:
230
+ raise ValueError(
231
+ f"{self.data_req.mkt_type} is not available for {self.data_req.exch}."
232
+ )
233
+
234
+ # check fields
235
+ if self.fields is None:
236
+ self.get_fields_info()
237
+ if not any(field in self.fields for field in self.data_req.fields):
238
+ raise ValueError(
239
+ f"{self.data_req.fields} fields are not available for {self.data_req.cat}."
240
+ )
241
+
242
+ return self.data_req
243
+
244
+ def get_series(self, data_req: DataRequest) -> pd.DataFrame:
245
+ """
246
+ Gets series from python client.
211
247
 
212
248
  Parameters
213
249
  ----------
@@ -220,55 +256,51 @@ class PandasDataReader(Library):
220
256
  Dataframe with DatetimeIndex and actual values (col) for requested series.
221
257
 
222
258
  """
223
- # convert data request parameters to source format
224
- conv_data_req = getattr(ConvertParams(data_req), f"to_{data_req.source}")()
259
+ # convert params to source format
260
+ if self.data_req is None:
261
+ self.convert_params(data_req)
225
262
 
226
263
  try:
227
- # fetch yahoo data
228
- if data_req.source == "yahoo":
264
+ # yahoo
265
+ if self.data_req.source == "yahoo":
229
266
  # fetch yf data
230
- df = yf.download(conv_data_req["tickers"],
231
- conv_data_req["start_date"],
232
- conv_data_req["end_date"])
267
+ self.data = yf.download(self.data_req.source_tickers,
268
+ self.data_req.source_start_date,
269
+ self.data_req.source_end_date)
233
270
 
234
- # fetch fama-french data
271
+ # fama-french
235
272
  elif data_req.source == "famafrench":
236
- df = pd.DataFrame()
237
- for ticker in conv_data_req["tickers"]:
238
- df1 = pdr_fetch(ticker,
239
- data_req.source,
240
- conv_data_req["start_date"],
241
- conv_data_req["end_date"])
242
- df = pd.concat([df, df1[0]], axis=1)
243
-
244
- # featch wb data
273
+ for ticker in self.data_req.source_tickers:
274
+ df1 = web.DataReader(ticker,
275
+ self.data_req.source,
276
+ self.data_req.source_start_date,
277
+ self.data_req.source_end_date)
278
+ self.data = pd.concat([self.data, df1[0]], axis=1)
279
+
280
+ # world bank
245
281
  elif data_req.source == "wb":
246
- df = pd.DataFrame()
247
- for ticker in conv_data_req["tickers"]:
282
+ for ticker in self.data_req.source_tickers:
248
283
  df1 = wb.download(indicator=ticker,
249
- country=conv_data_req['ctys'],
250
- start=conv_data_req["start_date"],
251
- end=conv_data_req["end_date"])
252
- df = pd.concat([df, df1], axis=1)
284
+ country=self.data_req.countries,
285
+ start=self.data_req.source_start_date,
286
+ end=self.data_req.source_end_date)
287
+ self.data = pd.concat([self.data, df1], axis=1)
253
288
 
254
- # fetch pdr data
289
+ # other pdr data
255
290
  else:
256
- df = pdr_fetch(conv_data_req["tickers"],
257
- data_req.source,
258
- conv_data_req["start_date"],
259
- conv_data_req["end_date"])
291
+ self.data = web.DataReader(self.data_req.source_tickers,
292
+ self.data_req.source,
293
+ self.data_req.source_start_date,
294
+ self.data_req.source_end_date)
260
295
 
261
296
  except Exception as e:
262
297
  logging.warning(e)
263
- logging.warning(f"Failed to get data for: {conv_data_req['tickers']}.")
298
+ logging.warning(f"Failed to get data for source tickers: {self.data_req.source_tickers}.")
264
299
 
265
300
  else:
301
+ return self.data
266
302
 
267
- return df
268
-
269
- @staticmethod
270
- def wrangle_data_resp(
271
- data_req: DataRequest, data_resp: pd.DataFrame) -> pd.DataFrame:
303
+ def wrangle_data_resp(self, data_req: DataRequest, data_resp: pd.DataFrame) -> pd.DataFrame:
272
304
  """
273
305
  Wrangle data response.
274
306
 
@@ -285,8 +317,11 @@ class PandasDataReader(Library):
285
317
  Wrangled dataframe with DatetimeIndex (level 0), ticker (level 1), and values for market or macro series
286
318
  for selected fields (cols), in tidy format.
287
319
  """
320
+ if self.data_req is None:
321
+ self.convert_params(data_req)
322
+
288
323
  # wrangle data resp
289
- df = getattr(WrangleData(data_req, data_resp), data_req.source)()
324
+ df = getattr(WrangleData(self.data_req, data_resp), self.data_req.source)()
290
325
 
291
326
  return df
292
327
 
@@ -305,38 +340,17 @@ class PandasDataReader(Library):
305
340
  Dataframe with DatetimeIndex (level 0), tickers (level 1) and actual values (cols),
306
341
  in tidy data format.
307
342
  """
308
- # change to get series
309
- df = self.get_series(data_req)
310
- # wrangle data resp
311
- df = self.wrangle_data_resp(data_req, df)
343
+ # convert params to source format
344
+ if self.data_req is None:
345
+ self.convert_params(data_req)
312
346
 
313
- return df
314
-
315
- def check_params(self, data_req: DataRequest) -> None:
316
- """
317
- Checks the data request parameters before requesting data to reduce API calls
318
- and improve efficiency.
347
+ # get series
348
+ data_resp = self.get_series(self.data_req)
319
349
 
320
- """
321
- # check data source
322
- if data_req.source not in ['fred', 'yahoo', 'famafrench', 'wb']:
323
- raise ValueError(
324
- "Select a Pandas-datareader supported data source for the data request."
325
- )
350
+ # wrangle data resp
351
+ df = self.wrangle_data_resp(self.data_req, data_resp)
326
352
 
327
- # check cat
328
- if data_req.cat not in self.categories:
329
- raise ValueError(
330
- f"Select a valid category. Valid categories are: {self.categories}."
331
- )
332
- # check freq
333
- if data_req.freq not in self.frequencies[data_req.cat]:
334
- raise ValueError(
335
- f"Invalid data frequency. Valid data frequencies are: {self.frequencies}."
336
- )
337
- # check fields
338
- if not any(field in self.fields[data_req.cat] for field in data_req.fields):
339
- raise ValueError(f"Invalid fields. Valid data fields are: {self.fields}.")
353
+ return df
340
354
 
341
355
  def get_data(self, data_req: DataRequest) -> pd.DataFrame:
342
356
  """
@@ -352,16 +366,17 @@ class PandasDataReader(Library):
352
366
  DataFrame with DatetimeIndex (level 0), ticker (level 1), and values for selected fields (cols),
353
367
  in tidy format.
354
368
  """
355
- # check params
356
- self.check_params(data_req)
369
+ # convert params to source format
370
+ if self.data_req is None:
371
+ self.convert_params(data_req)
357
372
 
358
373
  # get tidy data
359
- df = self.get_tidy_data(data_req)
374
+ self.data = self.get_tidy_data(self.data_req)
360
375
 
361
376
  # check if df empty
362
- if df.empty:
377
+ if self.data.empty:
363
378
  raise Exception(
364
379
  "No data returned. Check data request parameters and try again."
365
380
  )
366
381
 
367
- return df.sort_index()
382
+ return self.data.sort_index()