cryptodatapy 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodatapy/conf/fields.csv +1 -1
- cryptodatapy/extract/datarequest.py +169 -28
- cryptodatapy/extract/libraries/Untitled.ipynb +199 -0
- cryptodatapy/extract/libraries/ccxt.ipynb +747 -0
- cryptodatapy/extract/libraries/ccxt_api.py +631 -358
- cryptodatapy/extract/libraries/pandasdr_api.py +153 -138
- cryptodatapy/extract/libraries/yfinance_api.py +511 -0
- cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +226 -30
- cryptodatapy/transform/cmdty_data.ipynb +402 -0
- cryptodatapy/transform/convertparams.py +160 -303
- cryptodatapy/transform/eqty_data.ipynb +126 -99
- cryptodatapy/transform/wrangle.py +152 -43
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/METADATA +9 -6
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/RECORD +16 -12
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/WHEEL +1 -1
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/LICENSE +0 -0
@@ -1,9 +1,9 @@
|
|
1
1
|
import logging
|
2
|
-
from typing import Any, Dict, List, Optional
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
3
3
|
|
4
4
|
import pandas as pd
|
5
5
|
import yfinance as yf
|
6
|
-
|
6
|
+
import pandas_datareader.data as web
|
7
7
|
from pandas_datareader import wb
|
8
8
|
|
9
9
|
from cryptodatapy.extract.datarequest import DataRequest
|
@@ -20,19 +20,18 @@ class PandasDataReader(Library):
|
|
20
20
|
"""
|
21
21
|
Retrieves data from Pandas Data Reader API.
|
22
22
|
"""
|
23
|
-
|
24
23
|
def __init__(
|
25
24
|
self,
|
26
|
-
categories=
|
25
|
+
categories: Union[str, List[str]] = ["fx", "rates", "eqty", "cmdty", "credit", "macro"],
|
27
26
|
exchanges: Optional[List[str]] = None,
|
28
27
|
indexes: Optional[Dict[str, List[str]]] = None,
|
29
28
|
assets: Optional[Dict[str, List[str]]] = None,
|
30
29
|
markets: Optional[Dict[str, List[str]]] = None,
|
31
|
-
market_types=
|
30
|
+
market_types: List[str] = ["spot", "future"],
|
32
31
|
fields: Optional[Dict[str, List[str]]] = None,
|
33
|
-
frequencies=
|
32
|
+
frequencies: Optional[Dict[str, List[str]]] = ["d", "w", "m", "q", "y"],
|
34
33
|
base_url: Optional[str] = None,
|
35
|
-
api_key=None,
|
34
|
+
api_key: Optional[str] = None,
|
36
35
|
max_obs_per_call: Optional[int] = None,
|
37
36
|
rate_limit: Optional[Any] = None,
|
38
37
|
):
|
@@ -89,29 +88,8 @@ class PandasDataReader(Library):
|
|
89
88
|
max_obs_per_call,
|
90
89
|
rate_limit,
|
91
90
|
)
|
92
|
-
|
93
|
-
|
94
|
-
self.api_key = {
|
95
|
-
"fred": None,
|
96
|
-
"yahoo": None,
|
97
|
-
"fama_french": None
|
98
|
-
}
|
99
|
-
if frequencies is None:
|
100
|
-
self.frequencies = {
|
101
|
-
"crypto": ["d", "w", "m", "q", "y"],
|
102
|
-
"fx": ["d", "w", "m", "q", "y"],
|
103
|
-
"rates": ["d", "w", "m", "q", "y"],
|
104
|
-
"cmdty": ["d", "w", "m", "q", "y"],
|
105
|
-
"eqty": ["d", "w", "m", "q", "y"],
|
106
|
-
"credit": ["d", "w", "m", "q", "y"],
|
107
|
-
"macro": ["d", "w", "m", "q", "y"],
|
108
|
-
}
|
109
|
-
if market_types is None:
|
110
|
-
self.market_types = ["spot"]
|
111
|
-
if categories is None:
|
112
|
-
self.categories = ["fx", "rates", "eqty", "cmdty", "credit", "macro"]
|
113
|
-
if fields is None:
|
114
|
-
self.fields = self.get_fields_info()
|
91
|
+
self.data_req = None
|
92
|
+
self.data = pd.DataFrame()
|
115
93
|
|
116
94
|
@staticmethod
|
117
95
|
def get_vendors_info():
|
@@ -122,7 +100,8 @@ class PandasDataReader(Library):
|
|
122
100
|
f"See providers page to find available vendors: {data_cred.pdr_vendors_url} "
|
123
101
|
)
|
124
102
|
|
125
|
-
|
103
|
+
@staticmethod
|
104
|
+
def get_exchanges_info() -> None:
|
126
105
|
"""
|
127
106
|
Get exchanges info.
|
128
107
|
"""
|
@@ -130,7 +109,8 @@ class PandasDataReader(Library):
|
|
130
109
|
f"See specific data vendor for available exchanges: {data_cred.pdr_vendors_url}"
|
131
110
|
)
|
132
111
|
|
133
|
-
|
112
|
+
@staticmethod
|
113
|
+
def get_indexes_info() -> None:
|
134
114
|
"""
|
135
115
|
Get indexes info.
|
136
116
|
"""
|
@@ -138,7 +118,8 @@ class PandasDataReader(Library):
|
|
138
118
|
f"See specific data vendor for available indexes: {data_cred.pdr_vendors_url}"
|
139
119
|
)
|
140
120
|
|
141
|
-
|
121
|
+
@staticmethod
|
122
|
+
def get_assets_info() -> None:
|
142
123
|
"""
|
143
124
|
Get assets info.
|
144
125
|
"""
|
@@ -146,7 +127,8 @@ class PandasDataReader(Library):
|
|
146
127
|
f"See specific data vendor for available assets: {data_cred.pdr_vendors_url} "
|
147
128
|
)
|
148
129
|
|
149
|
-
|
130
|
+
@staticmethod
|
131
|
+
def get_markets_info() -> None:
|
150
132
|
"""
|
151
133
|
Get markets info.
|
152
134
|
"""
|
@@ -154,60 +136,114 @@ class PandasDataReader(Library):
|
|
154
136
|
f"See specific data vendor for available markets: {data_cred.pdr_vendors_url}"
|
155
137
|
)
|
156
138
|
|
157
|
-
|
158
|
-
def get_fields_info(
|
159
|
-
data_type: Optional[str] = "market", cat: Optional[str] = None
|
160
|
-
) -> Dict[str, List[str]]:
|
139
|
+
def get_fields_info(self) -> Dict[str, List[str]]:
|
161
140
|
"""
|
162
141
|
Get fields info.
|
163
142
|
|
164
|
-
Parameters
|
165
|
-
----------
|
166
|
-
data_type: str, {'market', 'on-chain', 'off-chain'}, default 'market'
|
167
|
-
Type of data.
|
168
|
-
cat: str, {'crypto', 'eqty', 'fx', 'rates', 'cmdty', 'macro'}, optional, default None
|
169
|
-
Asset class or time series category.
|
170
|
-
|
171
143
|
Returns
|
172
144
|
-------
|
173
145
|
fields: dictionary
|
174
146
|
Dictionary with info on available fields, by category.
|
175
147
|
"""
|
176
|
-
if
|
177
|
-
|
178
|
-
"
|
179
|
-
|
148
|
+
if self.fields is None:
|
149
|
+
self.fields = {
|
150
|
+
"fx": ["open", "high", "low", "close", "volume", "close_adj", "er"],
|
151
|
+
"rates": ["open", "high", "low", "close", "volume", "close_adj", "er"],
|
152
|
+
"eqty": ["open", "high", "low", "close", "volume", "close_adj", "er"],
|
153
|
+
"cmdty": ["open", "high", "low", "close", "volume", "close_adj", "er"],
|
154
|
+
"credit": ["open", "high", "low", "close", "volume", "close_adj", "er"],
|
155
|
+
"macro": ["actual"],
|
156
|
+
}
|
180
157
|
|
181
|
-
#
|
182
|
-
|
183
|
-
|
158
|
+
# fields cat
|
159
|
+
if self.data_req is not None:
|
160
|
+
self.fields = self.fields[self.data_req.cat]
|
184
161
|
|
185
|
-
|
186
|
-
fields = {
|
187
|
-
"fx": market_fields_list,
|
188
|
-
"rates": market_fields_list,
|
189
|
-
"eqty": market_fields_list,
|
190
|
-
"cmdty": market_fields_list,
|
191
|
-
"credit": market_fields_list,
|
192
|
-
"macro": macro_fields_list,
|
193
|
-
}
|
162
|
+
return self.fields
|
194
163
|
|
195
|
-
|
196
|
-
|
197
|
-
|
164
|
+
def get_frequencies_info(self) -> Dict[str, Union[str, int]]:
|
165
|
+
"""
|
166
|
+
Get frequencies info.
|
167
|
+
|
168
|
+
Returns
|
169
|
+
-------
|
170
|
+
freq: dictionary
|
171
|
+
Dictionary with info on available frequencies.
|
172
|
+
"""
|
173
|
+
if self.frequencies is None:
|
174
|
+
self.frequencies = {
|
175
|
+
"crypto": ["d", "w", "m", "q", "y"],
|
176
|
+
"fx": ["d", "w", "m", "q", "y"],
|
177
|
+
"rates": ["d", "w", "m", "q", "y"],
|
178
|
+
"cmdty": ["d", "w", "m", "q", "y"],
|
179
|
+
"eqty": ["d", "w", "m", "q", "y"],
|
180
|
+
"credit": ["d", "w", "m", "q", "y"],
|
181
|
+
"macro": ["d", "w", "m", "q", "y"],
|
182
|
+
}
|
198
183
|
|
199
|
-
return
|
184
|
+
return self.frequencies
|
200
185
|
|
201
|
-
|
186
|
+
@staticmethod
|
187
|
+
def get_rate_limit_info() -> None:
|
202
188
|
"""
|
203
189
|
Get rate limit info.
|
204
190
|
"""
|
205
191
|
print(f"See specific data vendor for rate limits: {data_cred.pdr_vendors_url}")
|
206
192
|
|
207
|
-
|
208
|
-
def get_series(data_req: DataRequest) -> pd.DataFrame:
|
193
|
+
def convert_params(self, data_req: DataRequest) -> DataRequest:
|
209
194
|
"""
|
210
|
-
|
195
|
+
Converts data request parameters to source format.
|
196
|
+
|
197
|
+
Parameters
|
198
|
+
----------
|
199
|
+
data_req: DataRequest
|
200
|
+
Parameters of data request in CryptoDataPy format.
|
201
|
+
|
202
|
+
Returns
|
203
|
+
-------
|
204
|
+
data_req: DataRequest
|
205
|
+
Parameters of data request in source format.
|
206
|
+
"""
|
207
|
+
# convert params to source format
|
208
|
+
if self.data_req is None:
|
209
|
+
self.data_req = getattr(ConvertParams(data_req), f"to_{data_req.source}")()
|
210
|
+
|
211
|
+
# check cat
|
212
|
+
if self.data_req.cat not in self.categories:
|
213
|
+
raise ValueError(
|
214
|
+
f"Select a valid category. Valid categories are: {self.categories}."
|
215
|
+
)
|
216
|
+
|
217
|
+
# check tickers
|
218
|
+
if not self.data_req.source_tickers:
|
219
|
+
raise ValueError("No tickers provided for data request.")
|
220
|
+
|
221
|
+
# check freq
|
222
|
+
if self.data_req.source_freq not in self.frequencies:
|
223
|
+
raise ValueError(
|
224
|
+
f"{self.data_req.source_freq} frequency is not available. "
|
225
|
+
f"Use the '.frequencies' attribute to check available frequencies."
|
226
|
+
)
|
227
|
+
|
228
|
+
# mkt type
|
229
|
+
if self.data_req.mkt_type not in self.market_types:
|
230
|
+
raise ValueError(
|
231
|
+
f"{self.data_req.mkt_type} is not available for {self.data_req.exch}."
|
232
|
+
)
|
233
|
+
|
234
|
+
# check fields
|
235
|
+
if self.fields is None:
|
236
|
+
self.get_fields_info()
|
237
|
+
if not any(field in self.fields for field in self.data_req.fields):
|
238
|
+
raise ValueError(
|
239
|
+
f"{self.data_req.fields} fields are not available for {self.data_req.cat}."
|
240
|
+
)
|
241
|
+
|
242
|
+
return self.data_req
|
243
|
+
|
244
|
+
def get_series(self, data_req: DataRequest) -> pd.DataFrame:
|
245
|
+
"""
|
246
|
+
Gets series from python client.
|
211
247
|
|
212
248
|
Parameters
|
213
249
|
----------
|
@@ -220,55 +256,51 @@ class PandasDataReader(Library):
|
|
220
256
|
Dataframe with DatetimeIndex and actual values (col) for requested series.
|
221
257
|
|
222
258
|
"""
|
223
|
-
# convert
|
224
|
-
|
259
|
+
# convert params to source format
|
260
|
+
if self.data_req is None:
|
261
|
+
self.convert_params(data_req)
|
225
262
|
|
226
263
|
try:
|
227
|
-
#
|
228
|
-
if data_req.source == "yahoo":
|
264
|
+
# yahoo
|
265
|
+
if self.data_req.source == "yahoo":
|
229
266
|
# fetch yf data
|
230
|
-
|
231
|
-
|
232
|
-
|
267
|
+
self.data = yf.download(self.data_req.source_tickers,
|
268
|
+
self.data_req.source_start_date,
|
269
|
+
self.data_req.source_end_date)
|
233
270
|
|
234
|
-
#
|
271
|
+
# fama-french
|
235
272
|
elif data_req.source == "famafrench":
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
# featch wb data
|
273
|
+
for ticker in self.data_req.source_tickers:
|
274
|
+
df1 = web.DataReader(ticker,
|
275
|
+
self.data_req.source,
|
276
|
+
self.data_req.source_start_date,
|
277
|
+
self.data_req.source_end_date)
|
278
|
+
self.data = pd.concat([self.data, df1[0]], axis=1)
|
279
|
+
|
280
|
+
# world bank
|
245
281
|
elif data_req.source == "wb":
|
246
|
-
|
247
|
-
for ticker in conv_data_req["tickers"]:
|
282
|
+
for ticker in self.data_req.source_tickers:
|
248
283
|
df1 = wb.download(indicator=ticker,
|
249
|
-
country=
|
250
|
-
start=
|
251
|
-
end=
|
252
|
-
|
284
|
+
country=self.data_req.countries,
|
285
|
+
start=self.data_req.source_start_date,
|
286
|
+
end=self.data_req.source_end_date)
|
287
|
+
self.data = pd.concat([self.data, df1], axis=1)
|
253
288
|
|
254
|
-
#
|
289
|
+
# other pdr data
|
255
290
|
else:
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
291
|
+
self.data = web.DataReader(self.data_req.source_tickers,
|
292
|
+
self.data_req.source,
|
293
|
+
self.data_req.source_start_date,
|
294
|
+
self.data_req.source_end_date)
|
260
295
|
|
261
296
|
except Exception as e:
|
262
297
|
logging.warning(e)
|
263
|
-
logging.warning(f"Failed to get data for: {
|
298
|
+
logging.warning(f"Failed to get data for source tickers: {self.data_req.source_tickers}.")
|
264
299
|
|
265
300
|
else:
|
301
|
+
return self.data
|
266
302
|
|
267
|
-
|
268
|
-
|
269
|
-
@staticmethod
|
270
|
-
def wrangle_data_resp(
|
271
|
-
data_req: DataRequest, data_resp: pd.DataFrame) -> pd.DataFrame:
|
303
|
+
def wrangle_data_resp(self, data_req: DataRequest, data_resp: pd.DataFrame) -> pd.DataFrame:
|
272
304
|
"""
|
273
305
|
Wrangle data response.
|
274
306
|
|
@@ -285,8 +317,11 @@ class PandasDataReader(Library):
|
|
285
317
|
Wrangled dataframe with DatetimeIndex (level 0), ticker (level 1), and values for market or macro series
|
286
318
|
for selected fields (cols), in tidy format.
|
287
319
|
"""
|
320
|
+
if self.data_req is None:
|
321
|
+
self.convert_params(data_req)
|
322
|
+
|
288
323
|
# wrangle data resp
|
289
|
-
df = getattr(WrangleData(data_req, data_resp), data_req.source)()
|
324
|
+
df = getattr(WrangleData(self.data_req, data_resp), self.data_req.source)()
|
290
325
|
|
291
326
|
return df
|
292
327
|
|
@@ -305,38 +340,17 @@ class PandasDataReader(Library):
|
|
305
340
|
Dataframe with DatetimeIndex (level 0), tickers (level 1) and actual values (cols),
|
306
341
|
in tidy data format.
|
307
342
|
"""
|
308
|
-
#
|
309
|
-
|
310
|
-
|
311
|
-
df = self.wrangle_data_resp(data_req, df)
|
343
|
+
# convert params to source format
|
344
|
+
if self.data_req is None:
|
345
|
+
self.convert_params(data_req)
|
312
346
|
|
313
|
-
|
314
|
-
|
315
|
-
def check_params(self, data_req: DataRequest) -> None:
|
316
|
-
"""
|
317
|
-
Checks the data request parameters before requesting data to reduce API calls
|
318
|
-
and improve efficiency.
|
347
|
+
# get series
|
348
|
+
data_resp = self.get_series(self.data_req)
|
319
349
|
|
320
|
-
|
321
|
-
|
322
|
-
if data_req.source not in ['fred', 'yahoo', 'famafrench', 'wb']:
|
323
|
-
raise ValueError(
|
324
|
-
"Select a Pandas-datareader supported data source for the data request."
|
325
|
-
)
|
350
|
+
# wrangle data resp
|
351
|
+
df = self.wrangle_data_resp(self.data_req, data_resp)
|
326
352
|
|
327
|
-
|
328
|
-
if data_req.cat not in self.categories:
|
329
|
-
raise ValueError(
|
330
|
-
f"Select a valid category. Valid categories are: {self.categories}."
|
331
|
-
)
|
332
|
-
# check freq
|
333
|
-
if data_req.freq not in self.frequencies[data_req.cat]:
|
334
|
-
raise ValueError(
|
335
|
-
f"Invalid data frequency. Valid data frequencies are: {self.frequencies}."
|
336
|
-
)
|
337
|
-
# check fields
|
338
|
-
if not any(field in self.fields[data_req.cat] for field in data_req.fields):
|
339
|
-
raise ValueError(f"Invalid fields. Valid data fields are: {self.fields}.")
|
353
|
+
return df
|
340
354
|
|
341
355
|
def get_data(self, data_req: DataRequest) -> pd.DataFrame:
|
342
356
|
"""
|
@@ -352,16 +366,17 @@ class PandasDataReader(Library):
|
|
352
366
|
DataFrame with DatetimeIndex (level 0), ticker (level 1), and values for selected fields (cols),
|
353
367
|
in tidy format.
|
354
368
|
"""
|
355
|
-
#
|
356
|
-
self.
|
369
|
+
# convert params to source format
|
370
|
+
if self.data_req is None:
|
371
|
+
self.convert_params(data_req)
|
357
372
|
|
358
373
|
# get tidy data
|
359
|
-
|
374
|
+
self.data = self.get_tidy_data(self.data_req)
|
360
375
|
|
361
376
|
# check if df empty
|
362
|
-
if
|
377
|
+
if self.data.empty:
|
363
378
|
raise Exception(
|
364
379
|
"No data returned. Check data request parameters and try again."
|
365
380
|
)
|
366
381
|
|
367
|
-
return
|
382
|
+
return self.data.sort_index()
|