cryptodatapy 0.2.25__py3-none-any.whl → 0.2.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodatapy/conf/fields.csv +126 -126
- cryptodatapy/conf/tickers.csv +2020 -2020
- cryptodatapy/extract/data_vendors/coinmetrics_api.py +1 -1
- cryptodatapy/extract/data_vendors/polygon_api.py +388 -0
- cryptodatapy/extract/data_vendors/tiingo_api.py +0 -2
- cryptodatapy/extract/datarequest.py +2 -0
- cryptodatapy/extract/getdata.py +7 -6
- cryptodatapy/extract/libraries/pandasdr_api.py +20 -3
- cryptodatapy/transform/clean.py +0 -41
- cryptodatapy/transform/convertparams.py +222 -75
- cryptodatapy/transform/wrangle.py +71 -1
- cryptodatapy/util/datacredentials.py +11 -0
- cryptodatapy/util/utils.py +82 -0
- {cryptodatapy-0.2.25.dist-info → cryptodatapy-0.2.26.dist-info}/METADATA +2 -1
- {cryptodatapy-0.2.25.dist-info → cryptodatapy-0.2.26.dist-info}/RECORD +17 -15
- {cryptodatapy-0.2.25.dist-info → cryptodatapy-0.2.26.dist-info}/LICENSE +0 -0
- {cryptodatapy-0.2.25.dist-info → cryptodatapy-0.2.26.dist-info}/WHEEL +0 -0
@@ -0,0 +1,388 @@
|
|
1
|
+
import logging
|
2
|
+
from time import sleep
|
3
|
+
from typing import Any, Dict, List, Optional
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
from polygon import RESTClient
|
7
|
+
|
8
|
+
|
9
|
+
from cryptodatapy.extract.data_vendors.datavendor import DataVendor
|
10
|
+
from cryptodatapy.extract.datarequest import DataRequest
|
11
|
+
from cryptodatapy.transform.convertparams import ConvertParams
|
12
|
+
from cryptodatapy.transform.wrangle import WrangleData
|
13
|
+
from cryptodatapy.util.datacredentials import DataCredentials
|
14
|
+
|
15
|
+
# data credentials
|
16
|
+
data_cred = DataCredentials()
|
17
|
+
|
18
|
+
|
19
|
+
class Polygon(DataVendor):
|
20
|
+
"""
|
21
|
+
Retrieves data from Tiingo API.
|
22
|
+
"""
|
23
|
+
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
categories: List[str] = ["crypto", "fx", "eqty", 'rates', "bonds", "cmdty", "index"],
|
27
|
+
exchanges: Optional[Dict[str, List[str]]] = None,
|
28
|
+
indexes: Optional[Dict[str, List[str]]] = None,
|
29
|
+
assets: Optional[Dict[str, List[str]]] = None,
|
30
|
+
markets: Optional[Dict[str, List[str]]] = None,
|
31
|
+
market_types: List[str] = ["spot", "future", "option"],
|
32
|
+
fields: Dict[str, List[str]] = None,
|
33
|
+
frequencies: List[str] = ["1s", "1min", "1h", "d", "w", "m", "q", "y"],
|
34
|
+
base_url: str = data_cred.tiingo_base_url,
|
35
|
+
api_key: str = data_cred.polygon_api_key,
|
36
|
+
api_endpoints: Optional[Dict[str, str]] = None,
|
37
|
+
max_obs_per_call: Optional[int] = None,
|
38
|
+
rate_limit: Optional[Any] = None,
|
39
|
+
):
|
40
|
+
"""
|
41
|
+
Constructor
|
42
|
+
|
43
|
+
Parameters
|
44
|
+
----------
|
45
|
+
categories: list or str, {'crypto', 'fx', 'rates', 'eqty', 'commodities', 'credit', 'macro', 'alt'}
|
46
|
+
List or string of available categories, e.g. ['crypto', 'fx', 'alt'].
|
47
|
+
exchanges: dictionary, optional, default None
|
48
|
+
Dictionary with available exchanges, by cat-exchanges key-value pairs, e.g. {'eqty' : ['NYSE', 'DAX', ...],
|
49
|
+
'crypto' : ['binance', 'ftx', ....]}.
|
50
|
+
indexes: dictionary, optional, default None
|
51
|
+
Dictionary of available indexes, by cat-indexes key-value pairs, e.g. [{'eqty': ['SPX', 'N225'],
|
52
|
+
'rates': [.... , ...}.
|
53
|
+
assets: dictionary, optional, default None
|
54
|
+
Dictionary of available assets, by cat-assets key-value pairs, e.g. {'rates': ['Germany 2Y', 'Japan 10Y',
|
55
|
+
...], 'eqty: ['SPY', 'TLT', ...], ...}.
|
56
|
+
markets: dictionary, optional, default None
|
57
|
+
Dictionary of available markets, by cat-markets key-value pairs, e.g. [{'fx': ['EUR/USD', 'USD/JPY', ...],
|
58
|
+
'crypto': ['BTC/ETH', 'ETH/USDT', ...}.
|
59
|
+
market_types: list
|
60
|
+
List of available market types e.g. [spot', 'perpetual_future', 'future', 'option'].
|
61
|
+
fields: dictionary, optional, default None
|
62
|
+
Dictionary of available fields, by cat-fields key-value pairs, e.g. {'eqty': ['date', 'open', 'high',
|
63
|
+
'low', 'close', 'volume'], 'fx': ['date', 'open', 'high', 'low', 'close']}
|
64
|
+
frequencies: list
|
65
|
+
List of available frequencies, e.g. ['tick', '1min', '5min', '10min', '20min', '30min', '1h', '2h', '4h',
|
66
|
+
'8h', 'd', 'w', 'm']
|
67
|
+
base_url: str
|
68
|
+
Base url used for GET requests. If not provided, default is set to base_url stored in DataCredentials.
|
69
|
+
api_endpoints: dict, optional, default None
|
70
|
+
Dictionary with available API endpoints. If not provided, default is set to api_endpoints stored in
|
71
|
+
DataCredentials.
|
72
|
+
api_key: str
|
73
|
+
Api key, e.g. 'dcf13983adf7dfa79a0dfa35adf'. If not provided, default is set to
|
74
|
+
api_key stored in DataCredentials.
|
75
|
+
max_obs_per_call: int, default None
|
76
|
+
Maximum number of observations returned per API call. If not provided, default is set to
|
77
|
+
api_limit stored in DataCredentials.
|
78
|
+
rate_limit: pd.DataFrame, optional, Default None
|
79
|
+
Number of API calls made and left, by time frequency.
|
80
|
+
"""
|
81
|
+
super().__init__(
|
82
|
+
categories, exchanges, indexes, assets, markets, market_types,
|
83
|
+
fields, frequencies, base_url, api_endpoints, api_key, max_obs_per_call, rate_limit
|
84
|
+
)
|
85
|
+
|
86
|
+
if api_key is None:
|
87
|
+
raise TypeError("Set your Polygon api key in environment variables as 'POLYGON_API_KEY' or "
|
88
|
+
"add it as an argument when instantiating the class. To get an api key, visit: "
|
89
|
+
"https://polygon.io/dashboard/")
|
90
|
+
|
91
|
+
self.data_req = None
|
92
|
+
self.data = pd.DataFrame()
|
93
|
+
self.client = RESTClient(self.api_key)
|
94
|
+
|
95
|
+
def get_exchanges_info(self):
|
96
|
+
"""
|
97
|
+
Get exchanges info from Polygon API.
|
98
|
+
|
99
|
+
Returns
|
100
|
+
-------
|
101
|
+
pd.DataFrame
|
102
|
+
DataFrame with exchanges info.
|
103
|
+
"""
|
104
|
+
pass
|
105
|
+
|
106
|
+
def get_indexes_info(self):
|
107
|
+
"""
|
108
|
+
Get indexes info from Polygon API.
|
109
|
+
|
110
|
+
Returns
|
111
|
+
-------
|
112
|
+
pd.DataFrame
|
113
|
+
DataFrame with indexes info.
|
114
|
+
"""
|
115
|
+
pass
|
116
|
+
|
117
|
+
def get_assets_info(self):
|
118
|
+
"""
|
119
|
+
Get assets info from Polygon API.
|
120
|
+
|
121
|
+
Returns
|
122
|
+
-------
|
123
|
+
pd.DataFrame
|
124
|
+
DataFrame with assets info.
|
125
|
+
"""
|
126
|
+
pass
|
127
|
+
|
128
|
+
def get_markets_info(self):
|
129
|
+
"""
|
130
|
+
Get markets info from Polygon API.
|
131
|
+
|
132
|
+
Returns
|
133
|
+
-------
|
134
|
+
pd.DataFrame
|
135
|
+
DataFrame with markets info.
|
136
|
+
"""
|
137
|
+
pass
|
138
|
+
|
139
|
+
def get_fields_info(self, data_type: Optional[str]):
|
140
|
+
"""
|
141
|
+
Get fields info from Polygon API.
|
142
|
+
|
143
|
+
Parameters
|
144
|
+
----------
|
145
|
+
data_type: str, optional
|
146
|
+
Data type for which to get fields info. If None, returns all fields info.
|
147
|
+
|
148
|
+
Returns
|
149
|
+
-------
|
150
|
+
pd.DataFrame
|
151
|
+
DataFrame with fields info.
|
152
|
+
"""
|
153
|
+
pass
|
154
|
+
|
155
|
+
def get_rate_limit_info(self):
|
156
|
+
"""
|
157
|
+
Get rate limit info from Polygon API.
|
158
|
+
|
159
|
+
Returns
|
160
|
+
-------
|
161
|
+
pd.DataFrame
|
162
|
+
DataFrame with rate limit info.
|
163
|
+
"""
|
164
|
+
pass
|
165
|
+
|
166
|
+
def req_data(self,
|
167
|
+
ticker: str,
|
168
|
+
multiplier: int,
|
169
|
+
timespan: str,
|
170
|
+
from_: str,
|
171
|
+
to: str
|
172
|
+
) -> List:
|
173
|
+
"""
|
174
|
+
Request data from Polygon API.
|
175
|
+
|
176
|
+
Parameters
|
177
|
+
----------
|
178
|
+
ticker: str
|
179
|
+
Ticker symbol for the asset.
|
180
|
+
multiplier: int
|
181
|
+
Multiplier for the aggregation.
|
182
|
+
timespan: str
|
183
|
+
Timespan for the aggregation, e.g. 'minute', 'hour', 'day'.
|
184
|
+
from_: str
|
185
|
+
Start date for the data request in 'YYYY-MM-DD' format.
|
186
|
+
to: str
|
187
|
+
End date for the data request in 'YYYY-MM-DD' format.
|
188
|
+
|
189
|
+
Returns
|
190
|
+
-------
|
191
|
+
List: List of aggregated data from Polygon API.
|
192
|
+
"""
|
193
|
+
|
194
|
+
aggs = []
|
195
|
+
for a in self.client.list_aggs(
|
196
|
+
f"C:{ticker}",
|
197
|
+
multiplier,
|
198
|
+
timespan,
|
199
|
+
from_,
|
200
|
+
to,
|
201
|
+
adjusted="true",
|
202
|
+
sort="asc",
|
203
|
+
limit=self.max_obs_per_call if self.max_obs_per_call else 500
|
204
|
+
):
|
205
|
+
aggs.append(a)
|
206
|
+
|
207
|
+
if not aggs:
|
208
|
+
logging.warning(f"No data found for ticker {ticker} in the specified date range.")
|
209
|
+
|
210
|
+
return aggs
|
211
|
+
|
212
|
+
@staticmethod
|
213
|
+
def wrangle_data_resp(data_req: DataRequest, data_resp: Dict[str, Any]) -> pd.DataFrame:
|
214
|
+
"""
|
215
|
+
Wrangle data response.
|
216
|
+
|
217
|
+
Parameters
|
218
|
+
----------
|
219
|
+
data_req: DataRequest
|
220
|
+
Parameters of data request in CryptoDataPy format.
|
221
|
+
data_resp: dictionary
|
222
|
+
Data response from data request in JSON format.
|
223
|
+
Returns
|
224
|
+
-------
|
225
|
+
df: pd.DataFrame
|
226
|
+
Wrangled dataframe with DatetimeIndex and market data for selected fields (cols), in tidy format.
|
227
|
+
"""
|
228
|
+
# wrangle data resp
|
229
|
+
df = WrangleData(data_req, data_resp).polygon()
|
230
|
+
|
231
|
+
return df
|
232
|
+
|
233
|
+
def get_tidy_data(self, data_req: DataRequest, ticker) -> pd.DataFrame:
|
234
|
+
"""
|
235
|
+
Submits data request and wrangles the data response into tidy data format.
|
236
|
+
|
237
|
+
Parameters
|
238
|
+
----------
|
239
|
+
data_req: DataRequest
|
240
|
+
|
241
|
+
Returns
|
242
|
+
-------
|
243
|
+
df: pd.DataFrame
|
244
|
+
Dataframe with DatetimeIndex and field values (col) wrangled into tidy data format.
|
245
|
+
"""
|
246
|
+
# convert data request parameters to CryptoCompare format
|
247
|
+
self.data_req = ConvertParams(data_req).to_polygon()
|
248
|
+
|
249
|
+
# get entire data history
|
250
|
+
df = self.req_data(
|
251
|
+
ticker=ticker,
|
252
|
+
multiplier=1,
|
253
|
+
timespan=self.data_req.source_freq,
|
254
|
+
from_=self.data_req.source_start_date,
|
255
|
+
to=self.data_req.source_end_date,
|
256
|
+
)
|
257
|
+
|
258
|
+
# wrangle df
|
259
|
+
df = self.wrangle_data_resp(self.data_req, df)
|
260
|
+
|
261
|
+
return df
|
262
|
+
|
263
|
+
def get_all_tickers(self, data_req: DataRequest) -> pd.DataFrame:
|
264
|
+
"""
|
265
|
+
Loops list of tickers, retrieves data in tidy format for each ticker and stores it in a
|
266
|
+
multiindex dataframe.
|
267
|
+
|
268
|
+
Parameters
|
269
|
+
----------
|
270
|
+
data_req: DataRequest
|
271
|
+
Parameters of data request in CryptoDataPy format.
|
272
|
+
|
273
|
+
Returns
|
274
|
+
-------
|
275
|
+
df: pd.DataFrame - MultiIndex
|
276
|
+
Dataframe with DatetimeIndex (level 0), ticker (level 1) and values for fields (cols), in tidy data format.
|
277
|
+
"""
|
278
|
+
# convert data request parameters to CryptoCompare format
|
279
|
+
self.data_req = ConvertParams(data_req).to_polygon()
|
280
|
+
|
281
|
+
# empty df to add data
|
282
|
+
df = pd.DataFrame()
|
283
|
+
|
284
|
+
if self.data_req.cat == 'fx':
|
285
|
+
for market, ticker in zip(self.data_req.source_markets, self.data_req.tickers):
|
286
|
+
try:
|
287
|
+
df0 = self.get_tidy_data(self.data_req, market)
|
288
|
+
except Exception as e:
|
289
|
+
logging.info(f"Failed to get fx data for {market} after many attempts: {e}.")
|
290
|
+
else:
|
291
|
+
# add ticker to index
|
292
|
+
df0['ticker'] = ticker.upper()
|
293
|
+
df0.set_index(['ticker'], append=True, inplace=True)
|
294
|
+
# concat df and df1
|
295
|
+
df = pd.concat([df, df0])
|
296
|
+
|
297
|
+
# sleep to avoid hitting API rate limits
|
298
|
+
sleep(self.data_req.pause)
|
299
|
+
|
300
|
+
elif self.data_req.cat == 'eqty':
|
301
|
+
for ticker in self.data_req.tickers:
|
302
|
+
try:
|
303
|
+
df0 = self.get_tidy_data(self.data_req, ticker)
|
304
|
+
except Exception as e:
|
305
|
+
logging.info(f"Failed to get eqty data for {ticker} after many attempts: {e}.")
|
306
|
+
else:
|
307
|
+
# add ticker to index
|
308
|
+
df0['ticker'] = ticker.upper()
|
309
|
+
df0.set_index(['ticker'], append=True, inplace=True)
|
310
|
+
# concat df and df1
|
311
|
+
df = pd.concat([df, df0])
|
312
|
+
|
313
|
+
# sleep to avoid hitting API rate limits
|
314
|
+
sleep(self.data_req.pause)
|
315
|
+
|
316
|
+
else:
|
317
|
+
raise NotImplementedError(
|
318
|
+
f"Data category '{self.data_req.cat}' is not implemented for Polygon API. "
|
319
|
+
"Supported categories are: 'fx', 'eqty'."
|
320
|
+
)
|
321
|
+
|
322
|
+
return df.sort_index()
|
323
|
+
|
324
|
+
def check_params(self, data_req: DataRequest) -> None:
|
325
|
+
"""
|
326
|
+
Checks the parameters of the data request before requesting data to reduce API calls
|
327
|
+
and improve efficiency.
|
328
|
+
|
329
|
+
"""
|
330
|
+
self.data_req = ConvertParams(data_req).to_polygon()
|
331
|
+
|
332
|
+
# get metadata
|
333
|
+
# self.get_assets_info(as_list=True)
|
334
|
+
# self.get_fields_info()
|
335
|
+
|
336
|
+
# check cat
|
337
|
+
if self.data_req.cat is None:
|
338
|
+
raise ValueError(
|
339
|
+
f"Cat cannot be None. Please provide category. Categories include: {self.categories}."
|
340
|
+
)
|
341
|
+
|
342
|
+
# # check assets
|
343
|
+
# if self.data_req.cat == 'eqty':
|
344
|
+
# if not any([ticker.upper() in self.assets[self.data_req.cat] for ticker in self.data_req.source_tickers]):
|
345
|
+
# raise ValueError(
|
346
|
+
# f"Selected eqty tickers are not available. Use assets attribute to see available eqty tickers."
|
347
|
+
# )
|
348
|
+
# elif self.data_req.cat == 'fx':
|
349
|
+
# if not any([ticker in self.assets[self.data_req.cat] for ticker in self.data_req.source_markets]):
|
350
|
+
# raise ValueError(
|
351
|
+
# f"Selected crypto tickers are not available.
|
352
|
+
# Use assets attribute to see available crypto tickers."
|
353
|
+
# )
|
354
|
+
|
355
|
+
# # check fields
|
356
|
+
# if not any([field in self.fields[data_req.cat] for field in self.data_req.fields]):
|
357
|
+
# raise ValueError(
|
358
|
+
# f"Selected fields are not available. Use fields attribute to see available fields."
|
359
|
+
# )
|
360
|
+
|
361
|
+
def get_data(self, data_req: DataRequest) -> pd.DataFrame:
|
362
|
+
"""
|
363
|
+
Get market data (eqty, fx, crypto).
|
364
|
+
|
365
|
+
Parameters
|
366
|
+
data_req: DataRequest
|
367
|
+
Parameters of data request in CryptoDataPy format.
|
368
|
+
|
369
|
+
Returns
|
370
|
+
-------
|
371
|
+
df: pd.DataFrame - MultiIndex
|
372
|
+
DataFrame with DatetimeIndex (level 0), ticker (level 1), and values for market or series data
|
373
|
+
for selected fields (cols), in tidy format.
|
374
|
+
"""
|
375
|
+
# check data req params
|
376
|
+
self.check_params(data_req)
|
377
|
+
|
378
|
+
# get data
|
379
|
+
try:
|
380
|
+
df = self.get_all_tickers(data_req)
|
381
|
+
|
382
|
+
except Exception as e:
|
383
|
+
logging.warning(e)
|
384
|
+
raise Exception(
|
385
|
+
"No data returned. Check data request parameters and try again."
|
386
|
+
)
|
387
|
+
|
388
|
+
return df
|
@@ -725,8 +725,6 @@ class Tiingo(DataVendor):
|
|
725
725
|
DataFrame with DatetimeIndex (level 0), ticker (level 1), and values for market or series data
|
726
726
|
for selected fields (cols), in tidy format.
|
727
727
|
"""
|
728
|
-
logging.info("Retrieving data request from Tiingo...")
|
729
|
-
|
730
728
|
# check data req params
|
731
729
|
self.check_params(data_req)
|
732
730
|
|
cryptodatapy/extract/getdata.py
CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
|
|
4
4
|
from cryptodatapy.extract.data_vendors.coinmetrics_api import CoinMetrics
|
5
5
|
from cryptodatapy.extract.data_vendors.cryptocompare_api import CryptoCompare
|
6
6
|
from cryptodatapy.extract.data_vendors.glassnode_api import Glassnode
|
7
|
+
from cryptodatapy.extract.data_vendors.polygon_api import Polygon
|
7
8
|
from cryptodatapy.extract.data_vendors.tiingo_api import Tiingo
|
8
9
|
from cryptodatapy.extract.datarequest import DataRequest
|
9
10
|
from cryptodatapy.extract.exchanges.dydx import Dydx
|
@@ -93,8 +94,8 @@ class GetData:
|
|
93
94
|
"dbnomics": DBnomics,
|
94
95
|
"yahoo": PandasDataReader,
|
95
96
|
"fred": PandasDataReader,
|
96
|
-
"
|
97
|
-
"
|
97
|
+
"alphavantage": PandasDataReader,
|
98
|
+
"polygon": Polygon,
|
98
99
|
"famafrench": PandasDataReader,
|
99
100
|
"aqr": AQR,
|
100
101
|
"dydx": Dydx
|
@@ -204,8 +205,8 @@ class GetData:
|
|
204
205
|
"dbnomics": DBnomics,
|
205
206
|
"yahoo": PandasDataReader,
|
206
207
|
"fred": PandasDataReader,
|
207
|
-
"
|
208
|
-
"
|
208
|
+
"alphavantage": PandasDataReader,
|
209
|
+
"polygon": Polygon,
|
209
210
|
"famafrench": PandasDataReader,
|
210
211
|
"aqr": AQR,
|
211
212
|
"dydx": Dydx
|
@@ -272,8 +273,8 @@ class GetData:
|
|
272
273
|
"dbnomics": DBnomics,
|
273
274
|
"yahoo": PandasDataReader,
|
274
275
|
"fred": PandasDataReader,
|
275
|
-
"
|
276
|
-
"
|
276
|
+
"alphavantage": PandasDataReader,
|
277
|
+
"polygon": Polygon,
|
277
278
|
"famafrench": PandasDataReader,
|
278
279
|
"aqr": AQR,
|
279
280
|
"dydx": Dydx
|
@@ -29,9 +29,12 @@ class PandasDataReader(Library):
|
|
29
29
|
markets: Optional[Dict[str, List[str]]] = None,
|
30
30
|
market_types: List[str] = ["spot", "future"],
|
31
31
|
fields: Optional[Dict[str, List[str]]] = None,
|
32
|
-
frequencies: Optional[Dict[str, List[str]]] = ["d", "w", "m", "q", "y"
|
32
|
+
frequencies: Optional[Dict[str, List[str]]] = ["d", "w", "m", "q", "y",
|
33
|
+
"av-intraday", "av-daily", "av-weekly", "av-monthly",
|
34
|
+
"av-daily-adjusted", "av-weekly-adjusted",
|
35
|
+
"av-monthly-adjusted", "av-forex-daily"],
|
33
36
|
base_url: Optional[str] = None,
|
34
|
-
api_key:
|
37
|
+
api_key: str = data_cred.alpha_vantage_api_key,
|
35
38
|
max_obs_per_call: Optional[int] = None,
|
36
39
|
rate_limit: Optional[Any] = None,
|
37
40
|
):
|
@@ -228,7 +231,7 @@ class PandasDataReader(Library):
|
|
228
231
|
# mkt type
|
229
232
|
if self.data_req.mkt_type not in self.market_types:
|
230
233
|
raise ValueError(
|
231
|
-
f"{self.data_req.mkt_type} is not available
|
234
|
+
f"{self.data_req.mkt_type} is not available."
|
232
235
|
)
|
233
236
|
|
234
237
|
# check fields
|
@@ -268,6 +271,20 @@ class PandasDataReader(Library):
|
|
268
271
|
self.data_req.source_start_date,
|
269
272
|
self.data_req.source_end_date)
|
270
273
|
|
274
|
+
# alpha vantage
|
275
|
+
elif self.data_req.source == "alphavantage":
|
276
|
+
for ticker, market in zip(self.data_req.source_tickers, self.data_req.source_markets):
|
277
|
+
df1 = web.DataReader(market,
|
278
|
+
self.data_req.source_freq,
|
279
|
+
self.data_req.source_start_date,
|
280
|
+
self.data_req.source_end_date,
|
281
|
+
api_key=self.api_key)
|
282
|
+
df1.index.name = 'date'
|
283
|
+
df1['ticker'] = ticker
|
284
|
+
df1.set_index(['ticker'], append=True, inplace=True)
|
285
|
+
# concat df and df1
|
286
|
+
self.data = pd.concat([self.data, df1])
|
287
|
+
|
271
288
|
# fama-french
|
272
289
|
elif data_req.source == "famafrench":
|
273
290
|
for ticker in self.data_req.source_tickers:
|
cryptodatapy/transform/clean.py
CHANGED
@@ -6,47 +6,6 @@ from cryptodatapy.transform.impute import Impute
|
|
6
6
|
from cryptodatapy.transform.filter import Filter
|
7
7
|
|
8
8
|
|
9
|
-
def stitch_dataframes(dfs):
|
10
|
-
"""
|
11
|
-
Stitches together dataframes with different start dates.
|
12
|
-
|
13
|
-
Parameters
|
14
|
-
----------
|
15
|
-
dfs: list
|
16
|
-
List of dataframes to be stitched together.
|
17
|
-
|
18
|
-
Returns
|
19
|
-
-------
|
20
|
-
combined_df: pd.DataFrame
|
21
|
-
Combined dataframe with extended start date.
|
22
|
-
"""
|
23
|
-
# check if dfs is a list
|
24
|
-
if not isinstance(dfs, list):
|
25
|
-
raise TypeError("Dataframes must be a list.")
|
26
|
-
|
27
|
-
# check index types
|
28
|
-
if all([isinstance(df.index, pd.MultiIndex) for df in dfs]):
|
29
|
-
dfs.sort(key=lambda df: df.index.levels[0][0], reverse=True)
|
30
|
-
elif all([isinstance(df.index, pd.DatetimeIndex) for df in dfs]):
|
31
|
-
dfs.sort(key=lambda df: df.index[0], reverse=True)
|
32
|
-
else:
|
33
|
-
raise TypeError("Dataframes must be pd.MultiIndex or have DatetimeIndex.")
|
34
|
-
|
35
|
-
# most recent start date
|
36
|
-
combined_df = dfs[0]
|
37
|
-
|
38
|
-
# combine dfs
|
39
|
-
for df in dfs[1:]:
|
40
|
-
combined_df = combined_df.combine_first(df)
|
41
|
-
|
42
|
-
# reorder cols
|
43
|
-
max_columns = max(len(df.columns) for df in dfs)
|
44
|
-
cols = next(df.columns.tolist() for df in dfs if len(df.columns) == max_columns)
|
45
|
-
combined_df = combined_df[cols]
|
46
|
-
|
47
|
-
return combined_df
|
48
|
-
|
49
|
-
|
50
9
|
class CleanData:
|
51
10
|
"""
|
52
11
|
Cleans data to improve data quality.
|