cryptodatapy 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodatapy/extract/libraries/ccxt.ipynb +873 -0
- cryptodatapy/extract/libraries/ccxt_api.py +40 -133
- cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +1426 -247
- cryptodatapy/transform/convertparams.py +0 -1
- cryptodatapy/transform/filter.py +0 -1
- {cryptodatapy-0.2.4.dist-info → cryptodatapy-0.2.5.dist-info}/METADATA +1 -1
- {cryptodatapy-0.2.4.dist-info → cryptodatapy-0.2.5.dist-info}/RECORD +9 -8
- {cryptodatapy-0.2.4.dist-info → cryptodatapy-0.2.5.dist-info}/LICENSE +0 -0
- {cryptodatapy-0.2.4.dist-info → cryptodatapy-0.2.5.dist-info}/WHEEL +0 -0
@@ -3,7 +3,6 @@ from time import sleep
|
|
3
3
|
from typing import Any, Dict, List, Optional, Union
|
4
4
|
|
5
5
|
import ccxt
|
6
|
-
import numpy as np
|
7
6
|
import pandas as pd
|
8
7
|
|
9
8
|
from cryptodatapy.extract.datarequest import DataRequest
|
@@ -20,7 +19,6 @@ class CCXT(Library):
|
|
20
19
|
"""
|
21
20
|
Retrieves data from CCXT API.
|
22
21
|
"""
|
23
|
-
|
24
22
|
def __init__(
|
25
23
|
self,
|
26
24
|
categories: Union[str, List[str]] = "crypto",
|
@@ -86,97 +84,16 @@ class CCXT(Library):
|
|
86
84
|
rate_limit,
|
87
85
|
)
|
88
86
|
|
89
|
-
def get_exchanges_info(
|
90
|
-
self,
|
91
|
-
exch: Optional[str] = None,
|
92
|
-
as_list: bool = True
|
93
|
-
) -> Union[List[str], pd.DataFrame]:
|
87
|
+
def get_exchanges_info(self) -> List[str]:
|
94
88
|
"""
|
95
89
|
Get exchanges info.
|
96
90
|
|
97
|
-
Parameters
|
98
|
-
----------
|
99
|
-
exch: str, default None
|
100
|
-
Name of exchange.
|
101
|
-
as_list: bool, default False
|
102
|
-
Returns exchanges info as list.
|
103
|
-
|
104
91
|
Returns
|
105
92
|
-------
|
106
93
|
exch: list or pd.DataFrame
|
107
94
|
List or dataframe with info on supported exchanges.
|
108
95
|
"""
|
109
|
-
|
110
|
-
if as_list:
|
111
|
-
self.exchanges = ccxt.exchanges
|
112
|
-
|
113
|
-
# df
|
114
|
-
else:
|
115
|
-
if exch is not None:
|
116
|
-
self.exchanges = [exch]
|
117
|
-
else:
|
118
|
-
self.exchanges = ccxt.exchanges
|
119
|
-
print(
|
120
|
-
"Getting metadata for all supported exchanges can take a few minutes."
|
121
|
-
" For quick info on a specific exchange, provide the name of the exchange in the exch parameter."
|
122
|
-
)
|
123
|
-
|
124
|
-
# exch df
|
125
|
-
exch_df = pd.DataFrame(
|
126
|
-
index=self.exchanges,
|
127
|
-
columns=[
|
128
|
-
"id",
|
129
|
-
"name",
|
130
|
-
"countries",
|
131
|
-
"urls",
|
132
|
-
"version",
|
133
|
-
"api",
|
134
|
-
"has",
|
135
|
-
"timeframes",
|
136
|
-
"timeout",
|
137
|
-
"rateLimit",
|
138
|
-
"userAgent",
|
139
|
-
"verbose",
|
140
|
-
"markets",
|
141
|
-
"symbols",
|
142
|
-
"currencies",
|
143
|
-
"markets_by_id",
|
144
|
-
"currencies_by_id",
|
145
|
-
"api_key",
|
146
|
-
"secret",
|
147
|
-
"uid",
|
148
|
-
"options",
|
149
|
-
],
|
150
|
-
)
|
151
|
-
|
152
|
-
# Extract exchange info
|
153
|
-
for index, row in exch_df.iterrows():
|
154
|
-
try:
|
155
|
-
exchange = getattr(ccxt, index)()
|
156
|
-
exchange.load_markets()
|
157
|
-
except AttributeError as e:
|
158
|
-
print(f"AttributeError: {e} for exchange {index}")
|
159
|
-
exch_df.loc[index, :] = np.nan
|
160
|
-
except ccxt.BaseError as e: # Catch specific ccxt exceptions
|
161
|
-
print(f"CCXT Error: {e} for exchange {index}")
|
162
|
-
exch_df.loc[index, :] = np.nan
|
163
|
-
except Exception as e: # Fallback for any other exceptions
|
164
|
-
print(f"Unexpected error: {e} for exchange {index}")
|
165
|
-
exch_df.loc[index, :] = np.nan
|
166
|
-
else:
|
167
|
-
for col in exch_df.columns:
|
168
|
-
try:
|
169
|
-
exch_df.loc[index, col] = str(getattr(exchange, col))
|
170
|
-
except AttributeError as e:
|
171
|
-
print(f"AttributeError: {e} for attribute {col} in exchange {index}")
|
172
|
-
exch_df.loc[index, col] = np.nan
|
173
|
-
except Exception as e: # Fallback for any other exceptions
|
174
|
-
print(f"Unexpected error: {e} for attribute {col} in exchange {index}")
|
175
|
-
exch_df.loc[index, col] = np.nan
|
176
|
-
|
177
|
-
# set index name
|
178
|
-
exch_df.index.name = "exchange"
|
179
|
-
self.exchanges = exch_df
|
96
|
+
self.exchanges = ccxt.exchanges
|
180
97
|
|
181
98
|
return self.exchanges
|
182
99
|
|
@@ -337,7 +254,7 @@ class CCXT(Library):
|
|
337
254
|
Get CCXT metadata.
|
338
255
|
"""
|
339
256
|
if self.exchanges is None:
|
340
|
-
self.exchanges = self.get_exchanges_info(
|
257
|
+
self.exchanges = self.get_exchanges_info()
|
341
258
|
if self.market_types is None:
|
342
259
|
self.market_types = ["spot", "future", "perpetual_future", "option"]
|
343
260
|
if self.assets is None:
|
@@ -371,9 +288,6 @@ class CCXT(Library):
|
|
371
288
|
start_date: str
|
372
289
|
Start date in 'YYYY-MM-DD' format.
|
373
290
|
|
374
|
-
Other Parameters
|
375
|
-
----------------
|
376
|
-
|
377
291
|
|
378
292
|
Returns
|
379
293
|
-------
|
@@ -407,16 +321,15 @@ class CCXT(Library):
|
|
407
321
|
limit=1000,
|
408
322
|
)
|
409
323
|
|
410
|
-
|
324
|
+
return data_resp
|
411
325
|
|
412
326
|
except Exception as e:
|
413
327
|
logging.warning(f"Failed to get {data_type} data for {ticker}.")
|
414
328
|
logging.warning(e)
|
415
329
|
|
416
|
-
|
417
|
-
return data_resp
|
330
|
+
return None
|
418
331
|
|
419
|
-
def
|
332
|
+
def fetch_all_ohlcv_hist(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
|
420
333
|
"""
|
421
334
|
Submits get requests to API until entire OHLCV history has been collected. Only necessary when
|
422
335
|
number of observations is larger than the maximum number of observations per call.
|
@@ -439,21 +352,19 @@ class CCXT(Library):
|
|
439
352
|
|
440
353
|
# create empty df
|
441
354
|
df = pd.DataFrame()
|
355
|
+
|
442
356
|
# while loop condition
|
443
357
|
missing_vals, attempts = True, 0
|
444
358
|
|
445
359
|
# run a while loop until all data collected
|
446
360
|
while missing_vals and attempts < cx_data_req['trials']:
|
447
361
|
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
ticker=ticker,
|
453
|
-
start_date=start_date)
|
362
|
+
data_resp = self.req_data(data_req=data_req,
|
363
|
+
data_type='ohlcv',
|
364
|
+
ticker=ticker,
|
365
|
+
start_date=start_date)
|
454
366
|
|
455
|
-
|
456
|
-
logging.warning(e)
|
367
|
+
if data_resp is None:
|
457
368
|
attempts += 1
|
458
369
|
sleep(self.get_rate_limit_info(exch=cx_data_req['exch'])[cx_data_req['exch']] / 1000)
|
459
370
|
logging.warning(
|
@@ -463,6 +374,7 @@ class CCXT(Library):
|
|
463
374
|
logging.warning(
|
464
375
|
f"Failed to get OHLCV data from {cx_data_req['exch']} for {ticker} after many attempts."
|
465
376
|
)
|
377
|
+
return None
|
466
378
|
|
467
379
|
else:
|
468
380
|
# name cols and create df
|
@@ -488,7 +400,7 @@ class CCXT(Library):
|
|
488
400
|
|
489
401
|
return df
|
490
402
|
|
491
|
-
def
|
403
|
+
def fetch_all_funding_hist(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
|
492
404
|
"""
|
493
405
|
Submits get requests to API until entire funding rate history has been collected. Only necessary when
|
494
406
|
number of observations is larger than the maximum number of observations per call.
|
@@ -517,15 +429,13 @@ class CCXT(Library):
|
|
517
429
|
# run a while loop until all data collected
|
518
430
|
while missing_vals and attempts < cx_data_req['trials']:
|
519
431
|
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
start_date=start_date)
|
432
|
+
# data req
|
433
|
+
data_resp = self.req_data(data_req=data_req,
|
434
|
+
data_type='funding_rates',
|
435
|
+
ticker=ticker,
|
436
|
+
start_date=start_date)
|
526
437
|
|
527
|
-
|
528
|
-
logging.warning(e)
|
438
|
+
if data_resp is None:
|
529
439
|
attempts += 1
|
530
440
|
sleep(self.get_rate_limit_info(exch=cx_data_req['exch'])[cx_data_req['exch']] / 1000)
|
531
441
|
logging.warning(
|
@@ -533,8 +443,9 @@ class CCXT(Library):
|
|
533
443
|
)
|
534
444
|
if attempts == cx_data_req["trials"]:
|
535
445
|
logging.warning(
|
536
|
-
f"Failed to get
|
446
|
+
f"Failed to get funding_rates from {cx_data_req['exch']} for {ticker} after many attempts."
|
537
447
|
)
|
448
|
+
return None
|
538
449
|
|
539
450
|
else:
|
540
451
|
# add to df
|
@@ -580,7 +491,7 @@ class CCXT(Library):
|
|
580
491
|
|
581
492
|
return WrangleData(data_req, data_resp).ccxt()
|
582
493
|
|
583
|
-
def
|
494
|
+
def fetch_tidy_ohlcv(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
|
584
495
|
"""
|
585
496
|
Gets entire OHLCV history and wrangles the data response into tidy data format.
|
586
497
|
|
@@ -597,13 +508,15 @@ class CCXT(Library):
|
|
597
508
|
Dataframe with entire data history retrieved and wrangled into tidy data format.
|
598
509
|
"""
|
599
510
|
# get entire data history
|
600
|
-
df = self.
|
511
|
+
df = self.fetch_all_ohlcv_hist(data_req, ticker)
|
512
|
+
|
601
513
|
# wrangle df
|
602
|
-
df
|
514
|
+
if df is not None:
|
515
|
+
df = self.wrangle_data_resp(data_req, df)
|
603
516
|
|
604
517
|
return df
|
605
518
|
|
606
|
-
def
|
519
|
+
def fetch_tidy_funding_rates(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
|
607
520
|
"""
|
608
521
|
Gets entire funding rates history and wrangles the data response into tidy data format.
|
609
522
|
|
@@ -620,9 +533,11 @@ class CCXT(Library):
|
|
620
533
|
Dataframe with entire data history retrieved and wrangled into tidy data format.
|
621
534
|
"""
|
622
535
|
# get entire data history
|
623
|
-
df = self.
|
536
|
+
df = self.fetch_all_funding_hist(data_req, ticker)
|
537
|
+
|
624
538
|
# wrangle df
|
625
|
-
df
|
539
|
+
if df is not None:
|
540
|
+
df = self.wrangle_data_resp(data_req, df)
|
626
541
|
|
627
542
|
return df
|
628
543
|
|
@@ -680,7 +595,7 @@ class CCXT(Library):
|
|
680
595
|
f" Market type must be perpetual futures."
|
681
596
|
)
|
682
597
|
|
683
|
-
def
|
598
|
+
def fetch_ohlcv(self, data_req: DataRequest) -> pd.DataFrame:
|
684
599
|
"""
|
685
600
|
Loops list of tickers, retrieves OHLCV data for each ticker in tidy format and stores it in a
|
686
601
|
multiindex dataframe.
|
@@ -707,13 +622,9 @@ class CCXT(Library):
|
|
707
622
|
# loop through tickers
|
708
623
|
for mkt, ticker in zip(cx_data_req['mkts'], data_req.tickers):
|
709
624
|
|
710
|
-
|
711
|
-
df0 = self.get_tidy_ohlcv(data_req, mkt)
|
712
|
-
|
713
|
-
except AssertionError:
|
714
|
-
logging.info(f"Failed to get OHLCV data for {ticker} after many attempts.")
|
625
|
+
df0 = self.fetch_tidy_ohlcv(data_req, mkt)
|
715
626
|
|
716
|
-
|
627
|
+
if df0 is not None:
|
717
628
|
# add ticker to index
|
718
629
|
df0['ticker'] = ticker.upper()
|
719
630
|
df0.set_index(['ticker'], append=True, inplace=True)
|
@@ -722,7 +633,7 @@ class CCXT(Library):
|
|
722
633
|
|
723
634
|
return df
|
724
635
|
|
725
|
-
def
|
636
|
+
def fetch_funding_rates(self, data_req: DataRequest) -> pd.DataFrame:
|
726
637
|
"""
|
727
638
|
Loops list of tickers, retrieves funding rates data for each ticker in tidy format and stores it in a
|
728
639
|
multiindex dataframe.
|
@@ -749,13 +660,9 @@ class CCXT(Library):
|
|
749
660
|
# loop through tickers
|
750
661
|
for mkt, ticker in zip(cx_data_req['mkts'], data_req.tickers):
|
751
662
|
|
752
|
-
|
753
|
-
df0 = self.get_tidy_funding_rates(data_req, mkt)
|
754
|
-
|
755
|
-
except AssertionError:
|
756
|
-
logging.info(f"Failed to get funding rates for {ticker} after many attempts.")
|
663
|
+
df0 = self.fetch_tidy_funding_rates(data_req, mkt)
|
757
664
|
|
758
|
-
|
665
|
+
if df0 is not None:
|
759
666
|
# add ticker to index
|
760
667
|
df0['ticker'] = ticker.upper()
|
761
668
|
df0.set_index(['ticker'], append=True, inplace=True)
|
@@ -783,12 +690,12 @@ class CCXT(Library):
|
|
783
690
|
# get OHLCV data
|
784
691
|
ohlcv_list = ["open", "high", "low", "close", "volume"]
|
785
692
|
if any([field in ohlcv_list for field in data_req.fields]):
|
786
|
-
df0 = self.
|
693
|
+
df0 = self.fetch_ohlcv(data_req)
|
787
694
|
df = pd.concat([df, df0])
|
788
695
|
|
789
696
|
# get funding rate data
|
790
697
|
if any([field == "funding_rate" for field in data_req.fields]):
|
791
|
-
df1 = self.
|
698
|
+
df1 = self.fetch_funding_rates(data_req)
|
792
699
|
df = pd.concat([df, df1], axis=1)
|
793
700
|
|
794
701
|
# check if df empty
|