cryptodatapy 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,6 @@ from time import sleep
3
3
  from typing import Any, Dict, List, Optional, Union
4
4
 
5
5
  import ccxt
6
- import numpy as np
7
6
  import pandas as pd
8
7
 
9
8
  from cryptodatapy.extract.datarequest import DataRequest
@@ -20,7 +19,6 @@ class CCXT(Library):
20
19
  """
21
20
  Retrieves data from CCXT API.
22
21
  """
23
-
24
22
  def __init__(
25
23
  self,
26
24
  categories: Union[str, List[str]] = "crypto",
@@ -86,97 +84,16 @@ class CCXT(Library):
86
84
  rate_limit,
87
85
  )
88
86
 
89
- def get_exchanges_info(
90
- self,
91
- exch: Optional[str] = None,
92
- as_list: bool = True
93
- ) -> Union[List[str], pd.DataFrame]:
87
+ def get_exchanges_info(self) -> List[str]:
94
88
  """
95
89
  Get exchanges info.
96
90
 
97
- Parameters
98
- ----------
99
- exch: str, default None
100
- Name of exchange.
101
- as_list: bool, default False
102
- Returns exchanges info as list.
103
-
104
91
  Returns
105
92
  -------
106
93
  exch: list or pd.DataFrame
107
94
  List or dataframe with info on supported exchanges.
108
95
  """
109
- # list
110
- if as_list:
111
- self.exchanges = ccxt.exchanges
112
-
113
- # df
114
- else:
115
- if exch is not None:
116
- self.exchanges = [exch]
117
- else:
118
- self.exchanges = ccxt.exchanges
119
- print(
120
- "Getting metadata for all supported exchanges can take a few minutes."
121
- " For quick info on a specific exchange, provide the name of the exchange in the exch parameter."
122
- )
123
-
124
- # exch df
125
- exch_df = pd.DataFrame(
126
- index=self.exchanges,
127
- columns=[
128
- "id",
129
- "name",
130
- "countries",
131
- "urls",
132
- "version",
133
- "api",
134
- "has",
135
- "timeframes",
136
- "timeout",
137
- "rateLimit",
138
- "userAgent",
139
- "verbose",
140
- "markets",
141
- "symbols",
142
- "currencies",
143
- "markets_by_id",
144
- "currencies_by_id",
145
- "api_key",
146
- "secret",
147
- "uid",
148
- "options",
149
- ],
150
- )
151
-
152
- # Extract exchange info
153
- for index, row in exch_df.iterrows():
154
- try:
155
- exchange = getattr(ccxt, index)()
156
- exchange.load_markets()
157
- except AttributeError as e:
158
- print(f"AttributeError: {e} for exchange {index}")
159
- exch_df.loc[index, :] = np.nan
160
- except ccxt.BaseError as e: # Catch specific ccxt exceptions
161
- print(f"CCXT Error: {e} for exchange {index}")
162
- exch_df.loc[index, :] = np.nan
163
- except Exception as e: # Fallback for any other exceptions
164
- print(f"Unexpected error: {e} for exchange {index}")
165
- exch_df.loc[index, :] = np.nan
166
- else:
167
- for col in exch_df.columns:
168
- try:
169
- exch_df.loc[index, col] = str(getattr(exchange, col))
170
- except AttributeError as e:
171
- print(f"AttributeError: {e} for attribute {col} in exchange {index}")
172
- exch_df.loc[index, col] = np.nan
173
- except Exception as e: # Fallback for any other exceptions
174
- print(f"Unexpected error: {e} for attribute {col} in exchange {index}")
175
- exch_df.loc[index, col] = np.nan
176
-
177
- # set index name
178
- exch_df.index.name = "exchange"
179
- self.exchanges = exch_df
96
+ self.exchanges = ccxt.exchanges
180
97
 
181
98
  return self.exchanges
182
99
 
@@ -337,7 +254,7 @@ class CCXT(Library):
337
254
  Get CCXT metadata.
338
255
  """
339
256
  if self.exchanges is None:
340
- self.exchanges = self.get_exchanges_info(as_list=True)
257
+ self.exchanges = self.get_exchanges_info()
341
258
  if self.market_types is None:
342
259
  self.market_types = ["spot", "future", "perpetual_future", "option"]
343
260
  if self.assets is None:
@@ -371,9 +288,6 @@ class CCXT(Library):
371
288
  start_date: str
372
289
  Start date in 'YYYY-MM-DD' format.
373
290
 
374
- Other Parameters
375
- ----------------
376
-
377
291
 
378
292
  Returns
379
293
  -------
@@ -407,16 +321,15 @@ class CCXT(Library):
407
321
  limit=1000,
408
322
  )
409
323
 
410
- assert data_resp != []
324
+ return data_resp
411
325
 
412
326
  except Exception as e:
413
327
  logging.warning(f"Failed to get {data_type} data for {ticker}.")
414
328
  logging.warning(e)
415
329
 
416
- else:
417
- return data_resp
330
+ return None
418
331
 
419
- def get_all_ohlcv_hist(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
332
+ def fetch_all_ohlcv_hist(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
420
333
  """
421
334
  Submits get requests to API until entire OHLCV history has been collected. Only necessary when
422
335
  number of observations is larger than the maximum number of observations per call.
@@ -439,21 +352,19 @@ class CCXT(Library):
439
352
 
440
353
  # create empty df
441
354
  df = pd.DataFrame()
355
+
442
356
  # while loop condition
443
357
  missing_vals, attempts = True, 0
444
358
 
445
359
  # run a while loop until all data collected
446
360
  while missing_vals and attempts < cx_data_req['trials']:
447
361
 
448
- try:
449
- # data req
450
- data_resp = self.req_data(data_req=data_req,
451
- data_type='ohlcv',
452
- ticker=ticker,
453
- start_date=start_date)
362
+ data_resp = self.req_data(data_req=data_req,
363
+ data_type='ohlcv',
364
+ ticker=ticker,
365
+ start_date=start_date)
454
366
 
455
- except AssertionError as e:
456
- logging.warning(e)
367
+ if data_resp is None:
457
368
  attempts += 1
458
369
  sleep(self.get_rate_limit_info(exch=cx_data_req['exch'])[cx_data_req['exch']] / 1000)
459
370
  logging.warning(
@@ -463,6 +374,7 @@ class CCXT(Library):
463
374
  logging.warning(
464
375
  f"Failed to get OHLCV data from {cx_data_req['exch']} for {ticker} after many attempts."
465
376
  )
377
+ return None
466
378
 
467
379
  else:
468
380
  # name cols and create df
@@ -488,7 +400,7 @@ class CCXT(Library):
488
400
 
489
401
  return df
490
402
 
491
- def get_all_funding_hist(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
403
+ def fetch_all_funding_hist(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
492
404
  """
493
405
  Submits get requests to API until entire funding rate history has been collected. Only necessary when
494
406
  number of observations is larger than the maximum number of observations per call.
@@ -517,15 +429,13 @@ class CCXT(Library):
517
429
  # run a while loop until all data collected
518
430
  while missing_vals and attempts < cx_data_req['trials']:
519
431
 
520
- try:
521
- # data req
522
- data_resp = self.req_data(data_req=data_req,
523
- data_type='funding_rates',
524
- ticker=ticker,
525
- start_date=start_date)
432
+ # data req
433
+ data_resp = self.req_data(data_req=data_req,
434
+ data_type='funding_rates',
435
+ ticker=ticker,
436
+ start_date=start_date)
526
437
 
527
- except AssertionError as e:
528
- logging.warning(e)
438
+ if data_resp is None:
529
439
  attempts += 1
530
440
  sleep(self.get_rate_limit_info(exch=cx_data_req['exch'])[cx_data_req['exch']] / 1000)
531
441
  logging.warning(
@@ -533,8 +443,9 @@ class CCXT(Library):
533
443
  )
534
444
  if attempts == cx_data_req["trials"]:
535
445
  logging.warning(
536
- f"Failed to get funding rates from {cx_data_req['exch']} for {ticker} after many attempts."
446
+ f"Failed to get funding_rates from {cx_data_req['exch']} for {ticker} after many attempts."
537
447
  )
448
+ return None
538
449
 
539
450
  else:
540
451
  # add to df
@@ -580,7 +491,7 @@ class CCXT(Library):
580
491
 
581
492
  return WrangleData(data_req, data_resp).ccxt()
582
493
 
583
- def get_tidy_ohlcv(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
494
+ def fetch_tidy_ohlcv(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
584
495
  """
585
496
  Gets entire OHLCV history and wrangles the data response into tidy data format.
586
497
 
@@ -597,13 +508,15 @@ class CCXT(Library):
597
508
  Dataframe with entire data history retrieved and wrangled into tidy data format.
598
509
  """
599
510
  # get entire data history
600
- df = self.get_all_ohlcv_hist(data_req, ticker)
511
+ df = self.fetch_all_ohlcv_hist(data_req, ticker)
512
+
601
513
  # wrangle df
602
- df = self.wrangle_data_resp(data_req, df)
514
+ if df is not None:
515
+ df = self.wrangle_data_resp(data_req, df)
603
516
 
604
517
  return df
605
518
 
606
- def get_tidy_funding_rates(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
519
+ def fetch_tidy_funding_rates(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
607
520
  """
608
521
  Gets entire funding rates history and wrangles the data response into tidy data format.
609
522
 
@@ -620,9 +533,11 @@ class CCXT(Library):
620
533
  Dataframe with entire data history retrieved and wrangled into tidy data format.
621
534
  """
622
535
  # get entire data history
623
- df = self.get_all_funding_hist(data_req, ticker)
536
+ df = self.fetch_all_funding_hist(data_req, ticker)
537
+
624
538
  # wrangle df
625
- df = self.wrangle_data_resp(data_req, df)
539
+ if df is not None:
540
+ df = self.wrangle_data_resp(data_req, df)
626
541
 
627
542
  return df
628
543
 
@@ -680,7 +595,7 @@ class CCXT(Library):
680
595
  f" Market type must be perpetual futures."
681
596
  )
682
597
 
683
- def get_ohlcv(self, data_req: DataRequest) -> pd.DataFrame:
598
+ def fetch_ohlcv(self, data_req: DataRequest) -> pd.DataFrame:
684
599
  """
685
600
  Loops list of tickers, retrieves OHLCV data for each ticker in tidy format and stores it in a
686
601
  multiindex dataframe.
@@ -707,13 +622,9 @@ class CCXT(Library):
707
622
  # loop through tickers
708
623
  for mkt, ticker in zip(cx_data_req['mkts'], data_req.tickers):
709
624
 
710
- try:
711
- df0 = self.get_tidy_ohlcv(data_req, mkt)
712
-
713
- except AssertionError:
714
- logging.info(f"Failed to get OHLCV data for {ticker} after many attempts.")
625
+ df0 = self.fetch_tidy_ohlcv(data_req, mkt)
715
626
 
716
- else:
627
+ if df0 is not None:
717
628
  # add ticker to index
718
629
  df0['ticker'] = ticker.upper()
719
630
  df0.set_index(['ticker'], append=True, inplace=True)
@@ -722,7 +633,7 @@ class CCXT(Library):
722
633
 
723
634
  return df
724
635
 
725
- def get_funding_rates(self, data_req: DataRequest) -> pd.DataFrame:
636
+ def fetch_funding_rates(self, data_req: DataRequest) -> pd.DataFrame:
726
637
  """
727
638
  Loops list of tickers, retrieves funding rates data for each ticker in tidy format and stores it in a
728
639
  multiindex dataframe.
@@ -749,13 +660,9 @@ class CCXT(Library):
749
660
  # loop through tickers
750
661
  for mkt, ticker in zip(cx_data_req['mkts'], data_req.tickers):
751
662
 
752
- try:
753
- df0 = self.get_tidy_funding_rates(data_req, mkt)
754
-
755
- except AssertionError:
756
- logging.info(f"Failed to get funding rates for {ticker} after many attempts.")
663
+ df0 = self.fetch_tidy_funding_rates(data_req, mkt)
757
664
 
758
- else:
665
+ if df0 is not None:
759
666
  # add ticker to index
760
667
  df0['ticker'] = ticker.upper()
761
668
  df0.set_index(['ticker'], append=True, inplace=True)
@@ -783,12 +690,12 @@ class CCXT(Library):
783
690
  # get OHLCV data
784
691
  ohlcv_list = ["open", "high", "low", "close", "volume"]
785
692
  if any([field in ohlcv_list for field in data_req.fields]):
786
- df0 = self.get_ohlcv(data_req)
693
+ df0 = self.fetch_ohlcv(data_req)
787
694
  df = pd.concat([df, df0])
788
695
 
789
696
  # get funding rate data
790
697
  if any([field == "funding_rate" for field in data_req.fields]):
791
- df1 = self.get_funding_rates(data_req)
698
+ df1 = self.fetch_funding_rates(data_req)
792
699
  df = pd.concat([df, df1], axis=1)
793
700
 
794
701
  # check if df empty
@@ -131,7 +131,7 @@ class CleanData:
131
131
  ).values * 100
132
132
 
133
133
  # filtered df
134
- self.df = self.filtered_df
134
+ self.df = self.filtered_df.sort_index()
135
135
 
136
136
  return self
137
137
 
@@ -161,11 +161,12 @@ class CleanData:
161
161
 
162
162
  # repaired df
163
163
  if self.excluded_cols is not None:
164
- self.df = pd.concat([self.repaired_df, self.raw_df[self.excluded_cols]], join="outer", axis=1)
164
+ self.df = pd.concat([self.repaired_df, self.raw_df[self.excluded_cols]], join="inner", axis=1)
165
165
  else:
166
166
  self.df = self.repaired_df
167
+
167
168
  # reorder cols
168
- self.df = self.df[self.raw_df.columns]
169
+ self.df = self.df[self.raw_df.columns].sort_index()
169
170
 
170
171
  return self
171
172
 
@@ -196,7 +197,7 @@ class CleanData:
196
197
  ).values * 100
197
198
 
198
199
  # filtered df
199
- self.df = self.filtered_df
200
+ self.df = self.filtered_df.sort_index()
200
201
 
201
202
  return self
202
203
 
@@ -226,7 +227,7 @@ class CleanData:
226
227
  ).values * 100
227
228
 
228
229
  # filtered df
229
- self.df = self.filtered_df
230
+ self.df = self.filtered_df.sort_index()
230
231
 
231
232
  return self
232
233
 
@@ -260,7 +261,41 @@ class CleanData:
260
261
  self.summary.loc["n_tickers_below_min_obs", self.df.unstack().columns] = len(self.filtered_tickers)
261
262
 
262
263
  # filtered df
263
- self.df = self.filtered_df
264
+ self.df = self.filtered_df.sort_index()
265
+
266
+ return self
267
+
268
+ def filter_delisted_tickers(self, field: str = 'close', n_unch_vals: int = 30) -> CleanData:
269
+ """
270
+ Removes delisted tickers from dataframe.
271
+
272
+ Parameters
273
+ ----------
274
+ field: str, default 'close'
275
+ Field/column to use for detecting delisted tickers.
276
+ n_unch_vals: int, default 30
277
+ Number of consecutive unchanged values to consider a ticker as delisted.
278
+
279
+ Returns
280
+ -------
281
+ CleanData
282
+ CleanData object
283
+ """
284
+ # filter tickers
285
+ self.filtered_df = Filter(self.df).remove_delisted(field=field, n_unch_vals=n_unch_vals)
286
+
287
+ # tickers < min obs
288
+ self.filtered_tickers = list(
289
+ set(self.filtered_df.index.droplevel(0).unique()).symmetric_difference(
290
+ set(self.df.index.droplevel(0).unique())
291
+ )
292
+ )
293
+
294
+ # add to summary
295
+ self.summary.loc["n_filtered_tickers", self.df.unstack().columns] = len(self.filtered_tickers)
296
+
297
+ # filtered df
298
+ self.df = self.filtered_df.sort_index()
264
299
 
265
300
  return self
266
301
 
@@ -283,6 +318,7 @@ class CleanData:
283
318
  self.filtered_df = Filter(self.df).tickers(tickers_list)
284
319
 
285
320
  # tickers < min obs
321
+
286
322
  self.filtered_tickers = list(
287
323
  set(self.filtered_df.index.droplevel(0).unique()).symmetric_difference(
288
324
  set(self.df.index.droplevel(0).unique())
@@ -293,7 +329,7 @@ class CleanData:
293
329
  self.summary.loc["n_filtered_tickers", self.df.unstack().columns] = len(self.filtered_tickers)
294
330
 
295
331
  # filtered df
296
- self.df = self.filtered_df
332
+ self.df = self.filtered_df.sort_index()
297
333
 
298
334
  return self
299
335