cryptodatapy 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. cryptodatapy/extract/datarequest.py +0 -1
  2. cryptodatapy/extract/exchanges/__init__.py +2 -0
  3. cryptodatapy/extract/exchanges/dydx.py +137 -0
  4. cryptodatapy/extract/exchanges/exchange.py +439 -0
  5. cryptodatapy/extract/getdata.py +68 -0
  6. cryptodatapy/extract/libraries/ccxt_api.py +706 -150
  7. cryptodatapy/extract/libraries/library.py +1 -3
  8. cryptodatapy/extract/web/web.py +62 -0
  9. cryptodatapy/transform/convertparams.py +5 -6
  10. cryptodatapy/transform/filter.py +7 -8
  11. cryptodatapy/transform/wrangle.py +2 -1
  12. {cryptodatapy-0.2.8.dist-info → cryptodatapy-0.2.10.dist-info}/METADATA +1 -1
  13. {cryptodatapy-0.2.8.dist-info → cryptodatapy-0.2.10.dist-info}/RECORD +15 -26
  14. cryptodatapy/conf/fx_tickers.csv +0 -31
  15. cryptodatapy/extract/data_vendors/CoinMetrics.ipynb +0 -747
  16. cryptodatapy/extract/libraries/Untitled.ipynb +0 -199
  17. cryptodatapy/extract/libraries/ccxt.ipynb +0 -747
  18. cryptodatapy/extract/libraries/yfinance_api.py +0 -511
  19. cryptodatapy/transform/cc_onchain_data.csv +0 -118423
  20. cryptodatapy/transform/clean_onchain_data.ipynb +0 -4750
  21. cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +0 -2819
  22. cryptodatapy/transform/cmdty_data.ipynb +0 -402
  23. cryptodatapy/transform/credit_data.ipynb +0 -291
  24. cryptodatapy/transform/eqty_data.ipynb +0 -836
  25. cryptodatapy/transform/global_credit_data_daily.parquet +0 -0
  26. cryptodatapy/transform/rates_data.ipynb +0 -465
  27. cryptodatapy/transform/us_rates_daily.csv +0 -227752
  28. {cryptodatapy-0.2.8.dist-info → cryptodatapy-0.2.10.dist-info}/LICENSE +0 -0
  29. {cryptodatapy-0.2.8.dist-info → cryptodatapy-0.2.10.dist-info}/WHEEL +0 -0
@@ -1,11 +1,12 @@
1
1
  import logging
2
2
  from typing import Any, Dict, List, Optional, Union
3
-
4
3
  import pandas as pd
5
4
  import asyncio
5
+ import random
6
+ from time import sleep
6
7
  import ccxt
7
8
  import ccxt.async_support as ccxt_async
8
- from tqdm.asyncio import tqdm # Progress bar for async
9
+ from tqdm.asyncio import tqdm
9
10
 
10
11
  from cryptodatapy.extract.datarequest import DataRequest
11
12
  from cryptodatapy.extract.libraries.library import Library
@@ -21,6 +22,7 @@ class CCXT(Library):
21
22
  """
22
23
  Retrieves data from CCXT API.
23
24
  """
25
+
24
26
  def __init__(
25
27
  self,
26
28
  categories: Union[str, List[str]] = "crypto",
@@ -70,22 +72,10 @@ class CCXT(Library):
70
72
  rate_limit: Any, optional, Default None
71
73
  Number of API calls made and left, by time frequency.
72
74
  """
73
- Library.__init__(
74
- self,
75
- categories,
76
- exchanges,
77
- indexes,
78
- assets,
79
- markets,
80
- market_types,
81
- fields,
82
- frequencies,
83
- base_url,
84
- api_key,
85
- max_obs_per_call,
86
- rate_limit,
75
+ super().__init__(
76
+ categories, exchanges, indexes, assets, markets, market_types,
77
+ fields, frequencies, base_url, api_key, max_obs_per_call, rate_limit
87
78
  )
88
-
89
79
  self.exchange = None
90
80
  self.exchange_async = None
91
81
  self.data_req = None
@@ -322,14 +312,27 @@ class CCXT(Library):
322
312
  if self.rate_limit is None:
323
313
  self.rate_limit = self.exchange.rateLimit
324
314
 
325
- async def _fetch_ohlcv(self,
326
- ticker: str,
327
- freq: str,
328
- start_date: str,
329
- end_date: str,
330
- exch: str,
331
- trials: int = 3
332
- ) -> List:
315
+ @staticmethod
316
+ def exponential_backoff_with_jitter(base_delay: float, max_delay: int, attempts: int) -> None:
317
+ delay = min(max_delay, base_delay * (2 ** attempts))
318
+ delay_with_jitter = delay + random.uniform(0, delay * 0.5)
319
+ sleep(delay_with_jitter)
320
+
321
+ @staticmethod
322
+ async def exponential_backoff_with_jitter_async(base_delay: float, max_delay: int, attempts: int) -> None:
323
+ delay = min(max_delay, base_delay * (2 ** attempts))
324
+ delay_with_jitter = delay + random.uniform(0, delay * 0.5)
325
+ await asyncio.sleep(delay_with_jitter)
326
+
327
+ async def _fetch_ohlcv_async(self,
328
+ ticker: str,
329
+ freq: str,
330
+ start_date: str,
331
+ end_date: str,
332
+ exch: str,
333
+ trials: int = 3,
334
+ pause: int = 1
335
+ ) -> List:
333
336
  """
334
337
  Fetches OHLCV data for a specific ticker.
335
338
 
@@ -347,6 +350,8 @@ class CCXT(Library):
347
350
  Name of exchange.
348
351
  trials: int, default 3
349
352
  Number of attempts to fetch data.
353
+ pause: int, default 60
354
+ Pause in seconds to respect the rate limit.
350
355
 
351
356
  Returns
352
357
  -------
@@ -367,7 +372,7 @@ class CCXT(Library):
367
372
  while start_date < end_date and attempts < trials:
368
373
 
369
374
  try:
370
- data_resp = await getattr(self.exchange_async, 'fetchOHLCV')(
375
+ data_resp = await self.exchange_async.fetch_ohlcv(
371
376
  ticker,
372
377
  freq,
373
378
  since=start_date,
@@ -375,48 +380,139 @@ class CCXT(Library):
375
380
  params={'until': end_date}
376
381
  )
377
382
 
383
+ # add data to list
384
+ if data_resp:
385
+ start_date = data_resp[-1][0] + 1
386
+ data.extend(data_resp)
387
+ else:
388
+ if not data:
389
+ logging.warning(f"No OHLCV data available for {ticker}.")
390
+ break
391
+
378
392
  except Exception as e:
379
393
  logging.warning(
380
- f"Failed to get OHLCV data from {self.exchange_async.id} for {ticker} on attempt #{attempts+1}."
394
+ f"Failed to get OHLCV data from {self.exchange_async.id} for {ticker} "
395
+ f"on attempt #{attempts + 1}: {e}."
381
396
  )
382
- logging.warning(e)
383
397
  attempts += 1
384
- if attempts == trials:
398
+ if attempts >= trials:
385
399
  logging.warning(
386
400
  f"Failed to get OHLCV data from {self.exchange_async.id} "
387
401
  f"for {ticker} after {trials} attempts."
388
402
  )
389
- return data
403
+ break
390
404
 
391
- await asyncio.sleep(self.exchange_async.rateLimit / 1000)
392
- continue
405
+ finally:
406
+ await self.exponential_backoff_with_jitter_async(self.exchange_async.rateLimit / 1000,
407
+ pause,
408
+ attempts)
393
409
 
394
- else:
395
- # check if data resp is empty
396
- if len(data_resp):
397
- # next start date
410
+ await self.exchange_async.close()
411
+ return data
412
+
413
+ else:
414
+ logging.warning(f"OHLCV data is not available for {self.exchange_async.id}.")
415
+ return None
416
+
417
+ def _fetch_ohlcv(self,
418
+ ticker: str,
419
+ freq: str,
420
+ start_date: str,
421
+ end_date: str,
422
+ exch: str,
423
+ trials: int = 3,
424
+ pause: int = 1
425
+ ) -> List:
426
+ """
427
+ Fetches OHLCV data for a specific ticker.
428
+
429
+ Parameters
430
+ ----------
431
+ ticker: str
432
+ Ticker symbol.
433
+ freq: str
434
+ Frequency of data, e.g. '1m', '5m', '1h', '1d'.
435
+ start_date: str
436
+ Start date in integers in milliseconds since Unix epoch.
437
+ end_date: str
438
+ End date in integers in milliseconds since Unix epoch.
439
+ exch: str
440
+ Name of exchange.
441
+ trials: int, default 3
442
+ Number of attempts to fetch data.
443
+ pause: int, default 60
444
+ Pause in seconds to respect the rate limit.
445
+
446
+ Returns
447
+ -------
448
+ data: list
449
+ List of timestamps with OHLCV data.
450
+ """
451
+ attempts = 0
452
+ data = []
453
+
454
+ # inst exch
455
+ if self.exchange is None:
456
+ self.exchange = getattr(ccxt, exch)()
457
+
458
+ # fetch data
459
+ if self.exchange.has['fetchOHLCV']:
460
+
461
+ # while loop to fetch all data
462
+ while start_date < end_date and attempts < trials:
463
+
464
+ try:
465
+ data_resp = self.exchange.fetch_ohlcv(
466
+ ticker,
467
+ freq,
468
+ since=start_date,
469
+ limit=self.max_obs_per_call,
470
+ params={
471
+ 'until': end_date,
472
+ 'paginate': True
473
+ }
474
+ )
475
+
476
+ # add data to list
477
+ if data_resp:
398
478
  start_date = data_resp[-1][0] + 1
399
479
  data.extend(data_resp)
400
- await asyncio.sleep(self.exchange_async.rateLimit / 1000)
401
-
402
480
  else:
481
+ if not data:
482
+ logging.warning(f"No OHLCV data available for {ticker}.")
403
483
  break
404
484
 
485
+ except Exception as e:
486
+ logging.warning(
487
+ f"Failed to get OHLCV data from {self.exchange.id} for {ticker} "
488
+ f"on attempt #{attempts + 1}: {e}."
489
+ )
490
+ attempts += 1
491
+ if attempts >= trials:
492
+ logging.warning(
493
+ f"Failed to get OHLCV data from {self.exchange.id} "
494
+ f"for {ticker} after {trials} attempts."
495
+ )
496
+ break
497
+
498
+ finally:
499
+ self.exponential_backoff_with_jitter(self.exchange.rateLimit / 1000, pause, attempts)
500
+
405
501
  return data
406
502
 
407
503
  else:
408
- logging.warning(f"OHLCV data is not available for {self.exchange_async.id}.")
504
+ logging.warning(f"OHLCV data is not available for {self.exchange.id}.")
409
505
  return None
410
506
 
411
- async def fetch_all_ohlcv(self,
412
- tickers,
413
- freq: str,
414
- start_date: str,
415
- end_date: str,
416
- exch: str,
417
- trials: int = 3,
418
- pause: int = 0.5
419
- ):
507
+ async def _fetch_all_ohlcv_async(self,
508
+ tickers,
509
+ freq: str,
510
+ start_date: str,
511
+ end_date: str,
512
+ exch: str,
513
+ trials: int = 3,
514
+ pause: int = 1
515
+ ):
420
516
  """
421
517
  Fetches OHLCV data for a list of tickers.
422
518
 
@@ -453,22 +549,70 @@ class CCXT(Library):
453
549
 
454
550
  # loop through tickers
455
551
  for ticker in tickers:
456
- data_resp = await self._fetch_ohlcv(ticker, freq, start_date, end_date, trials=trials, exch=exch)
552
+ data_resp = await self._fetch_ohlcv_async(ticker, freq, start_date, end_date, trials=trials, exch=exch)
553
+ await asyncio.sleep(pause)
457
554
  data.append(data_resp)
458
555
  pbar.update(1)
459
- await asyncio.sleep(pause) # pause between ticker requests to respect the rate limit
460
556
 
461
557
  await self.exchange_async.close()
462
558
 
463
559
  return data
464
560
 
465
- async def _fetch_funding_rates(self,
466
- ticker: str,
467
- start_date: str,
468
- end_date: str,
469
- exch: str,
470
- trials: int = 3
471
- ) -> List:
561
+ def _fetch_all_ohlcv(self,
562
+ tickers,
563
+ freq: str,
564
+ start_date: str,
565
+ end_date: str,
566
+ exch: str,
567
+ trials: int = 3,
568
+ pause: int = 1
569
+ ):
570
+ """
571
+ Fetches OHLCV data for a list of tickers.
572
+
573
+ Parameters
574
+ ----------
575
+ tickers: list
576
+ List of ticker symbols.
577
+ freq: str
578
+ Frequency of data, e.g. '1m', '5m', '1h', '1d'.
579
+ start_date: str
580
+ Start date in integers in milliseconds since Unix epoch.
581
+ end_date: str
582
+ End date in integers in milliseconds since Unix epoch.
583
+ exch: str
584
+ Name of exchange.
585
+ trials: int, default 3
586
+ Number of attempts to fetch data.
587
+ pause: int, default 0.5
588
+ Pause in seconds to respect the rate limit.
589
+ """
590
+ # inst exch
591
+ if self.exchange is None:
592
+ self.exchange = getattr(ccxt, exch)()
593
+
594
+ data = []
595
+
596
+ # create progress bar
597
+ pbar = tqdm(total=len(tickers), desc="Fetching OHLCV data", unit="ticker")
598
+
599
+ # loop through tickers
600
+ for ticker in tickers:
601
+ data_resp = self._fetch_ohlcv(ticker, freq, start_date, end_date, trials=trials, exch=exch)
602
+ sleep(pause)
603
+ data.append(data_resp)
604
+ pbar.update(1)
605
+
606
+ return data
607
+
608
+ async def _fetch_funding_rates_async(self,
609
+ ticker: str,
610
+ start_date: str,
611
+ end_date: str,
612
+ exch: str,
613
+ trials: int = 3,
614
+ pause: int = 1
615
+ ) -> List:
472
616
  """
473
617
  Fetches funding rates data for a specific ticker.
474
618
 
@@ -482,6 +626,8 @@ class CCXT(Library):
482
626
  End date in integers in milliseconds since Unix epoch.
483
627
  trials: int, default 3
484
628
  Number of attempts to fetch data.
629
+ pause: int, default 1
630
+ Pause in seconds to respect the rate limit.
485
631
 
486
632
  Returns
487
633
  -------
@@ -502,54 +648,136 @@ class CCXT(Library):
502
648
  while start_date < end_date and attempts < trials:
503
649
 
504
650
  try:
505
- data_resp = await getattr(self.exchange_async, 'fetchFundingRateHistory')(
651
+ data_resp = await self.exchange_async.fetch_funding_rate_history(
506
652
  ticker,
507
653
  since=start_date,
508
654
  limit=self.max_obs_per_call,
509
655
  params={'until': end_date}
510
656
  )
511
657
 
658
+ # add data to list
659
+ if data_resp:
660
+ start_date = data_resp[-1]['timestamp'] + 1
661
+ data.extend(data_resp)
662
+ else:
663
+ if not data:
664
+ logging.warning(f"No funding rates data available for {ticker}.")
665
+ break
666
+
512
667
  except Exception as e:
513
668
  logging.warning(
514
- f"Failed to get funding rates from {self.exchange_async.id} "
515
- f"for {ticker} on attempt #{attempts+1}."
669
+ f"Failed to get funding rates from {self.exchange_async.id} for {ticker} "
670
+ f"on attempt #{attempts + 1}: {e}."
516
671
  )
517
- logging.warning(e)
518
672
  attempts += 1
519
- if attempts == trials:
673
+ if attempts >= trials:
520
674
  logging.warning(
521
675
  f"Failed to get funding rates from {self.exchange_async.id} "
522
676
  f"for {ticker} after {trials} attempts."
523
677
  )
524
- return data
678
+ break
525
679
 
526
- await asyncio.sleep(self.exchange_async.rateLimit / 1000)
527
- continue
680
+ finally:
681
+ await self.exponential_backoff_with_jitter_async(self.exchange_async.rateLimit / 1000,
682
+ pause,
683
+ attempts)
528
684
 
529
- else:
530
- # check if data resp is empty
531
- if len(data_resp):
532
- # next start date
685
+ await self.exchange_async.close()
686
+ return data
687
+
688
+ else:
689
+ logging.warning(f"Funding rates are not available for {self.exchange_async.id}.")
690
+ return None
691
+
692
+ def _fetch_funding_rates(self,
693
+ ticker: str,
694
+ start_date: str,
695
+ end_date: str,
696
+ exch: str,
697
+ trials: int = 3,
698
+ pause: int = 1
699
+ ) -> List:
700
+ """
701
+ Fetches funding rates data for a specific ticker.
702
+
703
+ Parameters
704
+ ----------
705
+ ticker: str
706
+ Ticker symbol.
707
+ start_date: str
708
+ Start date in integers in milliseconds since Unix epoch.
709
+ end_date: str
710
+ End date in integers in milliseconds since Unix epoch.
711
+ trials: int, default 3
712
+ Number of attempts to fetch data.
713
+ pause: int, default 1
714
+ Pause in seconds to respect the rate limit.
715
+
716
+ Returns
717
+ -------
718
+ data: list
719
+ List of dictionaries with timestamps and funding rates data.
720
+ """
721
+ attempts = 0
722
+ data = []
723
+
724
+ # inst exch
725
+ if self.exchange is None:
726
+ self.exchange = getattr(ccxt, exch)()
727
+
728
+ # fetch data
729
+ if self.exchange.has['fetchFundingRateHistory']:
730
+
731
+ # while loop to get all data
732
+ while start_date < end_date and attempts < trials:
733
+
734
+ try:
735
+ data_resp = self.exchange.fetch_funding_rate_history(
736
+ ticker,
737
+ since=start_date,
738
+ limit=self.max_obs_per_call,
739
+ params={'until': end_date}
740
+ )
741
+
742
+ # add data to list
743
+ if data_resp:
533
744
  start_date = data_resp[-1]['timestamp'] + 1
534
745
  data.extend(data_resp)
535
- await asyncio.sleep(self.exchange_async.rateLimit / 1000)
536
746
  else:
747
+ if not data:
748
+ logging.warning(f"No funding rates data available for {ticker}.")
537
749
  break
538
750
 
751
+ except Exception as e:
752
+ logging.warning(
753
+ f"Failed to get funding rates from {self.exchange.id} for {ticker} "
754
+ f"on attempt #{attempts + 1}: {e}."
755
+ )
756
+ attempts += 1
757
+ if attempts >= trials:
758
+ logging.warning(
759
+ f"Failed to get funding rates from {self.exchange.id} "
760
+ f"for {ticker} after {trials} attempts."
761
+ )
762
+ break
763
+
764
+ finally:
765
+ self.exponential_backoff_with_jitter(self.exchange.rateLimit / 1000, pause, attempts)
766
+
539
767
  return data
540
768
 
541
769
  else:
542
- logging.warning(f"Funding rates are not available for {self.exchange_async.id}.")
770
+ logging.warning(f"Funding rates are not available for {self.exchange.id}.")
543
771
  return None
544
772
 
545
- async def fetch_all_funding_rates(self,
546
- tickers,
547
- start_date: str,
548
- end_date: str,
549
- exch: str,
550
- trials: int = 3,
551
- pause: int = 0.5
552
- ):
773
+ async def _fetch_all_funding_rates_async(self,
774
+ tickers,
775
+ start_date: str,
776
+ end_date: str,
777
+ exch: str,
778
+ trials: int = 3,
779
+ pause: int = 1
780
+ ):
553
781
  """
554
782
  Fetches funding rates data for a list of tickers.
555
783
 
@@ -584,23 +812,74 @@ class CCXT(Library):
584
812
 
585
813
  # loop through tickers
586
814
  for ticker in tickers:
587
- data_resp = await self._fetch_funding_rates(ticker, start_date, end_date, trials=trials, exch=exch)
815
+ data_resp = await self._fetch_funding_rates_async(ticker, start_date, end_date, trials=trials, exch=exch)
588
816
  data.append(data_resp)
589
817
  pbar.update(1)
590
- await asyncio.sleep(pause) # pause between ticker requests to respect the rate limit
818
+ await asyncio.sleep(pause)
591
819
 
592
820
  await self.exchange_async.close()
593
821
 
594
822
  return data
595
823
 
596
- async def _fetch_open_interest(self,
597
- ticker: str,
598
- freq: str,
599
- start_date: str,
600
- end_date: str,
601
- exch: str,
602
- trials: int = 3
603
- ) -> List:
824
+ def _fetch_all_funding_rates(self,
825
+ tickers,
826
+ start_date: str,
827
+ end_date: str,
828
+ exch: str,
829
+ trials: int = 3,
830
+ pause: int = 1
831
+ ):
832
+ """
833
+ Fetches funding rates data for a list of tickers.
834
+
835
+ Parameters
836
+ ----------
837
+ tickers: list
838
+ List of ticker symbols.
839
+ start_date: str
840
+ Start date in integers in milliseconds since Unix epoch.
841
+ end_date: str
842
+ End date in integers in milliseconds since Unix epoch.
843
+ exch: str
844
+ Name of exchange.
845
+ trials: int, default 3
846
+ Number of attempts to fetch data.
847
+ pause: int, default 0.5
848
+ Pause in seconds to respect the rate limit.
849
+
850
+ Returns
851
+ -------
852
+ data: list
853
+ List of lists of dictionaries with timestamps and funding rates data for each ticker.
854
+ """
855
+
856
+ # inst exch
857
+ if self.exchange is None:
858
+ self.exchange = getattr(ccxt, exch)()
859
+
860
+ data = []
861
+
862
+ # create progress bar
863
+ pbar = tqdm(total=len(tickers), desc="Fetching funding rates", unit="ticker")
864
+
865
+ # loop through tickers
866
+ for ticker in tickers:
867
+ data_resp = self._fetch_funding_rates(ticker, start_date, end_date, trials=trials, exch=exch)
868
+ data.append(data_resp)
869
+ pbar.update(1)
870
+ sleep(pause)
871
+
872
+ return data
873
+
874
+ async def _fetch_open_interest_async(self,
875
+ ticker: str,
876
+ freq: str,
877
+ start_date: str,
878
+ end_date: str,
879
+ exch: str,
880
+ trials: int = 3,
881
+ pause: int = 1
882
+ ) -> List:
604
883
  """
605
884
  Fetches open interest data for a specific ticker.
606
885
 
@@ -618,6 +897,8 @@ class CCXT(Library):
618
897
  Name of exchange.
619
898
  trials: int, default 3
620
899
  Number of attempts to fetch data.
900
+ pause: int, default 1
901
+ Pause in seconds to respect the rate limit.
621
902
 
622
903
  Returns
623
904
  -------
@@ -639,7 +920,7 @@ class CCXT(Library):
639
920
  while start_date < end_date and attempts < trials:
640
921
 
641
922
  try:
642
- data_resp = await getattr(self.exchange_async, 'fetchOpenInterestHistory')(
923
+ data_resp = await self.exchange_async.fetch_open_interest_history(
643
924
  ticker,
644
925
  freq,
645
926
  since=start_date,
@@ -647,49 +928,138 @@ class CCXT(Library):
647
928
  params={'until': end_date}
648
929
  )
649
930
 
931
+ # add data to list
932
+ if data_resp:
933
+ start_date = data_resp[-1]['timestamp'] + 1
934
+ data.extend(data_resp)
935
+ else:
936
+ if not data:
937
+ logging.warning(f"No open interest data available for {ticker}.")
938
+ break
939
+
650
940
  except Exception as e:
651
941
  logging.warning(
652
- f"Failed to get open interest from {self.exchange_async.id} "
653
- f"for {ticker} on attempt #{attempts + 1}."
942
+ f"Failed to get open interest from {self.exchange_async.id} for {ticker} "
943
+ f"on attempt #{attempts + 1}: {e}."
654
944
  )
655
- logging.warning(e)
656
945
  attempts += 1
657
- if attempts == trials:
946
+ if attempts >= trials:
658
947
  logging.warning(
659
948
  f"Failed to get open interest from {self.exchange_async.id} "
660
949
  f"for {ticker} after {trials} attempts."
661
950
  )
662
- return data
951
+ break
663
952
 
664
- await asyncio.sleep(self.exchange_async.rateLimit / 1000)
665
- continue
953
+ finally:
954
+ await self.exponential_backoff_with_jitter_async(self.exchange_async.rateLimit / 1000,
955
+ pause,
956
+ attempts)
957
+ # await asyncio.sleep(self.exchange_async.rateLimit / 1000)
666
958
 
667
- else:
668
- # check if data resp is empty
669
- if len(data_resp):
670
- # next start date
959
+ await self.exchange_async.close()
960
+ return data
961
+
962
+ else:
963
+ logging.warning(f"Open interest is not available for {self.exchange_async.id}.")
964
+ return None
965
+
966
+ def _fetch_open_interest(self,
967
+ ticker: str,
968
+ freq: str,
969
+ start_date: str,
970
+ end_date: str,
971
+ exch: str,
972
+ trials: int = 3,
973
+ pause: int = 1
974
+ ) -> List:
975
+ """
976
+ Fetches open interest data for a specific ticker.
977
+
978
+ Parameters
979
+ ----------
980
+ ticker: str
981
+ Ticker symbol.
982
+ freq: str
983
+ Frequency of data, e.g. '1m', '5m', '1h', '1d'.
984
+ start_date: str
985
+ Start date in integers in milliseconds since Unix epoch.
986
+ end_date: str
987
+ End date in integers in milliseconds since Unix epoch.
988
+ exch: str
989
+ Name of exchange.
990
+ trials: int, default 3
991
+ Number of attempts to fetch data.
992
+ pause: int, default 1
993
+ Pause in seconds to respect the rate limit.
994
+
995
+ Returns
996
+ -------
997
+ data: list
998
+ List of dictionaries with timestamps and open interest data.
999
+ """
1000
+ # number of attempts
1001
+ attempts = 0
1002
+ data = []
1003
+
1004
+ # inst exch
1005
+ self.exchange = getattr(ccxt, exch)()
1006
+
1007
+ # fetch data
1008
+ if self.exchange.has['fetchOpenInterestHistory']:
1009
+
1010
+ # while loop to get all data
1011
+ while start_date < end_date and attempts < trials:
1012
+
1013
+ try:
1014
+ data_resp = self.exchange.fetch_open_interest_history(
1015
+ ticker,
1016
+ freq,
1017
+ since=start_date,
1018
+ limit=500,
1019
+ params={'until': end_date}
1020
+ )
1021
+
1022
+ # add data to list
1023
+ if data_resp:
671
1024
  start_date = data_resp[-1]['timestamp'] + 1
672
1025
  data.extend(data_resp)
673
- await asyncio.sleep(self.exchange_async.rateLimit / 1000)
674
1026
  else:
1027
+ if not data:
1028
+ logging.warning(f"No open interest data available for {ticker}.")
675
1029
  break
676
1030
 
1031
+ except Exception as e:
1032
+ logging.warning(
1033
+ f"Failed to get open interest from {self.exchange.id} for {ticker} "
1034
+ f"on attempt #{attempts + 1}: {e}."
1035
+ )
1036
+ attempts += 1
1037
+ if attempts >= trials:
1038
+ logging.warning(
1039
+ f"Failed to get open interest from {self.exchange.id} "
1040
+ f"for {ticker} after {trials} attempts."
1041
+ )
1042
+ break
1043
+
1044
+ finally:
1045
+ self.exponential_backoff_with_jitter(self.exchange.rateLimit / 1000, pause, attempts)
1046
+ # sleep(self.exchange.rateLimit / 1000)
1047
+
677
1048
  return data
678
1049
 
679
1050
  else:
680
- logging.warning(f"Open interest is not available for {self.exchange_async.id}.")
1051
+ logging.warning(f"Open interest is not available for {self.exchange.id}.")
681
1052
  return None
682
1053
 
683
- async def fetch_all_open_interest(self,
684
- tickers,
685
- freq: str,
686
- start_date: str,
687
- end_date: str,
688
- exch: str,
689
- trials: int = 3,
690
- pause: int = 0.5
691
- ):
692
-
1054
+ async def _fetch_all_open_interest_async(self,
1055
+ tickers,
1056
+ freq: str,
1057
+ start_date: str,
1058
+ end_date: str,
1059
+ exch: str,
1060
+ trials: int = 3,
1061
+ pause: int = 1
1062
+ ):
693
1063
  """
694
1064
  Fetches open interest data for a list of tickers.
695
1065
 
@@ -726,15 +1096,68 @@ class CCXT(Library):
726
1096
 
727
1097
  # loop through tickers
728
1098
  for ticker in tickers:
729
- data_resp = await self._fetch_open_interest(ticker, freq, start_date, end_date, trials=trials, exch=exch)
1099
+ data_resp = await self._fetch_open_interest_async(ticker, freq, start_date, end_date, trials=trials,
1100
+ exch=exch)
730
1101
  data.append(data_resp)
731
1102
  pbar.update(1)
732
- await asyncio.sleep(pause) # pause between ticker requests to respect the rate limit
1103
+ await asyncio.sleep(pause)
733
1104
 
734
1105
  await self.exchange_async.close()
735
1106
 
736
1107
  return data
737
1108
 
1109
+ def _fetch_all_open_interest(self,
1110
+ tickers,
1111
+ freq: str,
1112
+ start_date: str,
1113
+ end_date: str,
1114
+ exch: str,
1115
+ trials: int = 3,
1116
+ pause: int = 1
1117
+ ):
1118
+ """
1119
+ Fetches open interest data for a list of tickers.
1120
+
1121
+ Parameters
1122
+ ----------
1123
+ tickers: list
1124
+ List of ticker symbols.
1125
+ freq: str
1126
+ Frequency of data, e.g. '1m', '5m', '1h', '1d'.
1127
+ start_date: str
1128
+ Start date in integers in milliseconds since Unix epoch.
1129
+ end_date: str
1130
+ End date in integers in milliseconds since Unix epoch.
1131
+ exch: str
1132
+ Name of exchange.
1133
+ trials: int, default 3
1134
+ Number of attempts to fetch data.
1135
+ pause: int, default 0.5
1136
+ Pause in seconds to respect the rate limit.
1137
+
1138
+ Returns
1139
+ -------
1140
+ data: list
1141
+ List of lists of dictionaries with timestamps and open interest data for each ticker.
1142
+ """
1143
+ # inst exch
1144
+ if self.exchange is None:
1145
+ self.exchange = getattr(ccxt, exch)()
1146
+
1147
+ data = []
1148
+
1149
+ # create progress bar
1150
+ pbar = tqdm(total=len(tickers), desc="Fetching open interest", unit="ticker")
1151
+
1152
+ # loop through tickers
1153
+ for ticker in tickers:
1154
+ data_resp = self._fetch_open_interest(ticker, freq, start_date, end_date, trials=trials, exch=exch)
1155
+ data.append(data_resp)
1156
+ pbar.update(1)
1157
+ sleep(pause)
1158
+
1159
+ return data
1160
+
738
1161
  def convert_params(self, data_req: DataRequest) -> DataRequest:
739
1162
  """
740
1163
  Converts data request parameters to CCXT format.
@@ -767,14 +1190,6 @@ class CCXT(Library):
767
1190
  f"Use the '.frequencies' attribute to check available frequencies."
768
1191
  )
769
1192
 
770
- # check quote ccy
771
- if self.data_req.quote_ccy is not None:
772
- if self.data_req.quote_ccy not in self.assets:
773
- raise ValueError(
774
- f"{self.data_req.quote_ccy} is not supported. "
775
- f"Use the '.assets' attribute to check supported currencies."
776
- )
777
-
778
1193
  # check mkt type
779
1194
  if self.data_req.mkt_type not in self.market_types:
780
1195
  raise ValueError(
@@ -853,7 +1268,41 @@ class CCXT(Library):
853
1268
 
854
1269
  return WrangleData(self.data_req, data_resp).ccxt(data_type=data_type)
855
1270
 
856
- async def fetch_tidy_ohlcv(self, data_req: DataRequest) -> pd.DataFrame:
1271
+ async def fetch_tidy_ohlcv_async(self, data_req: DataRequest) -> pd.DataFrame:
1272
+ """
1273
+ Gets entire OHLCV history and wrangles the data response into tidy data format.
1274
+
1275
+ Parameters
1276
+ ----------
1277
+ data_req: DataRequest
1278
+ Parameters of data request in CryptoDataPy format.
1279
+
1280
+ Returns
1281
+ -------
1282
+ df: pd.DataFrame
1283
+ Dataframe with entire OHLCV data history retrieved and wrangled into tidy data format.
1284
+ """
1285
+ # convert data request parameters to CCXT format
1286
+ if self.data_req is None:
1287
+ self.convert_params(data_req)
1288
+
1289
+ # get entire data history
1290
+ data_resp = await self._fetch_all_ohlcv_async(self.data_req.source_markets,
1291
+ self.data_req.source_freq,
1292
+ self.data_req.source_start_date,
1293
+ self.data_req.source_end_date,
1294
+ self.data_req.exch,
1295
+ trials=self.data_req.trials,
1296
+ pause=self.data_req.pause)
1297
+
1298
+ # wrangle df
1299
+ if any(data_resp):
1300
+ df = self.wrangle_data_resp(data_resp, data_type='ohlcv')
1301
+ return df
1302
+ else:
1303
+ logging.warning("Failed to get requested OHLCV data.")
1304
+
1305
+ def fetch_tidy_ohlcv(self, data_req: DataRequest) -> pd.DataFrame:
857
1306
  """
858
1307
  Gets entire OHLCV history and wrangles the data response into tidy data format.
859
1308
 
@@ -872,13 +1321,13 @@ class CCXT(Library):
872
1321
  self.convert_params(data_req)
873
1322
 
874
1323
  # get entire data history
875
- data_resp = await self.fetch_all_ohlcv(self.data_req.source_markets,
876
- self.data_req.source_freq,
877
- self.data_req.source_start_date,
878
- self.data_req.source_end_date,
879
- self.data_req.exch,
880
- trials=self.data_req.trials,
881
- pause=self.data_req.pause)
1324
+ data_resp = self._fetch_all_ohlcv(self.data_req.source_markets,
1325
+ self.data_req.source_freq,
1326
+ self.data_req.source_start_date,
1327
+ self.data_req.source_end_date,
1328
+ self.data_req.exch,
1329
+ trials=self.data_req.trials,
1330
+ pause=self.data_req.pause)
882
1331
 
883
1332
  # wrangle df
884
1333
  if any(data_resp):
@@ -887,7 +1336,40 @@ class CCXT(Library):
887
1336
  else:
888
1337
  logging.warning("Failed to get requested OHLCV data.")
889
1338
 
890
- async def fetch_tidy_funding_rates(self, data_req: DataRequest) -> pd.DataFrame:
1339
+ async def fetch_tidy_funding_rates_async(self, data_req: DataRequest) -> pd.DataFrame:
1340
+ """
1341
+ Gets entire funding rates history and wrangles the data response into tidy data format.
1342
+
1343
+ Parameters
1344
+ ----------
1345
+ data_req: DataRequest
1346
+ Parameters of data request in CryptoDataPy format.
1347
+
1348
+ Returns
1349
+ -------
1350
+ df: pd.DataFrame
1351
+ Dataframe with entire data history retrieved and wrangled into tidy data format.
1352
+ """
1353
+ # convert data request parameters to CCXT format
1354
+ if self.data_req is None:
1355
+ self.convert_params(data_req)
1356
+
1357
+ # get entire data history
1358
+ data_resp = await self._fetch_all_funding_rates_async(self.data_req.source_markets,
1359
+ self.data_req.source_start_date,
1360
+ self.data_req.source_end_date,
1361
+ self.data_req.exch,
1362
+ trials=self.data_req.trials,
1363
+ pause=self.data_req.pause)
1364
+
1365
+ # wrangle df
1366
+ if any(data_resp):
1367
+ df = self.wrangle_data_resp(data_resp, data_type='funding_rates')
1368
+ return df
1369
+ else:
1370
+ logging.warning("Failed to get requested funding rates.")
1371
+
1372
+ def fetch_tidy_funding_rates(self, data_req: DataRequest) -> pd.DataFrame:
891
1373
  """
892
1374
  Gets entire funding rates history and wrangles the data response into tidy data format.
893
1375
 
@@ -906,12 +1388,12 @@ class CCXT(Library):
906
1388
  self.convert_params(data_req)
907
1389
 
908
1390
  # get entire data history
909
- data_resp = await self.fetch_all_funding_rates(self.data_req.source_markets,
910
- self.data_req.source_start_date,
911
- self.data_req.source_end_date,
912
- self.data_req.exch,
913
- trials=self.data_req.trials,
914
- pause=self.data_req.pause)
1391
+ data_resp = self._fetch_all_funding_rates(self.data_req.source_markets,
1392
+ self.data_req.source_start_date,
1393
+ self.data_req.source_end_date,
1394
+ self.data_req.exch,
1395
+ trials=self.data_req.trials,
1396
+ pause=self.data_req.pause)
915
1397
 
916
1398
  # wrangle df
917
1399
  if any(data_resp):
@@ -920,7 +1402,7 @@ class CCXT(Library):
920
1402
  else:
921
1403
  logging.warning("Failed to get requested funding rates.")
922
1404
 
923
- async def fetch_tidy_open_interest(self, data_req: DataRequest) -> pd.DataFrame:
1405
+ async def fetch_tidy_open_interest_async(self, data_req: DataRequest) -> pd.DataFrame:
924
1406
  """
925
1407
  Gets entire open interest history and wrangles the data response into tidy data format.
926
1408
 
@@ -939,13 +1421,13 @@ class CCXT(Library):
939
1421
  self.convert_params(data_req)
940
1422
 
941
1423
  # get entire data history
942
- data_resp = await self.fetch_all_open_interest(self.data_req.source_markets,
943
- self.data_req.source_freq,
944
- self.data_req.source_start_date,
945
- self.data_req.source_end_date,
946
- self.data_req.exch,
947
- trials=self.data_req.trials,
948
- pause=self.data_req.pause)
1424
+ data_resp = await self._fetch_all_open_interest_async(self.data_req.source_markets,
1425
+ self.data_req.source_freq,
1426
+ self.data_req.source_start_date,
1427
+ self.data_req.source_end_date,
1428
+ self.data_req.exch,
1429
+ trials=self.data_req.trials,
1430
+ pause=self.data_req.pause)
949
1431
 
950
1432
  # wrangle df
951
1433
  if any(data_resp):
@@ -954,7 +1436,81 @@ class CCXT(Library):
954
1436
  else:
955
1437
  logging.warning("Failed to get requested open interest.")
956
1438
 
957
- async def get_data(self, data_req: DataRequest) -> pd.DataFrame:
1439
+ def fetch_tidy_open_interest(self, data_req: DataRequest) -> pd.DataFrame:
1440
+ """
1441
+ Gets entire open interest history and wrangles the data response into tidy data format.
1442
+
1443
+ Parameters
1444
+ ----------
1445
+ data_req: DataRequest
1446
+ Parameters of data request in CryptoDataPy format.
1447
+
1448
+ Returns
1449
+ -------
1450
+ df: pd.DataFrame
1451
+ Dataframe with entire data history retrieved and wrangled into tidy data format.
1452
+ """
1453
+ # convert data request parameters to CCXT format
1454
+ if self.data_req is None:
1455
+ self.convert_params(data_req)
1456
+
1457
+ # get entire data history
1458
+ data_resp = self._fetch_all_open_interest(self.data_req.source_markets,
1459
+ self.data_req.source_freq,
1460
+ self.data_req.source_start_date,
1461
+ self.data_req.source_end_date,
1462
+ self.data_req.exch,
1463
+ trials=self.data_req.trials,
1464
+ pause=self.data_req.pause)
1465
+
1466
+ # wrangle df
1467
+ if any(data_resp):
1468
+ df = self.wrangle_data_resp(data_resp, data_type='open_interest')
1469
+ return df
1470
+ else:
1471
+ logging.warning("Failed to get requested open interest.")
1472
+
1473
+ async def get_data_async(self, data_req: DataRequest) -> pd.DataFrame:
1474
+ """
1475
+ Get data specified by data request.
1476
+
1477
+ Parameters
1478
+ data_req: DataRequest
1479
+ Parameters of data request in CryptoDataPy format.
1480
+
1481
+ Returns
1482
+ -------
1483
+ df: pd.DataFrame - MultiIndex
1484
+ DataFrame with DatetimeIndex (level 0), ticker (level 1), and values for selected fields (cols).
1485
+ """
1486
+ # get OHLCV
1487
+ if any([field in ["open", "high", "low", "close", "volume"] for field in data_req.fields]):
1488
+ df = await self.fetch_tidy_ohlcv_async(data_req)
1489
+ self.data = pd.concat([self.data, df])
1490
+
1491
+ # get funding rates
1492
+ if any([field == "funding_rate" for field in data_req.fields]):
1493
+ df = await self.fetch_tidy_funding_rates_async(data_req)
1494
+ self.data = pd.concat([self.data, df], axis=1)
1495
+
1496
+ # get open interest
1497
+ if any([field == "oi" for field in data_req.fields]):
1498
+ df = await self.fetch_tidy_open_interest_async(data_req)
1499
+ self.data = pd.concat([self.data, df], axis=1)
1500
+
1501
+ # check df
1502
+ if self.data.empty:
1503
+ raise Exception(
1504
+ "No data returned. Check data request parameters and try again."
1505
+ )
1506
+
1507
+ # filter df for desired fields and typecast
1508
+ fields = [field for field in data_req.fields if field in self.data.columns]
1509
+ self.data = self.data.loc[:, fields]
1510
+
1511
+ return self.data.sort_index()
1512
+
1513
+ def get_data(self, data_req: DataRequest) -> pd.DataFrame:
958
1514
  """
959
1515
  Get data specified by data request.
960
1516
 
@@ -969,17 +1525,17 @@ class CCXT(Library):
969
1525
  """
970
1526
  # get OHLCV
971
1527
  if any([field in ["open", "high", "low", "close", "volume"] for field in data_req.fields]):
972
- df = await self.fetch_tidy_ohlcv(data_req)
1528
+ df = self.fetch_tidy_ohlcv(data_req)
973
1529
  self.data = pd.concat([self.data, df])
974
1530
 
975
1531
  # get funding rates
976
1532
  if any([field == "funding_rate" for field in data_req.fields]):
977
- df = await self.fetch_tidy_funding_rates(data_req)
1533
+ df = self.fetch_tidy_funding_rates(data_req)
978
1534
  self.data = pd.concat([self.data, df], axis=1)
979
1535
 
980
1536
  # get open interest
981
1537
  if any([field == "oi" for field in data_req.fields]):
982
- df = await self.fetch_tidy_open_interest(data_req)
1538
+ df = self.fetch_tidy_open_interest(data_req)
983
1539
  self.data = pd.concat([self.data, df], axis=1)
984
1540
 
985
1541
  # check df