cryptodatapy 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodatapy/conf/fields.csv +1 -1
- cryptodatapy/extract/datarequest.py +169 -28
- cryptodatapy/extract/libraries/Untitled.ipynb +199 -0
- cryptodatapy/extract/libraries/ccxt.ipynb +747 -0
- cryptodatapy/extract/libraries/ccxt_api.py +631 -358
- cryptodatapy/extract/libraries/pandasdr_api.py +153 -138
- cryptodatapy/extract/libraries/yfinance_api.py +511 -0
- cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +226 -30
- cryptodatapy/transform/cmdty_data.ipynb +402 -0
- cryptodatapy/transform/convertparams.py +160 -303
- cryptodatapy/transform/eqty_data.ipynb +126 -99
- cryptodatapy/transform/wrangle.py +152 -43
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/METADATA +9 -6
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/RECORD +16 -12
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/WHEEL +1 -1
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/LICENSE +0 -0
@@ -13,10 +13,7 @@ class ConvertParams:
|
|
13
13
|
Converts data request parameters from CryptoDataPy to data source format.
|
14
14
|
"""
|
15
15
|
|
16
|
-
def __init__(
|
17
|
-
self,
|
18
|
-
data_req: DataRequest = None,
|
19
|
-
):
|
16
|
+
def __init__(self, data_req: DataRequest):
|
20
17
|
"""
|
21
18
|
Constructor
|
22
19
|
|
@@ -24,7 +21,6 @@ class ConvertParams:
|
|
24
21
|
----------
|
25
22
|
data_req: DataRequest
|
26
23
|
Parameters of data request in CryptoDataPy format.
|
27
|
-
|
28
24
|
"""
|
29
25
|
self.data_req = data_req
|
30
26
|
|
@@ -69,7 +65,7 @@ class ConvertParams:
|
|
69
65
|
start_date = round(pd.Timestamp(self.data_req.start_date).timestamp())
|
70
66
|
# convert end date
|
71
67
|
if self.data_req.end_date is None:
|
72
|
-
end_date = round(pd.Timestamp
|
68
|
+
end_date = round(pd.Timestamp.utcnow()).timestamp()
|
73
69
|
else:
|
74
70
|
end_date = round(pd.Timestamp(self.data_req.end_date).timestamp())
|
75
71
|
# fields
|
@@ -401,7 +397,7 @@ class ConvertParams:
|
|
401
397
|
start_date = self.data_req.start_date
|
402
398
|
# convert end date
|
403
399
|
if self.data_req.end_date is None:
|
404
|
-
end_date =
|
400
|
+
end_date = pd.Timestamp.utcnow()
|
405
401
|
else:
|
406
402
|
end_date = self.data_req.end_date
|
407
403
|
# convert fields
|
@@ -437,149 +433,109 @@ class ConvertParams:
|
|
437
433
|
"source_fields": self.data_req.source_fields,
|
438
434
|
}
|
439
435
|
|
440
|
-
def to_ccxt(self) ->
|
436
|
+
def to_ccxt(self) -> DataRequest:
|
441
437
|
"""
|
442
438
|
Convert tickers from CryptoDataPy to CCXT format.
|
443
439
|
"""
|
444
|
-
#
|
445
|
-
if self.data_req.source_tickers is
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
# convert freq
|
451
|
-
if self.data_req.source_freq is not None:
|
452
|
-
freq = self.data_req.source_freq
|
453
|
-
self.data_req.freq = self.data_req.source_freq
|
454
|
-
else:
|
440
|
+
# tickers
|
441
|
+
if self.data_req.source_tickers is None:
|
442
|
+
self.data_req.source_tickers = [ticker.upper() for ticker in self.data_req.tickers]
|
443
|
+
|
444
|
+
# freq
|
445
|
+
if self.data_req.source_freq is None:
|
455
446
|
if self.data_req.freq is None:
|
456
|
-
|
447
|
+
self.data_req.source_freq = "1d"
|
457
448
|
elif self.data_req.freq == "tick":
|
458
|
-
|
449
|
+
self.data_req.source_freq = "tick"
|
459
450
|
elif self.data_req.freq[-3:] == "min":
|
460
|
-
|
451
|
+
self.data_req.source_freq = self.data_req.freq.replace("min", "m")
|
452
|
+
elif self.data_req.freq[-1] == "h":
|
453
|
+
self.data_req.source_freq = self.data_req.freq
|
461
454
|
elif self.data_req.freq == "w":
|
462
|
-
|
455
|
+
self.data_req.source_freq = "1w"
|
463
456
|
elif self.data_req.freq == "m":
|
464
|
-
|
457
|
+
self.data_req.source_freq = "1M"
|
465
458
|
elif self.data_req.freq[-1] == "m":
|
466
|
-
|
459
|
+
self.data_req.source_freq = self.data_req.freq.replace("m", "M")
|
467
460
|
elif self.data_req.freq == "q":
|
468
|
-
|
461
|
+
self.data_req.source_freq = "1q"
|
469
462
|
elif self.data_req.freq == "y":
|
470
|
-
|
463
|
+
self.data_req.source_freq = "1y"
|
471
464
|
else:
|
472
|
-
|
473
|
-
|
465
|
+
self.data_req.source_freq = "1d"
|
466
|
+
|
467
|
+
# quote ccy
|
474
468
|
if self.data_req.quote_ccy is None:
|
475
|
-
quote_ccy = "USDT"
|
469
|
+
self.data_req.quote_ccy = "USDT"
|
476
470
|
else:
|
477
|
-
quote_ccy = self.data_req.quote_ccy.upper()
|
478
|
-
|
471
|
+
self.data_req.quote_ccy = self.data_req.quote_ccy.upper()
|
472
|
+
|
473
|
+
# exch
|
479
474
|
if self.data_req.mkt_type == "perpetual_future" and (
|
480
475
|
self.data_req.exch is None or self.data_req.exch == "binance"
|
481
476
|
):
|
482
|
-
exch = "binanceusdm"
|
477
|
+
self.data_req.exch = "binanceusdm"
|
483
478
|
elif self.data_req.exch is None:
|
484
|
-
exch = "binance"
|
479
|
+
self.data_req.exch = "binance"
|
485
480
|
elif (
|
486
481
|
self.data_req.exch == "kucoin"
|
487
482
|
and self.data_req.mkt_type == "perpetual_future"
|
488
483
|
):
|
489
|
-
exch = "kucoinfutures"
|
484
|
+
self.data_req.exch = "kucoinfutures"
|
490
485
|
elif (
|
491
486
|
self.data_req.exch == "huobi"
|
492
487
|
and self.data_req.mkt_type == "perpetual_future"
|
493
488
|
):
|
494
|
-
exch = "huobipro"
|
489
|
+
self.data_req.exch = "huobipro"
|
495
490
|
elif (
|
496
491
|
self.data_req.exch == "bitfinex"
|
497
492
|
and self.data_req.mkt_type == "perpetual_future"
|
498
493
|
):
|
499
|
-
exch = "bitfinex2"
|
494
|
+
self.data_req.exch = "bitfinex2"
|
500
495
|
elif (
|
501
496
|
self.data_req.exch == "mexc"
|
502
497
|
and self.data_req.mkt_type == "perpetual_future"
|
503
498
|
):
|
504
|
-
exch = "mexc3"
|
499
|
+
self.data_req.exch = "mexc3"
|
505
500
|
else:
|
506
|
-
exch = self.data_req.exch.lower()
|
507
|
-
|
508
|
-
|
509
|
-
if self.data_req.
|
510
|
-
|
501
|
+
self.data_req.exch = self.data_req.exch.lower()
|
502
|
+
|
503
|
+
# markets
|
504
|
+
if self.data_req.source_markets is None:
|
505
|
+
if self.data_req.mkt_type == "spot":
|
506
|
+
self.data_req.source_markets = [ticker + "/" + self.data_req.quote_ccy
|
507
|
+
for ticker in self.data_req.source_tickers]
|
508
|
+
elif self.data_req.mkt_type == "perpetual_future":
|
509
|
+
self.data_req.source_markets = [ticker + "/" + self.data_req.quote_ccy + ":" + self.data_req.quote_ccy
|
510
|
+
for ticker in self.data_req.source_tickers]
|
511
511
|
else:
|
512
|
-
for
|
513
|
-
|
514
|
-
|
515
|
-
elif self.data_req.mkt_type == "perpetual_future":
|
516
|
-
if exch == "binanceusdm":
|
517
|
-
mkts_list.append(ticker.upper() + "/" + quote_ccy.upper() + ':' + quote_ccy.upper())
|
518
|
-
elif (
|
519
|
-
exch == "ftx"
|
520
|
-
or exch == "okx"
|
521
|
-
or exch == "kucoinfutures"
|
522
|
-
or exch == "huobipro"
|
523
|
-
or exch == "cryptocom"
|
524
|
-
or exch == "bitfinex2"
|
525
|
-
or exch == "bybit"
|
526
|
-
or exch == "mexc3"
|
527
|
-
or exch == "aax"
|
528
|
-
or exch == "bitmex"
|
529
|
-
):
|
530
|
-
mkts_list.append(
|
531
|
-
ticker.upper()
|
532
|
-
+ "/"
|
533
|
-
+ quote_ccy.upper()
|
534
|
-
+ ":"
|
535
|
-
+ quote_ccy.upper()
|
536
|
-
)
|
537
|
-
# convert start date
|
512
|
+
self.data_req.source_tickers = [market.split("/")[0] for market in self.data_req.source_markets]
|
513
|
+
|
514
|
+
# start date
|
538
515
|
if self.data_req.start_date is None:
|
539
|
-
|
516
|
+
self.data_req.source_start_date = round(
|
540
517
|
pd.Timestamp("2010-01-01 00:00:00").timestamp() * 1e3
|
541
518
|
)
|
542
519
|
else:
|
543
|
-
|
520
|
+
self.data_req.source_start_date = round(
|
544
521
|
pd.Timestamp(self.data_req.start_date).timestamp() * 1e3
|
545
522
|
)
|
546
|
-
|
523
|
+
|
524
|
+
# end date
|
547
525
|
if self.data_req.end_date is None:
|
548
|
-
|
526
|
+
self.data_req.source_end_date = round(pd.Timestamp.utcnow().timestamp() * 1e3)
|
549
527
|
else:
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
self.data_req.
|
555
|
-
|
556
|
-
fields = self.convert_fields(data_source='ccxt')
|
528
|
+
self.data_req.source_end_date = round(pd.Timestamp(self.data_req.end_date).timestamp() * 1e3)
|
529
|
+
|
530
|
+
# fields
|
531
|
+
if self.data_req.source_fields is None:
|
532
|
+
self.data_req.source_fields = self.convert_fields(data_source='ccxt')
|
533
|
+
|
557
534
|
# tz
|
558
535
|
if self.data_req.tz is None:
|
559
|
-
tz = "UTC"
|
560
|
-
else:
|
561
|
-
tz = self.data_req.tz
|
536
|
+
self.data_req.tz = "UTC"
|
562
537
|
|
563
|
-
return
|
564
|
-
"tickers": tickers,
|
565
|
-
"freq": freq,
|
566
|
-
"quote_ccy": quote_ccy,
|
567
|
-
"exch": exch,
|
568
|
-
"ctys": None,
|
569
|
-
"mkt_type": self.data_req.mkt_type,
|
570
|
-
"mkts": mkts_list,
|
571
|
-
"start_date": start_date,
|
572
|
-
"end_date": end_date,
|
573
|
-
"fields": fields,
|
574
|
-
"tz": tz,
|
575
|
-
"inst": None,
|
576
|
-
"cat": 'crypto',
|
577
|
-
"trials": self.data_req.trials,
|
578
|
-
"pause": self.data_req.pause,
|
579
|
-
"source_tickers": self.data_req.source_tickers,
|
580
|
-
"source_freq": self.data_req.source_freq,
|
581
|
-
"source_fields": self.data_req.source_fields,
|
582
|
-
}
|
538
|
+
return self.data_req
|
583
539
|
|
584
540
|
def to_dbnomics(self) -> Dict[str, Union[list, str, int, float, None]]:
|
585
541
|
"""
|
@@ -694,7 +650,7 @@ class ConvertParams:
|
|
694
650
|
start_date = pd.Timestamp(self.data_req.start_date).strftime("%d/%m/%Y")
|
695
651
|
# convert end date
|
696
652
|
if self.data_req.end_date is None:
|
697
|
-
end_date =
|
653
|
+
end_date = pd.Timestamp.utcnow().strftime("%d/%m/%Y")
|
698
654
|
else:
|
699
655
|
end_date = pd.Timestamp(self.data_req.end_date).strftime("%d/%m/%Y")
|
700
656
|
# convert fields
|
@@ -732,108 +688,78 @@ class ConvertParams:
|
|
732
688
|
# convert tickers
|
733
689
|
with resources.path("cryptodatapy.conf", "tickers.csv") as f:
|
734
690
|
tickers_path = f
|
735
|
-
tickers_df
|
691
|
+
tickers_df = pd.read_csv(tickers_path, index_col=0, encoding="latin1")
|
736
692
|
|
737
|
-
if self.data_req.source_tickers is
|
738
|
-
|
739
|
-
self.data_req.tickers = self.data_req.source_tickers
|
740
|
-
else:
|
693
|
+
if self.data_req.source_tickers is None:
|
694
|
+
self.data_req.source_tickers = []
|
741
695
|
for ticker in self.data_req.tickers:
|
742
696
|
try:
|
743
|
-
|
697
|
+
self.data_req.source_tickers.append(tickers_df.loc[ticker, "fred_id"])
|
744
698
|
except KeyError:
|
745
699
|
logging.warning(
|
746
|
-
f"{ticker} not found for Fred
|
700
|
+
f"{ticker} not found for Fred source. Check tickers in"
|
747
701
|
f" data catalog and try again."
|
748
702
|
)
|
749
|
-
|
750
|
-
#
|
751
|
-
if self.data_req.source_freq is
|
752
|
-
|
753
|
-
|
754
|
-
else:
|
755
|
-
freq = self.data_req.freq
|
756
|
-
# convert quote ccy
|
757
|
-
quote_ccy = self.data_req.quote_ccy
|
703
|
+
|
704
|
+
# freq
|
705
|
+
if self.data_req.source_freq is None:
|
706
|
+
self.data_req.source_freq = self.data_req.freq
|
707
|
+
|
758
708
|
# start date
|
759
|
-
if self.data_req.
|
760
|
-
|
709
|
+
if self.data_req.source_start_date is None:
|
710
|
+
self.data_req.source_start_date = pd.Timestamp('1920-01-01')
|
761
711
|
else:
|
762
|
-
|
712
|
+
self.data_req.source_start_date = self.data_req.start_date
|
713
|
+
|
763
714
|
# end date
|
764
715
|
if self.data_req.end_date is None:
|
765
|
-
|
716
|
+
self.data_req.source_end_date = pd.Timestamp.utcnow().tz_localize(None)
|
766
717
|
else:
|
767
|
-
|
718
|
+
self.data_req.source_end_date = self.data_req.end_date
|
719
|
+
|
768
720
|
# fields
|
769
|
-
if self.data_req.source_fields is
|
770
|
-
|
771
|
-
|
772
|
-
else:
|
773
|
-
fields = self.convert_fields(data_source='fred')
|
721
|
+
if self.data_req.source_fields is None:
|
722
|
+
self.data_req.source_fields = self.convert_fields(data_source='fred')
|
723
|
+
|
774
724
|
# tz
|
775
725
|
if self.data_req.tz is None:
|
776
|
-
tz = "America/New_York"
|
777
|
-
else:
|
778
|
-
tz = self.data_req.tz
|
726
|
+
self.data_req.tz = "America/New_York"
|
779
727
|
|
780
|
-
return
|
781
|
-
"tickers": tickers,
|
782
|
-
"freq": freq,
|
783
|
-
"quote_ccy": quote_ccy,
|
784
|
-
"exch": self.data_req.exch,
|
785
|
-
"ctys": None,
|
786
|
-
"mkt_type": self.data_req.mkt_type,
|
787
|
-
"mkts": None,
|
788
|
-
"start_date": start_date,
|
789
|
-
"end_date": end_date,
|
790
|
-
"fields": fields,
|
791
|
-
"tz": tz,
|
792
|
-
"inst": None,
|
793
|
-
"cat": self.data_req.cat,
|
794
|
-
"trials": self.data_req.trials,
|
795
|
-
"pause": self.data_req.pause,
|
796
|
-
"source_tickers": self.data_req.source_tickers,
|
797
|
-
"source_freq": self.data_req.source_freq,
|
798
|
-
"source_fields": self.data_req.source_fields,
|
799
|
-
}
|
728
|
+
return self.data_req
|
800
729
|
|
801
730
|
def to_wb(self) -> Dict[str, Union[list, str, int, float, datetime, None]]:
|
802
731
|
"""
|
803
732
|
Convert tickers from CryptoDataPy to Yahoo Finance format.
|
804
733
|
"""
|
805
|
-
#
|
734
|
+
# tickers
|
806
735
|
with resources.path("cryptodatapy.conf", "tickers.csv") as f:
|
807
736
|
tickers_path = f
|
808
|
-
tickers_df
|
737
|
+
tickers_df = pd.read_csv(tickers_path, index_col=0, encoding="latin1")
|
809
738
|
|
810
|
-
if self.data_req.source_tickers is
|
811
|
-
|
812
|
-
self.data_req.tickers = self.data_req.source_tickers
|
813
|
-
else:
|
739
|
+
if self.data_req.source_tickers is None:
|
740
|
+
self.data_req.source_tickers = []
|
814
741
|
for ticker in self.data_req.tickers:
|
815
742
|
try:
|
816
|
-
|
743
|
+
self.data_req.source_tickers.append(tickers_df.loc[ticker, "wb_id"])
|
817
744
|
except KeyError:
|
818
745
|
logging.warning(
|
819
|
-
f"{ticker} not found for World Bank
|
746
|
+
f"{ticker} not found for World Bank source. Check tickers in"
|
820
747
|
f" data catalog and try again."
|
821
748
|
)
|
822
|
-
self.data_req.tickers.remove(ticker)
|
823
749
|
# drop dupes
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
self.data_req.
|
829
|
-
|
830
|
-
freq = self.data_req.freq
|
750
|
+
self.data_req.source_tickers = list(set(self.data_req.source_tickers))
|
751
|
+
|
752
|
+
# freq
|
753
|
+
if self.data_req.source_freq is None:
|
754
|
+
self.data_req.source_freq = self.data_req.freq
|
755
|
+
|
831
756
|
# convert quote ccy
|
832
757
|
if self.data_req.quote_ccy is None:
|
833
|
-
quote_ccy = "USD"
|
758
|
+
self.data_req.quote_ccy = "USD"
|
834
759
|
else:
|
835
|
-
quote_ccy = self.data_req.quote_ccy.upper()
|
836
|
-
|
760
|
+
self.data_req.quote_ccy = self.data_req.quote_ccy.upper()
|
761
|
+
|
762
|
+
# ctys
|
837
763
|
ctys_list = []
|
838
764
|
if self.data_req.cat == "macro":
|
839
765
|
for ticker in self.data_req.tickers:
|
@@ -844,184 +770,115 @@ class ConvertParams:
|
|
844
770
|
f"{ticker} not found for {self.data_req.source} source. Check tickers in "
|
845
771
|
f"data catalog and try again."
|
846
772
|
)
|
847
|
-
|
773
|
+
self.data_req.ctys = list(set(ctys_list))
|
774
|
+
|
848
775
|
# start date
|
849
776
|
if self.data_req.start_date is None:
|
850
|
-
|
777
|
+
self.data_req.source_start_date = 1920
|
851
778
|
else:
|
852
|
-
|
779
|
+
self.data_req.source_start_date = int(self.data_req.start_date.year)
|
780
|
+
|
853
781
|
# end date
|
854
782
|
if self.data_req.end_date is None:
|
855
|
-
|
783
|
+
self.data_req.source_end_date = pd.Timestamp.utcnow().year
|
856
784
|
else:
|
857
|
-
|
785
|
+
self.data_req.source_end_date = int(self.data_req.end_date.year)
|
786
|
+
|
858
787
|
# fields
|
859
|
-
if self.data_req.source_fields is
|
860
|
-
|
861
|
-
self.data_req.fields = self.data_req.source_fields
|
862
|
-
else:
|
863
|
-
fields = self.convert_fields(data_source='wb')
|
788
|
+
if self.data_req.source_fields is None:
|
789
|
+
self.data_req.source_fields = self.convert_fields(data_source='wb')
|
864
790
|
|
865
|
-
return
|
866
|
-
"tickers": tickers,
|
867
|
-
"freq": freq,
|
868
|
-
"quote_ccy": quote_ccy,
|
869
|
-
"exch": self.data_req.exch,
|
870
|
-
"ctys": ctys_list,
|
871
|
-
"mkt_type": None,
|
872
|
-
"mkts": None,
|
873
|
-
"start_date": start_date,
|
874
|
-
"end_date": end_date,
|
875
|
-
"fields": fields,
|
876
|
-
"tz": self.data_req.tz,
|
877
|
-
"inst": None,
|
878
|
-
"cat": self.data_req.cat,
|
879
|
-
"trials": self.data_req.trials,
|
880
|
-
"pause": self.data_req.pause,
|
881
|
-
"source_tickers": self.data_req.source_tickers,
|
882
|
-
"source_freq": self.data_req.source_freq,
|
883
|
-
"source_fields": self.data_req.source_fields,
|
884
|
-
}
|
791
|
+
return self.data_req
|
885
792
|
|
886
|
-
def to_yahoo(self) ->
|
793
|
+
def to_yahoo(self) -> DataRequest:
|
887
794
|
"""
|
888
795
|
Convert tickers from CryptoDataPy to Yahoo Finance format.
|
889
|
-
|
890
796
|
"""
|
891
|
-
#
|
797
|
+
# tickers
|
892
798
|
with resources.path("cryptodatapy.conf", "tickers.csv") as f:
|
893
799
|
tickers_path = f
|
894
|
-
tickers_df
|
800
|
+
tickers_df = pd.read_csv(tickers_path, index_col=0, encoding="latin1")
|
895
801
|
|
896
|
-
if self.data_req.source_tickers is
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
802
|
+
if self.data_req.source_tickers is None:
|
803
|
+
if self.data_req.cat == 'eqty':
|
804
|
+
self.data_req.source_tickers = [ticker.upper() for ticker in self.data_req.tickers]
|
805
|
+
self.data_req.tickers = self.data_req.source_tickers
|
806
|
+
else:
|
807
|
+
self.data_req.source_tickers = []
|
901
808
|
if self.data_req.cat == 'fx':
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
self.data_req.freq = self.data_req.source_freq
|
917
|
-
else:
|
918
|
-
freq = self.data_req.freq
|
919
|
-
# convert quote ccy
|
920
|
-
quote_ccy = self.data_req.quote_ccy
|
809
|
+
self.data_req.tickers = [ticker.upper() for ticker in self.data_req.tickers]
|
810
|
+
for ticker in self.data_req.tickers:
|
811
|
+
try:
|
812
|
+
self.data_req.source_tickers.append(tickers_df.loc[ticker, "yahoo_id"])
|
813
|
+
except KeyError:
|
814
|
+
logging.warning(
|
815
|
+
f"{ticker} not found for Yahoo Finance data source. Check tickers in"
|
816
|
+
f" data catalog and try again."
|
817
|
+
)
|
818
|
+
|
819
|
+
# freq
|
820
|
+
if self.data_req.source_freq is None:
|
821
|
+
self.data_req.source_freq = self.data_req.freq
|
822
|
+
|
921
823
|
# start date
|
922
824
|
if self.data_req.start_date is None:
|
923
|
-
|
825
|
+
self.data_req.source_start_date = '1920-01-01'
|
924
826
|
else:
|
925
|
-
|
827
|
+
self.data_req.source_start_date = self.data_req.start_date
|
828
|
+
|
926
829
|
# end date
|
927
830
|
if self.data_req.end_date is None:
|
928
|
-
|
831
|
+
self.data_req.source_end_date = pd.Timestamp.utcnow().strftime('%Y-%m-%d')
|
929
832
|
else:
|
930
|
-
|
833
|
+
self.data_req.source_end_date = self.data_req.end_date
|
834
|
+
|
931
835
|
# fields
|
932
|
-
if self.data_req.source_fields is
|
933
|
-
|
934
|
-
|
935
|
-
else:
|
936
|
-
fields = self.convert_fields(data_source='yahoo')
|
836
|
+
if self.data_req.source_fields is None:
|
837
|
+
self.data_req.source_fields = self.convert_fields(data_source='yahoo')
|
838
|
+
|
937
839
|
# tz
|
938
840
|
if self.data_req.tz is None:
|
939
|
-
tz = "America/New_York"
|
940
|
-
else:
|
941
|
-
tz = self.data_req.tz
|
841
|
+
self.data_req.tz = "America/New_York"
|
942
842
|
|
943
|
-
return
|
944
|
-
"tickers": tickers,
|
945
|
-
"freq": freq,
|
946
|
-
"quote_ccy": quote_ccy,
|
947
|
-
"exch": self.data_req.exch,
|
948
|
-
"ctys": None,
|
949
|
-
"mkt_type": self.data_req.mkt_type,
|
950
|
-
"mkts": None,
|
951
|
-
"start_date": start_date,
|
952
|
-
"end_date": end_date,
|
953
|
-
"fields": fields,
|
954
|
-
"tz": tz,
|
955
|
-
"inst": None,
|
956
|
-
"cat": self.data_req.cat,
|
957
|
-
"trials": self.data_req.trials,
|
958
|
-
"pause": self.data_req.pause,
|
959
|
-
"source_tickers": self.data_req.source_tickers,
|
960
|
-
"source_freq": self.data_req.source_freq,
|
961
|
-
"source_fields": self.data_req.source_fields,
|
962
|
-
}
|
843
|
+
return self.data_req
|
963
844
|
|
964
|
-
def to_famafrench(self) ->
|
845
|
+
def to_famafrench(self) -> DataRequest:
|
965
846
|
"""
|
966
847
|
Convert tickers from CryptoDataPy to Fama-French format.
|
967
848
|
"""
|
968
|
-
#
|
849
|
+
# tickers
|
969
850
|
with resources.path("cryptodatapy.conf", "tickers.csv") as f:
|
970
851
|
tickers_path = f
|
971
|
-
tickers_df
|
852
|
+
tickers_df = pd.read_csv(tickers_path, index_col=0, encoding="latin1")
|
972
853
|
|
973
|
-
if self.data_req.source_tickers is
|
974
|
-
|
975
|
-
self.data_req.tickers = self.data_req.source_tickers
|
976
|
-
else:
|
854
|
+
if self.data_req.source_tickers is None:
|
855
|
+
self.data_req.source_tickers = []
|
977
856
|
for ticker in self.data_req.tickers:
|
978
857
|
try:
|
979
|
-
|
858
|
+
self.data_req.source_tickers.append(tickers_df.loc[ticker, "famafrench_id"])
|
980
859
|
except KeyError:
|
981
860
|
logging.warning(
|
982
861
|
f"{ticker} not found for Fama-French source. Check tickers in"
|
983
862
|
f" data catalog and try again."
|
984
863
|
)
|
985
|
-
|
986
|
-
#
|
987
|
-
if self.data_req.source_freq is
|
988
|
-
|
989
|
-
|
990
|
-
else:
|
991
|
-
freq = self.data_req.freq
|
992
|
-
# convert quote ccy
|
993
|
-
quote_ccy = self.data_req.quote_ccy
|
864
|
+
|
865
|
+
# freq
|
866
|
+
if self.data_req.source_freq is None:
|
867
|
+
self.data_req.source_freq = self.data_req.freq
|
868
|
+
|
994
869
|
# start date
|
995
870
|
if self.data_req.start_date is None:
|
996
|
-
|
871
|
+
self.data_req.source_start_date = datetime(1920, 1, 1)
|
997
872
|
else:
|
998
|
-
|
873
|
+
self.data_req.source_start_date = self.data_req.start_date
|
874
|
+
|
999
875
|
# end date
|
1000
876
|
if self.data_req.end_date is None:
|
1001
|
-
|
877
|
+
self.data_req.source_end_date = datetime.now()
|
1002
878
|
else:
|
1003
|
-
|
879
|
+
self.data_req.source_end_date = self.data_req.end_date
|
1004
880
|
|
1005
|
-
return
|
1006
|
-
"tickers": tickers,
|
1007
|
-
"freq": freq,
|
1008
|
-
"quote_ccy": quote_ccy,
|
1009
|
-
"exch": self.data_req.exch,
|
1010
|
-
"ctys": None,
|
1011
|
-
"mkt_type": self.data_req.mkt_type,
|
1012
|
-
"mkts": None,
|
1013
|
-
"start_date": start_date,
|
1014
|
-
"end_date": end_date,
|
1015
|
-
"fields": self.data_req.fields,
|
1016
|
-
"tz": self.data_req.tz,
|
1017
|
-
"inst": None,
|
1018
|
-
"cat": self.data_req.cat,
|
1019
|
-
"trials": self.data_req.trials,
|
1020
|
-
"pause": self.data_req.pause,
|
1021
|
-
"source_tickers": self.data_req.source_tickers,
|
1022
|
-
"source_freq": self.data_req.source_freq,
|
1023
|
-
"source_fields": self.data_req.source_fields,
|
1024
|
-
}
|
881
|
+
return self.data_req
|
1025
882
|
|
1026
883
|
def to_aqr(self) -> Dict[str, Union[list, str, int, dict, float, datetime, None]]:
|
1027
884
|
"""
|