cryptodatapy 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,10 +2,19 @@
2
2
  "cells": [
3
3
  {
4
4
  "cell_type": "code",
5
- "execution_count": 44,
5
+ "execution_count": 1,
6
6
  "id": "9fea9fae",
7
7
  "metadata": {},
8
- "outputs": [],
8
+ "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "fatal: bad revision 'HEAD'\n",
14
+ "Importing plotly failed. Interactive plots will not work.\n"
15
+ ]
16
+ }
17
+ ],
9
18
  "source": [
10
19
  "import pandas as pd\n",
11
20
  "import numpy as np\n",
@@ -30,7 +39,7 @@
30
39
  },
31
40
  {
32
41
  "cell_type": "code",
33
- "execution_count": 45,
42
+ "execution_count": 2,
34
43
  "id": "2ad72bc7-5fdd-4ae5-8d9e-e90118efcc26",
35
44
  "metadata": {},
36
45
  "outputs": [],
@@ -40,7 +49,7 @@
40
49
  },
41
50
  {
42
51
  "cell_type": "code",
43
- "execution_count": 46,
52
+ "execution_count": 3,
44
53
  "id": "0342bab7-a792-4be3-8d4d-44c4343d0e6a",
45
54
  "metadata": {},
46
55
  "outputs": [
@@ -76,7 +85,7 @@
76
85
  "Name: yahoo_id, dtype: object"
77
86
  ]
78
87
  },
79
- "execution_count": 46,
88
+ "execution_count": 3,
80
89
  "metadata": {},
81
90
  "output_type": "execute_result"
82
91
  }
@@ -95,7 +104,7 @@
95
104
  },
96
105
  {
97
106
  "cell_type": "code",
98
- "execution_count": 47,
107
+ "execution_count": 4,
99
108
  "id": "d875cd96-a29c-4e22-9806-a1b1c2513564",
100
109
  "metadata": {},
101
110
  "outputs": [],
@@ -105,7 +114,7 @@
105
114
  },
106
115
  {
107
116
  "cell_type": "code",
108
- "execution_count": 48,
117
+ "execution_count": 5,
109
118
  "id": "1bf0af0d-7ed7-4e07-9da1-5625b1f32bce",
110
119
  "metadata": {},
111
120
  "outputs": [
@@ -168,7 +177,7 @@
168
177
  " 'REET']"
169
178
  ]
170
179
  },
171
- "execution_count": 48,
180
+ "execution_count": 5,
172
181
  "metadata": {},
173
182
  "output_type": "execute_result"
174
183
  }
@@ -179,7 +188,7 @@
179
188
  },
180
189
  {
181
190
  "cell_type": "code",
182
- "execution_count": 49,
191
+ "execution_count": 6,
183
192
  "id": "0b1bc395-40dd-44c0-bac1-dd1e00f8a5c4",
184
193
  "metadata": {},
185
194
  "outputs": [],
@@ -189,7 +198,7 @@
189
198
  },
190
199
  {
191
200
  "cell_type": "code",
192
- "execution_count": 50,
201
+ "execution_count": 7,
193
202
  "id": "2674260a-56c7-40a4-9708-0eb335fa075d",
194
203
  "metadata": {},
195
204
  "outputs": [
@@ -207,7 +216,7 @@
207
216
  },
208
217
  {
209
218
  "cell_type": "code",
210
- "execution_count": 56,
219
+ "execution_count": 8,
211
220
  "id": "ee0523a8-c6ee-42b9-9410-36b26906f2de",
212
221
  "metadata": {},
213
222
  "outputs": [
@@ -425,78 +434,6 @@
425
434
  " <td>...</td>\n",
426
435
  " </tr>\n",
427
436
  " <tr>\n",
428
- " <th>2024-09-11</th>\n",
429
- " <td>64.93</td>\n",
430
- " <td>66.199997</td>\n",
431
- " <td>23.629999</td>\n",
432
- " <td>92.019997</td>\n",
433
- " <td>31.809999</td>\n",
434
- " <td>8.06</td>\n",
435
- " <td>69.559998</td>\n",
436
- " <td>50.709999</td>\n",
437
- " <td>25.52</td>\n",
438
- " <td>36.630001</td>\n",
439
- " <td>...</td>\n",
440
- " <td>61.66</td>\n",
441
- " <td>&lt;NA&gt;</td>\n",
442
- " <td>57.130001</td>\n",
443
- " <td>49.330002</td>\n",
444
- " <td>114.629997</td>\n",
445
- " <td>17.67</td>\n",
446
- " <td>26.59</td>\n",
447
- " <td>25.309999</td>\n",
448
- " <td>39.07</td>\n",
449
- " <td>15.0</td>\n",
450
- " </tr>\n",
451
- " <tr>\n",
452
- " <th>2024-09-12</th>\n",
453
- " <td>65.75</td>\n",
454
- " <td>67.93</td>\n",
455
- " <td>23.870001</td>\n",
456
- " <td>92.519997</td>\n",
457
- " <td>32.130001</td>\n",
458
- " <td>8.06</td>\n",
459
- " <td>70.25</td>\n",
460
- " <td>51.07</td>\n",
461
- " <td>25.450001</td>\n",
462
- " <td>36.790001</td>\n",
463
- " <td>...</td>\n",
464
- " <td>62.509998</td>\n",
465
- " <td>&lt;NA&gt;</td>\n",
466
- " <td>57.759998</td>\n",
467
- " <td>48.939999</td>\n",
468
- " <td>115.629997</td>\n",
469
- " <td>18.1</td>\n",
470
- " <td>26.780001</td>\n",
471
- " <td>25.85</td>\n",
472
- " <td>39.950001</td>\n",
473
- " <td>14.96</td>\n",
474
- " </tr>\n",
475
- " <tr>\n",
476
- " <th>2024-09-13</th>\n",
477
- " <td>66.139999</td>\n",
478
- " <td>68.529999</td>\n",
479
- " <td>24.120001</td>\n",
480
- " <td>93.5</td>\n",
481
- " <td>32.400002</td>\n",
482
- " <td>8.06</td>\n",
483
- " <td>69.870003</td>\n",
484
- " <td>51.389999</td>\n",
485
- " <td>25.93</td>\n",
486
- " <td>36.900002</td>\n",
487
- " <td>...</td>\n",
488
- " <td>63.299999</td>\n",
489
- " <td>&lt;NA&gt;</td>\n",
490
- " <td>57.849998</td>\n",
491
- " <td>48.790001</td>\n",
492
- " <td>116.209999</td>\n",
493
- " <td>18.139999</td>\n",
494
- " <td>27.01</td>\n",
495
- " <td>25.950001</td>\n",
496
- " <td>40.549999</td>\n",
497
- " <td>15.11</td>\n",
498
- " </tr>\n",
499
- " <tr>\n",
500
437
  " <th>2024-09-16</th>\n",
501
438
  " <td>66.760002</td>\n",
502
439
  " <td>68.5</td>\n",
@@ -544,9 +481,81 @@
544
481
  " <td>40.439999</td>\n",
545
482
  " <td>15.19</td>\n",
546
483
  " </tr>\n",
484
+ " <tr>\n",
485
+ " <th>2024-09-18</th>\n",
486
+ " <td>66.400002</td>\n",
487
+ " <td>68.769997</td>\n",
488
+ " <td>24.27</td>\n",
489
+ " <td>94.199997</td>\n",
490
+ " <td>32.580002</td>\n",
491
+ " <td>8.06</td>\n",
492
+ " <td>69.389999</td>\n",
493
+ " <td>51.16</td>\n",
494
+ " <td>26.42</td>\n",
495
+ " <td>36.990002</td>\n",
496
+ " <td>...</td>\n",
497
+ " <td>63.259998</td>\n",
498
+ " <td>&lt;NA&gt;</td>\n",
499
+ " <td>57.66</td>\n",
500
+ " <td>49.880001</td>\n",
501
+ " <td>116.160004</td>\n",
502
+ " <td>18.049999</td>\n",
503
+ " <td>26.889999</td>\n",
504
+ " <td>25.66</td>\n",
505
+ " <td>40.900002</td>\n",
506
+ " <td>15.07</td>\n",
507
+ " </tr>\n",
508
+ " <tr>\n",
509
+ " <th>2024-09-19</th>\n",
510
+ " <td>67.709999</td>\n",
511
+ " <td>70.150002</td>\n",
512
+ " <td>24.690001</td>\n",
513
+ " <td>95.419998</td>\n",
514
+ " <td>33.09</td>\n",
515
+ " <td>8.06</td>\n",
516
+ " <td>71.230003</td>\n",
517
+ " <td>51.509998</td>\n",
518
+ " <td>26.74</td>\n",
519
+ " <td>37.490002</td>\n",
520
+ " <td>...</td>\n",
521
+ " <td>63.91</td>\n",
522
+ " <td>&lt;NA&gt;</td>\n",
523
+ " <td>58.110001</td>\n",
524
+ " <td>47.720001</td>\n",
525
+ " <td>118.230003</td>\n",
526
+ " <td>18.219999</td>\n",
527
+ " <td>26.98</td>\n",
528
+ " <td>25.799999</td>\n",
529
+ " <td>41.400002</td>\n",
530
+ " <td>15.41</td>\n",
531
+ " </tr>\n",
532
+ " <tr>\n",
533
+ " <th>2024-09-20</th>\n",
534
+ " <td>68.089996</td>\n",
535
+ " <td>69.389999</td>\n",
536
+ " <td>24.549999</td>\n",
537
+ " <td>94.970001</td>\n",
538
+ " <td>32.689999</td>\n",
539
+ " <td>8.06</td>\n",
540
+ " <td>71.269997</td>\n",
541
+ " <td>50.66</td>\n",
542
+ " <td>26.940001</td>\n",
543
+ " <td>37.040001</td>\n",
544
+ " <td>...</td>\n",
545
+ " <td>63.32</td>\n",
546
+ " <td>&lt;NA&gt;</td>\n",
547
+ " <td>58.740002</td>\n",
548
+ " <td>47.43</td>\n",
549
+ " <td>117.760002</td>\n",
550
+ " <td>18.15</td>\n",
551
+ " <td>26.83</td>\n",
552
+ " <td>25.799999</td>\n",
553
+ " <td>41.349998</td>\n",
554
+ " <td>15.4</td>\n",
555
+ " </tr>\n",
547
556
  " </tbody>\n",
548
557
  "</table>\n",
549
- "<p>7965 rows × 54 columns</p>\n",
558
+ "<p>7968 rows × 54 columns</p>\n",
550
559
  "</div>"
551
560
  ],
552
561
  "text/plain": [
@@ -558,11 +567,11 @@
558
567
  "1993-02-03 <NA> <NA> <NA> <NA> <NA> <NA> \n",
559
568
  "1993-02-04 <NA> <NA> <NA> <NA> <NA> <NA> \n",
560
569
  "... ... ... ... ... ... ... \n",
561
- "2024-09-11 64.93 66.199997 23.629999 92.019997 31.809999 8.06 \n",
562
- "2024-09-12 65.75 67.93 23.870001 92.519997 32.130001 8.06 \n",
563
- "2024-09-13 66.139999 68.529999 24.120001 93.5 32.400002 8.06 \n",
564
570
  "2024-09-16 66.760002 68.5 24.48 94.160004 32.599998 8.06 \n",
565
571
  "2024-09-17 66.43 68.260002 24.15 94.330002 32.549999 8.06 \n",
572
+ "2024-09-18 66.400002 68.769997 24.27 94.199997 32.580002 8.06 \n",
573
+ "2024-09-19 67.709999 70.150002 24.690001 95.419998 33.09 8.06 \n",
574
+ "2024-09-20 68.089996 69.389999 24.549999 94.970001 32.689999 8.06 \n",
566
575
  "\n",
567
576
  "ticker EIDO EIRL EIS ENOR ... PAK PGAL \\\n",
568
577
  "date ... \n",
@@ -572,11 +581,11 @@
572
581
  "1993-02-03 <NA> <NA> <NA> <NA> ... <NA> <NA> \n",
573
582
  "1993-02-04 <NA> <NA> <NA> <NA> ... <NA> <NA> \n",
574
583
  "... ... ... ... ... ... ... ... \n",
575
- "2024-09-11 69.559998 50.709999 25.52 36.630001 ... 61.66 <NA> \n",
576
- "2024-09-12 70.25 51.07 25.450001 36.790001 ... 62.509998 <NA> \n",
577
- "2024-09-13 69.870003 51.389999 25.93 36.900002 ... 63.299999 <NA> \n",
578
584
  "2024-09-16 70.540001 51.580002 26.049999 37.299999 ... 63.52 <NA> \n",
579
585
  "2024-09-17 69.709999 51.400002 26.34 37.029999 ... 63.220001 <NA> \n",
586
+ "2024-09-18 69.389999 51.16 26.42 36.990002 ... 63.259998 <NA> \n",
587
+ "2024-09-19 71.230003 51.509998 26.74 37.490002 ... 63.91 <NA> \n",
588
+ "2024-09-20 71.269997 50.66 26.940001 37.040001 ... 63.32 <NA> \n",
580
589
  "\n",
581
590
  "ticker QAT REET SPY THD TUR UAE \\\n",
582
591
  "date \n",
@@ -586,11 +595,11 @@
586
595
  "1993-02-03 <NA> <NA> <NA> <NA> <NA> <NA> \n",
587
596
  "1993-02-04 <NA> <NA> <NA> <NA> <NA> <NA> \n",
588
597
  "... ... ... ... ... ... ... \n",
589
- "2024-09-11 57.130001 49.330002 114.629997 17.67 26.59 25.309999 \n",
590
- "2024-09-12 57.759998 48.939999 115.629997 18.1 26.780001 25.85 \n",
591
- "2024-09-13 57.849998 48.790001 116.209999 18.139999 27.01 25.950001 \n",
592
598
  "2024-09-16 58.130001 49.240002 116.610001 18.200001 27.17 26.07 \n",
593
599
  "2024-09-17 57.959999 50.299999 116.489998 18.049999 26.969999 25.82 \n",
600
+ "2024-09-18 57.66 49.880001 116.160004 18.049999 26.889999 25.66 \n",
601
+ "2024-09-19 58.110001 47.720001 118.230003 18.219999 26.98 25.799999 \n",
602
+ "2024-09-20 58.740002 47.43 117.760002 18.15 26.83 25.799999 \n",
594
603
  "\n",
595
604
  "ticker URTH VXX \n",
596
605
  "date \n",
@@ -600,16 +609,16 @@
600
609
  "1993-02-03 <NA> <NA> \n",
601
610
  "1993-02-04 <NA> <NA> \n",
602
611
  "... ... ... \n",
603
- "2024-09-11 39.07 15.0 \n",
604
- "2024-09-12 39.950001 14.96 \n",
605
- "2024-09-13 40.549999 15.11 \n",
606
612
  "2024-09-16 40.66 15.09 \n",
607
613
  "2024-09-17 40.439999 15.19 \n",
614
+ "2024-09-18 40.900002 15.07 \n",
615
+ "2024-09-19 41.400002 15.41 \n",
616
+ "2024-09-20 41.349998 15.4 \n",
608
617
  "\n",
609
- "[7965 rows x 54 columns]"
618
+ "[7968 rows x 54 columns]"
610
619
  ]
611
620
  },
612
- "execution_count": 56,
621
+ "execution_count": 8,
613
622
  "metadata": {},
614
623
  "output_type": "execute_result"
615
624
  }
@@ -620,10 +629,28 @@
620
629
  },
621
630
  {
622
631
  "cell_type": "code",
623
- "execution_count": null,
632
+ "execution_count": 10,
624
633
  "id": "83f30ee7-b686-4ee8-8c5f-9333d2be31c8",
625
634
  "metadata": {},
626
635
  "outputs": [],
636
+ "source": [
637
+ "msci_etf_df.to_parquet('s3://factorlab-data/global_msci_eqty_etf_data_daily.parquet')"
638
+ ]
639
+ },
640
+ {
641
+ "cell_type": "code",
642
+ "execution_count": null,
643
+ "id": "fa7fb5fc-e6f3-4a1e-bda5-365d09e2b3e0",
644
+ "metadata": {},
645
+ "outputs": [],
646
+ "source": []
647
+ },
648
+ {
649
+ "cell_type": "code",
650
+ "execution_count": null,
651
+ "id": "0ad35f12-5943-4640-96fb-07b7ea738abd",
652
+ "metadata": {},
653
+ "outputs": [],
627
654
  "source": []
628
655
  },
629
656
  {
@@ -469,7 +469,6 @@ class WrangleData:
469
469
  """
470
470
  Wrangles time series data responses from various APIs into tidy data format.
471
471
  """
472
-
473
472
  def __init__(self, data_req: DataRequest, data_resp: Union[Dict[str, pd.DataFrame], pd.DataFrame]):
474
473
  """
475
474
  Constructor
@@ -484,6 +483,7 @@ class WrangleData:
484
483
  """
485
484
  self.data_req = data_req
486
485
  self.data_resp = data_resp
486
+ self.tidy_data = pd.DataFrame()
487
487
 
488
488
  def cryptocompare(self) -> pd.DataFrame:
489
489
  """
@@ -717,16 +717,22 @@ class WrangleData:
717
717
  """
718
718
  # convert fields to lib
719
719
  self.convert_fields_to_lib(data_source='dbnomics')
720
+
720
721
  # convert to datetime
721
722
  self.data_resp['date'] = pd.to_datetime(self.data_resp['date'])
723
+
722
724
  # set index
723
725
  self.data_resp = self.data_resp.set_index('date').sort_index()
726
+
724
727
  # resample
725
728
  self.data_resp = self.data_resp.resample(self.data_req.freq).last().ffill()
729
+
726
730
  # filter dates
727
731
  self.filter_dates()
732
+
728
733
  # type conversion
729
734
  self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
735
+
730
736
  # remove bad data
731
737
  self.data_resp = self.data_resp[self.data_resp != 0] # 0 values
732
738
  self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
@@ -734,36 +740,125 @@ class WrangleData:
734
740
 
735
741
  return self.data_resp
736
742
 
737
- def ccxt(self) -> pd.DataFrame:
743
+ def ccxt_ohlcv(self) -> pd.DataFrame:
738
744
  """
739
- Wrangles CCXT data response to dataframe with tidy data format.
745
+ Wrangles CCXT OHLCV data response to dataframe with tidy data format.
740
746
 
741
747
  Returns
742
748
  -------
743
749
  pd.DataFrame
744
- Wrangled dataframe into tidy data format.
750
+ Dataframe with tidy data format.
751
+ """
752
+ # field cols
753
+ cols = ["date", "open", "high", "low", "close", "volume"]
754
+
755
+ # add tickers
756
+ for i in range(len(self.data_req.source_markets)):
757
+ df = pd.DataFrame(self.data_resp[i], columns=cols)
758
+ df['ticker'] = self.data_req.source_markets[i]
759
+ self.tidy_data = pd.concat([self.tidy_data, df])
745
760
 
761
+ # convert to datetime
762
+ self.tidy_data['date'] = pd.to_datetime(self.tidy_data['date'], unit='ms')
763
+
764
+ # set index
765
+ self.tidy_data = self.tidy_data.set_index(['date', 'ticker']).sort_index()
766
+
767
+ return self.tidy_data
768
+
769
+ def ccxt_funding_rates(self) -> pd.DataFrame:
746
770
  """
747
- # convert fields to lib
771
+ Wrangles CCXT funding rates data response to dataframe with tidy data format.
772
+
773
+ Returns
774
+ -------
775
+ pd.DataFrame
776
+ Dataframe with tidy data format.
777
+ """
778
+ # add tickers
779
+ for i in range(len(self.data_req.source_markets)):
780
+ df = pd.DataFrame(self.data_resp[i])
781
+ self.tidy_data = pd.concat([self.tidy_data, df])
782
+ self.tidy_data = self.tidy_data[['symbol', 'fundingRate', 'datetime']]
783
+ self.data_resp = self.tidy_data
784
+
785
+ # convert to lib fields
748
786
  self.convert_fields_to_lib(data_source='ccxt')
787
+ self.tidy_data = self.data_resp
788
+
749
789
  # convert to datetime
750
- if 'close' in self.data_resp.columns:
751
- self.data_resp['date'] = pd.to_datetime(self.data_resp.date, unit='ms')
752
- elif 'funding_rate' in self.data_resp.columns:
753
- self.data_resp['date'] = pd.to_datetime(self.data_resp.set_index('date').index).floor('s').tz_localize(None)
790
+ self.tidy_data['date'] = pd.to_datetime(self.tidy_data.set_index('date').index).floor('s').tz_localize(None)
791
+
754
792
  # set index
755
- self.data_resp = self.data_resp.set_index('date').sort_index()
793
+ self.tidy_data = self.tidy_data.set_index(['date', 'ticker']).sort_index()
794
+
756
795
  # resample
757
- if 'funding_rate' in self.data_resp.columns and self.data_req.freq in ['d', 'w', 'm', 'q', 'y']:
758
- self.data_resp = ((self.data_resp.funding_rate + 1).resample(self.data_req.freq).prod() - 1).to_frame()
796
+ if self.data_req.freq in ['d', 'w', 'm', 'q', 'y']:
797
+ self.tidy_data = (
798
+ (1 + self.tidy_data.funding_rate)
799
+ .groupby('ticker')
800
+ .resample('d', level='date')
801
+ .prod() - 1
802
+ ).to_frame().swaplevel('ticker', 'date').sort_index()
803
+
804
+ return self.tidy_data
805
+
806
+ def ccxt_open_interest(self) -> pd.DataFrame:
807
+ """
808
+ Wrangles CCXT open interest data response to dataframe with tidy data format.
809
+
810
+ Returns
811
+ -------
812
+ pd.DataFrame
813
+ Dataframe with tidy data format.
814
+ """
815
+ # add tickers
816
+ for i in range(len(self.data_req.source_markets)):
817
+ df = pd.DataFrame(self.data_resp[i])
818
+ self.tidy_data = pd.concat([self.tidy_data, df])
819
+ self.tidy_data = self.tidy_data[['symbol', 'openInterestAmount', 'datetime']]
820
+ self.data_resp = self.tidy_data
821
+
822
+ # convert to lib fields
823
+ self.convert_fields_to_lib(data_source='ccxt')
824
+ self.tidy_data = self.data_resp
825
+
826
+ # convert to datetime
827
+ self.tidy_data['date'] = pd.to_datetime(self.tidy_data.set_index('date').index).floor('s').tz_localize(None)
828
+
829
+ # set index
830
+ self.tidy_data = self.tidy_data.set_index(['date', 'ticker']).sort_index()
831
+
832
+ return self.tidy_data
833
+
834
+ def ccxt(self, data_type: str) -> pd.DataFrame:
835
+ """
836
+ Wrangles CCXT data response to dataframe with tidy data format.
837
+
838
+ Returns
839
+ -------
840
+ pd.DataFrame
841
+ Wrangled dataframe into tidy data format.
842
+
843
+ """
844
+ if data_type == 'ohlcv':
845
+ self.tidy_data = self.ccxt_ohlcv()
846
+ elif data_type == 'funding_rates':
847
+ self.tidy_data = self.ccxt_funding_rates()
848
+ elif data_type == 'open_interest':
849
+ self.tidy_data = self.ccxt_open_interest()
850
+ else:
851
+ raise ValueError(f"Data type {data_type} not supported.")
852
+
759
853
  # type conversion
760
- self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
854
+ self.tidy_data = self.tidy_data.apply(pd.to_numeric, errors='coerce').convert_dtypes()
855
+
761
856
  # remove bad data
762
- self.data_resp = self.data_resp[self.data_resp != 0] # 0 values
763
- self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
764
- self.data_resp = self.data_resp.dropna(how='all').dropna(how='all', axis=1) # entire row or col NaNs
857
+ self.tidy_data = self.tidy_data[self.tidy_data != 0] # 0 values
858
+ self.tidy_data = self.tidy_data[~self.tidy_data.index.duplicated()] # duplicate rows
859
+ self.tidy_data = self.tidy_data.dropna(how='all').dropna(how='all', axis=1) # entire row or col NaNs
765
860
 
766
- return self.data_resp
861
+ return self.tidy_data
767
862
 
768
863
  def fred(self) -> pd.DataFrame:
769
864
  """
@@ -773,24 +868,29 @@ class WrangleData:
773
868
  -------
774
869
  pd.DataFrame
775
870
  Wrangled dataframe into tidy data format.
776
-
777
871
  """
778
- # convert tickers to cryptodatapy format
872
+ # tickers
779
873
  self.data_resp.columns = self.data_req.tickers # convert tickers to cryptodatapy format
874
+
780
875
  # resample to match end of reporting period, not beginning
781
876
  self.data_resp = self.data_resp.resample('d').last().ffill().resample(self.data_req.freq).last().stack(). \
782
877
  to_frame().reset_index()
878
+
783
879
  # convert cols
784
880
  if self.data_req.cat == 'macro':
785
881
  self.data_resp.columns = ['DATE', 'symbol', 'actual']
786
882
  else:
787
883
  self.data_resp.columns = ['DATE', 'symbol', 'close']
788
- # convert fields to lib
884
+
885
+ # fields
789
886
  self.convert_fields_to_lib(data_source='fred')
790
- # set index
887
+
888
+ # index
791
889
  self.data_resp.set_index(['date', 'ticker'], inplace=True)
890
+
792
891
  # type conversion
793
892
  self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
893
+
794
894
  # remove bad data
795
895
  self.data_resp = self.data_resp[self.data_resp != 0] # 0 values
796
896
  self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
@@ -807,37 +907,41 @@ class WrangleData:
807
907
  pd.DataFrame
808
908
  Wrangled dataframe into tidy data format.
809
909
  """
810
- # convert tickers
811
- if len(self.data_req.tickers) == 1: # add ticker
812
- if self.data_req.cat == 'eqty' or self.data_req.cat == 'fx':
813
- self.data_resp['Ticker'] = self.data_req.tickers[0].upper()
814
- else:
815
- self.data_resp['Ticker'] = self.data_req.tickers[0]
816
- else: # convert tickers to cryptodatapy format
817
- self.data_resp = self.data_resp.stack() # stack to multi-index
910
+ # tickers
911
+ tickers_dict = {source_ticker: ticker for source_ticker, ticker in zip(self.data_req.source_tickers,
912
+ self.data_req.tickers)}
913
+ if len(self.data_req.tickers) == 1:
914
+ self.data_resp['Ticker'] = self.data_req.tickers[0]
915
+ else:
916
+ self.data_resp = self.data_resp.stack()
818
917
  self.data_resp.index.names = ['Date', 'Ticker']
819
- if self.data_req.cat == 'eqty' or self.data_req.cat == 'fx':
820
- self.data_resp.index = self.data_resp.index.set_levels([ticker.upper() for ticker in
821
- self.data_req.tickers], level=1)
822
- else:
823
- self.data_resp.index = self.data_resp.index.set_levels([ticker for ticker in self.data_req.tickers],
824
- level=1)
918
+ self.data_resp.index = self.data_resp.index.set_levels(self.data_resp.index.levels[1].map(tickers_dict),
919
+ level=1)
825
920
  self.data_resp.reset_index(inplace=True)
826
- # convert fields
921
+
922
+ # fields
827
923
  self.convert_fields_to_lib(data_source='yahoo')
828
- # convert to datetime
924
+
925
+ # index
829
926
  self.data_resp['date'] = pd.to_datetime(self.data_resp['date'])
927
+ self.data_resp.set_index(['date', 'ticker'], inplace=True)
928
+
830
929
  # resample
831
- self.data_resp = self.data_resp.set_index('date').groupby('ticker').resample(self.data_req.freq).last().\
832
- droplevel(0).reset_index().set_index(['date', 'ticker'])
930
+ self.data_resp = self.data_resp.groupby('ticker').\
931
+ resample(self.data_req.freq, level='date').\
932
+ last().swaplevel('ticker', 'date').sort_index()
933
+
833
934
  # re-order cols
834
935
  self.data_resp = self.data_resp.loc[:, ['open', 'high', 'low', 'close', 'close_adj', 'volume']]
936
+
835
937
  # type conversion
836
938
  self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
939
+
837
940
  # remove bad data
838
941
  self.data_resp = self.data_resp[self.data_resp != 0] # 0 values
839
942
  self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
840
943
  self.data_resp = self.data_resp.dropna(how='all').dropna(how='all', axis=1) # entire row or col NaNs
944
+
841
945
  # keep only requested fields and sort index
842
946
  self.data_resp = self.data_resp[self.data_req.fields].sort_index()
843
947
 
@@ -853,7 +957,7 @@ class WrangleData:
853
957
  Wrangled dataframe into tidy data format.
854
958
 
855
959
  """
856
- # convert tickers to cryptodatapy format
960
+ # ticker
857
961
  ff_tickers_dict = {'RF': 'US_Rates_1M_RF',
858
962
  'Mkt-RF': 'US_Eqty_CSRP_ER',
859
963
  'HML': 'US_Eqty_Val',
@@ -862,6 +966,7 @@ class WrangleData:
862
966
  'CMA': 'US_Eqty_Inv',
863
967
  'Mom': 'US_Eqty_Mom',
864
968
  'ST_Rev': 'US_Eqty_STRev'}
969
+
865
970
  # remove white space from cols str
866
971
  self.data_resp.columns = [col.strip() for col in self.data_resp.columns]
867
972
  # keep cols in data req tickers
@@ -870,14 +975,18 @@ class WrangleData:
870
975
  drop_cols = [col for col in self.data_resp.columns if col not in self.data_req.tickers]
871
976
  self.data_resp.drop(columns=drop_cols, inplace=True)
872
977
  self.data_resp = self.data_resp.loc[:, ~self.data_resp.columns.duplicated()] # drop dup cols
978
+
873
979
  # resample freq
874
980
  self.data_resp = self.data_resp.resample(self.data_req.freq).sum()
981
+
875
982
  # format index
876
983
  self.data_resp.index.name = 'date' # rename
877
984
  self.data_resp = self.data_resp.stack().to_frame('er')
878
985
  self.data_resp.index.names = ['date', 'ticker']
986
+
879
987
  # type and conversion to decimals
880
988
  self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes() / 100
989
+
881
990
  # remove bad data
882
991
  self.data_resp = self.data_resp[self.data_resp != 0] # 0 values
883
992
  self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
@@ -1016,9 +1125,9 @@ class WrangleData:
1016
1125
 
1017
1126
  # loop through data resp cols
1018
1127
  for col in self.data_resp.columns:
1019
- if self.data_req.source_fields is not None and col in self.data_req.source_fields:
1020
- pass
1021
- elif col in fields_list or col.title() in fields_list or col.lower() in fields_list:
1128
+ # if self.data_req.source_fields is not None and col in self.data_req.source_fields:
1129
+ # pass
1130
+ if col in fields_list or col.title() in fields_list or col.lower() in fields_list:
1022
1131
  self.data_resp.rename(columns={col: fields_df[(fields_df[str(data_source) + '_id']
1023
1132
  == col.title()) |
1024
1133
  (fields_df[str(data_source) + '_id'] == col.lower()) |