cryptodatapy 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodatapy/conf/fields.csv +1 -1
- cryptodatapy/extract/datarequest.py +169 -28
- cryptodatapy/extract/libraries/Untitled.ipynb +199 -0
- cryptodatapy/extract/libraries/ccxt.ipynb +747 -0
- cryptodatapy/extract/libraries/ccxt_api.py +631 -358
- cryptodatapy/extract/libraries/pandasdr_api.py +153 -138
- cryptodatapy/extract/libraries/yfinance_api.py +511 -0
- cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +226 -30
- cryptodatapy/transform/cmdty_data.ipynb +402 -0
- cryptodatapy/transform/convertparams.py +160 -303
- cryptodatapy/transform/eqty_data.ipynb +126 -99
- cryptodatapy/transform/wrangle.py +152 -43
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/METADATA +9 -6
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/RECORD +16 -12
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/WHEEL +1 -1
- {cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/LICENSE +0 -0
@@ -2,10 +2,19 @@
|
|
2
2
|
"cells": [
|
3
3
|
{
|
4
4
|
"cell_type": "code",
|
5
|
-
"execution_count":
|
5
|
+
"execution_count": 1,
|
6
6
|
"id": "9fea9fae",
|
7
7
|
"metadata": {},
|
8
|
-
"outputs": [
|
8
|
+
"outputs": [
|
9
|
+
{
|
10
|
+
"name": "stderr",
|
11
|
+
"output_type": "stream",
|
12
|
+
"text": [
|
13
|
+
"fatal: bad revision 'HEAD'\n",
|
14
|
+
"Importing plotly failed. Interactive plots will not work.\n"
|
15
|
+
]
|
16
|
+
}
|
17
|
+
],
|
9
18
|
"source": [
|
10
19
|
"import pandas as pd\n",
|
11
20
|
"import numpy as np\n",
|
@@ -30,7 +39,7 @@
|
|
30
39
|
},
|
31
40
|
{
|
32
41
|
"cell_type": "code",
|
33
|
-
"execution_count":
|
42
|
+
"execution_count": 2,
|
34
43
|
"id": "2ad72bc7-5fdd-4ae5-8d9e-e90118efcc26",
|
35
44
|
"metadata": {},
|
36
45
|
"outputs": [],
|
@@ -40,7 +49,7 @@
|
|
40
49
|
},
|
41
50
|
{
|
42
51
|
"cell_type": "code",
|
43
|
-
"execution_count":
|
52
|
+
"execution_count": 3,
|
44
53
|
"id": "0342bab7-a792-4be3-8d4d-44c4343d0e6a",
|
45
54
|
"metadata": {},
|
46
55
|
"outputs": [
|
@@ -76,7 +85,7 @@
|
|
76
85
|
"Name: yahoo_id, dtype: object"
|
77
86
|
]
|
78
87
|
},
|
79
|
-
"execution_count":
|
88
|
+
"execution_count": 3,
|
80
89
|
"metadata": {},
|
81
90
|
"output_type": "execute_result"
|
82
91
|
}
|
@@ -95,7 +104,7 @@
|
|
95
104
|
},
|
96
105
|
{
|
97
106
|
"cell_type": "code",
|
98
|
-
"execution_count":
|
107
|
+
"execution_count": 4,
|
99
108
|
"id": "d875cd96-a29c-4e22-9806-a1b1c2513564",
|
100
109
|
"metadata": {},
|
101
110
|
"outputs": [],
|
@@ -105,7 +114,7 @@
|
|
105
114
|
},
|
106
115
|
{
|
107
116
|
"cell_type": "code",
|
108
|
-
"execution_count":
|
117
|
+
"execution_count": 5,
|
109
118
|
"id": "1bf0af0d-7ed7-4e07-9da1-5625b1f32bce",
|
110
119
|
"metadata": {},
|
111
120
|
"outputs": [
|
@@ -168,7 +177,7 @@
|
|
168
177
|
" 'REET']"
|
169
178
|
]
|
170
179
|
},
|
171
|
-
"execution_count":
|
180
|
+
"execution_count": 5,
|
172
181
|
"metadata": {},
|
173
182
|
"output_type": "execute_result"
|
174
183
|
}
|
@@ -179,7 +188,7 @@
|
|
179
188
|
},
|
180
189
|
{
|
181
190
|
"cell_type": "code",
|
182
|
-
"execution_count":
|
191
|
+
"execution_count": 6,
|
183
192
|
"id": "0b1bc395-40dd-44c0-bac1-dd1e00f8a5c4",
|
184
193
|
"metadata": {},
|
185
194
|
"outputs": [],
|
@@ -189,7 +198,7 @@
|
|
189
198
|
},
|
190
199
|
{
|
191
200
|
"cell_type": "code",
|
192
|
-
"execution_count":
|
201
|
+
"execution_count": 7,
|
193
202
|
"id": "2674260a-56c7-40a4-9708-0eb335fa075d",
|
194
203
|
"metadata": {},
|
195
204
|
"outputs": [
|
@@ -207,7 +216,7 @@
|
|
207
216
|
},
|
208
217
|
{
|
209
218
|
"cell_type": "code",
|
210
|
-
"execution_count":
|
219
|
+
"execution_count": 8,
|
211
220
|
"id": "ee0523a8-c6ee-42b9-9410-36b26906f2de",
|
212
221
|
"metadata": {},
|
213
222
|
"outputs": [
|
@@ -425,78 +434,6 @@
|
|
425
434
|
" <td>...</td>\n",
|
426
435
|
" </tr>\n",
|
427
436
|
" <tr>\n",
|
428
|
-
" <th>2024-09-11</th>\n",
|
429
|
-
" <td>64.93</td>\n",
|
430
|
-
" <td>66.199997</td>\n",
|
431
|
-
" <td>23.629999</td>\n",
|
432
|
-
" <td>92.019997</td>\n",
|
433
|
-
" <td>31.809999</td>\n",
|
434
|
-
" <td>8.06</td>\n",
|
435
|
-
" <td>69.559998</td>\n",
|
436
|
-
" <td>50.709999</td>\n",
|
437
|
-
" <td>25.52</td>\n",
|
438
|
-
" <td>36.630001</td>\n",
|
439
|
-
" <td>...</td>\n",
|
440
|
-
" <td>61.66</td>\n",
|
441
|
-
" <td><NA></td>\n",
|
442
|
-
" <td>57.130001</td>\n",
|
443
|
-
" <td>49.330002</td>\n",
|
444
|
-
" <td>114.629997</td>\n",
|
445
|
-
" <td>17.67</td>\n",
|
446
|
-
" <td>26.59</td>\n",
|
447
|
-
" <td>25.309999</td>\n",
|
448
|
-
" <td>39.07</td>\n",
|
449
|
-
" <td>15.0</td>\n",
|
450
|
-
" </tr>\n",
|
451
|
-
" <tr>\n",
|
452
|
-
" <th>2024-09-12</th>\n",
|
453
|
-
" <td>65.75</td>\n",
|
454
|
-
" <td>67.93</td>\n",
|
455
|
-
" <td>23.870001</td>\n",
|
456
|
-
" <td>92.519997</td>\n",
|
457
|
-
" <td>32.130001</td>\n",
|
458
|
-
" <td>8.06</td>\n",
|
459
|
-
" <td>70.25</td>\n",
|
460
|
-
" <td>51.07</td>\n",
|
461
|
-
" <td>25.450001</td>\n",
|
462
|
-
" <td>36.790001</td>\n",
|
463
|
-
" <td>...</td>\n",
|
464
|
-
" <td>62.509998</td>\n",
|
465
|
-
" <td><NA></td>\n",
|
466
|
-
" <td>57.759998</td>\n",
|
467
|
-
" <td>48.939999</td>\n",
|
468
|
-
" <td>115.629997</td>\n",
|
469
|
-
" <td>18.1</td>\n",
|
470
|
-
" <td>26.780001</td>\n",
|
471
|
-
" <td>25.85</td>\n",
|
472
|
-
" <td>39.950001</td>\n",
|
473
|
-
" <td>14.96</td>\n",
|
474
|
-
" </tr>\n",
|
475
|
-
" <tr>\n",
|
476
|
-
" <th>2024-09-13</th>\n",
|
477
|
-
" <td>66.139999</td>\n",
|
478
|
-
" <td>68.529999</td>\n",
|
479
|
-
" <td>24.120001</td>\n",
|
480
|
-
" <td>93.5</td>\n",
|
481
|
-
" <td>32.400002</td>\n",
|
482
|
-
" <td>8.06</td>\n",
|
483
|
-
" <td>69.870003</td>\n",
|
484
|
-
" <td>51.389999</td>\n",
|
485
|
-
" <td>25.93</td>\n",
|
486
|
-
" <td>36.900002</td>\n",
|
487
|
-
" <td>...</td>\n",
|
488
|
-
" <td>63.299999</td>\n",
|
489
|
-
" <td><NA></td>\n",
|
490
|
-
" <td>57.849998</td>\n",
|
491
|
-
" <td>48.790001</td>\n",
|
492
|
-
" <td>116.209999</td>\n",
|
493
|
-
" <td>18.139999</td>\n",
|
494
|
-
" <td>27.01</td>\n",
|
495
|
-
" <td>25.950001</td>\n",
|
496
|
-
" <td>40.549999</td>\n",
|
497
|
-
" <td>15.11</td>\n",
|
498
|
-
" </tr>\n",
|
499
|
-
" <tr>\n",
|
500
437
|
" <th>2024-09-16</th>\n",
|
501
438
|
" <td>66.760002</td>\n",
|
502
439
|
" <td>68.5</td>\n",
|
@@ -544,9 +481,81 @@
|
|
544
481
|
" <td>40.439999</td>\n",
|
545
482
|
" <td>15.19</td>\n",
|
546
483
|
" </tr>\n",
|
484
|
+
" <tr>\n",
|
485
|
+
" <th>2024-09-18</th>\n",
|
486
|
+
" <td>66.400002</td>\n",
|
487
|
+
" <td>68.769997</td>\n",
|
488
|
+
" <td>24.27</td>\n",
|
489
|
+
" <td>94.199997</td>\n",
|
490
|
+
" <td>32.580002</td>\n",
|
491
|
+
" <td>8.06</td>\n",
|
492
|
+
" <td>69.389999</td>\n",
|
493
|
+
" <td>51.16</td>\n",
|
494
|
+
" <td>26.42</td>\n",
|
495
|
+
" <td>36.990002</td>\n",
|
496
|
+
" <td>...</td>\n",
|
497
|
+
" <td>63.259998</td>\n",
|
498
|
+
" <td><NA></td>\n",
|
499
|
+
" <td>57.66</td>\n",
|
500
|
+
" <td>49.880001</td>\n",
|
501
|
+
" <td>116.160004</td>\n",
|
502
|
+
" <td>18.049999</td>\n",
|
503
|
+
" <td>26.889999</td>\n",
|
504
|
+
" <td>25.66</td>\n",
|
505
|
+
" <td>40.900002</td>\n",
|
506
|
+
" <td>15.07</td>\n",
|
507
|
+
" </tr>\n",
|
508
|
+
" <tr>\n",
|
509
|
+
" <th>2024-09-19</th>\n",
|
510
|
+
" <td>67.709999</td>\n",
|
511
|
+
" <td>70.150002</td>\n",
|
512
|
+
" <td>24.690001</td>\n",
|
513
|
+
" <td>95.419998</td>\n",
|
514
|
+
" <td>33.09</td>\n",
|
515
|
+
" <td>8.06</td>\n",
|
516
|
+
" <td>71.230003</td>\n",
|
517
|
+
" <td>51.509998</td>\n",
|
518
|
+
" <td>26.74</td>\n",
|
519
|
+
" <td>37.490002</td>\n",
|
520
|
+
" <td>...</td>\n",
|
521
|
+
" <td>63.91</td>\n",
|
522
|
+
" <td><NA></td>\n",
|
523
|
+
" <td>58.110001</td>\n",
|
524
|
+
" <td>47.720001</td>\n",
|
525
|
+
" <td>118.230003</td>\n",
|
526
|
+
" <td>18.219999</td>\n",
|
527
|
+
" <td>26.98</td>\n",
|
528
|
+
" <td>25.799999</td>\n",
|
529
|
+
" <td>41.400002</td>\n",
|
530
|
+
" <td>15.41</td>\n",
|
531
|
+
" </tr>\n",
|
532
|
+
" <tr>\n",
|
533
|
+
" <th>2024-09-20</th>\n",
|
534
|
+
" <td>68.089996</td>\n",
|
535
|
+
" <td>69.389999</td>\n",
|
536
|
+
" <td>24.549999</td>\n",
|
537
|
+
" <td>94.970001</td>\n",
|
538
|
+
" <td>32.689999</td>\n",
|
539
|
+
" <td>8.06</td>\n",
|
540
|
+
" <td>71.269997</td>\n",
|
541
|
+
" <td>50.66</td>\n",
|
542
|
+
" <td>26.940001</td>\n",
|
543
|
+
" <td>37.040001</td>\n",
|
544
|
+
" <td>...</td>\n",
|
545
|
+
" <td>63.32</td>\n",
|
546
|
+
" <td><NA></td>\n",
|
547
|
+
" <td>58.740002</td>\n",
|
548
|
+
" <td>47.43</td>\n",
|
549
|
+
" <td>117.760002</td>\n",
|
550
|
+
" <td>18.15</td>\n",
|
551
|
+
" <td>26.83</td>\n",
|
552
|
+
" <td>25.799999</td>\n",
|
553
|
+
" <td>41.349998</td>\n",
|
554
|
+
" <td>15.4</td>\n",
|
555
|
+
" </tr>\n",
|
547
556
|
" </tbody>\n",
|
548
557
|
"</table>\n",
|
549
|
-
"<p>
|
558
|
+
"<p>7968 rows × 54 columns</p>\n",
|
550
559
|
"</div>"
|
551
560
|
],
|
552
561
|
"text/plain": [
|
@@ -558,11 +567,11 @@
|
|
558
567
|
"1993-02-03 <NA> <NA> <NA> <NA> <NA> <NA> \n",
|
559
568
|
"1993-02-04 <NA> <NA> <NA> <NA> <NA> <NA> \n",
|
560
569
|
"... ... ... ... ... ... ... \n",
|
561
|
-
"2024-09-11 64.93 66.199997 23.629999 92.019997 31.809999 8.06 \n",
|
562
|
-
"2024-09-12 65.75 67.93 23.870001 92.519997 32.130001 8.06 \n",
|
563
|
-
"2024-09-13 66.139999 68.529999 24.120001 93.5 32.400002 8.06 \n",
|
564
570
|
"2024-09-16 66.760002 68.5 24.48 94.160004 32.599998 8.06 \n",
|
565
571
|
"2024-09-17 66.43 68.260002 24.15 94.330002 32.549999 8.06 \n",
|
572
|
+
"2024-09-18 66.400002 68.769997 24.27 94.199997 32.580002 8.06 \n",
|
573
|
+
"2024-09-19 67.709999 70.150002 24.690001 95.419998 33.09 8.06 \n",
|
574
|
+
"2024-09-20 68.089996 69.389999 24.549999 94.970001 32.689999 8.06 \n",
|
566
575
|
"\n",
|
567
576
|
"ticker EIDO EIRL EIS ENOR ... PAK PGAL \\\n",
|
568
577
|
"date ... \n",
|
@@ -572,11 +581,11 @@
|
|
572
581
|
"1993-02-03 <NA> <NA> <NA> <NA> ... <NA> <NA> \n",
|
573
582
|
"1993-02-04 <NA> <NA> <NA> <NA> ... <NA> <NA> \n",
|
574
583
|
"... ... ... ... ... ... ... ... \n",
|
575
|
-
"2024-09-11 69.559998 50.709999 25.52 36.630001 ... 61.66 <NA> \n",
|
576
|
-
"2024-09-12 70.25 51.07 25.450001 36.790001 ... 62.509998 <NA> \n",
|
577
|
-
"2024-09-13 69.870003 51.389999 25.93 36.900002 ... 63.299999 <NA> \n",
|
578
584
|
"2024-09-16 70.540001 51.580002 26.049999 37.299999 ... 63.52 <NA> \n",
|
579
585
|
"2024-09-17 69.709999 51.400002 26.34 37.029999 ... 63.220001 <NA> \n",
|
586
|
+
"2024-09-18 69.389999 51.16 26.42 36.990002 ... 63.259998 <NA> \n",
|
587
|
+
"2024-09-19 71.230003 51.509998 26.74 37.490002 ... 63.91 <NA> \n",
|
588
|
+
"2024-09-20 71.269997 50.66 26.940001 37.040001 ... 63.32 <NA> \n",
|
580
589
|
"\n",
|
581
590
|
"ticker QAT REET SPY THD TUR UAE \\\n",
|
582
591
|
"date \n",
|
@@ -586,11 +595,11 @@
|
|
586
595
|
"1993-02-03 <NA> <NA> <NA> <NA> <NA> <NA> \n",
|
587
596
|
"1993-02-04 <NA> <NA> <NA> <NA> <NA> <NA> \n",
|
588
597
|
"... ... ... ... ... ... ... \n",
|
589
|
-
"2024-09-11 57.130001 49.330002 114.629997 17.67 26.59 25.309999 \n",
|
590
|
-
"2024-09-12 57.759998 48.939999 115.629997 18.1 26.780001 25.85 \n",
|
591
|
-
"2024-09-13 57.849998 48.790001 116.209999 18.139999 27.01 25.950001 \n",
|
592
598
|
"2024-09-16 58.130001 49.240002 116.610001 18.200001 27.17 26.07 \n",
|
593
599
|
"2024-09-17 57.959999 50.299999 116.489998 18.049999 26.969999 25.82 \n",
|
600
|
+
"2024-09-18 57.66 49.880001 116.160004 18.049999 26.889999 25.66 \n",
|
601
|
+
"2024-09-19 58.110001 47.720001 118.230003 18.219999 26.98 25.799999 \n",
|
602
|
+
"2024-09-20 58.740002 47.43 117.760002 18.15 26.83 25.799999 \n",
|
594
603
|
"\n",
|
595
604
|
"ticker URTH VXX \n",
|
596
605
|
"date \n",
|
@@ -600,16 +609,16 @@
|
|
600
609
|
"1993-02-03 <NA> <NA> \n",
|
601
610
|
"1993-02-04 <NA> <NA> \n",
|
602
611
|
"... ... ... \n",
|
603
|
-
"2024-09-11 39.07 15.0 \n",
|
604
|
-
"2024-09-12 39.950001 14.96 \n",
|
605
|
-
"2024-09-13 40.549999 15.11 \n",
|
606
612
|
"2024-09-16 40.66 15.09 \n",
|
607
613
|
"2024-09-17 40.439999 15.19 \n",
|
614
|
+
"2024-09-18 40.900002 15.07 \n",
|
615
|
+
"2024-09-19 41.400002 15.41 \n",
|
616
|
+
"2024-09-20 41.349998 15.4 \n",
|
608
617
|
"\n",
|
609
|
-
"[
|
618
|
+
"[7968 rows x 54 columns]"
|
610
619
|
]
|
611
620
|
},
|
612
|
-
"execution_count":
|
621
|
+
"execution_count": 8,
|
613
622
|
"metadata": {},
|
614
623
|
"output_type": "execute_result"
|
615
624
|
}
|
@@ -620,10 +629,28 @@
|
|
620
629
|
},
|
621
630
|
{
|
622
631
|
"cell_type": "code",
|
623
|
-
"execution_count":
|
632
|
+
"execution_count": 10,
|
624
633
|
"id": "83f30ee7-b686-4ee8-8c5f-9333d2be31c8",
|
625
634
|
"metadata": {},
|
626
635
|
"outputs": [],
|
636
|
+
"source": [
|
637
|
+
"msci_etf_df.to_parquet('s3://factorlab-data/global_msci_eqty_etf_data_daily.parquet')"
|
638
|
+
]
|
639
|
+
},
|
640
|
+
{
|
641
|
+
"cell_type": "code",
|
642
|
+
"execution_count": null,
|
643
|
+
"id": "fa7fb5fc-e6f3-4a1e-bda5-365d09e2b3e0",
|
644
|
+
"metadata": {},
|
645
|
+
"outputs": [],
|
646
|
+
"source": []
|
647
|
+
},
|
648
|
+
{
|
649
|
+
"cell_type": "code",
|
650
|
+
"execution_count": null,
|
651
|
+
"id": "0ad35f12-5943-4640-96fb-07b7ea738abd",
|
652
|
+
"metadata": {},
|
653
|
+
"outputs": [],
|
627
654
|
"source": []
|
628
655
|
},
|
629
656
|
{
|
@@ -469,7 +469,6 @@ class WrangleData:
|
|
469
469
|
"""
|
470
470
|
Wrangles time series data responses from various APIs into tidy data format.
|
471
471
|
"""
|
472
|
-
|
473
472
|
def __init__(self, data_req: DataRequest, data_resp: Union[Dict[str, pd.DataFrame], pd.DataFrame]):
|
474
473
|
"""
|
475
474
|
Constructor
|
@@ -484,6 +483,7 @@ class WrangleData:
|
|
484
483
|
"""
|
485
484
|
self.data_req = data_req
|
486
485
|
self.data_resp = data_resp
|
486
|
+
self.tidy_data = pd.DataFrame()
|
487
487
|
|
488
488
|
def cryptocompare(self) -> pd.DataFrame:
|
489
489
|
"""
|
@@ -717,16 +717,22 @@ class WrangleData:
|
|
717
717
|
"""
|
718
718
|
# convert fields to lib
|
719
719
|
self.convert_fields_to_lib(data_source='dbnomics')
|
720
|
+
|
720
721
|
# convert to datetime
|
721
722
|
self.data_resp['date'] = pd.to_datetime(self.data_resp['date'])
|
723
|
+
|
722
724
|
# set index
|
723
725
|
self.data_resp = self.data_resp.set_index('date').sort_index()
|
726
|
+
|
724
727
|
# resample
|
725
728
|
self.data_resp = self.data_resp.resample(self.data_req.freq).last().ffill()
|
729
|
+
|
726
730
|
# filter dates
|
727
731
|
self.filter_dates()
|
732
|
+
|
728
733
|
# type conversion
|
729
734
|
self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
|
735
|
+
|
730
736
|
# remove bad data
|
731
737
|
self.data_resp = self.data_resp[self.data_resp != 0] # 0 values
|
732
738
|
self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
|
@@ -734,36 +740,125 @@ class WrangleData:
|
|
734
740
|
|
735
741
|
return self.data_resp
|
736
742
|
|
737
|
-
def
|
743
|
+
def ccxt_ohlcv(self) -> pd.DataFrame:
|
738
744
|
"""
|
739
|
-
Wrangles CCXT data response to dataframe with tidy data format.
|
745
|
+
Wrangles CCXT OHLCV data response to dataframe with tidy data format.
|
740
746
|
|
741
747
|
Returns
|
742
748
|
-------
|
743
749
|
pd.DataFrame
|
744
|
-
|
750
|
+
Dataframe with tidy data format.
|
751
|
+
"""
|
752
|
+
# field cols
|
753
|
+
cols = ["date", "open", "high", "low", "close", "volume"]
|
754
|
+
|
755
|
+
# add tickers
|
756
|
+
for i in range(len(self.data_req.source_markets)):
|
757
|
+
df = pd.DataFrame(self.data_resp[i], columns=cols)
|
758
|
+
df['ticker'] = self.data_req.source_markets[i]
|
759
|
+
self.tidy_data = pd.concat([self.tidy_data, df])
|
745
760
|
|
761
|
+
# convert to datetime
|
762
|
+
self.tidy_data['date'] = pd.to_datetime(self.tidy_data['date'], unit='ms')
|
763
|
+
|
764
|
+
# set index
|
765
|
+
self.tidy_data = self.tidy_data.set_index(['date', 'ticker']).sort_index()
|
766
|
+
|
767
|
+
return self.tidy_data
|
768
|
+
|
769
|
+
def ccxt_funding_rates(self) -> pd.DataFrame:
|
746
770
|
"""
|
747
|
-
|
771
|
+
Wrangles CCXT funding rates data response to dataframe with tidy data format.
|
772
|
+
|
773
|
+
Returns
|
774
|
+
-------
|
775
|
+
pd.DataFrame
|
776
|
+
Dataframe with tidy data format.
|
777
|
+
"""
|
778
|
+
# add tickers
|
779
|
+
for i in range(len(self.data_req.source_markets)):
|
780
|
+
df = pd.DataFrame(self.data_resp[i])
|
781
|
+
self.tidy_data = pd.concat([self.tidy_data, df])
|
782
|
+
self.tidy_data = self.tidy_data[['symbol', 'fundingRate', 'datetime']]
|
783
|
+
self.data_resp = self.tidy_data
|
784
|
+
|
785
|
+
# convert to lib fields
|
748
786
|
self.convert_fields_to_lib(data_source='ccxt')
|
787
|
+
self.tidy_data = self.data_resp
|
788
|
+
|
749
789
|
# convert to datetime
|
750
|
-
|
751
|
-
|
752
|
-
elif 'funding_rate' in self.data_resp.columns:
|
753
|
-
self.data_resp['date'] = pd.to_datetime(self.data_resp.set_index('date').index).floor('s').tz_localize(None)
|
790
|
+
self.tidy_data['date'] = pd.to_datetime(self.tidy_data.set_index('date').index).floor('s').tz_localize(None)
|
791
|
+
|
754
792
|
# set index
|
755
|
-
self.
|
793
|
+
self.tidy_data = self.tidy_data.set_index(['date', 'ticker']).sort_index()
|
794
|
+
|
756
795
|
# resample
|
757
|
-
if
|
758
|
-
self.
|
796
|
+
if self.data_req.freq in ['d', 'w', 'm', 'q', 'y']:
|
797
|
+
self.tidy_data = (
|
798
|
+
(1 + self.tidy_data.funding_rate)
|
799
|
+
.groupby('ticker')
|
800
|
+
.resample('d', level='date')
|
801
|
+
.prod() - 1
|
802
|
+
).to_frame().swaplevel('ticker', 'date').sort_index()
|
803
|
+
|
804
|
+
return self.tidy_data
|
805
|
+
|
806
|
+
def ccxt_open_interest(self) -> pd.DataFrame:
|
807
|
+
"""
|
808
|
+
Wrangles CCXT open interest data response to dataframe with tidy data format.
|
809
|
+
|
810
|
+
Returns
|
811
|
+
-------
|
812
|
+
pd.DataFrame
|
813
|
+
Dataframe with tidy data format.
|
814
|
+
"""
|
815
|
+
# add tickers
|
816
|
+
for i in range(len(self.data_req.source_markets)):
|
817
|
+
df = pd.DataFrame(self.data_resp[i])
|
818
|
+
self.tidy_data = pd.concat([self.tidy_data, df])
|
819
|
+
self.tidy_data = self.tidy_data[['symbol', 'openInterestAmount', 'datetime']]
|
820
|
+
self.data_resp = self.tidy_data
|
821
|
+
|
822
|
+
# convert to lib fields
|
823
|
+
self.convert_fields_to_lib(data_source='ccxt')
|
824
|
+
self.tidy_data = self.data_resp
|
825
|
+
|
826
|
+
# convert to datetime
|
827
|
+
self.tidy_data['date'] = pd.to_datetime(self.tidy_data.set_index('date').index).floor('s').tz_localize(None)
|
828
|
+
|
829
|
+
# set index
|
830
|
+
self.tidy_data = self.tidy_data.set_index(['date', 'ticker']).sort_index()
|
831
|
+
|
832
|
+
return self.tidy_data
|
833
|
+
|
834
|
+
def ccxt(self, data_type: str) -> pd.DataFrame:
|
835
|
+
"""
|
836
|
+
Wrangles CCXT data response to dataframe with tidy data format.
|
837
|
+
|
838
|
+
Returns
|
839
|
+
-------
|
840
|
+
pd.DataFrame
|
841
|
+
Wrangled dataframe into tidy data format.
|
842
|
+
|
843
|
+
"""
|
844
|
+
if data_type == 'ohlcv':
|
845
|
+
self.tidy_data = self.ccxt_ohlcv()
|
846
|
+
elif data_type == 'funding_rates':
|
847
|
+
self.tidy_data = self.ccxt_funding_rates()
|
848
|
+
elif data_type == 'open_interest':
|
849
|
+
self.tidy_data = self.ccxt_open_interest()
|
850
|
+
else:
|
851
|
+
raise ValueError(f"Data type {data_type} not supported.")
|
852
|
+
|
759
853
|
# type conversion
|
760
|
-
self.
|
854
|
+
self.tidy_data = self.tidy_data.apply(pd.to_numeric, errors='coerce').convert_dtypes()
|
855
|
+
|
761
856
|
# remove bad data
|
762
|
-
self.
|
763
|
-
self.
|
764
|
-
self.
|
857
|
+
self.tidy_data = self.tidy_data[self.tidy_data != 0] # 0 values
|
858
|
+
self.tidy_data = self.tidy_data[~self.tidy_data.index.duplicated()] # duplicate rows
|
859
|
+
self.tidy_data = self.tidy_data.dropna(how='all').dropna(how='all', axis=1) # entire row or col NaNs
|
765
860
|
|
766
|
-
return self.
|
861
|
+
return self.tidy_data
|
767
862
|
|
768
863
|
def fred(self) -> pd.DataFrame:
|
769
864
|
"""
|
@@ -773,24 +868,29 @@ class WrangleData:
|
|
773
868
|
-------
|
774
869
|
pd.DataFrame
|
775
870
|
Wrangled dataframe into tidy data format.
|
776
|
-
|
777
871
|
"""
|
778
|
-
#
|
872
|
+
# tickers
|
779
873
|
self.data_resp.columns = self.data_req.tickers # convert tickers to cryptodatapy format
|
874
|
+
|
780
875
|
# resample to match end of reporting period, not beginning
|
781
876
|
self.data_resp = self.data_resp.resample('d').last().ffill().resample(self.data_req.freq).last().stack(). \
|
782
877
|
to_frame().reset_index()
|
878
|
+
|
783
879
|
# convert cols
|
784
880
|
if self.data_req.cat == 'macro':
|
785
881
|
self.data_resp.columns = ['DATE', 'symbol', 'actual']
|
786
882
|
else:
|
787
883
|
self.data_resp.columns = ['DATE', 'symbol', 'close']
|
788
|
-
|
884
|
+
|
885
|
+
# fields
|
789
886
|
self.convert_fields_to_lib(data_source='fred')
|
790
|
-
|
887
|
+
|
888
|
+
# index
|
791
889
|
self.data_resp.set_index(['date', 'ticker'], inplace=True)
|
890
|
+
|
792
891
|
# type conversion
|
793
892
|
self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
|
893
|
+
|
794
894
|
# remove bad data
|
795
895
|
self.data_resp = self.data_resp[self.data_resp != 0] # 0 values
|
796
896
|
self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
|
@@ -807,37 +907,41 @@ class WrangleData:
|
|
807
907
|
pd.DataFrame
|
808
908
|
Wrangled dataframe into tidy data format.
|
809
909
|
"""
|
810
|
-
#
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
self.data_resp = self.data_resp.stack() # stack to multi-index
|
910
|
+
# tickers
|
911
|
+
tickers_dict = {source_ticker: ticker for source_ticker, ticker in zip(self.data_req.source_tickers,
|
912
|
+
self.data_req.tickers)}
|
913
|
+
if len(self.data_req.tickers) == 1:
|
914
|
+
self.data_resp['Ticker'] = self.data_req.tickers[0]
|
915
|
+
else:
|
916
|
+
self.data_resp = self.data_resp.stack()
|
818
917
|
self.data_resp.index.names = ['Date', 'Ticker']
|
819
|
-
|
820
|
-
|
821
|
-
self.data_req.tickers], level=1)
|
822
|
-
else:
|
823
|
-
self.data_resp.index = self.data_resp.index.set_levels([ticker for ticker in self.data_req.tickers],
|
824
|
-
level=1)
|
918
|
+
self.data_resp.index = self.data_resp.index.set_levels(self.data_resp.index.levels[1].map(tickers_dict),
|
919
|
+
level=1)
|
825
920
|
self.data_resp.reset_index(inplace=True)
|
826
|
-
|
921
|
+
|
922
|
+
# fields
|
827
923
|
self.convert_fields_to_lib(data_source='yahoo')
|
828
|
-
|
924
|
+
|
925
|
+
# index
|
829
926
|
self.data_resp['date'] = pd.to_datetime(self.data_resp['date'])
|
927
|
+
self.data_resp.set_index(['date', 'ticker'], inplace=True)
|
928
|
+
|
830
929
|
# resample
|
831
|
-
self.data_resp = self.data_resp.
|
832
|
-
|
930
|
+
self.data_resp = self.data_resp.groupby('ticker').\
|
931
|
+
resample(self.data_req.freq, level='date').\
|
932
|
+
last().swaplevel('ticker', 'date').sort_index()
|
933
|
+
|
833
934
|
# re-order cols
|
834
935
|
self.data_resp = self.data_resp.loc[:, ['open', 'high', 'low', 'close', 'close_adj', 'volume']]
|
936
|
+
|
835
937
|
# type conversion
|
836
938
|
self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
|
939
|
+
|
837
940
|
# remove bad data
|
838
941
|
self.data_resp = self.data_resp[self.data_resp != 0] # 0 values
|
839
942
|
self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
|
840
943
|
self.data_resp = self.data_resp.dropna(how='all').dropna(how='all', axis=1) # entire row or col NaNs
|
944
|
+
|
841
945
|
# keep only requested fields and sort index
|
842
946
|
self.data_resp = self.data_resp[self.data_req.fields].sort_index()
|
843
947
|
|
@@ -853,7 +957,7 @@ class WrangleData:
|
|
853
957
|
Wrangled dataframe into tidy data format.
|
854
958
|
|
855
959
|
"""
|
856
|
-
#
|
960
|
+
# ticker
|
857
961
|
ff_tickers_dict = {'RF': 'US_Rates_1M_RF',
|
858
962
|
'Mkt-RF': 'US_Eqty_CSRP_ER',
|
859
963
|
'HML': 'US_Eqty_Val',
|
@@ -862,6 +966,7 @@ class WrangleData:
|
|
862
966
|
'CMA': 'US_Eqty_Inv',
|
863
967
|
'Mom': 'US_Eqty_Mom',
|
864
968
|
'ST_Rev': 'US_Eqty_STRev'}
|
969
|
+
|
865
970
|
# remove white space from cols str
|
866
971
|
self.data_resp.columns = [col.strip() for col in self.data_resp.columns]
|
867
972
|
# keep cols in data req tickers
|
@@ -870,14 +975,18 @@ class WrangleData:
|
|
870
975
|
drop_cols = [col for col in self.data_resp.columns if col not in self.data_req.tickers]
|
871
976
|
self.data_resp.drop(columns=drop_cols, inplace=True)
|
872
977
|
self.data_resp = self.data_resp.loc[:, ~self.data_resp.columns.duplicated()] # drop dup cols
|
978
|
+
|
873
979
|
# resample freq
|
874
980
|
self.data_resp = self.data_resp.resample(self.data_req.freq).sum()
|
981
|
+
|
875
982
|
# format index
|
876
983
|
self.data_resp.index.name = 'date' # rename
|
877
984
|
self.data_resp = self.data_resp.stack().to_frame('er')
|
878
985
|
self.data_resp.index.names = ['date', 'ticker']
|
986
|
+
|
879
987
|
# type and conversion to decimals
|
880
988
|
self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes() / 100
|
989
|
+
|
881
990
|
# remove bad data
|
882
991
|
self.data_resp = self.data_resp[self.data_resp != 0] # 0 values
|
883
992
|
self.data_resp = self.data_resp[~self.data_resp.index.duplicated()] # duplicate rows
|
@@ -1016,9 +1125,9 @@ class WrangleData:
|
|
1016
1125
|
|
1017
1126
|
# loop through data resp cols
|
1018
1127
|
for col in self.data_resp.columns:
|
1019
|
-
if self.data_req.source_fields is not None and col in self.data_req.source_fields:
|
1020
|
-
|
1021
|
-
|
1128
|
+
# if self.data_req.source_fields is not None and col in self.data_req.source_fields:
|
1129
|
+
# pass
|
1130
|
+
if col in fields_list or col.title() in fields_list or col.lower() in fields_list:
|
1022
1131
|
self.data_resp.rename(columns={col: fields_df[(fields_df[str(data_source) + '_id']
|
1023
1132
|
== col.title()) |
|
1024
1133
|
(fields_df[str(data_source) + '_id'] == col.lower()) |
|