cryptodatapy 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -64,6 +64,27 @@
64
64
  {
65
65
  "cell_type": "code",
66
66
  "execution_count": 4,
67
+ "id": "5e796745",
68
+ "metadata": {},
69
+ "outputs": [
70
+ {
71
+ "data": {
72
+ "text/plain": [
73
+ "True"
74
+ ]
75
+ },
76
+ "execution_count": 4,
77
+ "metadata": {},
78
+ "output_type": "execute_result"
79
+ }
80
+ ],
81
+ "source": [
82
+ "'BTCST/USDT:USDT' in perp_tickers"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": 5,
67
88
  "id": "fcb74458",
68
89
  "metadata": {},
69
90
  "outputs": [],
@@ -75,7 +96,28 @@
75
96
  },
76
97
  {
77
98
  "cell_type": "code",
78
- "execution_count": 5,
99
+ "execution_count": 6,
100
+ "id": "4b86fa0d",
101
+ "metadata": {},
102
+ "outputs": [
103
+ {
104
+ "data": {
105
+ "text/plain": [
106
+ "True"
107
+ ]
108
+ },
109
+ "execution_count": 6,
110
+ "metadata": {},
111
+ "output_type": "execute_result"
112
+ }
113
+ ],
114
+ "source": [
115
+ "'BTCST/USDT:USDT' in spot_tickers"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": 7,
79
121
  "id": "7962f7e5",
80
122
  "metadata": {},
81
123
  "outputs": [],
@@ -86,7 +128,7 @@
86
128
  },
87
129
  {
88
130
  "cell_type": "code",
89
- "execution_count": 6,
131
+ "execution_count": 8,
90
132
  "id": "877811c1",
91
133
  "metadata": {},
92
134
  "outputs": [
@@ -96,7 +138,7 @@
96
138
  "314"
97
139
  ]
98
140
  },
99
- "execution_count": 6,
141
+ "execution_count": 8,
100
142
  "metadata": {},
101
143
  "output_type": "execute_result"
102
144
  }
@@ -108,7 +150,28 @@
108
150
  },
109
151
  {
110
152
  "cell_type": "code",
111
- "execution_count": 7,
153
+ "execution_count": 9,
154
+ "id": "4ef7a85b",
155
+ "metadata": {},
156
+ "outputs": [
157
+ {
158
+ "data": {
159
+ "text/plain": [
160
+ "True"
161
+ ]
162
+ },
163
+ "execution_count": 9,
164
+ "metadata": {},
165
+ "output_type": "execute_result"
166
+ }
167
+ ],
168
+ "source": [
169
+ "'BTCST/USDT:USDT' in binance_tickers"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "code",
174
+ "execution_count": 10,
112
175
  "id": "fe425163",
113
176
  "metadata": {},
114
177
  "outputs": [],
@@ -120,7 +183,28 @@
120
183
  },
121
184
  {
122
185
  "cell_type": "code",
123
- "execution_count": 8,
186
+ "execution_count": 11,
187
+ "id": "9c63cd43",
188
+ "metadata": {},
189
+ "outputs": [
190
+ {
191
+ "data": {
192
+ "text/plain": [
193
+ "True"
194
+ ]
195
+ },
196
+ "execution_count": 11,
197
+ "metadata": {},
198
+ "output_type": "execute_result"
199
+ }
200
+ ],
201
+ "source": [
202
+ "'BTCST' in cc_tickers"
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": 12,
124
208
  "id": "165053db",
125
209
  "metadata": {},
126
210
  "outputs": [],
@@ -128,24 +212,66 @@
128
212
  "# keep only USDT ticker\n",
129
213
  "bin_tickers = []\n",
130
214
  "for ticker in binance_tickers:\n",
131
- " if '/' in ticker and ticker.split('/')[1] == 'USDT':\n",
215
+ " if '/' in ticker and ticker.split('/')[1] == 'USDT:USDT':\n",
132
216
  " bin_tickers.append(ticker.split('/')[0])"
133
217
  ]
134
218
  },
135
219
  {
136
220
  "cell_type": "code",
137
- "execution_count": 9,
221
+ "execution_count": 13,
222
+ "id": "c93e487a",
223
+ "metadata": {},
224
+ "outputs": [
225
+ {
226
+ "data": {
227
+ "text/plain": [
228
+ "True"
229
+ ]
230
+ },
231
+ "execution_count": 13,
232
+ "metadata": {},
233
+ "output_type": "execute_result"
234
+ }
235
+ ],
236
+ "source": [
237
+ "'BTCST' in bin_tickers"
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "code",
242
+ "execution_count": 14,
138
243
  "id": "d6cf8a4c",
139
244
  "metadata": {},
140
245
  "outputs": [],
141
246
  "source": [
142
247
  "# usdt tickers\n",
143
- "usdt_tickers = [ticker.split('/')[0] for ticker in binance_tickers if '/'in ticker and ticker.split('/')[1] == 'USDT']"
248
+ "usdt_tickers = [ticker.split('/')[0] for ticker in binance_tickers if '/'in ticker and ticker.split('/')[1] == 'USDT:USDT']"
144
249
  ]
145
250
  },
146
251
  {
147
252
  "cell_type": "code",
148
- "execution_count": 10,
253
+ "execution_count": 15,
254
+ "id": "11ec0e6d",
255
+ "metadata": {},
256
+ "outputs": [
257
+ {
258
+ "data": {
259
+ "text/plain": [
260
+ "282"
261
+ ]
262
+ },
263
+ "execution_count": 15,
264
+ "metadata": {},
265
+ "output_type": "execute_result"
266
+ }
267
+ ],
268
+ "source": [
269
+ "len(usdt_tickers)"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "code",
274
+ "execution_count": 16,
149
275
  "id": "633f7a3e",
150
276
  "metadata": {},
151
277
  "outputs": [],
@@ -156,17 +282,17 @@
156
282
  },
157
283
  {
158
284
  "cell_type": "code",
159
- "execution_count": 11,
285
+ "execution_count": 17,
160
286
  "id": "30337a71",
161
287
  "metadata": {},
162
288
  "outputs": [
163
289
  {
164
290
  "data": {
165
291
  "text/plain": [
166
- "0"
292
+ "270"
167
293
  ]
168
294
  },
169
- "execution_count": 11,
295
+ "execution_count": 17,
170
296
  "metadata": {},
171
297
  "output_type": "execute_result"
172
298
  }
@@ -175,6 +301,27 @@
175
301
  "len(tickers)"
176
302
  ]
177
303
  },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": 18,
307
+ "id": "7bb3b25f",
308
+ "metadata": {},
309
+ "outputs": [
310
+ {
311
+ "data": {
312
+ "text/plain": [
313
+ "True"
314
+ ]
315
+ },
316
+ "execution_count": 18,
317
+ "metadata": {},
318
+ "output_type": "execute_result"
319
+ }
320
+ ],
321
+ "source": [
322
+ "'BTCST' in tickers"
323
+ ]
324
+ },
178
325
  {
179
326
  "cell_type": "markdown",
180
327
  "id": "f80eb97d",
@@ -185,7 +332,7 @@
185
332
  },
186
333
  {
187
334
  "cell_type": "code",
188
- "execution_count": 12,
335
+ "execution_count": 20,
189
336
  "id": "49b09508",
190
337
  "metadata": {},
191
338
  "outputs": [],
@@ -200,29 +347,35 @@
200
347
  },
201
348
  {
202
349
  "cell_type": "code",
203
- "execution_count": 13,
350
+ "execution_count": 21,
204
351
  "id": "6ac9365b",
205
352
  "metadata": {},
206
- "outputs": [],
207
- "source": [
208
- "# df1 = GetData(data_req).get_series()"
209
- ]
210
- },
211
- {
212
- "cell_type": "code",
213
- "execution_count": 14,
214
- "id": "98a425b2",
215
- "metadata": {},
216
- "outputs": [],
353
+ "outputs": [
354
+ {
355
+ "name": "stderr",
356
+ "output_type": "stream",
357
+ "text": [
358
+ "WARNING:root:Failed to get ohlcv data for BTCST/USDT.\n",
359
+ "WARNING:root:binanceusdm {\"code\":-1122,\"msg\":\"Invalid symbol status.\"}\n",
360
+ "WARNING:root:Failed to pull data on attempt #1.\n",
361
+ "WARNING:root:Failed to get ohlcv data for BTCST/USDT.\n",
362
+ "WARNING:root:binanceusdm {\"code\":-1122,\"msg\":\"Invalid symbol status.\"}\n",
363
+ "WARNING:root:Failed to pull data on attempt #2.\n",
364
+ "WARNING:root:Failed to get ohlcv data for BTCST/USDT.\n",
365
+ "WARNING:root:binanceusdm {\"code\":-1122,\"msg\":\"Invalid symbol status.\"}\n",
366
+ "WARNING:root:Failed to pull data on attempt #3.\n",
367
+ "WARNING:root:Failed to get OHLCV data from binanceusdm for BTCST/USDT after many attempts.\n"
368
+ ]
369
+ }
370
+ ],
217
371
  "source": [
218
- "# df1.to_csv('binance_perp_futures.csv')\n",
219
- "df1 = pd.read_csv('../../../../factorlab/notebooks/binance_perp_futures.csv', index_col=['date', 'ticker'], parse_dates=['date'])"
372
+ "df1 = GetData(data_req).get_series()"
220
373
  ]
221
374
  },
222
375
  {
223
376
  "cell_type": "code",
224
- "execution_count": 15,
225
- "id": "0e04da4a",
377
+ "execution_count": 22,
378
+ "id": "364bb46e",
226
379
  "metadata": {},
227
380
  "outputs": [
228
381
  {
@@ -269,12 +422,12 @@
269
422
  " <tr>\n",
270
423
  " <th>2019-09-08</th>\n",
271
424
  " <th>BTC</th>\n",
272
- " <td>10000.00</td>\n",
425
+ " <td>10000.0</td>\n",
273
426
  " <td>10412.65</td>\n",
274
- " <td>10000.00</td>\n",
427
+ " <td>10000.0</td>\n",
275
428
  " <td>10391.63</td>\n",
276
429
  " <td>3096.291</td>\n",
277
- " <td>NaN</td>\n",
430
+ " <td>&lt;NA&gt;</td>\n",
278
431
  " </tr>\n",
279
432
  " <tr>\n",
280
433
  " <th>2019-09-09</th>\n",
@@ -282,14 +435,14 @@
282
435
  " <td>10316.62</td>\n",
283
436
  " <td>10475.54</td>\n",
284
437
  " <td>10077.22</td>\n",
285
- " <td>10307.00</td>\n",
438
+ " <td>10307.0</td>\n",
286
439
  " <td>14824.373</td>\n",
287
- " <td>NaN</td>\n",
440
+ " <td>&lt;NA&gt;</td>\n",
288
441
  " </tr>\n",
289
442
  " <tr>\n",
290
443
  " <th>2019-09-10</th>\n",
291
444
  " <th>BTC</th>\n",
292
- " <td>10307.00</td>\n",
445
+ " <td>10307.0</td>\n",
293
446
  " <td>10382.97</td>\n",
294
447
  " <td>9940.87</td>\n",
295
448
  " <td>10102.02</td>\n",
@@ -316,84 +469,123 @@
316
469
  " <td>15609.634</td>\n",
317
470
  " <td>0.0003</td>\n",
318
471
  " </tr>\n",
472
+ " <tr>\n",
473
+ " <th>...</th>\n",
474
+ " <th>...</th>\n",
475
+ " <td>...</td>\n",
476
+ " <td>...</td>\n",
477
+ " <td>...</td>\n",
478
+ " <td>...</td>\n",
479
+ " <td>...</td>\n",
480
+ " <td>...</td>\n",
481
+ " </tr>\n",
482
+ " <tr>\n",
483
+ " <th rowspan=\"5\" valign=\"top\">2024-08-13</th>\n",
484
+ " <th>ZETA</th>\n",
485
+ " <td>0.6558</td>\n",
486
+ " <td>0.7099</td>\n",
487
+ " <td>0.6143</td>\n",
488
+ " <td>0.6556</td>\n",
489
+ " <td>290931468.0</td>\n",
490
+ " <td>-0.001465</td>\n",
491
+ " </tr>\n",
492
+ " <tr>\n",
493
+ " <th>ZIL</th>\n",
494
+ " <td>0.01394</td>\n",
495
+ " <td>0.01397</td>\n",
496
+ " <td>0.01348</td>\n",
497
+ " <td>0.01372</td>\n",
498
+ " <td>211016383.0</td>\n",
499
+ " <td>0.0002</td>\n",
500
+ " </tr>\n",
501
+ " <tr>\n",
502
+ " <th>ZK</th>\n",
503
+ " <td>0.11683</td>\n",
504
+ " <td>0.11895</td>\n",
505
+ " <td>0.11223</td>\n",
506
+ " <td>0.11713</td>\n",
507
+ " <td>180118593.0</td>\n",
508
+ " <td>0.0002</td>\n",
509
+ " </tr>\n",
510
+ " <tr>\n",
511
+ " <th>ZRO</th>\n",
512
+ " <td>3.509</td>\n",
513
+ " <td>3.533</td>\n",
514
+ " <td>3.349</td>\n",
515
+ " <td>3.459</td>\n",
516
+ " <td>10802271.5</td>\n",
517
+ " <td>0.000173</td>\n",
518
+ " </tr>\n",
519
+ " <tr>\n",
520
+ " <th>ZRX</th>\n",
521
+ " <td>0.3102</td>\n",
522
+ " <td>0.3126</td>\n",
523
+ " <td>0.3</td>\n",
524
+ " <td>0.3083</td>\n",
525
+ " <td>18072404.9</td>\n",
526
+ " <td>-0.000044</td>\n",
527
+ " </tr>\n",
319
528
  " </tbody>\n",
320
529
  "</table>\n",
530
+ "<p>222221 rows × 6 columns</p>\n",
321
531
  "</div>"
322
532
  ],
323
533
  "text/plain": [
324
- " open high low close volume \\\n",
325
- "date ticker \n",
326
- "2019-09-08 BTC 10000.00 10412.65 10000.00 10391.63 3096.291 \n",
327
- "2019-09-09 BTC 10316.62 10475.54 10077.22 10307.00 14824.373 \n",
328
- "2019-09-10 BTC 10307.00 10382.97 9940.87 10102.02 9068.955 \n",
329
- "2019-09-11 BTC 10094.27 10293.11 9884.31 10159.55 10897.922 \n",
330
- "2019-09-12 BTC 10163.06 10450.13 10042.12 10415.13 15609.634 \n",
534
+ " open high low close volume \\\n",
535
+ "date ticker \n",
536
+ "2019-09-08 BTC 10000.0 10412.65 10000.0 10391.63 3096.291 \n",
537
+ "2019-09-09 BTC 10316.62 10475.54 10077.22 10307.0 14824.373 \n",
538
+ "2019-09-10 BTC 10307.0 10382.97 9940.87 10102.02 9068.955 \n",
539
+ "2019-09-11 BTC 10094.27 10293.11 9884.31 10159.55 10897.922 \n",
540
+ "2019-09-12 BTC 10163.06 10450.13 10042.12 10415.13 15609.634 \n",
541
+ "... ... ... ... ... ... \n",
542
+ "2024-08-13 ZETA 0.6558 0.7099 0.6143 0.6556 290931468.0 \n",
543
+ " ZIL 0.01394 0.01397 0.01348 0.01372 211016383.0 \n",
544
+ " ZK 0.11683 0.11895 0.11223 0.11713 180118593.0 \n",
545
+ " ZRO 3.509 3.533 3.349 3.459 10802271.5 \n",
546
+ " ZRX 0.3102 0.3126 0.3 0.3083 18072404.9 \n",
331
547
  "\n",
332
548
  " funding_rate \n",
333
549
  "date ticker \n",
334
- "2019-09-08 BTC NaN \n",
335
- "2019-09-09 BTC NaN \n",
550
+ "2019-09-08 BTC <NA> \n",
551
+ "2019-09-09 BTC <NA> \n",
336
552
  "2019-09-10 BTC 0.0002 \n",
337
553
  "2019-09-11 BTC 0.0003 \n",
338
- "2019-09-12 BTC 0.0003 "
554
+ "2019-09-12 BTC 0.0003 \n",
555
+ "... ... \n",
556
+ "2024-08-13 ZETA -0.001465 \n",
557
+ " ZIL 0.0002 \n",
558
+ " ZK 0.0002 \n",
559
+ " ZRO 0.000173 \n",
560
+ " ZRX -0.000044 \n",
561
+ "\n",
562
+ "[222221 rows x 6 columns]"
339
563
  ]
340
564
  },
341
- "execution_count": 15,
565
+ "execution_count": 22,
342
566
  "metadata": {},
343
567
  "output_type": "execute_result"
344
568
  }
345
569
  ],
346
570
  "source": [
347
- "df1.head()"
348
- ]
349
- },
350
- {
351
- "cell_type": "markdown",
352
- "id": "32f15191",
353
- "metadata": {},
354
- "source": [
355
- "### Binance Spot"
356
- ]
357
- },
358
- {
359
- "cell_type": "code",
360
- "execution_count": 16,
361
- "id": "83e9e466",
362
- "metadata": {},
363
- "outputs": [],
364
- "source": [
365
- "# pull OHLC from Binance\n",
366
- "data_req = DataRequest(source='ccxt',\n",
367
- " tickers=tickers, \n",
368
- " fields=['open', 'high', 'low', 'close', 'volume'], \n",
369
- " freq='d')"
370
- ]
371
- },
372
- {
373
- "cell_type": "code",
374
- "execution_count": 17,
375
- "id": "82d4bbc7",
376
- "metadata": {},
377
- "outputs": [],
378
- "source": [
379
- "# df2 = GetData(data_req).get_series()"
571
+ "df1"
380
572
  ]
381
573
  },
382
574
  {
383
575
  "cell_type": "code",
384
- "execution_count": 18,
385
- "id": "4f63eb21",
576
+ "execution_count": 23,
577
+ "id": "98a425b2",
386
578
  "metadata": {},
387
579
  "outputs": [],
388
580
  "source": [
389
- "# df2.to_csv('binance_spot.csv')\n",
390
- "df2 = pd.read_csv('../../../../factorlab/notebooks/binance_spot.csv', index_col=['date', 'ticker'], parse_dates=['date'])"
581
+ "# df1.to_csv('binance_perp_futures.csv')\n",
582
+ "df1 = pd.read_csv('../../../../factorlab/notebooks/binance_perp_futures.csv', index_col=['date', 'ticker'], parse_dates=['date'])"
391
583
  ]
392
584
  },
393
585
  {
394
586
  "cell_type": "code",
395
- "execution_count": 19,
396
- "id": "ce8929c1",
587
+ "execution_count": 24,
588
+ "id": "0e04da4a",
397
589
  "metadata": {},
398
590
  "outputs": [
399
591
  {
@@ -423,6 +615,7 @@
423
615
  " <th>low</th>\n",
424
616
  " <th>close</th>\n",
425
617
  " <th>volume</th>\n",
618
+ " <th>funding_rate</th>\n",
426
619
  " </tr>\n",
427
620
  " <tr>\n",
428
621
  " <th>date</th>\n",
@@ -432,16 +625,202 @@
432
625
  " <th></th>\n",
433
626
  " <th></th>\n",
434
627
  " <th></th>\n",
628
+ " <th></th>\n",
435
629
  " </tr>\n",
436
630
  " </thead>\n",
437
631
  " <tbody>\n",
438
632
  " <tr>\n",
439
- " <th rowspan=\"2\" valign=\"top\">2017-08-17</th>\n",
633
+ " <th>2019-09-08</th>\n",
440
634
  " <th>BTC</th>\n",
441
- " <td>4261.48</td>\n",
442
- " <td>4485.39</td>\n",
443
- " <td>4200.74</td>\n",
444
- " <td>4285.08</td>\n",
635
+ " <td>10000.00</td>\n",
636
+ " <td>10412.65</td>\n",
637
+ " <td>10000.00</td>\n",
638
+ " <td>10391.63</td>\n",
639
+ " <td>3096.291</td>\n",
640
+ " <td>NaN</td>\n",
641
+ " </tr>\n",
642
+ " <tr>\n",
643
+ " <th>2019-09-09</th>\n",
644
+ " <th>BTC</th>\n",
645
+ " <td>10316.62</td>\n",
646
+ " <td>10475.54</td>\n",
647
+ " <td>10077.22</td>\n",
648
+ " <td>10307.00</td>\n",
649
+ " <td>14824.373</td>\n",
650
+ " <td>NaN</td>\n",
651
+ " </tr>\n",
652
+ " <tr>\n",
653
+ " <th>2019-09-10</th>\n",
654
+ " <th>BTC</th>\n",
655
+ " <td>10307.00</td>\n",
656
+ " <td>10382.97</td>\n",
657
+ " <td>9940.87</td>\n",
658
+ " <td>10102.02</td>\n",
659
+ " <td>9068.955</td>\n",
660
+ " <td>0.0002</td>\n",
661
+ " </tr>\n",
662
+ " <tr>\n",
663
+ " <th>2019-09-11</th>\n",
664
+ " <th>BTC</th>\n",
665
+ " <td>10094.27</td>\n",
666
+ " <td>10293.11</td>\n",
667
+ " <td>9884.31</td>\n",
668
+ " <td>10159.55</td>\n",
669
+ " <td>10897.922</td>\n",
670
+ " <td>0.0003</td>\n",
671
+ " </tr>\n",
672
+ " <tr>\n",
673
+ " <th>2019-09-12</th>\n",
674
+ " <th>BTC</th>\n",
675
+ " <td>10163.06</td>\n",
676
+ " <td>10450.13</td>\n",
677
+ " <td>10042.12</td>\n",
678
+ " <td>10415.13</td>\n",
679
+ " <td>15609.634</td>\n",
680
+ " <td>0.0003</td>\n",
681
+ " </tr>\n",
682
+ " </tbody>\n",
683
+ "</table>\n",
684
+ "</div>"
685
+ ],
686
+ "text/plain": [
687
+ " open high low close volume \\\n",
688
+ "date ticker \n",
689
+ "2019-09-08 BTC 10000.00 10412.65 10000.00 10391.63 3096.291 \n",
690
+ "2019-09-09 BTC 10316.62 10475.54 10077.22 10307.00 14824.373 \n",
691
+ "2019-09-10 BTC 10307.00 10382.97 9940.87 10102.02 9068.955 \n",
692
+ "2019-09-11 BTC 10094.27 10293.11 9884.31 10159.55 10897.922 \n",
693
+ "2019-09-12 BTC 10163.06 10450.13 10042.12 10415.13 15609.634 \n",
694
+ "\n",
695
+ " funding_rate \n",
696
+ "date ticker \n",
697
+ "2019-09-08 BTC NaN \n",
698
+ "2019-09-09 BTC NaN \n",
699
+ "2019-09-10 BTC 0.0002 \n",
700
+ "2019-09-11 BTC 0.0003 \n",
701
+ "2019-09-12 BTC 0.0003 "
702
+ ]
703
+ },
704
+ "execution_count": 24,
705
+ "metadata": {},
706
+ "output_type": "execute_result"
707
+ }
708
+ ],
709
+ "source": [
710
+ "df1.head()"
711
+ ]
712
+ },
713
+ {
714
+ "cell_type": "markdown",
715
+ "id": "32f15191",
716
+ "metadata": {},
717
+ "source": [
718
+ "### Binance Spot"
719
+ ]
720
+ },
721
+ {
722
+ "cell_type": "code",
723
+ "execution_count": 25,
724
+ "id": "83e9e466",
725
+ "metadata": {},
726
+ "outputs": [],
727
+ "source": [
728
+ "# pull OHLC from Binance\n",
729
+ "data_req = DataRequest(source='ccxt',\n",
730
+ " tickers=tickers, \n",
731
+ " fields=['open', 'high', 'low', 'close', 'volume'], \n",
732
+ " freq='d')"
733
+ ]
734
+ },
735
+ {
736
+ "cell_type": "code",
737
+ "execution_count": 26,
738
+ "id": "82d4bbc7",
739
+ "metadata": {},
740
+ "outputs": [
741
+ {
742
+ "name": "stderr",
743
+ "output_type": "stream",
744
+ "text": [
745
+ "WARNING:root:Missing recent OHLCV data for XMR/USDT.\n",
746
+ "WARNING:root:Missing recent OHLCV data for OMG/USDT.\n",
747
+ "WARNING:root:Missing recent OHLCV data for WAVES/USDT.\n",
748
+ "WARNING:root:Missing recent OHLCV data for OCEAN/USDT.\n",
749
+ "WARNING:root:Missing recent OHLCV data for XEM/USDT.\n",
750
+ "WARNING:root:Missing recent OHLCV data for BTCST/USDT.\n",
751
+ "WARNING:root:Missing recent OHLCV data for AGIX/USDT.\n",
752
+ "WARNING:root:Missing recent OHLCV data for BOND/USDT.\n",
753
+ "WARNING:root:Missing recent OHLCV data for BSV/USDT.\n"
754
+ ]
755
+ }
756
+ ],
757
+ "source": [
758
+ "df2 = GetData(data_req).get_series()"
759
+ ]
760
+ },
761
+ {
762
+ "cell_type": "code",
763
+ "execution_count": 27,
764
+ "id": "4f63eb21",
765
+ "metadata": {},
766
+ "outputs": [],
767
+ "source": [
768
+ "# df2.to_csv('binance_spot.csv')\n",
769
+ "df2 = pd.read_csv('../../../../factorlab/notebooks/binance_spot.csv', index_col=['date', 'ticker'], parse_dates=['date'])"
770
+ ]
771
+ },
772
+ {
773
+ "cell_type": "code",
774
+ "execution_count": 28,
775
+ "id": "ce8929c1",
776
+ "metadata": {},
777
+ "outputs": [
778
+ {
779
+ "data": {
780
+ "text/html": [
781
+ "<div>\n",
782
+ "<style scoped>\n",
783
+ " .dataframe tbody tr th:only-of-type {\n",
784
+ " vertical-align: middle;\n",
785
+ " }\n",
786
+ "\n",
787
+ " .dataframe tbody tr th {\n",
788
+ " vertical-align: top;\n",
789
+ " }\n",
790
+ "\n",
791
+ " .dataframe thead th {\n",
792
+ " text-align: right;\n",
793
+ " }\n",
794
+ "</style>\n",
795
+ "<table border=\"1\" class=\"dataframe\">\n",
796
+ " <thead>\n",
797
+ " <tr style=\"text-align: right;\">\n",
798
+ " <th></th>\n",
799
+ " <th></th>\n",
800
+ " <th>open</th>\n",
801
+ " <th>high</th>\n",
802
+ " <th>low</th>\n",
803
+ " <th>close</th>\n",
804
+ " <th>volume</th>\n",
805
+ " </tr>\n",
806
+ " <tr>\n",
807
+ " <th>date</th>\n",
808
+ " <th>ticker</th>\n",
809
+ " <th></th>\n",
810
+ " <th></th>\n",
811
+ " <th></th>\n",
812
+ " <th></th>\n",
813
+ " <th></th>\n",
814
+ " </tr>\n",
815
+ " </thead>\n",
816
+ " <tbody>\n",
817
+ " <tr>\n",
818
+ " <th rowspan=\"2\" valign=\"top\">2017-08-17</th>\n",
819
+ " <th>BTC</th>\n",
820
+ " <td>4261.48</td>\n",
821
+ " <td>4485.39</td>\n",
822
+ " <td>4200.74</td>\n",
823
+ " <td>4285.08</td>\n",
445
824
  " <td>795.150377</td>\n",
446
825
  " </tr>\n",
447
826
  " <tr>\n",
@@ -492,7 +871,7 @@
492
871
  "2017-08-19 BTC 4108.37 4184.69 3850.00 4139.98 381.309763"
493
872
  ]
494
873
  },
495
- "execution_count": 19,
874
+ "execution_count": 28,
496
875
  "metadata": {},
497
876
  "output_type": "execute_result"
498
877
  }
@@ -511,7 +890,7 @@
511
890
  },
512
891
  {
513
892
  "cell_type": "code",
514
- "execution_count": 20,
893
+ "execution_count": 29,
515
894
  "id": "7f14d874",
516
895
  "metadata": {},
517
896
  "outputs": [],
@@ -525,17 +904,17 @@
525
904
  },
526
905
  {
527
906
  "cell_type": "code",
528
- "execution_count": 21,
907
+ "execution_count": 30,
529
908
  "id": "3a8708d3",
530
909
  "metadata": {},
531
910
  "outputs": [],
532
911
  "source": [
533
- "# df3 = GetData(data_req).get_series()"
912
+ "df3 = GetData(data_req).get_series()"
534
913
  ]
535
914
  },
536
915
  {
537
916
  "cell_type": "code",
538
- "execution_count": 22,
917
+ "execution_count": 31,
539
918
  "id": "aa265538",
540
919
  "metadata": {},
541
920
  "outputs": [],
@@ -554,7 +933,7 @@
554
933
  },
555
934
  {
556
935
  "cell_type": "code",
557
- "execution_count": 23,
936
+ "execution_count": 32,
558
937
  "id": "f5ee4f6d",
559
938
  "metadata": {},
560
939
  "outputs": [],
@@ -565,7 +944,7 @@
565
944
  },
566
945
  {
567
946
  "cell_type": "code",
568
- "execution_count": 24,
947
+ "execution_count": 33,
569
948
  "id": "cbe07c91",
570
949
  "metadata": {},
571
950
  "outputs": [
@@ -674,7 +1053,7 @@
674
1053
  "2010-07-21 BTC 0.07474 0.07921 0.06634 0.07921 575.00 0.0"
675
1054
  ]
676
1055
  },
677
- "execution_count": 24,
1056
+ "execution_count": 33,
678
1057
  "metadata": {},
679
1058
  "output_type": "execute_result"
680
1059
  }
@@ -685,7 +1064,32 @@
685
1064
  },
686
1065
  {
687
1066
  "cell_type": "code",
688
- "execution_count": 25,
1067
+ "execution_count": 34,
1068
+ "id": "cef46007",
1069
+ "metadata": {},
1070
+ "outputs": [
1071
+ {
1072
+ "data": {
1073
+ "text/plain": [
1074
+ "Index(['BTC', 'LTC', 'DOGE', 'DASH', 'XLM', 'XMR', 'XRP', 'KEY', 'DGB', 'XEM',\n",
1075
+ " ...\n",
1076
+ " 'TNSR', 'SAGA', 'REZ', 'BB', 'NOT', 'IO', 'ZK', 'LISTA', 'ZRO',\n",
1077
+ " 'RENDER'],\n",
1078
+ " dtype='object', name='ticker', length=256)"
1079
+ ]
1080
+ },
1081
+ "execution_count": 34,
1082
+ "metadata": {},
1083
+ "output_type": "execute_result"
1084
+ }
1085
+ ],
1086
+ "source": [
1087
+ "df.index.get_level_values(1).unique()"
1088
+ ]
1089
+ },
1090
+ {
1091
+ "cell_type": "code",
1092
+ "execution_count": null,
689
1093
  "id": "d4c497d1",
690
1094
  "metadata": {},
691
1095
  "outputs": [],
@@ -696,74 +1100,98 @@
696
1100
  },
697
1101
  {
698
1102
  "cell_type": "code",
699
- "execution_count": 26,
700
- "id": "a9b1764c",
1103
+ "execution_count": 35,
1104
+ "id": "9f8a899f",
1105
+ "metadata": {},
1106
+ "outputs": [],
1107
+ "source": [
1108
+ "clean = CleanData(df)"
1109
+ ]
1110
+ },
1111
+ {
1112
+ "cell_type": "code",
1113
+ "execution_count": 36,
1114
+ "id": "29e1b955",
701
1115
  "metadata": {},
702
1116
  "outputs": [
703
1117
  {
704
- "name": "stdout",
705
- "output_type": "stream",
706
- "text": [
707
- "Index(['AGIX', 'CTK', 'CVC', 'CVX', 'DGB', 'FTT', 'GLMR', 'IDEX', 'MDT',\n",
708
- " 'OCEAN', 'RAD', 'RAY', 'SC', 'SLP', 'SNT', 'STPT', 'STRAX', 'WAVES'],\n",
709
- " dtype='object', name='ticker')\n"
710
- ]
1118
+ "data": {
1119
+ "text/plain": [
1120
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1121
+ ]
1122
+ },
1123
+ "execution_count": 36,
1124
+ "metadata": {},
1125
+ "output_type": "execute_result"
711
1126
  }
712
1127
  ],
713
1128
  "source": [
714
- "# clean data\n",
715
- "clean_df = CleanData(df).filter_delisted_tickers().\\\n",
716
- " filter_min_nobs(ts_obs=1500, cs_obs=10).\\\n",
717
- " filter_outliers(od_method='mad', excl_cols=['volume', 'funding_rate'], thresh_val=10).\\\n",
718
- " repair_outliers(imp_method='fcst').\\\n",
719
- " filter_avg_trading_val(thresh_val=1000000).\\\n",
720
- " filter_missing_vals_gaps().\\\n",
721
- " get(attr='df').dropna(how='all')"
1129
+ "clean.filter_delisted_tickers()"
722
1130
  ]
723
1131
  },
724
1132
  {
725
1133
  "cell_type": "code",
726
- "execution_count": 27,
727
- "id": "3d423e53",
1134
+ "execution_count": 37,
1135
+ "id": "9315f178",
728
1136
  "metadata": {},
729
1137
  "outputs": [
730
1138
  {
731
- "ename": "KeyError",
732
- "evalue": "'OCEAN'",
733
- "output_type": "error",
734
- "traceback": [
735
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
736
- "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
737
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/core/indexes/base.py:3800\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3799\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3800\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3801\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
738
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/_libs/index.pyx:138\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
739
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/_libs/index.pyx:165\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
740
- "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:5745\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
741
- "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:5753\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
742
- "\u001b[0;31mKeyError\u001b[0m: 'OCEAN'",
743
- "\nThe above exception was the direct cause of the following exception:\n",
744
- "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
745
- "Cell \u001b[0;32mIn [27], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m clean_df\u001b[38;5;241m.\u001b[39mloc[:, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mOCEAN\u001b[39m\u001b[38;5;124m'\u001b[39m, :]\n",
746
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/core/indexing.py:1068\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1066\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_scalar_access(key):\n\u001b[1;32m 1067\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_get_value(\u001b[38;5;241m*\u001b[39mkey, takeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_takeable)\n\u001b[0;32m-> 1068\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_tuple\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1069\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1070\u001b[0m \u001b[38;5;66;03m# we by definition only have the 0th axis\u001b[39;00m\n\u001b[1;32m 1071\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n",
747
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/core/indexing.py:1248\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_tuple\u001b[0;34m(self, tup)\u001b[0m\n\u001b[1;32m 1246\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m suppress(IndexingError):\n\u001b[1;32m 1247\u001b[0m tup \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_ellipsis(tup)\n\u001b[0;32m-> 1248\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_lowerdim\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtup\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1250\u001b[0m \u001b[38;5;66;03m# no multi-index, so validate all of the indexers\u001b[39;00m\n\u001b[1;32m 1251\u001b[0m tup \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_tuple_indexer(tup)\n",
748
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/core/indexing.py:942\u001b[0m, in \u001b[0;36m_LocationIndexer._getitem_lowerdim\u001b[0;34m(self, tup)\u001b[0m\n\u001b[1;32m 940\u001b[0m \u001b[38;5;66;03m# we may have a nested tuples indexer here\u001b[39;00m\n\u001b[1;32m 941\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_nested_tuple_indexer(tup):\n\u001b[0;32m--> 942\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_nested_tuple\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtup\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 944\u001b[0m \u001b[38;5;66;03m# we maybe be using a tuple to represent multiple dimensions here\u001b[39;00m\n\u001b[1;32m 945\u001b[0m ax0 \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_get_axis(\u001b[38;5;241m0\u001b[39m)\n",
749
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/core/indexing.py:1034\u001b[0m, in \u001b[0;36m_LocationIndexer._getitem_nested_tuple\u001b[0;34m(self, tup)\u001b[0m\n\u001b[1;32m 1031\u001b[0m \u001b[38;5;66;03m# this is a series with a multi-index specified a tuple of\u001b[39;00m\n\u001b[1;32m 1032\u001b[0m \u001b[38;5;66;03m# selectors\u001b[39;00m\n\u001b[1;32m 1033\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m-> 1034\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtup\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1036\u001b[0m \u001b[38;5;66;03m# handle the multi-axis by taking sections and reducing\u001b[39;00m\n\u001b[1;32m 1037\u001b[0m \u001b[38;5;66;03m# this is iterative\u001b[39;00m\n\u001b[1;32m 1038\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\n",
750
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/core/indexing.py:1306\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1304\u001b[0m \u001b[38;5;66;03m# nested tuple slicing\u001b[39;00m\n\u001b[1;32m 1305\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_nested_tuple(key, labels):\n\u001b[0;32m-> 1306\u001b[0m locs \u001b[38;5;241m=\u001b[39m \u001b[43mlabels\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_locs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1307\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28mslice\u001b[39m(\u001b[38;5;28;01mNone\u001b[39;00m)] \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mndim\n\u001b[1;32m 1308\u001b[0m indexer[axis] \u001b[38;5;241m=\u001b[39m locs\n",
751
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/core/indexes/multi.py:3422\u001b[0m, in \u001b[0;36mMultiIndex.get_locs\u001b[0;34m(self, seq)\u001b[0m\n\u001b[1;32m 3418\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 3420\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3421\u001b[0m \u001b[38;5;66;03m# a slice or a single label\u001b[39;00m\n\u001b[0;32m-> 3422\u001b[0m lvl_indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_level_indexer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindexer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3424\u001b[0m \u001b[38;5;66;03m# update indexer\u001b[39;00m\n\u001b[1;32m 3425\u001b[0m lvl_indexer \u001b[38;5;241m=\u001b[39m _to_bool_indexer(lvl_indexer)\n",
752
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/core/indexes/multi.py:3262\u001b[0m, in \u001b[0;36mMultiIndex._get_level_indexer\u001b[0;34m(self, key, level, indexer)\u001b[0m\n\u001b[1;32m 3258\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mslice\u001b[39m(i, j, step)\n\u001b[1;32m 3260\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 3262\u001b[0m idx \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_loc_single_level_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlevel_index\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3264\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m level \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lexsort_depth \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 3265\u001b[0m \u001b[38;5;66;03m# Desired level is not sorted\u001b[39;00m\n\u001b[1;32m 3266\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(idx, \u001b[38;5;28mslice\u001b[39m):\n\u001b[1;32m 3267\u001b[0m \u001b[38;5;66;03m# test_get_loc_partial_timestamp_multiindex\u001b[39;00m\n",
753
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/core/indexes/multi.py:2848\u001b[0m, in \u001b[0;36mMultiIndex._get_loc_single_level_index\u001b[0;34m(self, level_index, key)\u001b[0m\n\u001b[1;32m 2846\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 2847\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 2848\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlevel_index\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n",
754
- "File \u001b[0;32m~/opt/anaconda3/envs/cryptodatapy/lib/python3.9/site-packages/pandas/core/indexes/base.py:3802\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3800\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[1;32m 3801\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m-> 3802\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3803\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3805\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
755
- "\u001b[0;31mKeyError\u001b[0m: 'OCEAN'"
756
- ]
1139
+ "data": {
1140
+ "text/plain": [
1141
+ "['CVC',\n",
1142
+ " 'CTK',\n",
1143
+ " 'STRAX',\n",
1144
+ " 'STPT',\n",
1145
+ " 'DGB',\n",
1146
+ " 'CVX',\n",
1147
+ " 'FTT',\n",
1148
+ " 'GLMR',\n",
1149
+ " 'OCEAN',\n",
1150
+ " 'SLP',\n",
1151
+ " 'AGIX',\n",
1152
+ " 'RAY',\n",
1153
+ " 'MDT',\n",
1154
+ " 'SC',\n",
1155
+ " 'SNT',\n",
1156
+ " 'IDEX',\n",
1157
+ " 'RAD',\n",
1158
+ " 'WAVES']"
1159
+ ]
1160
+ },
1161
+ "execution_count": 37,
1162
+ "metadata": {},
1163
+ "output_type": "execute_result"
757
1164
  }
758
1165
  ],
759
1166
  "source": [
760
- "clean_df.loc[:, 'OCEAN', :]"
1167
+ "clean.filtered_tickers"
761
1168
  ]
762
1169
  },
763
1170
  {
764
1171
  "cell_type": "code",
765
- "execution_count": 28,
766
- "id": "ad5a885c",
1172
+ "execution_count": 38,
1173
+ "id": "e4693e52",
1174
+ "metadata": {},
1175
+ "outputs": [
1176
+ {
1177
+ "data": {
1178
+ "text/plain": [
1179
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1180
+ ]
1181
+ },
1182
+ "execution_count": 38,
1183
+ "metadata": {},
1184
+ "output_type": "execute_result"
1185
+ }
1186
+ ],
1187
+ "source": [
1188
+ "clean.filter_outliers(od_method='mad', excl_cols=['volume', 'funding_rate'], thresh_val=10)"
1189
+ ]
1190
+ },
1191
+ {
1192
+ "cell_type": "code",
1193
+ "execution_count": 39,
1194
+ "id": "311ee6ed",
767
1195
  "metadata": {},
768
1196
  "outputs": [
769
1197
  {
@@ -787,71 +1215,259 @@
787
1215
  " <thead>\n",
788
1216
  " <tr style=\"text-align: right;\">\n",
789
1217
  " <th></th>\n",
1218
+ " <th></th>\n",
790
1219
  " <th>open</th>\n",
791
1220
  " <th>high</th>\n",
792
1221
  " <th>low</th>\n",
793
1222
  " <th>close</th>\n",
794
- " <th>volume</th>\n",
795
- " <th>funding_rate</th>\n",
796
1223
  " </tr>\n",
797
1224
  " <tr>\n",
798
1225
  " <th>date</th>\n",
1226
+ " <th>ticker</th>\n",
1227
+ " <th></th>\n",
1228
+ " <th></th>\n",
1229
+ " <th></th>\n",
1230
+ " <th></th>\n",
1231
+ " </tr>\n",
1232
+ " </thead>\n",
1233
+ " <tbody>\n",
1234
+ " <tr>\n",
1235
+ " <th>2010-07-17</th>\n",
1236
+ " <th>BTC</th>\n",
1237
+ " <td>&lt;NA&gt;</td>\n",
1238
+ " <td>&lt;NA&gt;</td>\n",
1239
+ " <td>&lt;NA&gt;</td>\n",
1240
+ " <td>&lt;NA&gt;</td>\n",
1241
+ " </tr>\n",
1242
+ " <tr>\n",
1243
+ " <th>2010-07-18</th>\n",
1244
+ " <th>BTC</th>\n",
1245
+ " <td>&lt;NA&gt;</td>\n",
1246
+ " <td>&lt;NA&gt;</td>\n",
1247
+ " <td>&lt;NA&gt;</td>\n",
1248
+ " <td>&lt;NA&gt;</td>\n",
1249
+ " </tr>\n",
1250
+ " <tr>\n",
1251
+ " <th>2010-07-19</th>\n",
1252
+ " <th>BTC</th>\n",
1253
+ " <td>&lt;NA&gt;</td>\n",
1254
+ " <td>&lt;NA&gt;</td>\n",
1255
+ " <td>&lt;NA&gt;</td>\n",
1256
+ " <td>&lt;NA&gt;</td>\n",
1257
+ " </tr>\n",
1258
+ " <tr>\n",
1259
+ " <th>2010-07-20</th>\n",
1260
+ " <th>BTC</th>\n",
1261
+ " <td>&lt;NA&gt;</td>\n",
1262
+ " <td>&lt;NA&gt;</td>\n",
1263
+ " <td>&lt;NA&gt;</td>\n",
1264
+ " <td>&lt;NA&gt;</td>\n",
1265
+ " </tr>\n",
1266
+ " <tr>\n",
1267
+ " <th>2010-07-21</th>\n",
1268
+ " <th>BTC</th>\n",
1269
+ " <td>&lt;NA&gt;</td>\n",
1270
+ " <td>&lt;NA&gt;</td>\n",
1271
+ " <td>&lt;NA&gt;</td>\n",
1272
+ " <td>&lt;NA&gt;</td>\n",
1273
+ " </tr>\n",
1274
+ " <tr>\n",
1275
+ " <th>...</th>\n",
1276
+ " <th>...</th>\n",
1277
+ " <td>...</td>\n",
1278
+ " <td>...</td>\n",
1279
+ " <td>...</td>\n",
1280
+ " <td>...</td>\n",
1281
+ " </tr>\n",
1282
+ " <tr>\n",
1283
+ " <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
1284
+ " <th>ZEN</th>\n",
1285
+ " <td>&lt;NA&gt;</td>\n",
1286
+ " <td>9.112</td>\n",
1287
+ " <td>8.285</td>\n",
1288
+ " <td>8.462</td>\n",
1289
+ " </tr>\n",
1290
+ " <tr>\n",
1291
+ " <th>ZIL</th>\n",
1292
+ " <td>0.01422</td>\n",
1293
+ " <td>0.01441</td>\n",
1294
+ " <td>0.01392</td>\n",
1295
+ " <td>0.01396</td>\n",
1296
+ " </tr>\n",
1297
+ " <tr>\n",
1298
+ " <th>ZK</th>\n",
1299
+ " <td>0.11451</td>\n",
1300
+ " <td>0.11673</td>\n",
1301
+ " <td>0.10857</td>\n",
1302
+ " <td>0.10874</td>\n",
1303
+ " </tr>\n",
1304
+ " <tr>\n",
1305
+ " <th>ZRO</th>\n",
1306
+ " <td>3.631</td>\n",
1307
+ " <td>3.648</td>\n",
1308
+ " <td>3.362</td>\n",
1309
+ " <td>3.367</td>\n",
1310
+ " </tr>\n",
1311
+ " <tr>\n",
1312
+ " <th>ZRX</th>\n",
1313
+ " <td>0.3055</td>\n",
1314
+ " <td>0.3121</td>\n",
1315
+ " <td>0.2983</td>\n",
1316
+ " <td>0.2988</td>\n",
1317
+ " </tr>\n",
1318
+ " </tbody>\n",
1319
+ "</table>\n",
1320
+ "<p>357696 rows × 4 columns</p>\n",
1321
+ "</div>"
1322
+ ],
1323
+ "text/plain": [
1324
+ " open high low close\n",
1325
+ "date ticker \n",
1326
+ "2010-07-17 BTC <NA> <NA> <NA> <NA>\n",
1327
+ "2010-07-18 BTC <NA> <NA> <NA> <NA>\n",
1328
+ "2010-07-19 BTC <NA> <NA> <NA> <NA>\n",
1329
+ "2010-07-20 BTC <NA> <NA> <NA> <NA>\n",
1330
+ "2010-07-21 BTC <NA> <NA> <NA> <NA>\n",
1331
+ "... ... ... ... ...\n",
1332
+ "2024-08-04 ZEN <NA> 9.112 8.285 8.462\n",
1333
+ " ZIL 0.01422 0.01441 0.01392 0.01396\n",
1334
+ " ZK 0.11451 0.11673 0.10857 0.10874\n",
1335
+ " ZRO 3.631 3.648 3.362 3.367\n",
1336
+ " ZRX 0.3055 0.3121 0.2983 0.2988\n",
1337
+ "\n",
1338
+ "[357696 rows x 4 columns]"
1339
+ ]
1340
+ },
1341
+ "execution_count": 39,
1342
+ "metadata": {},
1343
+ "output_type": "execute_result"
1344
+ }
1345
+ ],
1346
+ "source": [
1347
+ "clean.df"
1348
+ ]
1349
+ },
1350
+ {
1351
+ "cell_type": "code",
1352
+ "execution_count": 40,
1353
+ "id": "83caa2c6",
1354
+ "metadata": {},
1355
+ "outputs": [
1356
+ {
1357
+ "data": {
1358
+ "text/plain": [
1359
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1360
+ ]
1361
+ },
1362
+ "execution_count": 40,
1363
+ "metadata": {},
1364
+ "output_type": "execute_result"
1365
+ }
1366
+ ],
1367
+ "source": [
1368
+ "clean.repair_outliers(imp_method='fcst')"
1369
+ ]
1370
+ },
1371
+ {
1372
+ "cell_type": "code",
1373
+ "execution_count": 41,
1374
+ "id": "f4c21352",
1375
+ "metadata": {},
1376
+ "outputs": [
1377
+ {
1378
+ "data": {
1379
+ "text/html": [
1380
+ "<div>\n",
1381
+ "<style scoped>\n",
1382
+ " .dataframe tbody tr th:only-of-type {\n",
1383
+ " vertical-align: middle;\n",
1384
+ " }\n",
1385
+ "\n",
1386
+ " .dataframe tbody tr th {\n",
1387
+ " vertical-align: top;\n",
1388
+ " }\n",
1389
+ "\n",
1390
+ " .dataframe thead th {\n",
1391
+ " text-align: right;\n",
1392
+ " }\n",
1393
+ "</style>\n",
1394
+ "<table border=\"1\" class=\"dataframe\">\n",
1395
+ " <thead>\n",
1396
+ " <tr style=\"text-align: right;\">\n",
1397
+ " <th></th>\n",
1398
+ " <th></th>\n",
1399
+ " <th>open</th>\n",
1400
+ " <th>high</th>\n",
1401
+ " <th>low</th>\n",
1402
+ " <th>close</th>\n",
1403
+ " <th>volume</th>\n",
1404
+ " <th>funding_rate</th>\n",
1405
+ " </tr>\n",
1406
+ " <tr>\n",
1407
+ " <th>date</th>\n",
1408
+ " <th>ticker</th>\n",
1409
+ " <th></th>\n",
799
1410
  " <th></th>\n",
800
1411
  " <th></th>\n",
801
1412
  " <th></th>\n",
802
1413
  " <th></th>\n",
803
1414
  " <th></th>\n",
804
- " <th></th>\n",
805
1415
  " </tr>\n",
806
1416
  " </thead>\n",
807
1417
  " <tbody>\n",
808
1418
  " <tr>\n",
809
- " <th>2015-06-12</th>\n",
810
- " <td>229.88</td>\n",
811
- " <td>231.58</td>\n",
812
- " <td>229.29</td>\n",
813
- " <td>230.46</td>\n",
814
- " <td>40744.820</td>\n",
1419
+ " <th>2010-07-17</th>\n",
1420
+ " <th>BTC</th>\n",
1421
+ " <td>42.99</td>\n",
1422
+ " <td>43.76</td>\n",
1423
+ " <td>40.99</td>\n",
1424
+ " <td>41.01</td>\n",
1425
+ " <td>2.000000e+01</td>\n",
815
1426
  " <td>0.000000</td>\n",
816
1427
  " </tr>\n",
817
1428
  " <tr>\n",
818
- " <th>2015-06-13</th>\n",
819
- " <td>230.46</td>\n",
820
- " <td>233.14</td>\n",
821
- " <td>229.01</td>\n",
822
- " <td>232.48</td>\n",
823
- " <td>38008.730</td>\n",
1429
+ " <th>2010-07-18</th>\n",
1430
+ " <th>BTC</th>\n",
1431
+ " <td>0.07921</td>\n",
1432
+ " <td>0.08181</td>\n",
1433
+ " <td>0.06634</td>\n",
1434
+ " <td>0.07921</td>\n",
1435
+ " <td>7.501000e+01</td>\n",
824
1436
  " <td>0.000000</td>\n",
825
1437
  " </tr>\n",
826
1438
  " <tr>\n",
827
- " <th>2015-06-14</th>\n",
828
- " <td>232.48</td>\n",
829
- " <td>235.51</td>\n",
830
- " <td>232.09</td>\n",
831
- " <td>233.75</td>\n",
832
- " <td>32894.870</td>\n",
1439
+ " <th>2010-07-19</th>\n",
1440
+ " <th>BTC</th>\n",
1441
+ " <td>0.07474</td>\n",
1442
+ " <td>0.07921</td>\n",
1443
+ " <td>0.0505</td>\n",
1444
+ " <td>0.06262</td>\n",
1445
+ " <td>5.740000e+02</td>\n",
833
1446
  " <td>0.000000</td>\n",
834
1447
  " </tr>\n",
835
1448
  " <tr>\n",
836
- " <th>2015-06-15</th>\n",
837
- " <td>233.75</td>\n",
838
- " <td>238.55</td>\n",
839
- " <td>233.29</td>\n",
840
- " <td>237.0</td>\n",
841
- " <td>63467.090</td>\n",
1449
+ " <th>2010-07-20</th>\n",
1450
+ " <th>BTC</th>\n",
1451
+ " <td>0.06868</td>\n",
1452
+ " <td>0.07344</td>\n",
1453
+ " <td>0.0505</td>\n",
1454
+ " <td>0.06052</td>\n",
1455
+ " <td>2.620000e+02</td>\n",
842
1456
  " <td>0.000000</td>\n",
843
1457
  " </tr>\n",
844
1458
  " <tr>\n",
845
- " <th>2015-06-16</th>\n",
846
- " <td>237.0</td>\n",
847
- " <td>254.15</td>\n",
848
- " <td>235.7</td>\n",
849
- " <td>249.82</td>\n",
850
- " <td>122473.610</td>\n",
1459
+ " <th>2010-07-21</th>\n",
1460
+ " <th>BTC</th>\n",
1461
+ " <td>0.06262</td>\n",
1462
+ " <td>0.06767</td>\n",
1463
+ " <td>0.0505</td>\n",
1464
+ " <td>0.05842</td>\n",
1465
+ " <td>5.750000e+02</td>\n",
851
1466
  " <td>0.000000</td>\n",
852
1467
  " </tr>\n",
853
1468
  " <tr>\n",
854
1469
  " <th>...</th>\n",
1470
+ " <th>...</th>\n",
855
1471
  " <td>...</td>\n",
856
1472
  " <td>...</td>\n",
857
1473
  " <td>...</td>\n",
@@ -860,128 +1476,691 @@
860
1476
  " <td>...</td>\n",
861
1477
  " </tr>\n",
862
1478
  " <tr>\n",
863
- " <th>2024-07-31</th>\n",
864
- " <td>66159.3</td>\n",
865
- " <td>66826.3</td>\n",
866
- " <td>64500.4</td>\n",
867
- " <td>64601.8</td>\n",
868
- " <td>246389.446</td>\n",
869
- " <td>0.000141</td>\n",
1479
+ " <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
1480
+ " <th>ZEN</th>\n",
1481
+ " <td>9.657</td>\n",
1482
+ " <td>9.112</td>\n",
1483
+ " <td>8.285</td>\n",
1484
+ " <td>8.462</td>\n",
1485
+ " <td>2.071124e+06</td>\n",
1486
+ " <td>0.000194</td>\n",
870
1487
  " </tr>\n",
871
1488
  " <tr>\n",
872
- " <th>2024-08-01</th>\n",
873
- " <td>64601.8</td>\n",
874
- " <td>65650.0</td>\n",
875
- " <td>62271.2</td>\n",
876
- " <td>65328.9</td>\n",
877
- " <td>372654.590</td>\n",
878
- " <td>0.000282</td>\n",
1489
+ " <th>ZIL</th>\n",
1490
+ " <td>0.01422</td>\n",
1491
+ " <td>0.01441</td>\n",
1492
+ " <td>0.01392</td>\n",
1493
+ " <td>0.01396</td>\n",
1494
+ " <td>2.048626e+08</td>\n",
1495
+ " <td>-0.000031</td>\n",
879
1496
  " </tr>\n",
880
1497
  " <tr>\n",
881
- " <th>2024-08-02</th>\n",
882
- " <td>65329.0</td>\n",
883
- " <td>65577.0</td>\n",
884
- " <td>61200.2</td>\n",
885
- " <td>61483.7</td>\n",
886
- " <td>421628.420</td>\n",
887
- " <td>0.000300</td>\n",
1498
+ " <th>ZK</th>\n",
1499
+ " <td>0.11451</td>\n",
1500
+ " <td>0.11673</td>\n",
1501
+ " <td>0.10857</td>\n",
1502
+ " <td>0.10874</td>\n",
1503
+ " <td>3.833253e+08</td>\n",
1504
+ " <td>0.000200</td>\n",
888
1505
  " </tr>\n",
889
1506
  " <tr>\n",
890
- " <th>2024-08-03</th>\n",
891
- " <td>61483.7</td>\n",
892
- " <td>63871.5</td>\n",
893
- " <td>59800.0</td>\n",
894
- " <td>60684.6</td>\n",
895
- " <td>290469.956</td>\n",
896
- " <td>0.000240</td>\n",
1507
+ " <th>ZRO</th>\n",
1508
+ " <td>3.631</td>\n",
1509
+ " <td>3.648</td>\n",
1510
+ " <td>3.362</td>\n",
1511
+ " <td>3.367</td>\n",
1512
+ " <td>7.049472e+07</td>\n",
1513
+ " <td>0.000184</td>\n",
897
1514
  " </tr>\n",
898
1515
  " <tr>\n",
899
- " <th>2024-08-04</th>\n",
900
- " <td>60684.5</td>\n",
901
- " <td>61089.5</td>\n",
902
- " <td>60080.5</td>\n",
903
- " <td>60357.6</td>\n",
904
- " <td>85220.266</td>\n",
905
- " <td>0.000153</td>\n",
1516
+ " <th>ZRX</th>\n",
1517
+ " <td>0.3055</td>\n",
1518
+ " <td>0.3121</td>\n",
1519
+ " <td>0.2983</td>\n",
1520
+ " <td>0.2988</td>\n",
1521
+ " <td>9.810764e+06</td>\n",
1522
+ " <td>0.000186</td>\n",
906
1523
  " </tr>\n",
907
1524
  " </tbody>\n",
908
1525
  "</table>\n",
909
- "<p>3342 rows × 6 columns</p>\n",
1526
+ "<p>357696 rows × 6 columns</p>\n",
910
1527
  "</div>"
911
1528
  ],
912
1529
  "text/plain": [
913
- " open high low close volume funding_rate\n",
914
- "date \n",
915
- "2015-06-12 229.88 231.58 229.29 230.46 40744.820 0.000000\n",
916
- "2015-06-13 230.46 233.14 229.01 232.48 38008.730 0.000000\n",
917
- "2015-06-14 232.48 235.51 232.09 233.75 32894.870 0.000000\n",
918
- "2015-06-15 233.75 238.55 233.29 237.0 63467.090 0.000000\n",
919
- "2015-06-16 237.0 254.15 235.7 249.82 122473.610 0.000000\n",
920
- "... ... ... ... ... ... ...\n",
921
- "2024-07-31 66159.3 66826.3 64500.4 64601.8 246389.446 0.000141\n",
922
- "2024-08-01 64601.8 65650.0 62271.2 65328.9 372654.590 0.000282\n",
923
- "2024-08-02 65329.0 65577.0 61200.2 61483.7 421628.420 0.000300\n",
924
- "2024-08-03 61483.7 63871.5 59800.0 60684.6 290469.956 0.000240\n",
925
- "2024-08-04 60684.5 61089.5 60080.5 60357.6 85220.266 0.000153\n",
1530
+ " open high low close volume \\\n",
1531
+ "date ticker \n",
1532
+ "2010-07-17 BTC 42.99 43.76 40.99 41.01 2.000000e+01 \n",
1533
+ "2010-07-18 BTC 0.07921 0.08181 0.06634 0.07921 7.501000e+01 \n",
1534
+ "2010-07-19 BTC 0.07474 0.07921 0.0505 0.06262 5.740000e+02 \n",
1535
+ "2010-07-20 BTC 0.06868 0.07344 0.0505 0.06052 2.620000e+02 \n",
1536
+ "2010-07-21 BTC 0.06262 0.06767 0.0505 0.05842 5.750000e+02 \n",
1537
+ "... ... ... ... ... ... \n",
1538
+ "2024-08-04 ZEN 9.657 9.112 8.285 8.462 2.071124e+06 \n",
1539
+ " ZIL 0.01422 0.01441 0.01392 0.01396 2.048626e+08 \n",
1540
+ " ZK 0.11451 0.11673 0.10857 0.10874 3.833253e+08 \n",
1541
+ " ZRO 3.631 3.648 3.362 3.367 7.049472e+07 \n",
1542
+ " ZRX 0.3055 0.3121 0.2983 0.2988 9.810764e+06 \n",
1543
+ "\n",
1544
+ " funding_rate \n",
1545
+ "date ticker \n",
1546
+ "2010-07-17 BTC 0.000000 \n",
1547
+ "2010-07-18 BTC 0.000000 \n",
1548
+ "2010-07-19 BTC 0.000000 \n",
1549
+ "2010-07-20 BTC 0.000000 \n",
1550
+ "2010-07-21 BTC 0.000000 \n",
1551
+ "... ... \n",
1552
+ "2024-08-04 ZEN 0.000194 \n",
1553
+ " ZIL -0.000031 \n",
1554
+ " ZK 0.000200 \n",
1555
+ " ZRO 0.000184 \n",
1556
+ " ZRX 0.000186 \n",
926
1557
  "\n",
927
- "[3342 rows x 6 columns]"
1558
+ "[357696 rows x 6 columns]"
928
1559
  ]
929
1560
  },
930
- "execution_count": 28,
1561
+ "execution_count": 41,
931
1562
  "metadata": {},
932
1563
  "output_type": "execute_result"
933
1564
  }
934
1565
  ],
935
1566
  "source": [
936
- "clean_df.loc[:, 'BTC', :]"
1567
+ "clean.df"
937
1568
  ]
938
1569
  },
939
1570
  {
940
1571
  "cell_type": "code",
941
- "execution_count": 29,
942
- "id": "a9c262fc",
1572
+ "execution_count": 42,
1573
+ "id": "66b3d8d2",
943
1574
  "metadata": {},
944
- "outputs": [],
1575
+ "outputs": [
1576
+ {
1577
+ "data": {
1578
+ "text/plain": [
1579
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1580
+ ]
1581
+ },
1582
+ "execution_count": 42,
1583
+ "metadata": {},
1584
+ "output_type": "execute_result"
1585
+ }
1586
+ ],
945
1587
  "source": [
946
- "clean_df.to_parquet('s3://factorlab-data/binance_historical_ohlcv_daily.parquet')"
1588
+ "clean.filter_avg_trading_val(thresh_val=1000000)"
947
1589
  ]
948
1590
  },
949
1591
  {
950
1592
  "cell_type": "code",
951
- "execution_count": 30,
952
- "id": "893e3e38",
1593
+ "execution_count": 43,
1594
+ "id": "b7f23056",
1595
+ "metadata": {},
1596
+ "outputs": [
1597
+ {
1598
+ "data": {
1599
+ "text/html": [
1600
+ "<div>\n",
1601
+ "<style scoped>\n",
1602
+ " .dataframe tbody tr th:only-of-type {\n",
1603
+ " vertical-align: middle;\n",
1604
+ " }\n",
1605
+ "\n",
1606
+ " .dataframe tbody tr th {\n",
1607
+ " vertical-align: top;\n",
1608
+ " }\n",
1609
+ "\n",
1610
+ " .dataframe thead th {\n",
1611
+ " text-align: right;\n",
1612
+ " }\n",
1613
+ "</style>\n",
1614
+ "<table border=\"1\" class=\"dataframe\">\n",
1615
+ " <thead>\n",
1616
+ " <tr style=\"text-align: right;\">\n",
1617
+ " <th></th>\n",
1618
+ " <th></th>\n",
1619
+ " <th>open</th>\n",
1620
+ " <th>high</th>\n",
1621
+ " <th>low</th>\n",
1622
+ " <th>close</th>\n",
1623
+ " <th>volume</th>\n",
1624
+ " <th>funding_rate</th>\n",
1625
+ " </tr>\n",
1626
+ " <tr>\n",
1627
+ " <th>date</th>\n",
1628
+ " <th>ticker</th>\n",
1629
+ " <th></th>\n",
1630
+ " <th></th>\n",
1631
+ " <th></th>\n",
1632
+ " <th></th>\n",
1633
+ " <th></th>\n",
1634
+ " <th></th>\n",
1635
+ " </tr>\n",
1636
+ " </thead>\n",
1637
+ " <tbody>\n",
1638
+ " <tr>\n",
1639
+ " <th>2010-07-17</th>\n",
1640
+ " <th>BTC</th>\n",
1641
+ " <td>&lt;NA&gt;</td>\n",
1642
+ " <td>&lt;NA&gt;</td>\n",
1643
+ " <td>&lt;NA&gt;</td>\n",
1644
+ " <td>&lt;NA&gt;</td>\n",
1645
+ " <td>NaN</td>\n",
1646
+ " <td>NaN</td>\n",
1647
+ " </tr>\n",
1648
+ " <tr>\n",
1649
+ " <th>2010-07-18</th>\n",
1650
+ " <th>BTC</th>\n",
1651
+ " <td>&lt;NA&gt;</td>\n",
1652
+ " <td>&lt;NA&gt;</td>\n",
1653
+ " <td>&lt;NA&gt;</td>\n",
1654
+ " <td>&lt;NA&gt;</td>\n",
1655
+ " <td>NaN</td>\n",
1656
+ " <td>NaN</td>\n",
1657
+ " </tr>\n",
1658
+ " <tr>\n",
1659
+ " <th>2010-07-19</th>\n",
1660
+ " <th>BTC</th>\n",
1661
+ " <td>&lt;NA&gt;</td>\n",
1662
+ " <td>&lt;NA&gt;</td>\n",
1663
+ " <td>&lt;NA&gt;</td>\n",
1664
+ " <td>&lt;NA&gt;</td>\n",
1665
+ " <td>NaN</td>\n",
1666
+ " <td>NaN</td>\n",
1667
+ " </tr>\n",
1668
+ " <tr>\n",
1669
+ " <th>2010-07-20</th>\n",
1670
+ " <th>BTC</th>\n",
1671
+ " <td>&lt;NA&gt;</td>\n",
1672
+ " <td>&lt;NA&gt;</td>\n",
1673
+ " <td>&lt;NA&gt;</td>\n",
1674
+ " <td>&lt;NA&gt;</td>\n",
1675
+ " <td>NaN</td>\n",
1676
+ " <td>NaN</td>\n",
1677
+ " </tr>\n",
1678
+ " <tr>\n",
1679
+ " <th>2010-07-21</th>\n",
1680
+ " <th>BTC</th>\n",
1681
+ " <td>&lt;NA&gt;</td>\n",
1682
+ " <td>&lt;NA&gt;</td>\n",
1683
+ " <td>&lt;NA&gt;</td>\n",
1684
+ " <td>&lt;NA&gt;</td>\n",
1685
+ " <td>NaN</td>\n",
1686
+ " <td>NaN</td>\n",
1687
+ " </tr>\n",
1688
+ " <tr>\n",
1689
+ " <th>...</th>\n",
1690
+ " <th>...</th>\n",
1691
+ " <td>...</td>\n",
1692
+ " <td>...</td>\n",
1693
+ " <td>...</td>\n",
1694
+ " <td>...</td>\n",
1695
+ " <td>...</td>\n",
1696
+ " <td>...</td>\n",
1697
+ " </tr>\n",
1698
+ " <tr>\n",
1699
+ " <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
1700
+ " <th>ZEN</th>\n",
1701
+ " <td>9.657</td>\n",
1702
+ " <td>9.112</td>\n",
1703
+ " <td>8.285</td>\n",
1704
+ " <td>8.462</td>\n",
1705
+ " <td>2071124.0</td>\n",
1706
+ " <td>0.000194</td>\n",
1707
+ " </tr>\n",
1708
+ " <tr>\n",
1709
+ " <th>ZIL</th>\n",
1710
+ " <td>0.01422</td>\n",
1711
+ " <td>0.01441</td>\n",
1712
+ " <td>0.01392</td>\n",
1713
+ " <td>0.01396</td>\n",
1714
+ " <td>204862627.0</td>\n",
1715
+ " <td>-0.000031</td>\n",
1716
+ " </tr>\n",
1717
+ " <tr>\n",
1718
+ " <th>ZK</th>\n",
1719
+ " <td>0.11451</td>\n",
1720
+ " <td>0.11673</td>\n",
1721
+ " <td>0.10857</td>\n",
1722
+ " <td>0.10874</td>\n",
1723
+ " <td>383325323.0</td>\n",
1724
+ " <td>0.000200</td>\n",
1725
+ " </tr>\n",
1726
+ " <tr>\n",
1727
+ " <th>ZRO</th>\n",
1728
+ " <td>3.631</td>\n",
1729
+ " <td>3.648</td>\n",
1730
+ " <td>3.362</td>\n",
1731
+ " <td>3.367</td>\n",
1732
+ " <td>70494717.0</td>\n",
1733
+ " <td>0.000184</td>\n",
1734
+ " </tr>\n",
1735
+ " <tr>\n",
1736
+ " <th>ZRX</th>\n",
1737
+ " <td>0.3055</td>\n",
1738
+ " <td>0.3121</td>\n",
1739
+ " <td>0.2983</td>\n",
1740
+ " <td>0.2988</td>\n",
1741
+ " <td>9810764.1</td>\n",
1742
+ " <td>0.000186</td>\n",
1743
+ " </tr>\n",
1744
+ " </tbody>\n",
1745
+ "</table>\n",
1746
+ "<p>357696 rows × 6 columns</p>\n",
1747
+ "</div>"
1748
+ ],
1749
+ "text/plain": [
1750
+ " open high low close volume \\\n",
1751
+ "date ticker \n",
1752
+ "2010-07-17 BTC <NA> <NA> <NA> <NA> NaN \n",
1753
+ "2010-07-18 BTC <NA> <NA> <NA> <NA> NaN \n",
1754
+ "2010-07-19 BTC <NA> <NA> <NA> <NA> NaN \n",
1755
+ "2010-07-20 BTC <NA> <NA> <NA> <NA> NaN \n",
1756
+ "2010-07-21 BTC <NA> <NA> <NA> <NA> NaN \n",
1757
+ "... ... ... ... ... ... \n",
1758
+ "2024-08-04 ZEN 9.657 9.112 8.285 8.462 2071124.0 \n",
1759
+ " ZIL 0.01422 0.01441 0.01392 0.01396 204862627.0 \n",
1760
+ " ZK 0.11451 0.11673 0.10857 0.10874 383325323.0 \n",
1761
+ " ZRO 3.631 3.648 3.362 3.367 70494717.0 \n",
1762
+ " ZRX 0.3055 0.3121 0.2983 0.2988 9810764.1 \n",
1763
+ "\n",
1764
+ " funding_rate \n",
1765
+ "date ticker \n",
1766
+ "2010-07-17 BTC NaN \n",
1767
+ "2010-07-18 BTC NaN \n",
1768
+ "2010-07-19 BTC NaN \n",
1769
+ "2010-07-20 BTC NaN \n",
1770
+ "2010-07-21 BTC NaN \n",
1771
+ "... ... \n",
1772
+ "2024-08-04 ZEN 0.000194 \n",
1773
+ " ZIL -0.000031 \n",
1774
+ " ZK 0.000200 \n",
1775
+ " ZRO 0.000184 \n",
1776
+ " ZRX 0.000186 \n",
1777
+ "\n",
1778
+ "[357696 rows x 6 columns]"
1779
+ ]
1780
+ },
1781
+ "execution_count": 43,
1782
+ "metadata": {},
1783
+ "output_type": "execute_result"
1784
+ }
1785
+ ],
1786
+ "source": [
1787
+ "clean.df"
1788
+ ]
1789
+ },
1790
+ {
1791
+ "cell_type": "code",
1792
+ "execution_count": 44,
1793
+ "id": "b5ac345d",
1794
+ "metadata": {},
1795
+ "outputs": [
1796
+ {
1797
+ "data": {
1798
+ "text/plain": [
1799
+ "['CVC',\n",
1800
+ " 'CTK',\n",
1801
+ " 'STRAX',\n",
1802
+ " 'STPT',\n",
1803
+ " 'DGB',\n",
1804
+ " 'CVX',\n",
1805
+ " 'FTT',\n",
1806
+ " 'GLMR',\n",
1807
+ " 'OCEAN',\n",
1808
+ " 'SLP',\n",
1809
+ " 'AGIX',\n",
1810
+ " 'RAY',\n",
1811
+ " 'MDT',\n",
1812
+ " 'SC',\n",
1813
+ " 'SNT',\n",
1814
+ " 'IDEX',\n",
1815
+ " 'RAD',\n",
1816
+ " 'WAVES']"
1817
+ ]
1818
+ },
1819
+ "execution_count": 44,
1820
+ "metadata": {},
1821
+ "output_type": "execute_result"
1822
+ }
1823
+ ],
1824
+ "source": [
1825
+ "clean.filtered_tickers"
1826
+ ]
1827
+ },
1828
+ {
1829
+ "cell_type": "code",
1830
+ "execution_count": 45,
1831
+ "id": "6d36d4ce",
1832
+ "metadata": {},
1833
+ "outputs": [
1834
+ {
1835
+ "data": {
1836
+ "text/plain": [
1837
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1838
+ ]
1839
+ },
1840
+ "execution_count": 45,
1841
+ "metadata": {},
1842
+ "output_type": "execute_result"
1843
+ }
1844
+ ],
1845
+ "source": [
1846
+ "clean.filter_missing_vals_gaps()"
1847
+ ]
1848
+ },
1849
+ {
1850
+ "cell_type": "code",
1851
+ "execution_count": 47,
1852
+ "id": "7fe432d5",
1853
+ "metadata": {},
1854
+ "outputs": [
1855
+ {
1856
+ "data": {
1857
+ "text/plain": [
1858
+ "['CVC',\n",
1859
+ " 'CTK',\n",
1860
+ " 'STRAX',\n",
1861
+ " 'STPT',\n",
1862
+ " 'DGB',\n",
1863
+ " 'CVX',\n",
1864
+ " 'FTT',\n",
1865
+ " 'GLMR',\n",
1866
+ " 'OCEAN',\n",
1867
+ " 'SLP',\n",
1868
+ " 'AGIX',\n",
1869
+ " 'RAY',\n",
1870
+ " 'MDT',\n",
1871
+ " 'SC',\n",
1872
+ " 'SNT',\n",
1873
+ " 'IDEX',\n",
1874
+ " 'RAD',\n",
1875
+ " 'WAVES']"
1876
+ ]
1877
+ },
1878
+ "execution_count": 47,
1879
+ "metadata": {},
1880
+ "output_type": "execute_result"
1881
+ }
1882
+ ],
1883
+ "source": [
1884
+ "clean.filtered_tickers"
1885
+ ]
1886
+ },
1887
+ {
1888
+ "cell_type": "code",
1889
+ "execution_count": 48,
1890
+ "id": "f7cbdbae",
1891
+ "metadata": {},
1892
+ "outputs": [
1893
+ {
1894
+ "data": {
1895
+ "text/plain": [
1896
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1897
+ ]
1898
+ },
1899
+ "execution_count": 48,
1900
+ "metadata": {},
1901
+ "output_type": "execute_result"
1902
+ }
1903
+ ],
1904
+ "source": [
1905
+ "clean.filter_min_nobs(ts_obs=1400, cs_obs=5)"
1906
+ ]
1907
+ },
1908
+ {
1909
+ "cell_type": "code",
1910
+ "execution_count": 49,
1911
+ "id": "f13d9f1f",
1912
+ "metadata": {},
1913
+ "outputs": [
1914
+ {
1915
+ "data": {
1916
+ "text/html": [
1917
+ "<div>\n",
1918
+ "<style scoped>\n",
1919
+ " .dataframe tbody tr th:only-of-type {\n",
1920
+ " vertical-align: middle;\n",
1921
+ " }\n",
1922
+ "\n",
1923
+ " .dataframe tbody tr th {\n",
1924
+ " vertical-align: top;\n",
1925
+ " }\n",
1926
+ "\n",
1927
+ " .dataframe thead th {\n",
1928
+ " text-align: right;\n",
1929
+ " }\n",
1930
+ "</style>\n",
1931
+ "<table border=\"1\" class=\"dataframe\">\n",
1932
+ " <thead>\n",
1933
+ " <tr style=\"text-align: right;\">\n",
1934
+ " <th></th>\n",
1935
+ " <th></th>\n",
1936
+ " <th>open</th>\n",
1937
+ " <th>high</th>\n",
1938
+ " <th>low</th>\n",
1939
+ " <th>close</th>\n",
1940
+ " <th>volume</th>\n",
1941
+ " <th>funding_rate</th>\n",
1942
+ " </tr>\n",
1943
+ " <tr>\n",
1944
+ " <th>date</th>\n",
1945
+ " <th>ticker</th>\n",
1946
+ " <th></th>\n",
1947
+ " <th></th>\n",
1948
+ " <th></th>\n",
1949
+ " <th></th>\n",
1950
+ " <th></th>\n",
1951
+ " <th></th>\n",
1952
+ " </tr>\n",
1953
+ " </thead>\n",
1954
+ " <tbody>\n",
1955
+ " <tr>\n",
1956
+ " <th rowspan=\"5\" valign=\"top\">2017-06-18</th>\n",
1957
+ " <th>BAT</th>\n",
1958
+ " <td>&lt;NA&gt;</td>\n",
1959
+ " <td>&lt;NA&gt;</td>\n",
1960
+ " <td>&lt;NA&gt;</td>\n",
1961
+ " <td>&lt;NA&gt;</td>\n",
1962
+ " <td>NaN</td>\n",
1963
+ " <td>NaN</td>\n",
1964
+ " </tr>\n",
1965
+ " <tr>\n",
1966
+ " <th>BTC</th>\n",
1967
+ " <td>2655.1</td>\n",
1968
+ " <td>2676.04</td>\n",
1969
+ " <td>2488.59</td>\n",
1970
+ " <td>2539.56</td>\n",
1971
+ " <td>9.200422e+04</td>\n",
1972
+ " <td>0.000000</td>\n",
1973
+ " </tr>\n",
1974
+ " <tr>\n",
1975
+ " <th>DASH</th>\n",
1976
+ " <td>&lt;NA&gt;</td>\n",
1977
+ " <td>&lt;NA&gt;</td>\n",
1978
+ " <td>&lt;NA&gt;</td>\n",
1979
+ " <td>&lt;NA&gt;</td>\n",
1980
+ " <td>NaN</td>\n",
1981
+ " <td>NaN</td>\n",
1982
+ " </tr>\n",
1983
+ " <tr>\n",
1984
+ " <th>DOGE</th>\n",
1985
+ " <td>&lt;NA&gt;</td>\n",
1986
+ " <td>&lt;NA&gt;</td>\n",
1987
+ " <td>&lt;NA&gt;</td>\n",
1988
+ " <td>&lt;NA&gt;</td>\n",
1989
+ " <td>NaN</td>\n",
1990
+ " <td>NaN</td>\n",
1991
+ " </tr>\n",
1992
+ " <tr>\n",
1993
+ " <th>ETC</th>\n",
1994
+ " <td>21.98</td>\n",
1995
+ " <td>23.8</td>\n",
1996
+ " <td>19.5</td>\n",
1997
+ " <td>20.27</td>\n",
1998
+ " <td>1.306320e+06</td>\n",
1999
+ " <td>0.000000</td>\n",
2000
+ " </tr>\n",
2001
+ " <tr>\n",
2002
+ " <th>...</th>\n",
2003
+ " <th>...</th>\n",
2004
+ " <td>...</td>\n",
2005
+ " <td>...</td>\n",
2006
+ " <td>...</td>\n",
2007
+ " <td>...</td>\n",
2008
+ " <td>...</td>\n",
2009
+ " <td>...</td>\n",
2010
+ " </tr>\n",
2011
+ " <tr>\n",
2012
+ " <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
2013
+ " <th>XTZ</th>\n",
2014
+ " <td>0.685</td>\n",
2015
+ " <td>0.693</td>\n",
2016
+ " <td>0.677</td>\n",
2017
+ " <td>0.679</td>\n",
2018
+ " <td>5.373374e+06</td>\n",
2019
+ " <td>0.000200</td>\n",
2020
+ " </tr>\n",
2021
+ " <tr>\n",
2022
+ " <th>YFI</th>\n",
2023
+ " <td>5341.0</td>\n",
2024
+ " <td>5341.0</td>\n",
2025
+ " <td>5196.0</td>\n",
2026
+ " <td>5198.0</td>\n",
2027
+ " <td>4.281050e+02</td>\n",
2028
+ " <td>0.000169</td>\n",
2029
+ " </tr>\n",
2030
+ " <tr>\n",
2031
+ " <th>ZEC</th>\n",
2032
+ " <td>31.76</td>\n",
2033
+ " <td>34.44</td>\n",
2034
+ " <td>31.28</td>\n",
2035
+ " <td>31.55</td>\n",
2036
+ " <td>1.348085e+06</td>\n",
2037
+ " <td>0.000191</td>\n",
2038
+ " </tr>\n",
2039
+ " <tr>\n",
2040
+ " <th>ZIL</th>\n",
2041
+ " <td>0.01422</td>\n",
2042
+ " <td>0.01441</td>\n",
2043
+ " <td>0.01392</td>\n",
2044
+ " <td>0.01396</td>\n",
2045
+ " <td>2.048626e+08</td>\n",
2046
+ " <td>-0.000031</td>\n",
2047
+ " </tr>\n",
2048
+ " <tr>\n",
2049
+ " <th>ZRX</th>\n",
2050
+ " <td>0.3055</td>\n",
2051
+ " <td>0.3121</td>\n",
2052
+ " <td>0.2983</td>\n",
2053
+ " <td>0.2988</td>\n",
2054
+ " <td>9.810764e+06</td>\n",
2055
+ " <td>0.000186</td>\n",
2056
+ " </tr>\n",
2057
+ " </tbody>\n",
2058
+ "</table>\n",
2059
+ "<p>136630 rows × 6 columns</p>\n",
2060
+ "</div>"
2061
+ ],
2062
+ "text/plain": [
2063
+ " open high low close volume \\\n",
2064
+ "date ticker \n",
2065
+ "2017-06-18 BAT <NA> <NA> <NA> <NA> NaN \n",
2066
+ " BTC 2655.1 2676.04 2488.59 2539.56 9.200422e+04 \n",
2067
+ " DASH <NA> <NA> <NA> <NA> NaN \n",
2068
+ " DOGE <NA> <NA> <NA> <NA> NaN \n",
2069
+ " ETC 21.98 23.8 19.5 20.27 1.306320e+06 \n",
2070
+ "... ... ... ... ... ... \n",
2071
+ "2024-08-04 XTZ 0.685 0.693 0.677 0.679 5.373374e+06 \n",
2072
+ " YFI 5341.0 5341.0 5196.0 5198.0 4.281050e+02 \n",
2073
+ " ZEC 31.76 34.44 31.28 31.55 1.348085e+06 \n",
2074
+ " ZIL 0.01422 0.01441 0.01392 0.01396 2.048626e+08 \n",
2075
+ " ZRX 0.3055 0.3121 0.2983 0.2988 9.810764e+06 \n",
2076
+ "\n",
2077
+ " funding_rate \n",
2078
+ "date ticker \n",
2079
+ "2017-06-18 BAT NaN \n",
2080
+ " BTC 0.000000 \n",
2081
+ " DASH NaN \n",
2082
+ " DOGE NaN \n",
2083
+ " ETC 0.000000 \n",
2084
+ "... ... \n",
2085
+ "2024-08-04 XTZ 0.000200 \n",
2086
+ " YFI 0.000169 \n",
2087
+ " ZEC 0.000191 \n",
2088
+ " ZIL -0.000031 \n",
2089
+ " ZRX 0.000186 \n",
2090
+ "\n",
2091
+ "[136630 rows x 6 columns]"
2092
+ ]
2093
+ },
2094
+ "execution_count": 49,
2095
+ "metadata": {},
2096
+ "output_type": "execute_result"
2097
+ }
2098
+ ],
2099
+ "source": [
2100
+ "clean.df"
2101
+ ]
2102
+ },
2103
+ {
2104
+ "cell_type": "code",
2105
+ "execution_count": 50,
2106
+ "id": "a9b1764c",
953
2107
  "metadata": {},
954
2108
  "outputs": [],
955
2109
  "source": [
956
- "clean_df.to_parquet('../../../../factorlab/notebooks/binance_historical_ohlcv_daily.parquet')"
2110
+ "# # clean data\n",
2111
+ "# clean_df = clean.filter_delisted_tickers().\\\n",
2112
+ "# filter_outliers(od_method='mad', excl_cols=['volume', 'funding_rate'], thresh_val=10).\\\n",
2113
+ "# repair_outliers(imp_method='fcst').\\\n",
2114
+ "# filter_avg_trading_val(thresh_val=1000000).\\\n",
2115
+ "# filter_missing_vals_gaps().\\\n",
2116
+ "# filter_min_nobs(ts_obs=1500, cs_obs=10).\\\n",
2117
+ "# get(attr='df').dropna(how='all')"
957
2118
  ]
958
2119
  },
959
2120
  {
960
2121
  "cell_type": "code",
961
2122
  "execution_count": null,
962
- "id": "8a962fa7",
2123
+ "id": "572d7a2e",
963
2124
  "metadata": {},
964
2125
  "outputs": [],
965
2126
  "source": []
966
2127
  },
967
2128
  {
968
2129
  "cell_type": "code",
969
- "execution_count": 31,
970
- "id": "54b818cd",
2130
+ "execution_count": 53,
2131
+ "id": "99857595",
971
2132
  "metadata": {},
972
2133
  "outputs": [],
973
2134
  "source": [
974
- "clean_df.to_csv('../../../../factorlab/notebooks/binance_historical_ohlcv_daily.csv')"
2135
+ "df.to_parquet('s3://factorlab-data/binance_historical_ohlcv_daily.parquet')"
975
2136
  ]
976
2137
  },
977
2138
  {
978
2139
  "cell_type": "code",
979
- "execution_count": 32,
980
- "id": "a1c49f01",
2140
+ "execution_count": 54,
2141
+ "id": "15e66225",
2142
+ "metadata": {},
2143
+ "outputs": [],
2144
+ "source": [
2145
+ "clean.df.dropna(how='all').to_parquet('../../../../factorlab/notebooks/binance_historical_ohlcv_daily.parquet')"
2146
+ ]
2147
+ },
2148
+ {
2149
+ "cell_type": "code",
2150
+ "execution_count": null,
2151
+ "id": "8a962fa7",
2152
+ "metadata": {},
2153
+ "outputs": [],
2154
+ "source": []
2155
+ },
2156
+ {
2157
+ "cell_type": "code",
2158
+ "execution_count": 55,
2159
+ "id": "54b818cd",
981
2160
  "metadata": {},
982
2161
  "outputs": [],
983
2162
  "source": [
984
- "clean_df.to_csv('s3://factorlab-data/binance_historical_ohlcv_daily.csv')"
2163
+ "clean.df.dropna(how='all').to_csv('../../../../factorlab/notebooks/binance_historical_ohlcv_daily.csv')"
985
2164
  ]
986
2165
  },
987
2166
  {
@@ -995,7 +2174,7 @@
995
2174
  {
996
2175
  "cell_type": "code",
997
2176
  "execution_count": null,
998
- "id": "f9488eba",
2177
+ "id": "da953cbf",
999
2178
  "metadata": {},
1000
2179
  "outputs": [],
1001
2180
  "source": []