cryptodatapy 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodatapy/extract/libraries/ccxt.ipynb +873 -0
- cryptodatapy/extract/libraries/ccxt_api.py +40 -133
- cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +1426 -247
- cryptodatapy/transform/convertparams.py +0 -1
- cryptodatapy/transform/filter.py +0 -1
- {cryptodatapy-0.2.4.dist-info → cryptodatapy-0.2.5.dist-info}/METADATA +1 -1
- {cryptodatapy-0.2.4.dist-info → cryptodatapy-0.2.5.dist-info}/RECORD +9 -8
- {cryptodatapy-0.2.4.dist-info → cryptodatapy-0.2.5.dist-info}/LICENSE +0 -0
- {cryptodatapy-0.2.4.dist-info → cryptodatapy-0.2.5.dist-info}/WHEEL +0 -0
@@ -64,6 +64,27 @@
|
|
64
64
|
{
|
65
65
|
"cell_type": "code",
|
66
66
|
"execution_count": 4,
|
67
|
+
"id": "5e796745",
|
68
|
+
"metadata": {},
|
69
|
+
"outputs": [
|
70
|
+
{
|
71
|
+
"data": {
|
72
|
+
"text/plain": [
|
73
|
+
"True"
|
74
|
+
]
|
75
|
+
},
|
76
|
+
"execution_count": 4,
|
77
|
+
"metadata": {},
|
78
|
+
"output_type": "execute_result"
|
79
|
+
}
|
80
|
+
],
|
81
|
+
"source": [
|
82
|
+
"'BTCST/USDT:USDT' in perp_tickers"
|
83
|
+
]
|
84
|
+
},
|
85
|
+
{
|
86
|
+
"cell_type": "code",
|
87
|
+
"execution_count": 5,
|
67
88
|
"id": "fcb74458",
|
68
89
|
"metadata": {},
|
69
90
|
"outputs": [],
|
@@ -75,7 +96,28 @@
|
|
75
96
|
},
|
76
97
|
{
|
77
98
|
"cell_type": "code",
|
78
|
-
"execution_count":
|
99
|
+
"execution_count": 6,
|
100
|
+
"id": "4b86fa0d",
|
101
|
+
"metadata": {},
|
102
|
+
"outputs": [
|
103
|
+
{
|
104
|
+
"data": {
|
105
|
+
"text/plain": [
|
106
|
+
"True"
|
107
|
+
]
|
108
|
+
},
|
109
|
+
"execution_count": 6,
|
110
|
+
"metadata": {},
|
111
|
+
"output_type": "execute_result"
|
112
|
+
}
|
113
|
+
],
|
114
|
+
"source": [
|
115
|
+
"'BTCST/USDT:USDT' in spot_tickers"
|
116
|
+
]
|
117
|
+
},
|
118
|
+
{
|
119
|
+
"cell_type": "code",
|
120
|
+
"execution_count": 7,
|
79
121
|
"id": "7962f7e5",
|
80
122
|
"metadata": {},
|
81
123
|
"outputs": [],
|
@@ -86,7 +128,7 @@
|
|
86
128
|
},
|
87
129
|
{
|
88
130
|
"cell_type": "code",
|
89
|
-
"execution_count":
|
131
|
+
"execution_count": 8,
|
90
132
|
"id": "877811c1",
|
91
133
|
"metadata": {},
|
92
134
|
"outputs": [
|
@@ -96,7 +138,7 @@
|
|
96
138
|
"314"
|
97
139
|
]
|
98
140
|
},
|
99
|
-
"execution_count":
|
141
|
+
"execution_count": 8,
|
100
142
|
"metadata": {},
|
101
143
|
"output_type": "execute_result"
|
102
144
|
}
|
@@ -108,7 +150,28 @@
|
|
108
150
|
},
|
109
151
|
{
|
110
152
|
"cell_type": "code",
|
111
|
-
"execution_count":
|
153
|
+
"execution_count": 9,
|
154
|
+
"id": "4ef7a85b",
|
155
|
+
"metadata": {},
|
156
|
+
"outputs": [
|
157
|
+
{
|
158
|
+
"data": {
|
159
|
+
"text/plain": [
|
160
|
+
"True"
|
161
|
+
]
|
162
|
+
},
|
163
|
+
"execution_count": 9,
|
164
|
+
"metadata": {},
|
165
|
+
"output_type": "execute_result"
|
166
|
+
}
|
167
|
+
],
|
168
|
+
"source": [
|
169
|
+
"'BTCST/USDT:USDT' in binance_tickers"
|
170
|
+
]
|
171
|
+
},
|
172
|
+
{
|
173
|
+
"cell_type": "code",
|
174
|
+
"execution_count": 10,
|
112
175
|
"id": "fe425163",
|
113
176
|
"metadata": {},
|
114
177
|
"outputs": [],
|
@@ -120,7 +183,28 @@
|
|
120
183
|
},
|
121
184
|
{
|
122
185
|
"cell_type": "code",
|
123
|
-
"execution_count":
|
186
|
+
"execution_count": 11,
|
187
|
+
"id": "9c63cd43",
|
188
|
+
"metadata": {},
|
189
|
+
"outputs": [
|
190
|
+
{
|
191
|
+
"data": {
|
192
|
+
"text/plain": [
|
193
|
+
"True"
|
194
|
+
]
|
195
|
+
},
|
196
|
+
"execution_count": 11,
|
197
|
+
"metadata": {},
|
198
|
+
"output_type": "execute_result"
|
199
|
+
}
|
200
|
+
],
|
201
|
+
"source": [
|
202
|
+
"'BTCST' in cc_tickers"
|
203
|
+
]
|
204
|
+
},
|
205
|
+
{
|
206
|
+
"cell_type": "code",
|
207
|
+
"execution_count": 12,
|
124
208
|
"id": "165053db",
|
125
209
|
"metadata": {},
|
126
210
|
"outputs": [],
|
@@ -128,24 +212,66 @@
|
|
128
212
|
"# keep only USDT ticker\n",
|
129
213
|
"bin_tickers = []\n",
|
130
214
|
"for ticker in binance_tickers:\n",
|
131
|
-
" if '/' in ticker and ticker.split('/')[1] == 'USDT':\n",
|
215
|
+
" if '/' in ticker and ticker.split('/')[1] == 'USDT:USDT':\n",
|
132
216
|
" bin_tickers.append(ticker.split('/')[0])"
|
133
217
|
]
|
134
218
|
},
|
135
219
|
{
|
136
220
|
"cell_type": "code",
|
137
|
-
"execution_count":
|
221
|
+
"execution_count": 13,
|
222
|
+
"id": "c93e487a",
|
223
|
+
"metadata": {},
|
224
|
+
"outputs": [
|
225
|
+
{
|
226
|
+
"data": {
|
227
|
+
"text/plain": [
|
228
|
+
"True"
|
229
|
+
]
|
230
|
+
},
|
231
|
+
"execution_count": 13,
|
232
|
+
"metadata": {},
|
233
|
+
"output_type": "execute_result"
|
234
|
+
}
|
235
|
+
],
|
236
|
+
"source": [
|
237
|
+
"'BTCST' in bin_tickers"
|
238
|
+
]
|
239
|
+
},
|
240
|
+
{
|
241
|
+
"cell_type": "code",
|
242
|
+
"execution_count": 14,
|
138
243
|
"id": "d6cf8a4c",
|
139
244
|
"metadata": {},
|
140
245
|
"outputs": [],
|
141
246
|
"source": [
|
142
247
|
"# usdt tickers\n",
|
143
|
-
"usdt_tickers = [ticker.split('/')[0] for ticker in binance_tickers if '/'in ticker and ticker.split('/')[1] == 'USDT']"
|
248
|
+
"usdt_tickers = [ticker.split('/')[0] for ticker in binance_tickers if '/'in ticker and ticker.split('/')[1] == 'USDT:USDT']"
|
144
249
|
]
|
145
250
|
},
|
146
251
|
{
|
147
252
|
"cell_type": "code",
|
148
|
-
"execution_count":
|
253
|
+
"execution_count": 15,
|
254
|
+
"id": "11ec0e6d",
|
255
|
+
"metadata": {},
|
256
|
+
"outputs": [
|
257
|
+
{
|
258
|
+
"data": {
|
259
|
+
"text/plain": [
|
260
|
+
"282"
|
261
|
+
]
|
262
|
+
},
|
263
|
+
"execution_count": 15,
|
264
|
+
"metadata": {},
|
265
|
+
"output_type": "execute_result"
|
266
|
+
}
|
267
|
+
],
|
268
|
+
"source": [
|
269
|
+
"len(usdt_tickers)"
|
270
|
+
]
|
271
|
+
},
|
272
|
+
{
|
273
|
+
"cell_type": "code",
|
274
|
+
"execution_count": 16,
|
149
275
|
"id": "633f7a3e",
|
150
276
|
"metadata": {},
|
151
277
|
"outputs": [],
|
@@ -156,17 +282,17 @@
|
|
156
282
|
},
|
157
283
|
{
|
158
284
|
"cell_type": "code",
|
159
|
-
"execution_count":
|
285
|
+
"execution_count": 17,
|
160
286
|
"id": "30337a71",
|
161
287
|
"metadata": {},
|
162
288
|
"outputs": [
|
163
289
|
{
|
164
290
|
"data": {
|
165
291
|
"text/plain": [
|
166
|
-
"
|
292
|
+
"270"
|
167
293
|
]
|
168
294
|
},
|
169
|
-
"execution_count":
|
295
|
+
"execution_count": 17,
|
170
296
|
"metadata": {},
|
171
297
|
"output_type": "execute_result"
|
172
298
|
}
|
@@ -175,6 +301,27 @@
|
|
175
301
|
"len(tickers)"
|
176
302
|
]
|
177
303
|
},
|
304
|
+
{
|
305
|
+
"cell_type": "code",
|
306
|
+
"execution_count": 18,
|
307
|
+
"id": "7bb3b25f",
|
308
|
+
"metadata": {},
|
309
|
+
"outputs": [
|
310
|
+
{
|
311
|
+
"data": {
|
312
|
+
"text/plain": [
|
313
|
+
"True"
|
314
|
+
]
|
315
|
+
},
|
316
|
+
"execution_count": 18,
|
317
|
+
"metadata": {},
|
318
|
+
"output_type": "execute_result"
|
319
|
+
}
|
320
|
+
],
|
321
|
+
"source": [
|
322
|
+
"'BTCST' in tickers"
|
323
|
+
]
|
324
|
+
},
|
178
325
|
{
|
179
326
|
"cell_type": "markdown",
|
180
327
|
"id": "f80eb97d",
|
@@ -185,7 +332,7 @@
|
|
185
332
|
},
|
186
333
|
{
|
187
334
|
"cell_type": "code",
|
188
|
-
"execution_count":
|
335
|
+
"execution_count": 20,
|
189
336
|
"id": "49b09508",
|
190
337
|
"metadata": {},
|
191
338
|
"outputs": [],
|
@@ -200,29 +347,35 @@
|
|
200
347
|
},
|
201
348
|
{
|
202
349
|
"cell_type": "code",
|
203
|
-
"execution_count":
|
350
|
+
"execution_count": 21,
|
204
351
|
"id": "6ac9365b",
|
205
352
|
"metadata": {},
|
206
|
-
"outputs": [
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
353
|
+
"outputs": [
|
354
|
+
{
|
355
|
+
"name": "stderr",
|
356
|
+
"output_type": "stream",
|
357
|
+
"text": [
|
358
|
+
"WARNING:root:Failed to get ohlcv data for BTCST/USDT.\n",
|
359
|
+
"WARNING:root:binanceusdm {\"code\":-1122,\"msg\":\"Invalid symbol status.\"}\n",
|
360
|
+
"WARNING:root:Failed to pull data on attempt #1.\n",
|
361
|
+
"WARNING:root:Failed to get ohlcv data for BTCST/USDT.\n",
|
362
|
+
"WARNING:root:binanceusdm {\"code\":-1122,\"msg\":\"Invalid symbol status.\"}\n",
|
363
|
+
"WARNING:root:Failed to pull data on attempt #2.\n",
|
364
|
+
"WARNING:root:Failed to get ohlcv data for BTCST/USDT.\n",
|
365
|
+
"WARNING:root:binanceusdm {\"code\":-1122,\"msg\":\"Invalid symbol status.\"}\n",
|
366
|
+
"WARNING:root:Failed to pull data on attempt #3.\n",
|
367
|
+
"WARNING:root:Failed to get OHLCV data from binanceusdm for BTCST/USDT after many attempts.\n"
|
368
|
+
]
|
369
|
+
}
|
370
|
+
],
|
217
371
|
"source": [
|
218
|
-
"
|
219
|
-
"df1 = pd.read_csv('../../../../factorlab/notebooks/binance_perp_futures.csv', index_col=['date', 'ticker'], parse_dates=['date'])"
|
372
|
+
"df1 = GetData(data_req).get_series()"
|
220
373
|
]
|
221
374
|
},
|
222
375
|
{
|
223
376
|
"cell_type": "code",
|
224
|
-
"execution_count":
|
225
|
-
"id": "
|
377
|
+
"execution_count": 22,
|
378
|
+
"id": "364bb46e",
|
226
379
|
"metadata": {},
|
227
380
|
"outputs": [
|
228
381
|
{
|
@@ -269,12 +422,12 @@
|
|
269
422
|
" <tr>\n",
|
270
423
|
" <th>2019-09-08</th>\n",
|
271
424
|
" <th>BTC</th>\n",
|
272
|
-
" <td>10000.
|
425
|
+
" <td>10000.0</td>\n",
|
273
426
|
" <td>10412.65</td>\n",
|
274
|
-
" <td>10000.
|
427
|
+
" <td>10000.0</td>\n",
|
275
428
|
" <td>10391.63</td>\n",
|
276
429
|
" <td>3096.291</td>\n",
|
277
|
-
" <td
|
430
|
+
" <td><NA></td>\n",
|
278
431
|
" </tr>\n",
|
279
432
|
" <tr>\n",
|
280
433
|
" <th>2019-09-09</th>\n",
|
@@ -282,14 +435,14 @@
|
|
282
435
|
" <td>10316.62</td>\n",
|
283
436
|
" <td>10475.54</td>\n",
|
284
437
|
" <td>10077.22</td>\n",
|
285
|
-
" <td>10307.
|
438
|
+
" <td>10307.0</td>\n",
|
286
439
|
" <td>14824.373</td>\n",
|
287
|
-
" <td
|
440
|
+
" <td><NA></td>\n",
|
288
441
|
" </tr>\n",
|
289
442
|
" <tr>\n",
|
290
443
|
" <th>2019-09-10</th>\n",
|
291
444
|
" <th>BTC</th>\n",
|
292
|
-
" <td>10307.
|
445
|
+
" <td>10307.0</td>\n",
|
293
446
|
" <td>10382.97</td>\n",
|
294
447
|
" <td>9940.87</td>\n",
|
295
448
|
" <td>10102.02</td>\n",
|
@@ -316,84 +469,123 @@
|
|
316
469
|
" <td>15609.634</td>\n",
|
317
470
|
" <td>0.0003</td>\n",
|
318
471
|
" </tr>\n",
|
472
|
+
" <tr>\n",
|
473
|
+
" <th>...</th>\n",
|
474
|
+
" <th>...</th>\n",
|
475
|
+
" <td>...</td>\n",
|
476
|
+
" <td>...</td>\n",
|
477
|
+
" <td>...</td>\n",
|
478
|
+
" <td>...</td>\n",
|
479
|
+
" <td>...</td>\n",
|
480
|
+
" <td>...</td>\n",
|
481
|
+
" </tr>\n",
|
482
|
+
" <tr>\n",
|
483
|
+
" <th rowspan=\"5\" valign=\"top\">2024-08-13</th>\n",
|
484
|
+
" <th>ZETA</th>\n",
|
485
|
+
" <td>0.6558</td>\n",
|
486
|
+
" <td>0.7099</td>\n",
|
487
|
+
" <td>0.6143</td>\n",
|
488
|
+
" <td>0.6556</td>\n",
|
489
|
+
" <td>290931468.0</td>\n",
|
490
|
+
" <td>-0.001465</td>\n",
|
491
|
+
" </tr>\n",
|
492
|
+
" <tr>\n",
|
493
|
+
" <th>ZIL</th>\n",
|
494
|
+
" <td>0.01394</td>\n",
|
495
|
+
" <td>0.01397</td>\n",
|
496
|
+
" <td>0.01348</td>\n",
|
497
|
+
" <td>0.01372</td>\n",
|
498
|
+
" <td>211016383.0</td>\n",
|
499
|
+
" <td>0.0002</td>\n",
|
500
|
+
" </tr>\n",
|
501
|
+
" <tr>\n",
|
502
|
+
" <th>ZK</th>\n",
|
503
|
+
" <td>0.11683</td>\n",
|
504
|
+
" <td>0.11895</td>\n",
|
505
|
+
" <td>0.11223</td>\n",
|
506
|
+
" <td>0.11713</td>\n",
|
507
|
+
" <td>180118593.0</td>\n",
|
508
|
+
" <td>0.0002</td>\n",
|
509
|
+
" </tr>\n",
|
510
|
+
" <tr>\n",
|
511
|
+
" <th>ZRO</th>\n",
|
512
|
+
" <td>3.509</td>\n",
|
513
|
+
" <td>3.533</td>\n",
|
514
|
+
" <td>3.349</td>\n",
|
515
|
+
" <td>3.459</td>\n",
|
516
|
+
" <td>10802271.5</td>\n",
|
517
|
+
" <td>0.000173</td>\n",
|
518
|
+
" </tr>\n",
|
519
|
+
" <tr>\n",
|
520
|
+
" <th>ZRX</th>\n",
|
521
|
+
" <td>0.3102</td>\n",
|
522
|
+
" <td>0.3126</td>\n",
|
523
|
+
" <td>0.3</td>\n",
|
524
|
+
" <td>0.3083</td>\n",
|
525
|
+
" <td>18072404.9</td>\n",
|
526
|
+
" <td>-0.000044</td>\n",
|
527
|
+
" </tr>\n",
|
319
528
|
" </tbody>\n",
|
320
529
|
"</table>\n",
|
530
|
+
"<p>222221 rows × 6 columns</p>\n",
|
321
531
|
"</div>"
|
322
532
|
],
|
323
533
|
"text/plain": [
|
324
|
-
" open high low close
|
325
|
-
"date ticker
|
326
|
-
"2019-09-08 BTC
|
327
|
-
"2019-09-09 BTC 10316.62 10475.54 10077.22
|
328
|
-
"2019-09-10 BTC
|
329
|
-
"2019-09-11 BTC 10094.27 10293.11 9884.31 10159.55
|
330
|
-
"2019-09-12 BTC 10163.06 10450.13 10042.12 10415.13
|
534
|
+
" open high low close volume \\\n",
|
535
|
+
"date ticker \n",
|
536
|
+
"2019-09-08 BTC 10000.0 10412.65 10000.0 10391.63 3096.291 \n",
|
537
|
+
"2019-09-09 BTC 10316.62 10475.54 10077.22 10307.0 14824.373 \n",
|
538
|
+
"2019-09-10 BTC 10307.0 10382.97 9940.87 10102.02 9068.955 \n",
|
539
|
+
"2019-09-11 BTC 10094.27 10293.11 9884.31 10159.55 10897.922 \n",
|
540
|
+
"2019-09-12 BTC 10163.06 10450.13 10042.12 10415.13 15609.634 \n",
|
541
|
+
"... ... ... ... ... ... \n",
|
542
|
+
"2024-08-13 ZETA 0.6558 0.7099 0.6143 0.6556 290931468.0 \n",
|
543
|
+
" ZIL 0.01394 0.01397 0.01348 0.01372 211016383.0 \n",
|
544
|
+
" ZK 0.11683 0.11895 0.11223 0.11713 180118593.0 \n",
|
545
|
+
" ZRO 3.509 3.533 3.349 3.459 10802271.5 \n",
|
546
|
+
" ZRX 0.3102 0.3126 0.3 0.3083 18072404.9 \n",
|
331
547
|
"\n",
|
332
548
|
" funding_rate \n",
|
333
549
|
"date ticker \n",
|
334
|
-
"2019-09-08 BTC
|
335
|
-
"2019-09-09 BTC
|
550
|
+
"2019-09-08 BTC <NA> \n",
|
551
|
+
"2019-09-09 BTC <NA> \n",
|
336
552
|
"2019-09-10 BTC 0.0002 \n",
|
337
553
|
"2019-09-11 BTC 0.0003 \n",
|
338
|
-
"2019-09-12 BTC 0.0003 "
|
554
|
+
"2019-09-12 BTC 0.0003 \n",
|
555
|
+
"... ... \n",
|
556
|
+
"2024-08-13 ZETA -0.001465 \n",
|
557
|
+
" ZIL 0.0002 \n",
|
558
|
+
" ZK 0.0002 \n",
|
559
|
+
" ZRO 0.000173 \n",
|
560
|
+
" ZRX -0.000044 \n",
|
561
|
+
"\n",
|
562
|
+
"[222221 rows x 6 columns]"
|
339
563
|
]
|
340
564
|
},
|
341
|
-
"execution_count":
|
565
|
+
"execution_count": 22,
|
342
566
|
"metadata": {},
|
343
567
|
"output_type": "execute_result"
|
344
568
|
}
|
345
569
|
],
|
346
570
|
"source": [
|
347
|
-
"df1
|
348
|
-
]
|
349
|
-
},
|
350
|
-
{
|
351
|
-
"cell_type": "markdown",
|
352
|
-
"id": "32f15191",
|
353
|
-
"metadata": {},
|
354
|
-
"source": [
|
355
|
-
"### Binance Spot"
|
356
|
-
]
|
357
|
-
},
|
358
|
-
{
|
359
|
-
"cell_type": "code",
|
360
|
-
"execution_count": 16,
|
361
|
-
"id": "83e9e466",
|
362
|
-
"metadata": {},
|
363
|
-
"outputs": [],
|
364
|
-
"source": [
|
365
|
-
"# pull OHLC from Binance\n",
|
366
|
-
"data_req = DataRequest(source='ccxt',\n",
|
367
|
-
" tickers=tickers, \n",
|
368
|
-
" fields=['open', 'high', 'low', 'close', 'volume'], \n",
|
369
|
-
" freq='d')"
|
370
|
-
]
|
371
|
-
},
|
372
|
-
{
|
373
|
-
"cell_type": "code",
|
374
|
-
"execution_count": 17,
|
375
|
-
"id": "82d4bbc7",
|
376
|
-
"metadata": {},
|
377
|
-
"outputs": [],
|
378
|
-
"source": [
|
379
|
-
"# df2 = GetData(data_req).get_series()"
|
571
|
+
"df1"
|
380
572
|
]
|
381
573
|
},
|
382
574
|
{
|
383
575
|
"cell_type": "code",
|
384
|
-
"execution_count":
|
385
|
-
"id": "
|
576
|
+
"execution_count": 23,
|
577
|
+
"id": "98a425b2",
|
386
578
|
"metadata": {},
|
387
579
|
"outputs": [],
|
388
580
|
"source": [
|
389
|
-
"#
|
390
|
-
"
|
581
|
+
"# df1.to_csv('binance_perp_futures.csv')\n",
|
582
|
+
"df1 = pd.read_csv('../../../../factorlab/notebooks/binance_perp_futures.csv', index_col=['date', 'ticker'], parse_dates=['date'])"
|
391
583
|
]
|
392
584
|
},
|
393
585
|
{
|
394
586
|
"cell_type": "code",
|
395
|
-
"execution_count":
|
396
|
-
"id": "
|
587
|
+
"execution_count": 24,
|
588
|
+
"id": "0e04da4a",
|
397
589
|
"metadata": {},
|
398
590
|
"outputs": [
|
399
591
|
{
|
@@ -423,6 +615,7 @@
|
|
423
615
|
" <th>low</th>\n",
|
424
616
|
" <th>close</th>\n",
|
425
617
|
" <th>volume</th>\n",
|
618
|
+
" <th>funding_rate</th>\n",
|
426
619
|
" </tr>\n",
|
427
620
|
" <tr>\n",
|
428
621
|
" <th>date</th>\n",
|
@@ -432,16 +625,202 @@
|
|
432
625
|
" <th></th>\n",
|
433
626
|
" <th></th>\n",
|
434
627
|
" <th></th>\n",
|
628
|
+
" <th></th>\n",
|
435
629
|
" </tr>\n",
|
436
630
|
" </thead>\n",
|
437
631
|
" <tbody>\n",
|
438
632
|
" <tr>\n",
|
439
|
-
" <th
|
633
|
+
" <th>2019-09-08</th>\n",
|
440
634
|
" <th>BTC</th>\n",
|
441
|
-
" <td>
|
442
|
-
" <td>
|
443
|
-
" <td>
|
444
|
-
" <td>
|
635
|
+
" <td>10000.00</td>\n",
|
636
|
+
" <td>10412.65</td>\n",
|
637
|
+
" <td>10000.00</td>\n",
|
638
|
+
" <td>10391.63</td>\n",
|
639
|
+
" <td>3096.291</td>\n",
|
640
|
+
" <td>NaN</td>\n",
|
641
|
+
" </tr>\n",
|
642
|
+
" <tr>\n",
|
643
|
+
" <th>2019-09-09</th>\n",
|
644
|
+
" <th>BTC</th>\n",
|
645
|
+
" <td>10316.62</td>\n",
|
646
|
+
" <td>10475.54</td>\n",
|
647
|
+
" <td>10077.22</td>\n",
|
648
|
+
" <td>10307.00</td>\n",
|
649
|
+
" <td>14824.373</td>\n",
|
650
|
+
" <td>NaN</td>\n",
|
651
|
+
" </tr>\n",
|
652
|
+
" <tr>\n",
|
653
|
+
" <th>2019-09-10</th>\n",
|
654
|
+
" <th>BTC</th>\n",
|
655
|
+
" <td>10307.00</td>\n",
|
656
|
+
" <td>10382.97</td>\n",
|
657
|
+
" <td>9940.87</td>\n",
|
658
|
+
" <td>10102.02</td>\n",
|
659
|
+
" <td>9068.955</td>\n",
|
660
|
+
" <td>0.0002</td>\n",
|
661
|
+
" </tr>\n",
|
662
|
+
" <tr>\n",
|
663
|
+
" <th>2019-09-11</th>\n",
|
664
|
+
" <th>BTC</th>\n",
|
665
|
+
" <td>10094.27</td>\n",
|
666
|
+
" <td>10293.11</td>\n",
|
667
|
+
" <td>9884.31</td>\n",
|
668
|
+
" <td>10159.55</td>\n",
|
669
|
+
" <td>10897.922</td>\n",
|
670
|
+
" <td>0.0003</td>\n",
|
671
|
+
" </tr>\n",
|
672
|
+
" <tr>\n",
|
673
|
+
" <th>2019-09-12</th>\n",
|
674
|
+
" <th>BTC</th>\n",
|
675
|
+
" <td>10163.06</td>\n",
|
676
|
+
" <td>10450.13</td>\n",
|
677
|
+
" <td>10042.12</td>\n",
|
678
|
+
" <td>10415.13</td>\n",
|
679
|
+
" <td>15609.634</td>\n",
|
680
|
+
" <td>0.0003</td>\n",
|
681
|
+
" </tr>\n",
|
682
|
+
" </tbody>\n",
|
683
|
+
"</table>\n",
|
684
|
+
"</div>"
|
685
|
+
],
|
686
|
+
"text/plain": [
|
687
|
+
" open high low close volume \\\n",
|
688
|
+
"date ticker \n",
|
689
|
+
"2019-09-08 BTC 10000.00 10412.65 10000.00 10391.63 3096.291 \n",
|
690
|
+
"2019-09-09 BTC 10316.62 10475.54 10077.22 10307.00 14824.373 \n",
|
691
|
+
"2019-09-10 BTC 10307.00 10382.97 9940.87 10102.02 9068.955 \n",
|
692
|
+
"2019-09-11 BTC 10094.27 10293.11 9884.31 10159.55 10897.922 \n",
|
693
|
+
"2019-09-12 BTC 10163.06 10450.13 10042.12 10415.13 15609.634 \n",
|
694
|
+
"\n",
|
695
|
+
" funding_rate \n",
|
696
|
+
"date ticker \n",
|
697
|
+
"2019-09-08 BTC NaN \n",
|
698
|
+
"2019-09-09 BTC NaN \n",
|
699
|
+
"2019-09-10 BTC 0.0002 \n",
|
700
|
+
"2019-09-11 BTC 0.0003 \n",
|
701
|
+
"2019-09-12 BTC 0.0003 "
|
702
|
+
]
|
703
|
+
},
|
704
|
+
"execution_count": 24,
|
705
|
+
"metadata": {},
|
706
|
+
"output_type": "execute_result"
|
707
|
+
}
|
708
|
+
],
|
709
|
+
"source": [
|
710
|
+
"df1.head()"
|
711
|
+
]
|
712
|
+
},
|
713
|
+
{
|
714
|
+
"cell_type": "markdown",
|
715
|
+
"id": "32f15191",
|
716
|
+
"metadata": {},
|
717
|
+
"source": [
|
718
|
+
"### Binance Spot"
|
719
|
+
]
|
720
|
+
},
|
721
|
+
{
|
722
|
+
"cell_type": "code",
|
723
|
+
"execution_count": 25,
|
724
|
+
"id": "83e9e466",
|
725
|
+
"metadata": {},
|
726
|
+
"outputs": [],
|
727
|
+
"source": [
|
728
|
+
"# pull OHLC from Binance\n",
|
729
|
+
"data_req = DataRequest(source='ccxt',\n",
|
730
|
+
" tickers=tickers, \n",
|
731
|
+
" fields=['open', 'high', 'low', 'close', 'volume'], \n",
|
732
|
+
" freq='d')"
|
733
|
+
]
|
734
|
+
},
|
735
|
+
{
|
736
|
+
"cell_type": "code",
|
737
|
+
"execution_count": 26,
|
738
|
+
"id": "82d4bbc7",
|
739
|
+
"metadata": {},
|
740
|
+
"outputs": [
|
741
|
+
{
|
742
|
+
"name": "stderr",
|
743
|
+
"output_type": "stream",
|
744
|
+
"text": [
|
745
|
+
"WARNING:root:Missing recent OHLCV data for XMR/USDT.\n",
|
746
|
+
"WARNING:root:Missing recent OHLCV data for OMG/USDT.\n",
|
747
|
+
"WARNING:root:Missing recent OHLCV data for WAVES/USDT.\n",
|
748
|
+
"WARNING:root:Missing recent OHLCV data for OCEAN/USDT.\n",
|
749
|
+
"WARNING:root:Missing recent OHLCV data for XEM/USDT.\n",
|
750
|
+
"WARNING:root:Missing recent OHLCV data for BTCST/USDT.\n",
|
751
|
+
"WARNING:root:Missing recent OHLCV data for AGIX/USDT.\n",
|
752
|
+
"WARNING:root:Missing recent OHLCV data for BOND/USDT.\n",
|
753
|
+
"WARNING:root:Missing recent OHLCV data for BSV/USDT.\n"
|
754
|
+
]
|
755
|
+
}
|
756
|
+
],
|
757
|
+
"source": [
|
758
|
+
"df2 = GetData(data_req).get_series()"
|
759
|
+
]
|
760
|
+
},
|
761
|
+
{
|
762
|
+
"cell_type": "code",
|
763
|
+
"execution_count": 27,
|
764
|
+
"id": "4f63eb21",
|
765
|
+
"metadata": {},
|
766
|
+
"outputs": [],
|
767
|
+
"source": [
|
768
|
+
"# df2.to_csv('binance_spot.csv')\n",
|
769
|
+
"df2 = pd.read_csv('../../../../factorlab/notebooks/binance_spot.csv', index_col=['date', 'ticker'], parse_dates=['date'])"
|
770
|
+
]
|
771
|
+
},
|
772
|
+
{
|
773
|
+
"cell_type": "code",
|
774
|
+
"execution_count": 28,
|
775
|
+
"id": "ce8929c1",
|
776
|
+
"metadata": {},
|
777
|
+
"outputs": [
|
778
|
+
{
|
779
|
+
"data": {
|
780
|
+
"text/html": [
|
781
|
+
"<div>\n",
|
782
|
+
"<style scoped>\n",
|
783
|
+
" .dataframe tbody tr th:only-of-type {\n",
|
784
|
+
" vertical-align: middle;\n",
|
785
|
+
" }\n",
|
786
|
+
"\n",
|
787
|
+
" .dataframe tbody tr th {\n",
|
788
|
+
" vertical-align: top;\n",
|
789
|
+
" }\n",
|
790
|
+
"\n",
|
791
|
+
" .dataframe thead th {\n",
|
792
|
+
" text-align: right;\n",
|
793
|
+
" }\n",
|
794
|
+
"</style>\n",
|
795
|
+
"<table border=\"1\" class=\"dataframe\">\n",
|
796
|
+
" <thead>\n",
|
797
|
+
" <tr style=\"text-align: right;\">\n",
|
798
|
+
" <th></th>\n",
|
799
|
+
" <th></th>\n",
|
800
|
+
" <th>open</th>\n",
|
801
|
+
" <th>high</th>\n",
|
802
|
+
" <th>low</th>\n",
|
803
|
+
" <th>close</th>\n",
|
804
|
+
" <th>volume</th>\n",
|
805
|
+
" </tr>\n",
|
806
|
+
" <tr>\n",
|
807
|
+
" <th>date</th>\n",
|
808
|
+
" <th>ticker</th>\n",
|
809
|
+
" <th></th>\n",
|
810
|
+
" <th></th>\n",
|
811
|
+
" <th></th>\n",
|
812
|
+
" <th></th>\n",
|
813
|
+
" <th></th>\n",
|
814
|
+
" </tr>\n",
|
815
|
+
" </thead>\n",
|
816
|
+
" <tbody>\n",
|
817
|
+
" <tr>\n",
|
818
|
+
" <th rowspan=\"2\" valign=\"top\">2017-08-17</th>\n",
|
819
|
+
" <th>BTC</th>\n",
|
820
|
+
" <td>4261.48</td>\n",
|
821
|
+
" <td>4485.39</td>\n",
|
822
|
+
" <td>4200.74</td>\n",
|
823
|
+
" <td>4285.08</td>\n",
|
445
824
|
" <td>795.150377</td>\n",
|
446
825
|
" </tr>\n",
|
447
826
|
" <tr>\n",
|
@@ -492,7 +871,7 @@
|
|
492
871
|
"2017-08-19 BTC 4108.37 4184.69 3850.00 4139.98 381.309763"
|
493
872
|
]
|
494
873
|
},
|
495
|
-
"execution_count":
|
874
|
+
"execution_count": 28,
|
496
875
|
"metadata": {},
|
497
876
|
"output_type": "execute_result"
|
498
877
|
}
|
@@ -511,7 +890,7 @@
|
|
511
890
|
},
|
512
891
|
{
|
513
892
|
"cell_type": "code",
|
514
|
-
"execution_count":
|
893
|
+
"execution_count": 29,
|
515
894
|
"id": "7f14d874",
|
516
895
|
"metadata": {},
|
517
896
|
"outputs": [],
|
@@ -525,17 +904,17 @@
|
|
525
904
|
},
|
526
905
|
{
|
527
906
|
"cell_type": "code",
|
528
|
-
"execution_count":
|
907
|
+
"execution_count": 30,
|
529
908
|
"id": "3a8708d3",
|
530
909
|
"metadata": {},
|
531
910
|
"outputs": [],
|
532
911
|
"source": [
|
533
|
-
"
|
912
|
+
"df3 = GetData(data_req).get_series()"
|
534
913
|
]
|
535
914
|
},
|
536
915
|
{
|
537
916
|
"cell_type": "code",
|
538
|
-
"execution_count":
|
917
|
+
"execution_count": 31,
|
539
918
|
"id": "aa265538",
|
540
919
|
"metadata": {},
|
541
920
|
"outputs": [],
|
@@ -554,7 +933,7 @@
|
|
554
933
|
},
|
555
934
|
{
|
556
935
|
"cell_type": "code",
|
557
|
-
"execution_count":
|
936
|
+
"execution_count": 32,
|
558
937
|
"id": "f5ee4f6d",
|
559
938
|
"metadata": {},
|
560
939
|
"outputs": [],
|
@@ -565,7 +944,7 @@
|
|
565
944
|
},
|
566
945
|
{
|
567
946
|
"cell_type": "code",
|
568
|
-
"execution_count":
|
947
|
+
"execution_count": 33,
|
569
948
|
"id": "cbe07c91",
|
570
949
|
"metadata": {},
|
571
950
|
"outputs": [
|
@@ -674,7 +1053,7 @@
|
|
674
1053
|
"2010-07-21 BTC 0.07474 0.07921 0.06634 0.07921 575.00 0.0"
|
675
1054
|
]
|
676
1055
|
},
|
677
|
-
"execution_count":
|
1056
|
+
"execution_count": 33,
|
678
1057
|
"metadata": {},
|
679
1058
|
"output_type": "execute_result"
|
680
1059
|
}
|
@@ -685,7 +1064,32 @@
|
|
685
1064
|
},
|
686
1065
|
{
|
687
1066
|
"cell_type": "code",
|
688
|
-
"execution_count":
|
1067
|
+
"execution_count": 34,
|
1068
|
+
"id": "cef46007",
|
1069
|
+
"metadata": {},
|
1070
|
+
"outputs": [
|
1071
|
+
{
|
1072
|
+
"data": {
|
1073
|
+
"text/plain": [
|
1074
|
+
"Index(['BTC', 'LTC', 'DOGE', 'DASH', 'XLM', 'XMR', 'XRP', 'KEY', 'DGB', 'XEM',\n",
|
1075
|
+
" ...\n",
|
1076
|
+
" 'TNSR', 'SAGA', 'REZ', 'BB', 'NOT', 'IO', 'ZK', 'LISTA', 'ZRO',\n",
|
1077
|
+
" 'RENDER'],\n",
|
1078
|
+
" dtype='object', name='ticker', length=256)"
|
1079
|
+
]
|
1080
|
+
},
|
1081
|
+
"execution_count": 34,
|
1082
|
+
"metadata": {},
|
1083
|
+
"output_type": "execute_result"
|
1084
|
+
}
|
1085
|
+
],
|
1086
|
+
"source": [
|
1087
|
+
"df.index.get_level_values(1).unique()"
|
1088
|
+
]
|
1089
|
+
},
|
1090
|
+
{
|
1091
|
+
"cell_type": "code",
|
1092
|
+
"execution_count": null,
|
689
1093
|
"id": "d4c497d1",
|
690
1094
|
"metadata": {},
|
691
1095
|
"outputs": [],
|
@@ -696,74 +1100,98 @@
|
|
696
1100
|
},
|
697
1101
|
{
|
698
1102
|
"cell_type": "code",
|
699
|
-
"execution_count":
|
700
|
-
"id": "
|
1103
|
+
"execution_count": 35,
|
1104
|
+
"id": "9f8a899f",
|
1105
|
+
"metadata": {},
|
1106
|
+
"outputs": [],
|
1107
|
+
"source": [
|
1108
|
+
"clean = CleanData(df)"
|
1109
|
+
]
|
1110
|
+
},
|
1111
|
+
{
|
1112
|
+
"cell_type": "code",
|
1113
|
+
"execution_count": 36,
|
1114
|
+
"id": "29e1b955",
|
701
1115
|
"metadata": {},
|
702
1116
|
"outputs": [
|
703
1117
|
{
|
704
|
-
"
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
1118
|
+
"data": {
|
1119
|
+
"text/plain": [
|
1120
|
+
"<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
|
1121
|
+
]
|
1122
|
+
},
|
1123
|
+
"execution_count": 36,
|
1124
|
+
"metadata": {},
|
1125
|
+
"output_type": "execute_result"
|
711
1126
|
}
|
712
1127
|
],
|
713
1128
|
"source": [
|
714
|
-
"
|
715
|
-
"clean_df = CleanData(df).filter_delisted_tickers().\\\n",
|
716
|
-
" filter_min_nobs(ts_obs=1500, cs_obs=10).\\\n",
|
717
|
-
" filter_outliers(od_method='mad', excl_cols=['volume', 'funding_rate'], thresh_val=10).\\\n",
|
718
|
-
" repair_outliers(imp_method='fcst').\\\n",
|
719
|
-
" filter_avg_trading_val(thresh_val=1000000).\\\n",
|
720
|
-
" filter_missing_vals_gaps().\\\n",
|
721
|
-
" get(attr='df').dropna(how='all')"
|
1129
|
+
"clean.filter_delisted_tickers()"
|
722
1130
|
]
|
723
1131
|
},
|
724
1132
|
{
|
725
1133
|
"cell_type": "code",
|
726
|
-
"execution_count":
|
727
|
-
"id": "
|
1134
|
+
"execution_count": 37,
|
1135
|
+
"id": "9315f178",
|
728
1136
|
"metadata": {},
|
729
1137
|
"outputs": [
|
730
1138
|
{
|
731
|
-
"
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
]
|
1139
|
+
"data": {
|
1140
|
+
"text/plain": [
|
1141
|
+
"['CVC',\n",
|
1142
|
+
" 'CTK',\n",
|
1143
|
+
" 'STRAX',\n",
|
1144
|
+
" 'STPT',\n",
|
1145
|
+
" 'DGB',\n",
|
1146
|
+
" 'CVX',\n",
|
1147
|
+
" 'FTT',\n",
|
1148
|
+
" 'GLMR',\n",
|
1149
|
+
" 'OCEAN',\n",
|
1150
|
+
" 'SLP',\n",
|
1151
|
+
" 'AGIX',\n",
|
1152
|
+
" 'RAY',\n",
|
1153
|
+
" 'MDT',\n",
|
1154
|
+
" 'SC',\n",
|
1155
|
+
" 'SNT',\n",
|
1156
|
+
" 'IDEX',\n",
|
1157
|
+
" 'RAD',\n",
|
1158
|
+
" 'WAVES']"
|
1159
|
+
]
|
1160
|
+
},
|
1161
|
+
"execution_count": 37,
|
1162
|
+
"metadata": {},
|
1163
|
+
"output_type": "execute_result"
|
757
1164
|
}
|
758
1165
|
],
|
759
1166
|
"source": [
|
760
|
-
"
|
1167
|
+
"clean.filtered_tickers"
|
761
1168
|
]
|
762
1169
|
},
|
763
1170
|
{
|
764
1171
|
"cell_type": "code",
|
765
|
-
"execution_count":
|
766
|
-
"id": "
|
1172
|
+
"execution_count": 38,
|
1173
|
+
"id": "e4693e52",
|
1174
|
+
"metadata": {},
|
1175
|
+
"outputs": [
|
1176
|
+
{
|
1177
|
+
"data": {
|
1178
|
+
"text/plain": [
|
1179
|
+
"<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
|
1180
|
+
]
|
1181
|
+
},
|
1182
|
+
"execution_count": 38,
|
1183
|
+
"metadata": {},
|
1184
|
+
"output_type": "execute_result"
|
1185
|
+
}
|
1186
|
+
],
|
1187
|
+
"source": [
|
1188
|
+
"clean.filter_outliers(od_method='mad', excl_cols=['volume', 'funding_rate'], thresh_val=10)"
|
1189
|
+
]
|
1190
|
+
},
|
1191
|
+
{
|
1192
|
+
"cell_type": "code",
|
1193
|
+
"execution_count": 39,
|
1194
|
+
"id": "311ee6ed",
|
767
1195
|
"metadata": {},
|
768
1196
|
"outputs": [
|
769
1197
|
{
|
@@ -787,71 +1215,259 @@
|
|
787
1215
|
" <thead>\n",
|
788
1216
|
" <tr style=\"text-align: right;\">\n",
|
789
1217
|
" <th></th>\n",
|
1218
|
+
" <th></th>\n",
|
790
1219
|
" <th>open</th>\n",
|
791
1220
|
" <th>high</th>\n",
|
792
1221
|
" <th>low</th>\n",
|
793
1222
|
" <th>close</th>\n",
|
794
|
-
" <th>volume</th>\n",
|
795
|
-
" <th>funding_rate</th>\n",
|
796
1223
|
" </tr>\n",
|
797
1224
|
" <tr>\n",
|
798
1225
|
" <th>date</th>\n",
|
1226
|
+
" <th>ticker</th>\n",
|
1227
|
+
" <th></th>\n",
|
1228
|
+
" <th></th>\n",
|
1229
|
+
" <th></th>\n",
|
1230
|
+
" <th></th>\n",
|
1231
|
+
" </tr>\n",
|
1232
|
+
" </thead>\n",
|
1233
|
+
" <tbody>\n",
|
1234
|
+
" <tr>\n",
|
1235
|
+
" <th>2010-07-17</th>\n",
|
1236
|
+
" <th>BTC</th>\n",
|
1237
|
+
" <td><NA></td>\n",
|
1238
|
+
" <td><NA></td>\n",
|
1239
|
+
" <td><NA></td>\n",
|
1240
|
+
" <td><NA></td>\n",
|
1241
|
+
" </tr>\n",
|
1242
|
+
" <tr>\n",
|
1243
|
+
" <th>2010-07-18</th>\n",
|
1244
|
+
" <th>BTC</th>\n",
|
1245
|
+
" <td><NA></td>\n",
|
1246
|
+
" <td><NA></td>\n",
|
1247
|
+
" <td><NA></td>\n",
|
1248
|
+
" <td><NA></td>\n",
|
1249
|
+
" </tr>\n",
|
1250
|
+
" <tr>\n",
|
1251
|
+
" <th>2010-07-19</th>\n",
|
1252
|
+
" <th>BTC</th>\n",
|
1253
|
+
" <td><NA></td>\n",
|
1254
|
+
" <td><NA></td>\n",
|
1255
|
+
" <td><NA></td>\n",
|
1256
|
+
" <td><NA></td>\n",
|
1257
|
+
" </tr>\n",
|
1258
|
+
" <tr>\n",
|
1259
|
+
" <th>2010-07-20</th>\n",
|
1260
|
+
" <th>BTC</th>\n",
|
1261
|
+
" <td><NA></td>\n",
|
1262
|
+
" <td><NA></td>\n",
|
1263
|
+
" <td><NA></td>\n",
|
1264
|
+
" <td><NA></td>\n",
|
1265
|
+
" </tr>\n",
|
1266
|
+
" <tr>\n",
|
1267
|
+
" <th>2010-07-21</th>\n",
|
1268
|
+
" <th>BTC</th>\n",
|
1269
|
+
" <td><NA></td>\n",
|
1270
|
+
" <td><NA></td>\n",
|
1271
|
+
" <td><NA></td>\n",
|
1272
|
+
" <td><NA></td>\n",
|
1273
|
+
" </tr>\n",
|
1274
|
+
" <tr>\n",
|
1275
|
+
" <th>...</th>\n",
|
1276
|
+
" <th>...</th>\n",
|
1277
|
+
" <td>...</td>\n",
|
1278
|
+
" <td>...</td>\n",
|
1279
|
+
" <td>...</td>\n",
|
1280
|
+
" <td>...</td>\n",
|
1281
|
+
" </tr>\n",
|
1282
|
+
" <tr>\n",
|
1283
|
+
" <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
|
1284
|
+
" <th>ZEN</th>\n",
|
1285
|
+
" <td><NA></td>\n",
|
1286
|
+
" <td>9.112</td>\n",
|
1287
|
+
" <td>8.285</td>\n",
|
1288
|
+
" <td>8.462</td>\n",
|
1289
|
+
" </tr>\n",
|
1290
|
+
" <tr>\n",
|
1291
|
+
" <th>ZIL</th>\n",
|
1292
|
+
" <td>0.01422</td>\n",
|
1293
|
+
" <td>0.01441</td>\n",
|
1294
|
+
" <td>0.01392</td>\n",
|
1295
|
+
" <td>0.01396</td>\n",
|
1296
|
+
" </tr>\n",
|
1297
|
+
" <tr>\n",
|
1298
|
+
" <th>ZK</th>\n",
|
1299
|
+
" <td>0.11451</td>\n",
|
1300
|
+
" <td>0.11673</td>\n",
|
1301
|
+
" <td>0.10857</td>\n",
|
1302
|
+
" <td>0.10874</td>\n",
|
1303
|
+
" </tr>\n",
|
1304
|
+
" <tr>\n",
|
1305
|
+
" <th>ZRO</th>\n",
|
1306
|
+
" <td>3.631</td>\n",
|
1307
|
+
" <td>3.648</td>\n",
|
1308
|
+
" <td>3.362</td>\n",
|
1309
|
+
" <td>3.367</td>\n",
|
1310
|
+
" </tr>\n",
|
1311
|
+
" <tr>\n",
|
1312
|
+
" <th>ZRX</th>\n",
|
1313
|
+
" <td>0.3055</td>\n",
|
1314
|
+
" <td>0.3121</td>\n",
|
1315
|
+
" <td>0.2983</td>\n",
|
1316
|
+
" <td>0.2988</td>\n",
|
1317
|
+
" </tr>\n",
|
1318
|
+
" </tbody>\n",
|
1319
|
+
"</table>\n",
|
1320
|
+
"<p>357696 rows × 4 columns</p>\n",
|
1321
|
+
"</div>"
|
1322
|
+
],
|
1323
|
+
"text/plain": [
|
1324
|
+
" open high low close\n",
|
1325
|
+
"date ticker \n",
|
1326
|
+
"2010-07-17 BTC <NA> <NA> <NA> <NA>\n",
|
1327
|
+
"2010-07-18 BTC <NA> <NA> <NA> <NA>\n",
|
1328
|
+
"2010-07-19 BTC <NA> <NA> <NA> <NA>\n",
|
1329
|
+
"2010-07-20 BTC <NA> <NA> <NA> <NA>\n",
|
1330
|
+
"2010-07-21 BTC <NA> <NA> <NA> <NA>\n",
|
1331
|
+
"... ... ... ... ...\n",
|
1332
|
+
"2024-08-04 ZEN <NA> 9.112 8.285 8.462\n",
|
1333
|
+
" ZIL 0.01422 0.01441 0.01392 0.01396\n",
|
1334
|
+
" ZK 0.11451 0.11673 0.10857 0.10874\n",
|
1335
|
+
" ZRO 3.631 3.648 3.362 3.367\n",
|
1336
|
+
" ZRX 0.3055 0.3121 0.2983 0.2988\n",
|
1337
|
+
"\n",
|
1338
|
+
"[357696 rows x 4 columns]"
|
1339
|
+
]
|
1340
|
+
},
|
1341
|
+
"execution_count": 39,
|
1342
|
+
"metadata": {},
|
1343
|
+
"output_type": "execute_result"
|
1344
|
+
}
|
1345
|
+
],
|
1346
|
+
"source": [
|
1347
|
+
"clean.df"
|
1348
|
+
]
|
1349
|
+
},
|
1350
|
+
{
|
1351
|
+
"cell_type": "code",
|
1352
|
+
"execution_count": 40,
|
1353
|
+
"id": "83caa2c6",
|
1354
|
+
"metadata": {},
|
1355
|
+
"outputs": [
|
1356
|
+
{
|
1357
|
+
"data": {
|
1358
|
+
"text/plain": [
|
1359
|
+
"<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
|
1360
|
+
]
|
1361
|
+
},
|
1362
|
+
"execution_count": 40,
|
1363
|
+
"metadata": {},
|
1364
|
+
"output_type": "execute_result"
|
1365
|
+
}
|
1366
|
+
],
|
1367
|
+
"source": [
|
1368
|
+
"clean.repair_outliers(imp_method='fcst')"
|
1369
|
+
]
|
1370
|
+
},
|
1371
|
+
{
|
1372
|
+
"cell_type": "code",
|
1373
|
+
"execution_count": 41,
|
1374
|
+
"id": "f4c21352",
|
1375
|
+
"metadata": {},
|
1376
|
+
"outputs": [
|
1377
|
+
{
|
1378
|
+
"data": {
|
1379
|
+
"text/html": [
|
1380
|
+
"<div>\n",
|
1381
|
+
"<style scoped>\n",
|
1382
|
+
" .dataframe tbody tr th:only-of-type {\n",
|
1383
|
+
" vertical-align: middle;\n",
|
1384
|
+
" }\n",
|
1385
|
+
"\n",
|
1386
|
+
" .dataframe tbody tr th {\n",
|
1387
|
+
" vertical-align: top;\n",
|
1388
|
+
" }\n",
|
1389
|
+
"\n",
|
1390
|
+
" .dataframe thead th {\n",
|
1391
|
+
" text-align: right;\n",
|
1392
|
+
" }\n",
|
1393
|
+
"</style>\n",
|
1394
|
+
"<table border=\"1\" class=\"dataframe\">\n",
|
1395
|
+
" <thead>\n",
|
1396
|
+
" <tr style=\"text-align: right;\">\n",
|
1397
|
+
" <th></th>\n",
|
1398
|
+
" <th></th>\n",
|
1399
|
+
" <th>open</th>\n",
|
1400
|
+
" <th>high</th>\n",
|
1401
|
+
" <th>low</th>\n",
|
1402
|
+
" <th>close</th>\n",
|
1403
|
+
" <th>volume</th>\n",
|
1404
|
+
" <th>funding_rate</th>\n",
|
1405
|
+
" </tr>\n",
|
1406
|
+
" <tr>\n",
|
1407
|
+
" <th>date</th>\n",
|
1408
|
+
" <th>ticker</th>\n",
|
1409
|
+
" <th></th>\n",
|
799
1410
|
" <th></th>\n",
|
800
1411
|
" <th></th>\n",
|
801
1412
|
" <th></th>\n",
|
802
1413
|
" <th></th>\n",
|
803
1414
|
" <th></th>\n",
|
804
|
-
" <th></th>\n",
|
805
1415
|
" </tr>\n",
|
806
1416
|
" </thead>\n",
|
807
1417
|
" <tbody>\n",
|
808
1418
|
" <tr>\n",
|
809
|
-
" <th>
|
810
|
-
" <
|
811
|
-
" <td>
|
812
|
-
" <td>
|
813
|
-
" <td>
|
814
|
-
" <td>
|
1419
|
+
" <th>2010-07-17</th>\n",
|
1420
|
+
" <th>BTC</th>\n",
|
1421
|
+
" <td>42.99</td>\n",
|
1422
|
+
" <td>43.76</td>\n",
|
1423
|
+
" <td>40.99</td>\n",
|
1424
|
+
" <td>41.01</td>\n",
|
1425
|
+
" <td>2.000000e+01</td>\n",
|
815
1426
|
" <td>0.000000</td>\n",
|
816
1427
|
" </tr>\n",
|
817
1428
|
" <tr>\n",
|
818
|
-
" <th>
|
819
|
-
" <
|
820
|
-
" <td>
|
821
|
-
" <td>
|
822
|
-
" <td>
|
823
|
-
" <td>
|
1429
|
+
" <th>2010-07-18</th>\n",
|
1430
|
+
" <th>BTC</th>\n",
|
1431
|
+
" <td>0.07921</td>\n",
|
1432
|
+
" <td>0.08181</td>\n",
|
1433
|
+
" <td>0.06634</td>\n",
|
1434
|
+
" <td>0.07921</td>\n",
|
1435
|
+
" <td>7.501000e+01</td>\n",
|
824
1436
|
" <td>0.000000</td>\n",
|
825
1437
|
" </tr>\n",
|
826
1438
|
" <tr>\n",
|
827
|
-
" <th>
|
828
|
-
" <
|
829
|
-
" <td>
|
830
|
-
" <td>
|
831
|
-
" <td>
|
832
|
-
" <td>
|
1439
|
+
" <th>2010-07-19</th>\n",
|
1440
|
+
" <th>BTC</th>\n",
|
1441
|
+
" <td>0.07474</td>\n",
|
1442
|
+
" <td>0.07921</td>\n",
|
1443
|
+
" <td>0.0505</td>\n",
|
1444
|
+
" <td>0.06262</td>\n",
|
1445
|
+
" <td>5.740000e+02</td>\n",
|
833
1446
|
" <td>0.000000</td>\n",
|
834
1447
|
" </tr>\n",
|
835
1448
|
" <tr>\n",
|
836
|
-
" <th>
|
837
|
-
" <
|
838
|
-
" <td>
|
839
|
-
" <td>
|
840
|
-
" <td>
|
841
|
-
" <td>
|
1449
|
+
" <th>2010-07-20</th>\n",
|
1450
|
+
" <th>BTC</th>\n",
|
1451
|
+
" <td>0.06868</td>\n",
|
1452
|
+
" <td>0.07344</td>\n",
|
1453
|
+
" <td>0.0505</td>\n",
|
1454
|
+
" <td>0.06052</td>\n",
|
1455
|
+
" <td>2.620000e+02</td>\n",
|
842
1456
|
" <td>0.000000</td>\n",
|
843
1457
|
" </tr>\n",
|
844
1458
|
" <tr>\n",
|
845
|
-
" <th>
|
846
|
-
" <
|
847
|
-
" <td>
|
848
|
-
" <td>
|
849
|
-
" <td>
|
850
|
-
" <td>
|
1459
|
+
" <th>2010-07-21</th>\n",
|
1460
|
+
" <th>BTC</th>\n",
|
1461
|
+
" <td>0.06262</td>\n",
|
1462
|
+
" <td>0.06767</td>\n",
|
1463
|
+
" <td>0.0505</td>\n",
|
1464
|
+
" <td>0.05842</td>\n",
|
1465
|
+
" <td>5.750000e+02</td>\n",
|
851
1466
|
" <td>0.000000</td>\n",
|
852
1467
|
" </tr>\n",
|
853
1468
|
" <tr>\n",
|
854
1469
|
" <th>...</th>\n",
|
1470
|
+
" <th>...</th>\n",
|
855
1471
|
" <td>...</td>\n",
|
856
1472
|
" <td>...</td>\n",
|
857
1473
|
" <td>...</td>\n",
|
@@ -860,128 +1476,691 @@
|
|
860
1476
|
" <td>...</td>\n",
|
861
1477
|
" </tr>\n",
|
862
1478
|
" <tr>\n",
|
863
|
-
" <th>2024-
|
864
|
-
" <
|
865
|
-
" <td>
|
866
|
-
" <td>
|
867
|
-
" <td>
|
868
|
-
" <td>
|
869
|
-
" <td>
|
1479
|
+
" <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
|
1480
|
+
" <th>ZEN</th>\n",
|
1481
|
+
" <td>9.657</td>\n",
|
1482
|
+
" <td>9.112</td>\n",
|
1483
|
+
" <td>8.285</td>\n",
|
1484
|
+
" <td>8.462</td>\n",
|
1485
|
+
" <td>2.071124e+06</td>\n",
|
1486
|
+
" <td>0.000194</td>\n",
|
870
1487
|
" </tr>\n",
|
871
1488
|
" <tr>\n",
|
872
|
-
" <th>
|
873
|
-
" <td>
|
874
|
-
" <td>
|
875
|
-
" <td>
|
876
|
-
" <td>
|
877
|
-
" <td>
|
878
|
-
" <td
|
1489
|
+
" <th>ZIL</th>\n",
|
1490
|
+
" <td>0.01422</td>\n",
|
1491
|
+
" <td>0.01441</td>\n",
|
1492
|
+
" <td>0.01392</td>\n",
|
1493
|
+
" <td>0.01396</td>\n",
|
1494
|
+
" <td>2.048626e+08</td>\n",
|
1495
|
+
" <td>-0.000031</td>\n",
|
879
1496
|
" </tr>\n",
|
880
1497
|
" <tr>\n",
|
881
|
-
" <th>
|
882
|
-
" <td>
|
883
|
-
" <td>
|
884
|
-
" <td>
|
885
|
-
" <td>
|
886
|
-
" <td>
|
887
|
-
" <td>0.
|
1498
|
+
" <th>ZK</th>\n",
|
1499
|
+
" <td>0.11451</td>\n",
|
1500
|
+
" <td>0.11673</td>\n",
|
1501
|
+
" <td>0.10857</td>\n",
|
1502
|
+
" <td>0.10874</td>\n",
|
1503
|
+
" <td>3.833253e+08</td>\n",
|
1504
|
+
" <td>0.000200</td>\n",
|
888
1505
|
" </tr>\n",
|
889
1506
|
" <tr>\n",
|
890
|
-
" <th>
|
891
|
-
" <td>
|
892
|
-
" <td>
|
893
|
-
" <td>
|
894
|
-
" <td>
|
895
|
-
" <td>
|
896
|
-
" <td>0.
|
1507
|
+
" <th>ZRO</th>\n",
|
1508
|
+
" <td>3.631</td>\n",
|
1509
|
+
" <td>3.648</td>\n",
|
1510
|
+
" <td>3.362</td>\n",
|
1511
|
+
" <td>3.367</td>\n",
|
1512
|
+
" <td>7.049472e+07</td>\n",
|
1513
|
+
" <td>0.000184</td>\n",
|
897
1514
|
" </tr>\n",
|
898
1515
|
" <tr>\n",
|
899
|
-
" <th>
|
900
|
-
" <td>
|
901
|
-
" <td>
|
902
|
-
" <td>
|
903
|
-
" <td>
|
904
|
-
" <td>
|
905
|
-
" <td>0.
|
1516
|
+
" <th>ZRX</th>\n",
|
1517
|
+
" <td>0.3055</td>\n",
|
1518
|
+
" <td>0.3121</td>\n",
|
1519
|
+
" <td>0.2983</td>\n",
|
1520
|
+
" <td>0.2988</td>\n",
|
1521
|
+
" <td>9.810764e+06</td>\n",
|
1522
|
+
" <td>0.000186</td>\n",
|
906
1523
|
" </tr>\n",
|
907
1524
|
" </tbody>\n",
|
908
1525
|
"</table>\n",
|
909
|
-
"<p>
|
1526
|
+
"<p>357696 rows × 6 columns</p>\n",
|
910
1527
|
"</div>"
|
911
1528
|
],
|
912
1529
|
"text/plain": [
|
913
|
-
"
|
914
|
-
"date
|
915
|
-
"
|
916
|
-
"
|
917
|
-
"
|
918
|
-
"
|
919
|
-
"
|
920
|
-
"...
|
921
|
-
"2024-
|
922
|
-
"
|
923
|
-
"
|
924
|
-
"
|
925
|
-
"
|
1530
|
+
" open high low close volume \\\n",
|
1531
|
+
"date ticker \n",
|
1532
|
+
"2010-07-17 BTC 42.99 43.76 40.99 41.01 2.000000e+01 \n",
|
1533
|
+
"2010-07-18 BTC 0.07921 0.08181 0.06634 0.07921 7.501000e+01 \n",
|
1534
|
+
"2010-07-19 BTC 0.07474 0.07921 0.0505 0.06262 5.740000e+02 \n",
|
1535
|
+
"2010-07-20 BTC 0.06868 0.07344 0.0505 0.06052 2.620000e+02 \n",
|
1536
|
+
"2010-07-21 BTC 0.06262 0.06767 0.0505 0.05842 5.750000e+02 \n",
|
1537
|
+
"... ... ... ... ... ... \n",
|
1538
|
+
"2024-08-04 ZEN 9.657 9.112 8.285 8.462 2.071124e+06 \n",
|
1539
|
+
" ZIL 0.01422 0.01441 0.01392 0.01396 2.048626e+08 \n",
|
1540
|
+
" ZK 0.11451 0.11673 0.10857 0.10874 3.833253e+08 \n",
|
1541
|
+
" ZRO 3.631 3.648 3.362 3.367 7.049472e+07 \n",
|
1542
|
+
" ZRX 0.3055 0.3121 0.2983 0.2988 9.810764e+06 \n",
|
1543
|
+
"\n",
|
1544
|
+
" funding_rate \n",
|
1545
|
+
"date ticker \n",
|
1546
|
+
"2010-07-17 BTC 0.000000 \n",
|
1547
|
+
"2010-07-18 BTC 0.000000 \n",
|
1548
|
+
"2010-07-19 BTC 0.000000 \n",
|
1549
|
+
"2010-07-20 BTC 0.000000 \n",
|
1550
|
+
"2010-07-21 BTC 0.000000 \n",
|
1551
|
+
"... ... \n",
|
1552
|
+
"2024-08-04 ZEN 0.000194 \n",
|
1553
|
+
" ZIL -0.000031 \n",
|
1554
|
+
" ZK 0.000200 \n",
|
1555
|
+
" ZRO 0.000184 \n",
|
1556
|
+
" ZRX 0.000186 \n",
|
926
1557
|
"\n",
|
927
|
-
"[
|
1558
|
+
"[357696 rows x 6 columns]"
|
928
1559
|
]
|
929
1560
|
},
|
930
|
-
"execution_count":
|
1561
|
+
"execution_count": 41,
|
931
1562
|
"metadata": {},
|
932
1563
|
"output_type": "execute_result"
|
933
1564
|
}
|
934
1565
|
],
|
935
1566
|
"source": [
|
936
|
-
"
|
1567
|
+
"clean.df"
|
937
1568
|
]
|
938
1569
|
},
|
939
1570
|
{
|
940
1571
|
"cell_type": "code",
|
941
|
-
"execution_count":
|
942
|
-
"id": "
|
1572
|
+
"execution_count": 42,
|
1573
|
+
"id": "66b3d8d2",
|
943
1574
|
"metadata": {},
|
944
|
-
"outputs": [
|
1575
|
+
"outputs": [
|
1576
|
+
{
|
1577
|
+
"data": {
|
1578
|
+
"text/plain": [
|
1579
|
+
"<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
|
1580
|
+
]
|
1581
|
+
},
|
1582
|
+
"execution_count": 42,
|
1583
|
+
"metadata": {},
|
1584
|
+
"output_type": "execute_result"
|
1585
|
+
}
|
1586
|
+
],
|
945
1587
|
"source": [
|
946
|
-
"
|
1588
|
+
"clean.filter_avg_trading_val(thresh_val=1000000)"
|
947
1589
|
]
|
948
1590
|
},
|
949
1591
|
{
|
950
1592
|
"cell_type": "code",
|
951
|
-
"execution_count":
|
952
|
-
"id": "
|
1593
|
+
"execution_count": 43,
|
1594
|
+
"id": "b7f23056",
|
1595
|
+
"metadata": {},
|
1596
|
+
"outputs": [
|
1597
|
+
{
|
1598
|
+
"data": {
|
1599
|
+
"text/html": [
|
1600
|
+
"<div>\n",
|
1601
|
+
"<style scoped>\n",
|
1602
|
+
" .dataframe tbody tr th:only-of-type {\n",
|
1603
|
+
" vertical-align: middle;\n",
|
1604
|
+
" }\n",
|
1605
|
+
"\n",
|
1606
|
+
" .dataframe tbody tr th {\n",
|
1607
|
+
" vertical-align: top;\n",
|
1608
|
+
" }\n",
|
1609
|
+
"\n",
|
1610
|
+
" .dataframe thead th {\n",
|
1611
|
+
" text-align: right;\n",
|
1612
|
+
" }\n",
|
1613
|
+
"</style>\n",
|
1614
|
+
"<table border=\"1\" class=\"dataframe\">\n",
|
1615
|
+
" <thead>\n",
|
1616
|
+
" <tr style=\"text-align: right;\">\n",
|
1617
|
+
" <th></th>\n",
|
1618
|
+
" <th></th>\n",
|
1619
|
+
" <th>open</th>\n",
|
1620
|
+
" <th>high</th>\n",
|
1621
|
+
" <th>low</th>\n",
|
1622
|
+
" <th>close</th>\n",
|
1623
|
+
" <th>volume</th>\n",
|
1624
|
+
" <th>funding_rate</th>\n",
|
1625
|
+
" </tr>\n",
|
1626
|
+
" <tr>\n",
|
1627
|
+
" <th>date</th>\n",
|
1628
|
+
" <th>ticker</th>\n",
|
1629
|
+
" <th></th>\n",
|
1630
|
+
" <th></th>\n",
|
1631
|
+
" <th></th>\n",
|
1632
|
+
" <th></th>\n",
|
1633
|
+
" <th></th>\n",
|
1634
|
+
" <th></th>\n",
|
1635
|
+
" </tr>\n",
|
1636
|
+
" </thead>\n",
|
1637
|
+
" <tbody>\n",
|
1638
|
+
" <tr>\n",
|
1639
|
+
" <th>2010-07-17</th>\n",
|
1640
|
+
" <th>BTC</th>\n",
|
1641
|
+
" <td><NA></td>\n",
|
1642
|
+
" <td><NA></td>\n",
|
1643
|
+
" <td><NA></td>\n",
|
1644
|
+
" <td><NA></td>\n",
|
1645
|
+
" <td>NaN</td>\n",
|
1646
|
+
" <td>NaN</td>\n",
|
1647
|
+
" </tr>\n",
|
1648
|
+
" <tr>\n",
|
1649
|
+
" <th>2010-07-18</th>\n",
|
1650
|
+
" <th>BTC</th>\n",
|
1651
|
+
" <td><NA></td>\n",
|
1652
|
+
" <td><NA></td>\n",
|
1653
|
+
" <td><NA></td>\n",
|
1654
|
+
" <td><NA></td>\n",
|
1655
|
+
" <td>NaN</td>\n",
|
1656
|
+
" <td>NaN</td>\n",
|
1657
|
+
" </tr>\n",
|
1658
|
+
" <tr>\n",
|
1659
|
+
" <th>2010-07-19</th>\n",
|
1660
|
+
" <th>BTC</th>\n",
|
1661
|
+
" <td><NA></td>\n",
|
1662
|
+
" <td><NA></td>\n",
|
1663
|
+
" <td><NA></td>\n",
|
1664
|
+
" <td><NA></td>\n",
|
1665
|
+
" <td>NaN</td>\n",
|
1666
|
+
" <td>NaN</td>\n",
|
1667
|
+
" </tr>\n",
|
1668
|
+
" <tr>\n",
|
1669
|
+
" <th>2010-07-20</th>\n",
|
1670
|
+
" <th>BTC</th>\n",
|
1671
|
+
" <td><NA></td>\n",
|
1672
|
+
" <td><NA></td>\n",
|
1673
|
+
" <td><NA></td>\n",
|
1674
|
+
" <td><NA></td>\n",
|
1675
|
+
" <td>NaN</td>\n",
|
1676
|
+
" <td>NaN</td>\n",
|
1677
|
+
" </tr>\n",
|
1678
|
+
" <tr>\n",
|
1679
|
+
" <th>2010-07-21</th>\n",
|
1680
|
+
" <th>BTC</th>\n",
|
1681
|
+
" <td><NA></td>\n",
|
1682
|
+
" <td><NA></td>\n",
|
1683
|
+
" <td><NA></td>\n",
|
1684
|
+
" <td><NA></td>\n",
|
1685
|
+
" <td>NaN</td>\n",
|
1686
|
+
" <td>NaN</td>\n",
|
1687
|
+
" </tr>\n",
|
1688
|
+
" <tr>\n",
|
1689
|
+
" <th>...</th>\n",
|
1690
|
+
" <th>...</th>\n",
|
1691
|
+
" <td>...</td>\n",
|
1692
|
+
" <td>...</td>\n",
|
1693
|
+
" <td>...</td>\n",
|
1694
|
+
" <td>...</td>\n",
|
1695
|
+
" <td>...</td>\n",
|
1696
|
+
" <td>...</td>\n",
|
1697
|
+
" </tr>\n",
|
1698
|
+
" <tr>\n",
|
1699
|
+
" <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
|
1700
|
+
" <th>ZEN</th>\n",
|
1701
|
+
" <td>9.657</td>\n",
|
1702
|
+
" <td>9.112</td>\n",
|
1703
|
+
" <td>8.285</td>\n",
|
1704
|
+
" <td>8.462</td>\n",
|
1705
|
+
" <td>2071124.0</td>\n",
|
1706
|
+
" <td>0.000194</td>\n",
|
1707
|
+
" </tr>\n",
|
1708
|
+
" <tr>\n",
|
1709
|
+
" <th>ZIL</th>\n",
|
1710
|
+
" <td>0.01422</td>\n",
|
1711
|
+
" <td>0.01441</td>\n",
|
1712
|
+
" <td>0.01392</td>\n",
|
1713
|
+
" <td>0.01396</td>\n",
|
1714
|
+
" <td>204862627.0</td>\n",
|
1715
|
+
" <td>-0.000031</td>\n",
|
1716
|
+
" </tr>\n",
|
1717
|
+
" <tr>\n",
|
1718
|
+
" <th>ZK</th>\n",
|
1719
|
+
" <td>0.11451</td>\n",
|
1720
|
+
" <td>0.11673</td>\n",
|
1721
|
+
" <td>0.10857</td>\n",
|
1722
|
+
" <td>0.10874</td>\n",
|
1723
|
+
" <td>383325323.0</td>\n",
|
1724
|
+
" <td>0.000200</td>\n",
|
1725
|
+
" </tr>\n",
|
1726
|
+
" <tr>\n",
|
1727
|
+
" <th>ZRO</th>\n",
|
1728
|
+
" <td>3.631</td>\n",
|
1729
|
+
" <td>3.648</td>\n",
|
1730
|
+
" <td>3.362</td>\n",
|
1731
|
+
" <td>3.367</td>\n",
|
1732
|
+
" <td>70494717.0</td>\n",
|
1733
|
+
" <td>0.000184</td>\n",
|
1734
|
+
" </tr>\n",
|
1735
|
+
" <tr>\n",
|
1736
|
+
" <th>ZRX</th>\n",
|
1737
|
+
" <td>0.3055</td>\n",
|
1738
|
+
" <td>0.3121</td>\n",
|
1739
|
+
" <td>0.2983</td>\n",
|
1740
|
+
" <td>0.2988</td>\n",
|
1741
|
+
" <td>9810764.1</td>\n",
|
1742
|
+
" <td>0.000186</td>\n",
|
1743
|
+
" </tr>\n",
|
1744
|
+
" </tbody>\n",
|
1745
|
+
"</table>\n",
|
1746
|
+
"<p>357696 rows × 6 columns</p>\n",
|
1747
|
+
"</div>"
|
1748
|
+
],
|
1749
|
+
"text/plain": [
|
1750
|
+
" open high low close volume \\\n",
|
1751
|
+
"date ticker \n",
|
1752
|
+
"2010-07-17 BTC <NA> <NA> <NA> <NA> NaN \n",
|
1753
|
+
"2010-07-18 BTC <NA> <NA> <NA> <NA> NaN \n",
|
1754
|
+
"2010-07-19 BTC <NA> <NA> <NA> <NA> NaN \n",
|
1755
|
+
"2010-07-20 BTC <NA> <NA> <NA> <NA> NaN \n",
|
1756
|
+
"2010-07-21 BTC <NA> <NA> <NA> <NA> NaN \n",
|
1757
|
+
"... ... ... ... ... ... \n",
|
1758
|
+
"2024-08-04 ZEN 9.657 9.112 8.285 8.462 2071124.0 \n",
|
1759
|
+
" ZIL 0.01422 0.01441 0.01392 0.01396 204862627.0 \n",
|
1760
|
+
" ZK 0.11451 0.11673 0.10857 0.10874 383325323.0 \n",
|
1761
|
+
" ZRO 3.631 3.648 3.362 3.367 70494717.0 \n",
|
1762
|
+
" ZRX 0.3055 0.3121 0.2983 0.2988 9810764.1 \n",
|
1763
|
+
"\n",
|
1764
|
+
" funding_rate \n",
|
1765
|
+
"date ticker \n",
|
1766
|
+
"2010-07-17 BTC NaN \n",
|
1767
|
+
"2010-07-18 BTC NaN \n",
|
1768
|
+
"2010-07-19 BTC NaN \n",
|
1769
|
+
"2010-07-20 BTC NaN \n",
|
1770
|
+
"2010-07-21 BTC NaN \n",
|
1771
|
+
"... ... \n",
|
1772
|
+
"2024-08-04 ZEN 0.000194 \n",
|
1773
|
+
" ZIL -0.000031 \n",
|
1774
|
+
" ZK 0.000200 \n",
|
1775
|
+
" ZRO 0.000184 \n",
|
1776
|
+
" ZRX 0.000186 \n",
|
1777
|
+
"\n",
|
1778
|
+
"[357696 rows x 6 columns]"
|
1779
|
+
]
|
1780
|
+
},
|
1781
|
+
"execution_count": 43,
|
1782
|
+
"metadata": {},
|
1783
|
+
"output_type": "execute_result"
|
1784
|
+
}
|
1785
|
+
],
|
1786
|
+
"source": [
|
1787
|
+
"clean.df"
|
1788
|
+
]
|
1789
|
+
},
|
1790
|
+
{
|
1791
|
+
"cell_type": "code",
|
1792
|
+
"execution_count": 44,
|
1793
|
+
"id": "b5ac345d",
|
1794
|
+
"metadata": {},
|
1795
|
+
"outputs": [
|
1796
|
+
{
|
1797
|
+
"data": {
|
1798
|
+
"text/plain": [
|
1799
|
+
"['CVC',\n",
|
1800
|
+
" 'CTK',\n",
|
1801
|
+
" 'STRAX',\n",
|
1802
|
+
" 'STPT',\n",
|
1803
|
+
" 'DGB',\n",
|
1804
|
+
" 'CVX',\n",
|
1805
|
+
" 'FTT',\n",
|
1806
|
+
" 'GLMR',\n",
|
1807
|
+
" 'OCEAN',\n",
|
1808
|
+
" 'SLP',\n",
|
1809
|
+
" 'AGIX',\n",
|
1810
|
+
" 'RAY',\n",
|
1811
|
+
" 'MDT',\n",
|
1812
|
+
" 'SC',\n",
|
1813
|
+
" 'SNT',\n",
|
1814
|
+
" 'IDEX',\n",
|
1815
|
+
" 'RAD',\n",
|
1816
|
+
" 'WAVES']"
|
1817
|
+
]
|
1818
|
+
},
|
1819
|
+
"execution_count": 44,
|
1820
|
+
"metadata": {},
|
1821
|
+
"output_type": "execute_result"
|
1822
|
+
}
|
1823
|
+
],
|
1824
|
+
"source": [
|
1825
|
+
"clean.filtered_tickers"
|
1826
|
+
]
|
1827
|
+
},
|
1828
|
+
{
|
1829
|
+
"cell_type": "code",
|
1830
|
+
"execution_count": 45,
|
1831
|
+
"id": "6d36d4ce",
|
1832
|
+
"metadata": {},
|
1833
|
+
"outputs": [
|
1834
|
+
{
|
1835
|
+
"data": {
|
1836
|
+
"text/plain": [
|
1837
|
+
"<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
|
1838
|
+
]
|
1839
|
+
},
|
1840
|
+
"execution_count": 45,
|
1841
|
+
"metadata": {},
|
1842
|
+
"output_type": "execute_result"
|
1843
|
+
}
|
1844
|
+
],
|
1845
|
+
"source": [
|
1846
|
+
"clean.filter_missing_vals_gaps()"
|
1847
|
+
]
|
1848
|
+
},
|
1849
|
+
{
|
1850
|
+
"cell_type": "code",
|
1851
|
+
"execution_count": 47,
|
1852
|
+
"id": "7fe432d5",
|
1853
|
+
"metadata": {},
|
1854
|
+
"outputs": [
|
1855
|
+
{
|
1856
|
+
"data": {
|
1857
|
+
"text/plain": [
|
1858
|
+
"['CVC',\n",
|
1859
|
+
" 'CTK',\n",
|
1860
|
+
" 'STRAX',\n",
|
1861
|
+
" 'STPT',\n",
|
1862
|
+
" 'DGB',\n",
|
1863
|
+
" 'CVX',\n",
|
1864
|
+
" 'FTT',\n",
|
1865
|
+
" 'GLMR',\n",
|
1866
|
+
" 'OCEAN',\n",
|
1867
|
+
" 'SLP',\n",
|
1868
|
+
" 'AGIX',\n",
|
1869
|
+
" 'RAY',\n",
|
1870
|
+
" 'MDT',\n",
|
1871
|
+
" 'SC',\n",
|
1872
|
+
" 'SNT',\n",
|
1873
|
+
" 'IDEX',\n",
|
1874
|
+
" 'RAD',\n",
|
1875
|
+
" 'WAVES']"
|
1876
|
+
]
|
1877
|
+
},
|
1878
|
+
"execution_count": 47,
|
1879
|
+
"metadata": {},
|
1880
|
+
"output_type": "execute_result"
|
1881
|
+
}
|
1882
|
+
],
|
1883
|
+
"source": [
|
1884
|
+
"clean.filtered_tickers"
|
1885
|
+
]
|
1886
|
+
},
|
1887
|
+
{
|
1888
|
+
"cell_type": "code",
|
1889
|
+
"execution_count": 48,
|
1890
|
+
"id": "f7cbdbae",
|
1891
|
+
"metadata": {},
|
1892
|
+
"outputs": [
|
1893
|
+
{
|
1894
|
+
"data": {
|
1895
|
+
"text/plain": [
|
1896
|
+
"<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
|
1897
|
+
]
|
1898
|
+
},
|
1899
|
+
"execution_count": 48,
|
1900
|
+
"metadata": {},
|
1901
|
+
"output_type": "execute_result"
|
1902
|
+
}
|
1903
|
+
],
|
1904
|
+
"source": [
|
1905
|
+
"clean.filter_min_nobs(ts_obs=1400, cs_obs=5)"
|
1906
|
+
]
|
1907
|
+
},
|
1908
|
+
{
|
1909
|
+
"cell_type": "code",
|
1910
|
+
"execution_count": 49,
|
1911
|
+
"id": "f13d9f1f",
|
1912
|
+
"metadata": {},
|
1913
|
+
"outputs": [
|
1914
|
+
{
|
1915
|
+
"data": {
|
1916
|
+
"text/html": [
|
1917
|
+
"<div>\n",
|
1918
|
+
"<style scoped>\n",
|
1919
|
+
" .dataframe tbody tr th:only-of-type {\n",
|
1920
|
+
" vertical-align: middle;\n",
|
1921
|
+
" }\n",
|
1922
|
+
"\n",
|
1923
|
+
" .dataframe tbody tr th {\n",
|
1924
|
+
" vertical-align: top;\n",
|
1925
|
+
" }\n",
|
1926
|
+
"\n",
|
1927
|
+
" .dataframe thead th {\n",
|
1928
|
+
" text-align: right;\n",
|
1929
|
+
" }\n",
|
1930
|
+
"</style>\n",
|
1931
|
+
"<table border=\"1\" class=\"dataframe\">\n",
|
1932
|
+
" <thead>\n",
|
1933
|
+
" <tr style=\"text-align: right;\">\n",
|
1934
|
+
" <th></th>\n",
|
1935
|
+
" <th></th>\n",
|
1936
|
+
" <th>open</th>\n",
|
1937
|
+
" <th>high</th>\n",
|
1938
|
+
" <th>low</th>\n",
|
1939
|
+
" <th>close</th>\n",
|
1940
|
+
" <th>volume</th>\n",
|
1941
|
+
" <th>funding_rate</th>\n",
|
1942
|
+
" </tr>\n",
|
1943
|
+
" <tr>\n",
|
1944
|
+
" <th>date</th>\n",
|
1945
|
+
" <th>ticker</th>\n",
|
1946
|
+
" <th></th>\n",
|
1947
|
+
" <th></th>\n",
|
1948
|
+
" <th></th>\n",
|
1949
|
+
" <th></th>\n",
|
1950
|
+
" <th></th>\n",
|
1951
|
+
" <th></th>\n",
|
1952
|
+
" </tr>\n",
|
1953
|
+
" </thead>\n",
|
1954
|
+
" <tbody>\n",
|
1955
|
+
" <tr>\n",
|
1956
|
+
" <th rowspan=\"5\" valign=\"top\">2017-06-18</th>\n",
|
1957
|
+
" <th>BAT</th>\n",
|
1958
|
+
" <td><NA></td>\n",
|
1959
|
+
" <td><NA></td>\n",
|
1960
|
+
" <td><NA></td>\n",
|
1961
|
+
" <td><NA></td>\n",
|
1962
|
+
" <td>NaN</td>\n",
|
1963
|
+
" <td>NaN</td>\n",
|
1964
|
+
" </tr>\n",
|
1965
|
+
" <tr>\n",
|
1966
|
+
" <th>BTC</th>\n",
|
1967
|
+
" <td>2655.1</td>\n",
|
1968
|
+
" <td>2676.04</td>\n",
|
1969
|
+
" <td>2488.59</td>\n",
|
1970
|
+
" <td>2539.56</td>\n",
|
1971
|
+
" <td>9.200422e+04</td>\n",
|
1972
|
+
" <td>0.000000</td>\n",
|
1973
|
+
" </tr>\n",
|
1974
|
+
" <tr>\n",
|
1975
|
+
" <th>DASH</th>\n",
|
1976
|
+
" <td><NA></td>\n",
|
1977
|
+
" <td><NA></td>\n",
|
1978
|
+
" <td><NA></td>\n",
|
1979
|
+
" <td><NA></td>\n",
|
1980
|
+
" <td>NaN</td>\n",
|
1981
|
+
" <td>NaN</td>\n",
|
1982
|
+
" </tr>\n",
|
1983
|
+
" <tr>\n",
|
1984
|
+
" <th>DOGE</th>\n",
|
1985
|
+
" <td><NA></td>\n",
|
1986
|
+
" <td><NA></td>\n",
|
1987
|
+
" <td><NA></td>\n",
|
1988
|
+
" <td><NA></td>\n",
|
1989
|
+
" <td>NaN</td>\n",
|
1990
|
+
" <td>NaN</td>\n",
|
1991
|
+
" </tr>\n",
|
1992
|
+
" <tr>\n",
|
1993
|
+
" <th>ETC</th>\n",
|
1994
|
+
" <td>21.98</td>\n",
|
1995
|
+
" <td>23.8</td>\n",
|
1996
|
+
" <td>19.5</td>\n",
|
1997
|
+
" <td>20.27</td>\n",
|
1998
|
+
" <td>1.306320e+06</td>\n",
|
1999
|
+
" <td>0.000000</td>\n",
|
2000
|
+
" </tr>\n",
|
2001
|
+
" <tr>\n",
|
2002
|
+
" <th>...</th>\n",
|
2003
|
+
" <th>...</th>\n",
|
2004
|
+
" <td>...</td>\n",
|
2005
|
+
" <td>...</td>\n",
|
2006
|
+
" <td>...</td>\n",
|
2007
|
+
" <td>...</td>\n",
|
2008
|
+
" <td>...</td>\n",
|
2009
|
+
" <td>...</td>\n",
|
2010
|
+
" </tr>\n",
|
2011
|
+
" <tr>\n",
|
2012
|
+
" <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
|
2013
|
+
" <th>XTZ</th>\n",
|
2014
|
+
" <td>0.685</td>\n",
|
2015
|
+
" <td>0.693</td>\n",
|
2016
|
+
" <td>0.677</td>\n",
|
2017
|
+
" <td>0.679</td>\n",
|
2018
|
+
" <td>5.373374e+06</td>\n",
|
2019
|
+
" <td>0.000200</td>\n",
|
2020
|
+
" </tr>\n",
|
2021
|
+
" <tr>\n",
|
2022
|
+
" <th>YFI</th>\n",
|
2023
|
+
" <td>5341.0</td>\n",
|
2024
|
+
" <td>5341.0</td>\n",
|
2025
|
+
" <td>5196.0</td>\n",
|
2026
|
+
" <td>5198.0</td>\n",
|
2027
|
+
" <td>4.281050e+02</td>\n",
|
2028
|
+
" <td>0.000169</td>\n",
|
2029
|
+
" </tr>\n",
|
2030
|
+
" <tr>\n",
|
2031
|
+
" <th>ZEC</th>\n",
|
2032
|
+
" <td>31.76</td>\n",
|
2033
|
+
" <td>34.44</td>\n",
|
2034
|
+
" <td>31.28</td>\n",
|
2035
|
+
" <td>31.55</td>\n",
|
2036
|
+
" <td>1.348085e+06</td>\n",
|
2037
|
+
" <td>0.000191</td>\n",
|
2038
|
+
" </tr>\n",
|
2039
|
+
" <tr>\n",
|
2040
|
+
" <th>ZIL</th>\n",
|
2041
|
+
" <td>0.01422</td>\n",
|
2042
|
+
" <td>0.01441</td>\n",
|
2043
|
+
" <td>0.01392</td>\n",
|
2044
|
+
" <td>0.01396</td>\n",
|
2045
|
+
" <td>2.048626e+08</td>\n",
|
2046
|
+
" <td>-0.000031</td>\n",
|
2047
|
+
" </tr>\n",
|
2048
|
+
" <tr>\n",
|
2049
|
+
" <th>ZRX</th>\n",
|
2050
|
+
" <td>0.3055</td>\n",
|
2051
|
+
" <td>0.3121</td>\n",
|
2052
|
+
" <td>0.2983</td>\n",
|
2053
|
+
" <td>0.2988</td>\n",
|
2054
|
+
" <td>9.810764e+06</td>\n",
|
2055
|
+
" <td>0.000186</td>\n",
|
2056
|
+
" </tr>\n",
|
2057
|
+
" </tbody>\n",
|
2058
|
+
"</table>\n",
|
2059
|
+
"<p>136630 rows × 6 columns</p>\n",
|
2060
|
+
"</div>"
|
2061
|
+
],
|
2062
|
+
"text/plain": [
|
2063
|
+
" open high low close volume \\\n",
|
2064
|
+
"date ticker \n",
|
2065
|
+
"2017-06-18 BAT <NA> <NA> <NA> <NA> NaN \n",
|
2066
|
+
" BTC 2655.1 2676.04 2488.59 2539.56 9.200422e+04 \n",
|
2067
|
+
" DASH <NA> <NA> <NA> <NA> NaN \n",
|
2068
|
+
" DOGE <NA> <NA> <NA> <NA> NaN \n",
|
2069
|
+
" ETC 21.98 23.8 19.5 20.27 1.306320e+06 \n",
|
2070
|
+
"... ... ... ... ... ... \n",
|
2071
|
+
"2024-08-04 XTZ 0.685 0.693 0.677 0.679 5.373374e+06 \n",
|
2072
|
+
" YFI 5341.0 5341.0 5196.0 5198.0 4.281050e+02 \n",
|
2073
|
+
" ZEC 31.76 34.44 31.28 31.55 1.348085e+06 \n",
|
2074
|
+
" ZIL 0.01422 0.01441 0.01392 0.01396 2.048626e+08 \n",
|
2075
|
+
" ZRX 0.3055 0.3121 0.2983 0.2988 9.810764e+06 \n",
|
2076
|
+
"\n",
|
2077
|
+
" funding_rate \n",
|
2078
|
+
"date ticker \n",
|
2079
|
+
"2017-06-18 BAT NaN \n",
|
2080
|
+
" BTC 0.000000 \n",
|
2081
|
+
" DASH NaN \n",
|
2082
|
+
" DOGE NaN \n",
|
2083
|
+
" ETC 0.000000 \n",
|
2084
|
+
"... ... \n",
|
2085
|
+
"2024-08-04 XTZ 0.000200 \n",
|
2086
|
+
" YFI 0.000169 \n",
|
2087
|
+
" ZEC 0.000191 \n",
|
2088
|
+
" ZIL -0.000031 \n",
|
2089
|
+
" ZRX 0.000186 \n",
|
2090
|
+
"\n",
|
2091
|
+
"[136630 rows x 6 columns]"
|
2092
|
+
]
|
2093
|
+
},
|
2094
|
+
"execution_count": 49,
|
2095
|
+
"metadata": {},
|
2096
|
+
"output_type": "execute_result"
|
2097
|
+
}
|
2098
|
+
],
|
2099
|
+
"source": [
|
2100
|
+
"clean.df"
|
2101
|
+
]
|
2102
|
+
},
|
2103
|
+
{
|
2104
|
+
"cell_type": "code",
|
2105
|
+
"execution_count": 50,
|
2106
|
+
"id": "a9b1764c",
|
953
2107
|
"metadata": {},
|
954
2108
|
"outputs": [],
|
955
2109
|
"source": [
|
956
|
-
"
|
2110
|
+
"# # clean data\n",
|
2111
|
+
"# clean_df = clean.filter_delisted_tickers().\\\n",
|
2112
|
+
"# filter_outliers(od_method='mad', excl_cols=['volume', 'funding_rate'], thresh_val=10).\\\n",
|
2113
|
+
"# repair_outliers(imp_method='fcst').\\\n",
|
2114
|
+
"# filter_avg_trading_val(thresh_val=1000000).\\\n",
|
2115
|
+
"# filter_missing_vals_gaps().\\\n",
|
2116
|
+
"# filter_min_nobs(ts_obs=1500, cs_obs=10).\\\n",
|
2117
|
+
"# get(attr='df').dropna(how='all')"
|
957
2118
|
]
|
958
2119
|
},
|
959
2120
|
{
|
960
2121
|
"cell_type": "code",
|
961
2122
|
"execution_count": null,
|
962
|
-
"id": "
|
2123
|
+
"id": "572d7a2e",
|
963
2124
|
"metadata": {},
|
964
2125
|
"outputs": [],
|
965
2126
|
"source": []
|
966
2127
|
},
|
967
2128
|
{
|
968
2129
|
"cell_type": "code",
|
969
|
-
"execution_count":
|
970
|
-
"id": "
|
2130
|
+
"execution_count": 53,
|
2131
|
+
"id": "99857595",
|
971
2132
|
"metadata": {},
|
972
2133
|
"outputs": [],
|
973
2134
|
"source": [
|
974
|
-
"
|
2135
|
+
"df.to_parquet('s3://factorlab-data/binance_historical_ohlcv_daily.parquet')"
|
975
2136
|
]
|
976
2137
|
},
|
977
2138
|
{
|
978
2139
|
"cell_type": "code",
|
979
|
-
"execution_count":
|
980
|
-
"id": "
|
2140
|
+
"execution_count": 54,
|
2141
|
+
"id": "15e66225",
|
2142
|
+
"metadata": {},
|
2143
|
+
"outputs": [],
|
2144
|
+
"source": [
|
2145
|
+
"clean.df.dropna(how='all').to_parquet('../../../../factorlab/notebooks/binance_historical_ohlcv_daily.parquet')"
|
2146
|
+
]
|
2147
|
+
},
|
2148
|
+
{
|
2149
|
+
"cell_type": "code",
|
2150
|
+
"execution_count": null,
|
2151
|
+
"id": "8a962fa7",
|
2152
|
+
"metadata": {},
|
2153
|
+
"outputs": [],
|
2154
|
+
"source": []
|
2155
|
+
},
|
2156
|
+
{
|
2157
|
+
"cell_type": "code",
|
2158
|
+
"execution_count": 55,
|
2159
|
+
"id": "54b818cd",
|
981
2160
|
"metadata": {},
|
982
2161
|
"outputs": [],
|
983
2162
|
"source": [
|
984
|
-
"
|
2163
|
+
"clean.df.dropna(how='all').to_csv('../../../../factorlab/notebooks/binance_historical_ohlcv_daily.csv')"
|
985
2164
|
]
|
986
2165
|
},
|
987
2166
|
{
|
@@ -995,7 +2174,7 @@
|
|
995
2174
|
{
|
996
2175
|
"cell_type": "code",
|
997
2176
|
"execution_count": null,
|
998
|
-
"id": "
|
2177
|
+
"id": "da953cbf",
|
999
2178
|
"metadata": {},
|
1000
2179
|
"outputs": [],
|
1001
2180
|
"source": []
|