cryptodatapy 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,10 +2,19 @@
2
2
  "cells": [
3
3
  {
4
4
  "cell_type": "code",
5
- "execution_count": 86,
5
+ "execution_count": 1,
6
6
  "id": "9fea9fae",
7
7
  "metadata": {},
8
- "outputs": [],
8
+ "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "fatal: bad revision 'HEAD'\n",
14
+ "Importing plotly failed. Interactive plots will not work.\n"
15
+ ]
16
+ }
17
+ ],
9
18
  "source": [
10
19
  "import pandas as pd\n",
11
20
  "import numpy as np\n",
@@ -21,225 +30,94 @@
21
30
  },
22
31
  {
23
32
  "cell_type": "code",
24
- "execution_count": 94,
25
- "id": "c9876f0b",
33
+ "execution_count": 2,
34
+ "id": "3cbdeffc",
26
35
  "metadata": {},
27
36
  "outputs": [],
28
37
  "source": [
29
- "data_req = DataRequest(source='tiingo')"
38
+ "# get all Binance perp futures tickers\n",
39
+ "data_req = DataRequest(source='ccxt')\n",
40
+ "perp_tickers = GetData(data_req).get_meta(method='get_markets_info', exch='binanceusdm', as_list=True)"
30
41
  ]
31
42
  },
32
43
  {
33
44
  "cell_type": "code",
34
- "execution_count": 95,
35
- "id": "e23c9751",
45
+ "execution_count": 3,
46
+ "id": "3d084cf7",
36
47
  "metadata": {},
37
48
  "outputs": [
38
49
  {
39
- "ename": "AttributeError",
40
- "evalue": "'Tiingo' object has no attribute 'get_top_mkt_cap_info'",
41
- "output_type": "error",
42
- "traceback": [
43
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
44
- "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
45
- "Cell \u001b[0;32mIn [95], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m GetData(data_req)\u001b[38;5;241m.\u001b[39mget_meta(method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mget_top_mkt_cap_info\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
46
- "File \u001b[0;32m~/projects/systamental/cryptodatapy/src/cryptodatapy/extract/getdata.py:145\u001b[0m, in \u001b[0;36mGetData.get_meta\u001b[0;34m(self, attr, method, **kwargs)\u001b[0m\n\u001b[1;32m 143\u001b[0m meta \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(ds, attr)\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m method \u001b[38;5;129;01min\u001b[39;00m valid_meth:\n\u001b[0;32m--> 145\u001b[0m meta \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m)\u001b[49m(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 146\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\n\u001b[1;32m 148\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSelect a valid attribute or method. Valid attributes: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvalid_attr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m Valid methods include: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvalid_meth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 150\u001b[0m )\n",
47
- "\u001b[0;31mAttributeError\u001b[0m: 'Tiingo' object has no attribute 'get_top_mkt_cap_info'"
48
- ]
50
+ "data": {
51
+ "text/plain": [
52
+ "314"
53
+ ]
54
+ },
55
+ "execution_count": 3,
56
+ "metadata": {},
57
+ "output_type": "execute_result"
49
58
  }
50
59
  ],
51
60
  "source": [
52
- "GetData(data_req).get_meta(method='get_top_mkt_cap_info')"
61
+ "len(perp_tickers)"
53
62
  ]
54
63
  },
55
64
  {
56
65
  "cell_type": "code",
57
- "execution_count": 89,
58
- "id": "b8facf81",
66
+ "execution_count": 4,
67
+ "id": "5e796745",
59
68
  "metadata": {},
60
69
  "outputs": [
61
70
  {
62
71
  "data": {
63
- "text/html": [
64
- "<div>\n",
65
- "<style scoped>\n",
66
- " .dataframe tbody tr th:only-of-type {\n",
67
- " vertical-align: middle;\n",
68
- " }\n",
69
- "\n",
70
- " .dataframe tbody tr th {\n",
71
- " vertical-align: top;\n",
72
- " }\n",
73
- "\n",
74
- " .dataframe thead th {\n",
75
- " text-align: right;\n",
76
- " }\n",
77
- "</style>\n",
78
- "<table border=\"1\" class=\"dataframe\">\n",
79
- " <thead>\n",
80
- " <tr style=\"text-align: right;\">\n",
81
- " <th></th>\n",
82
- " <th></th>\n",
83
- " <th>close</th>\n",
84
- " </tr>\n",
85
- " <tr>\n",
86
- " <th>date</th>\n",
87
- " <th>ticker</th>\n",
88
- " <th></th>\n",
89
- " </tr>\n",
90
- " </thead>\n",
91
- " <tbody>\n",
92
- " <tr>\n",
93
- " <th>2019-01-31</th>\n",
94
- " <th>EURUSD</th>\n",
95
- " <td>1.147579</td>\n",
96
- " </tr>\n",
97
- " <tr>\n",
98
- " <th>2019-02-28</th>\n",
99
- " <th>EURUSD</th>\n",
100
- " <td>1.138693</td>\n",
101
- " </tr>\n",
102
- " <tr>\n",
103
- " <th>2019-03-31</th>\n",
104
- " <th>EURUSD</th>\n",
105
- " <td>1.122355</td>\n",
106
- " </tr>\n",
107
- " <tr>\n",
108
- " <th>2019-04-30</th>\n",
109
- " <th>EURUSD</th>\n",
110
- " <td>1.120699</td>\n",
111
- " </tr>\n",
112
- " <tr>\n",
113
- " <th>2019-05-31</th>\n",
114
- " <th>EURUSD</th>\n",
115
- " <td>1.11433</td>\n",
116
- " </tr>\n",
117
- " <tr>\n",
118
- " <th>...</th>\n",
119
- " <th>...</th>\n",
120
- " <td>...</td>\n",
121
- " </tr>\n",
122
- " <tr>\n",
123
- " <th>2024-04-30</th>\n",
124
- " <th>EURUSD</th>\n",
125
- " <td>1.066605</td>\n",
126
- " </tr>\n",
127
- " <tr>\n",
128
- " <th>2024-05-31</th>\n",
129
- " <th>EURUSD</th>\n",
130
- " <td>1.084825</td>\n",
131
- " </tr>\n",
132
- " <tr>\n",
133
- " <th>2024-06-30</th>\n",
134
- " <th>EURUSD</th>\n",
135
- " <td>1.073515</td>\n",
136
- " </tr>\n",
137
- " <tr>\n",
138
- " <th>2024-07-31</th>\n",
139
- " <th>EURUSD</th>\n",
140
- " <td>1.082395</td>\n",
141
- " </tr>\n",
142
- " <tr>\n",
143
- " <th>2024-08-31</th>\n",
144
- " <th>EURUSD</th>\n",
145
- " <td>1.092375</td>\n",
146
- " </tr>\n",
147
- " </tbody>\n",
148
- "</table>\n",
149
- "<p>68 rows × 1 columns</p>\n",
150
- "</div>"
151
- ],
152
72
  "text/plain": [
153
- " close\n",
154
- "date ticker \n",
155
- "2019-01-31 EURUSD 1.147579\n",
156
- "2019-02-28 EURUSD 1.138693\n",
157
- "2019-03-31 EURUSD 1.122355\n",
158
- "2019-04-30 EURUSD 1.120699\n",
159
- "2019-05-31 EURUSD 1.11433\n",
160
- "... ...\n",
161
- "2024-04-30 EURUSD 1.066605\n",
162
- "2024-05-31 EURUSD 1.084825\n",
163
- "2024-06-30 EURUSD 1.073515\n",
164
- "2024-07-31 EURUSD 1.082395\n",
165
- "2024-08-31 EURUSD 1.092375\n",
166
- "\n",
167
- "[68 rows x 1 columns]"
73
+ "True"
168
74
  ]
169
75
  },
170
- "execution_count": 89,
76
+ "execution_count": 4,
171
77
  "metadata": {},
172
78
  "output_type": "execute_result"
173
79
  }
174
80
  ],
175
81
  "source": [
176
- "fx_df"
82
+ "'BTCST/USDT:USDT' in perp_tickers"
177
83
  ]
178
84
  },
179
85
  {
180
86
  "cell_type": "code",
181
- "execution_count": null,
182
- "id": "005404ee",
183
- "metadata": {},
184
- "outputs": [],
185
- "source": []
186
- },
187
- {
188
- "cell_type": "code",
189
- "execution_count": null,
190
- "id": "8e3dc50d",
191
- "metadata": {},
192
- "outputs": [],
193
- "source": []
194
- },
195
- {
196
- "cell_type": "code",
197
- "execution_count": 2,
198
- "id": "3cbdeffc",
87
+ "execution_count": 5,
88
+ "id": "fcb74458",
199
89
  "metadata": {},
200
90
  "outputs": [],
201
91
  "source": [
202
- "# get all Binance perp futures tickers\n",
92
+ "# get Binance spot tickers\n",
203
93
  "data_req = DataRequest(source='ccxt')\n",
204
- "perp_tickers = GetData(data_req).get_meta(method='get_markets_info', exch='binanceusdm', as_list=True)"
94
+ "spot_tickers = GetData(data_req).get_meta(method='get_markets_info', exch='binance', as_list=True)"
205
95
  ]
206
96
  },
207
97
  {
208
98
  "cell_type": "code",
209
- "execution_count": 3,
210
- "id": "3d084cf7",
99
+ "execution_count": 6,
100
+ "id": "4b86fa0d",
211
101
  "metadata": {},
212
102
  "outputs": [
213
103
  {
214
104
  "data": {
215
105
  "text/plain": [
216
- "314"
106
+ "True"
217
107
  ]
218
108
  },
219
- "execution_count": 3,
109
+ "execution_count": 6,
220
110
  "metadata": {},
221
111
  "output_type": "execute_result"
222
112
  }
223
113
  ],
224
114
  "source": [
225
- "len(perp_tickers)"
226
- ]
227
- },
228
- {
229
- "cell_type": "code",
230
- "execution_count": 4,
231
- "id": "fcb74458",
232
- "metadata": {},
233
- "outputs": [],
234
- "source": [
235
- "# get Binance spot tickers\n",
236
- "data_req = DataRequest(source='ccxt')\n",
237
- "spot_tickers = GetData(data_req).get_meta(method='get_markets_info', exch='binance', as_list=True)"
115
+ "'BTCST/USDT:USDT' in spot_tickers"
238
116
  ]
239
117
  },
240
118
  {
241
119
  "cell_type": "code",
242
- "execution_count": 5,
120
+ "execution_count": 7,
243
121
  "id": "7962f7e5",
244
122
  "metadata": {},
245
123
  "outputs": [],
@@ -250,17 +128,17 @@
250
128
  },
251
129
  {
252
130
  "cell_type": "code",
253
- "execution_count": 6,
131
+ "execution_count": 8,
254
132
  "id": "877811c1",
255
133
  "metadata": {},
256
134
  "outputs": [
257
135
  {
258
136
  "data": {
259
137
  "text/plain": [
260
- "284"
138
+ "314"
261
139
  ]
262
140
  },
263
- "execution_count": 6,
141
+ "execution_count": 8,
264
142
  "metadata": {},
265
143
  "output_type": "execute_result"
266
144
  }
@@ -272,7 +150,28 @@
272
150
  },
273
151
  {
274
152
  "cell_type": "code",
275
- "execution_count": 7,
153
+ "execution_count": 9,
154
+ "id": "4ef7a85b",
155
+ "metadata": {},
156
+ "outputs": [
157
+ {
158
+ "data": {
159
+ "text/plain": [
160
+ "True"
161
+ ]
162
+ },
163
+ "execution_count": 9,
164
+ "metadata": {},
165
+ "output_type": "execute_result"
166
+ }
167
+ ],
168
+ "source": [
169
+ "'BTCST/USDT:USDT' in binance_tickers"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "code",
174
+ "execution_count": 10,
276
175
  "id": "fe425163",
277
176
  "metadata": {},
278
177
  "outputs": [],
@@ -284,7 +183,28 @@
284
183
  },
285
184
  {
286
185
  "cell_type": "code",
287
- "execution_count": 8,
186
+ "execution_count": 11,
187
+ "id": "9c63cd43",
188
+ "metadata": {},
189
+ "outputs": [
190
+ {
191
+ "data": {
192
+ "text/plain": [
193
+ "True"
194
+ ]
195
+ },
196
+ "execution_count": 11,
197
+ "metadata": {},
198
+ "output_type": "execute_result"
199
+ }
200
+ ],
201
+ "source": [
202
+ "'BTCST' in cc_tickers"
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": 12,
288
208
  "id": "165053db",
289
209
  "metadata": {},
290
210
  "outputs": [],
@@ -292,24 +212,66 @@
292
212
  "# keep only USDT ticker\n",
293
213
  "bin_tickers = []\n",
294
214
  "for ticker in binance_tickers:\n",
295
- " if '/' in ticker and ticker.split('/')[1] == 'USDT':\n",
215
+ " if '/' in ticker and ticker.split('/')[1] == 'USDT:USDT':\n",
296
216
  " bin_tickers.append(ticker.split('/')[0])"
297
217
  ]
298
218
  },
299
219
  {
300
220
  "cell_type": "code",
301
- "execution_count": 9,
221
+ "execution_count": 13,
222
+ "id": "c93e487a",
223
+ "metadata": {},
224
+ "outputs": [
225
+ {
226
+ "data": {
227
+ "text/plain": [
228
+ "True"
229
+ ]
230
+ },
231
+ "execution_count": 13,
232
+ "metadata": {},
233
+ "output_type": "execute_result"
234
+ }
235
+ ],
236
+ "source": [
237
+ "'BTCST' in bin_tickers"
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "code",
242
+ "execution_count": 14,
302
243
  "id": "d6cf8a4c",
303
244
  "metadata": {},
304
245
  "outputs": [],
305
246
  "source": [
306
247
  "# usdt tickers\n",
307
- "usdt_tickers = [ticker.split('/')[0] for ticker in binance_tickers if '/'in ticker and ticker.split('/')[1] == 'USDT']"
248
+ "usdt_tickers = [ticker.split('/')[0] for ticker in binance_tickers if '/'in ticker and ticker.split('/')[1] == 'USDT:USDT']"
308
249
  ]
309
250
  },
310
251
  {
311
252
  "cell_type": "code",
312
- "execution_count": 10,
253
+ "execution_count": 15,
254
+ "id": "11ec0e6d",
255
+ "metadata": {},
256
+ "outputs": [
257
+ {
258
+ "data": {
259
+ "text/plain": [
260
+ "282"
261
+ ]
262
+ },
263
+ "execution_count": 15,
264
+ "metadata": {},
265
+ "output_type": "execute_result"
266
+ }
267
+ ],
268
+ "source": [
269
+ "len(usdt_tickers)"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "code",
274
+ "execution_count": 16,
313
275
  "id": "633f7a3e",
314
276
  "metadata": {},
315
277
  "outputs": [],
@@ -320,17 +282,17 @@
320
282
  },
321
283
  {
322
284
  "cell_type": "code",
323
- "execution_count": 11,
285
+ "execution_count": 17,
324
286
  "id": "30337a71",
325
287
  "metadata": {},
326
288
  "outputs": [
327
289
  {
328
290
  "data": {
329
291
  "text/plain": [
330
- "257"
292
+ "270"
331
293
  ]
332
294
  },
333
- "execution_count": 11,
295
+ "execution_count": 17,
334
296
  "metadata": {},
335
297
  "output_type": "execute_result"
336
298
  }
@@ -339,6 +301,27 @@
339
301
  "len(tickers)"
340
302
  ]
341
303
  },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": 18,
307
+ "id": "7bb3b25f",
308
+ "metadata": {},
309
+ "outputs": [
310
+ {
311
+ "data": {
312
+ "text/plain": [
313
+ "True"
314
+ ]
315
+ },
316
+ "execution_count": 18,
317
+ "metadata": {},
318
+ "output_type": "execute_result"
319
+ }
320
+ ],
321
+ "source": [
322
+ "'BTCST' in tickers"
323
+ ]
324
+ },
342
325
  {
343
326
  "cell_type": "markdown",
344
327
  "id": "f80eb97d",
@@ -349,7 +332,7 @@
349
332
  },
350
333
  {
351
334
  "cell_type": "code",
352
- "execution_count": 12,
335
+ "execution_count": 20,
353
336
  "id": "49b09508",
354
337
  "metadata": {},
355
338
  "outputs": [],
@@ -364,29 +347,35 @@
364
347
  },
365
348
  {
366
349
  "cell_type": "code",
367
- "execution_count": 13,
350
+ "execution_count": 21,
368
351
  "id": "6ac9365b",
369
352
  "metadata": {},
370
- "outputs": [],
371
- "source": [
372
- "# df1 = GetData(data_req).get_series()"
373
- ]
374
- },
375
- {
376
- "cell_type": "code",
377
- "execution_count": 14,
378
- "id": "98a425b2",
379
- "metadata": {},
380
- "outputs": [],
353
+ "outputs": [
354
+ {
355
+ "name": "stderr",
356
+ "output_type": "stream",
357
+ "text": [
358
+ "WARNING:root:Failed to get ohlcv data for BTCST/USDT.\n",
359
+ "WARNING:root:binanceusdm {\"code\":-1122,\"msg\":\"Invalid symbol status.\"}\n",
360
+ "WARNING:root:Failed to pull data on attempt #1.\n",
361
+ "WARNING:root:Failed to get ohlcv data for BTCST/USDT.\n",
362
+ "WARNING:root:binanceusdm {\"code\":-1122,\"msg\":\"Invalid symbol status.\"}\n",
363
+ "WARNING:root:Failed to pull data on attempt #2.\n",
364
+ "WARNING:root:Failed to get ohlcv data for BTCST/USDT.\n",
365
+ "WARNING:root:binanceusdm {\"code\":-1122,\"msg\":\"Invalid symbol status.\"}\n",
366
+ "WARNING:root:Failed to pull data on attempt #3.\n",
367
+ "WARNING:root:Failed to get OHLCV data from binanceusdm for BTCST/USDT after many attempts.\n"
368
+ ]
369
+ }
370
+ ],
381
371
  "source": [
382
- "# df1.to_csv('binance_perp_futures.csv')\n",
383
- "df1 = pd.read_csv('../../../../factorlab/notebooks/binance_perp_futures.csv', index_col=['date', 'ticker'], parse_dates=['date'])"
372
+ "df1 = GetData(data_req).get_series()"
384
373
  ]
385
374
  },
386
375
  {
387
376
  "cell_type": "code",
388
- "execution_count": 15,
389
- "id": "0e04da4a",
377
+ "execution_count": 22,
378
+ "id": "364bb46e",
390
379
  "metadata": {},
391
380
  "outputs": [
392
381
  {
@@ -433,12 +422,12 @@
433
422
  " <tr>\n",
434
423
  " <th>2019-09-08</th>\n",
435
424
  " <th>BTC</th>\n",
436
- " <td>10000.00</td>\n",
425
+ " <td>10000.0</td>\n",
437
426
  " <td>10412.65</td>\n",
438
- " <td>10000.00</td>\n",
427
+ " <td>10000.0</td>\n",
439
428
  " <td>10391.63</td>\n",
440
429
  " <td>3096.291</td>\n",
441
- " <td>NaN</td>\n",
430
+ " <td>&lt;NA&gt;</td>\n",
442
431
  " </tr>\n",
443
432
  " <tr>\n",
444
433
  " <th>2019-09-09</th>\n",
@@ -446,7 +435,217 @@
446
435
  " <td>10316.62</td>\n",
447
436
  " <td>10475.54</td>\n",
448
437
  " <td>10077.22</td>\n",
449
- " <td>10307.00</td>\n",
438
+ " <td>10307.0</td>\n",
439
+ " <td>14824.373</td>\n",
440
+ " <td>&lt;NA&gt;</td>\n",
441
+ " </tr>\n",
442
+ " <tr>\n",
443
+ " <th>2019-09-10</th>\n",
444
+ " <th>BTC</th>\n",
445
+ " <td>10307.0</td>\n",
446
+ " <td>10382.97</td>\n",
447
+ " <td>9940.87</td>\n",
448
+ " <td>10102.02</td>\n",
449
+ " <td>9068.955</td>\n",
450
+ " <td>0.0002</td>\n",
451
+ " </tr>\n",
452
+ " <tr>\n",
453
+ " <th>2019-09-11</th>\n",
454
+ " <th>BTC</th>\n",
455
+ " <td>10094.27</td>\n",
456
+ " <td>10293.11</td>\n",
457
+ " <td>9884.31</td>\n",
458
+ " <td>10159.55</td>\n",
459
+ " <td>10897.922</td>\n",
460
+ " <td>0.0003</td>\n",
461
+ " </tr>\n",
462
+ " <tr>\n",
463
+ " <th>2019-09-12</th>\n",
464
+ " <th>BTC</th>\n",
465
+ " <td>10163.06</td>\n",
466
+ " <td>10450.13</td>\n",
467
+ " <td>10042.12</td>\n",
468
+ " <td>10415.13</td>\n",
469
+ " <td>15609.634</td>\n",
470
+ " <td>0.0003</td>\n",
471
+ " </tr>\n",
472
+ " <tr>\n",
473
+ " <th>...</th>\n",
474
+ " <th>...</th>\n",
475
+ " <td>...</td>\n",
476
+ " <td>...</td>\n",
477
+ " <td>...</td>\n",
478
+ " <td>...</td>\n",
479
+ " <td>...</td>\n",
480
+ " <td>...</td>\n",
481
+ " </tr>\n",
482
+ " <tr>\n",
483
+ " <th rowspan=\"5\" valign=\"top\">2024-08-13</th>\n",
484
+ " <th>ZETA</th>\n",
485
+ " <td>0.6558</td>\n",
486
+ " <td>0.7099</td>\n",
487
+ " <td>0.6143</td>\n",
488
+ " <td>0.6556</td>\n",
489
+ " <td>290931468.0</td>\n",
490
+ " <td>-0.001465</td>\n",
491
+ " </tr>\n",
492
+ " <tr>\n",
493
+ " <th>ZIL</th>\n",
494
+ " <td>0.01394</td>\n",
495
+ " <td>0.01397</td>\n",
496
+ " <td>0.01348</td>\n",
497
+ " <td>0.01372</td>\n",
498
+ " <td>211016383.0</td>\n",
499
+ " <td>0.0002</td>\n",
500
+ " </tr>\n",
501
+ " <tr>\n",
502
+ " <th>ZK</th>\n",
503
+ " <td>0.11683</td>\n",
504
+ " <td>0.11895</td>\n",
505
+ " <td>0.11223</td>\n",
506
+ " <td>0.11713</td>\n",
507
+ " <td>180118593.0</td>\n",
508
+ " <td>0.0002</td>\n",
509
+ " </tr>\n",
510
+ " <tr>\n",
511
+ " <th>ZRO</th>\n",
512
+ " <td>3.509</td>\n",
513
+ " <td>3.533</td>\n",
514
+ " <td>3.349</td>\n",
515
+ " <td>3.459</td>\n",
516
+ " <td>10802271.5</td>\n",
517
+ " <td>0.000173</td>\n",
518
+ " </tr>\n",
519
+ " <tr>\n",
520
+ " <th>ZRX</th>\n",
521
+ " <td>0.3102</td>\n",
522
+ " <td>0.3126</td>\n",
523
+ " <td>0.3</td>\n",
524
+ " <td>0.3083</td>\n",
525
+ " <td>18072404.9</td>\n",
526
+ " <td>-0.000044</td>\n",
527
+ " </tr>\n",
528
+ " </tbody>\n",
529
+ "</table>\n",
530
+ "<p>222221 rows × 6 columns</p>\n",
531
+ "</div>"
532
+ ],
533
+ "text/plain": [
534
+ " open high low close volume \\\n",
535
+ "date ticker \n",
536
+ "2019-09-08 BTC 10000.0 10412.65 10000.0 10391.63 3096.291 \n",
537
+ "2019-09-09 BTC 10316.62 10475.54 10077.22 10307.0 14824.373 \n",
538
+ "2019-09-10 BTC 10307.0 10382.97 9940.87 10102.02 9068.955 \n",
539
+ "2019-09-11 BTC 10094.27 10293.11 9884.31 10159.55 10897.922 \n",
540
+ "2019-09-12 BTC 10163.06 10450.13 10042.12 10415.13 15609.634 \n",
541
+ "... ... ... ... ... ... \n",
542
+ "2024-08-13 ZETA 0.6558 0.7099 0.6143 0.6556 290931468.0 \n",
543
+ " ZIL 0.01394 0.01397 0.01348 0.01372 211016383.0 \n",
544
+ " ZK 0.11683 0.11895 0.11223 0.11713 180118593.0 \n",
545
+ " ZRO 3.509 3.533 3.349 3.459 10802271.5 \n",
546
+ " ZRX 0.3102 0.3126 0.3 0.3083 18072404.9 \n",
547
+ "\n",
548
+ " funding_rate \n",
549
+ "date ticker \n",
550
+ "2019-09-08 BTC <NA> \n",
551
+ "2019-09-09 BTC <NA> \n",
552
+ "2019-09-10 BTC 0.0002 \n",
553
+ "2019-09-11 BTC 0.0003 \n",
554
+ "2019-09-12 BTC 0.0003 \n",
555
+ "... ... \n",
556
+ "2024-08-13 ZETA -0.001465 \n",
557
+ " ZIL 0.0002 \n",
558
+ " ZK 0.0002 \n",
559
+ " ZRO 0.000173 \n",
560
+ " ZRX -0.000044 \n",
561
+ "\n",
562
+ "[222221 rows x 6 columns]"
563
+ ]
564
+ },
565
+ "execution_count": 22,
566
+ "metadata": {},
567
+ "output_type": "execute_result"
568
+ }
569
+ ],
570
+ "source": [
571
+ "df1"
572
+ ]
573
+ },
574
+ {
575
+ "cell_type": "code",
576
+ "execution_count": 23,
577
+ "id": "98a425b2",
578
+ "metadata": {},
579
+ "outputs": [],
580
+ "source": [
581
+ "# df1.to_csv('binance_perp_futures.csv')\n",
582
+ "df1 = pd.read_csv('../../../../factorlab/notebooks/binance_perp_futures.csv', index_col=['date', 'ticker'], parse_dates=['date'])"
583
+ ]
584
+ },
585
+ {
586
+ "cell_type": "code",
587
+ "execution_count": 24,
588
+ "id": "0e04da4a",
589
+ "metadata": {},
590
+ "outputs": [
591
+ {
592
+ "data": {
593
+ "text/html": [
594
+ "<div>\n",
595
+ "<style scoped>\n",
596
+ " .dataframe tbody tr th:only-of-type {\n",
597
+ " vertical-align: middle;\n",
598
+ " }\n",
599
+ "\n",
600
+ " .dataframe tbody tr th {\n",
601
+ " vertical-align: top;\n",
602
+ " }\n",
603
+ "\n",
604
+ " .dataframe thead th {\n",
605
+ " text-align: right;\n",
606
+ " }\n",
607
+ "</style>\n",
608
+ "<table border=\"1\" class=\"dataframe\">\n",
609
+ " <thead>\n",
610
+ " <tr style=\"text-align: right;\">\n",
611
+ " <th></th>\n",
612
+ " <th></th>\n",
613
+ " <th>open</th>\n",
614
+ " <th>high</th>\n",
615
+ " <th>low</th>\n",
616
+ " <th>close</th>\n",
617
+ " <th>volume</th>\n",
618
+ " <th>funding_rate</th>\n",
619
+ " </tr>\n",
620
+ " <tr>\n",
621
+ " <th>date</th>\n",
622
+ " <th>ticker</th>\n",
623
+ " <th></th>\n",
624
+ " <th></th>\n",
625
+ " <th></th>\n",
626
+ " <th></th>\n",
627
+ " <th></th>\n",
628
+ " <th></th>\n",
629
+ " </tr>\n",
630
+ " </thead>\n",
631
+ " <tbody>\n",
632
+ " <tr>\n",
633
+ " <th>2019-09-08</th>\n",
634
+ " <th>BTC</th>\n",
635
+ " <td>10000.00</td>\n",
636
+ " <td>10412.65</td>\n",
637
+ " <td>10000.00</td>\n",
638
+ " <td>10391.63</td>\n",
639
+ " <td>3096.291</td>\n",
640
+ " <td>NaN</td>\n",
641
+ " </tr>\n",
642
+ " <tr>\n",
643
+ " <th>2019-09-09</th>\n",
644
+ " <th>BTC</th>\n",
645
+ " <td>10316.62</td>\n",
646
+ " <td>10475.54</td>\n",
647
+ " <td>10077.22</td>\n",
648
+ " <td>10307.00</td>\n",
450
649
  " <td>14824.373</td>\n",
451
650
  " <td>NaN</td>\n",
452
651
  " </tr>\n",
@@ -502,7 +701,7 @@
502
701
  "2019-09-12 BTC 0.0003 "
503
702
  ]
504
703
  },
505
- "execution_count": 15,
704
+ "execution_count": 24,
506
705
  "metadata": {},
507
706
  "output_type": "execute_result"
508
707
  }
@@ -521,7 +720,7 @@
521
720
  },
522
721
  {
523
722
  "cell_type": "code",
524
- "execution_count": 16,
723
+ "execution_count": 25,
525
724
  "id": "83e9e466",
526
725
  "metadata": {},
527
726
  "outputs": [],
@@ -535,17 +734,33 @@
535
734
  },
536
735
  {
537
736
  "cell_type": "code",
538
- "execution_count": 17,
737
+ "execution_count": 26,
539
738
  "id": "82d4bbc7",
540
739
  "metadata": {},
541
- "outputs": [],
740
+ "outputs": [
741
+ {
742
+ "name": "stderr",
743
+ "output_type": "stream",
744
+ "text": [
745
+ "WARNING:root:Missing recent OHLCV data for XMR/USDT.\n",
746
+ "WARNING:root:Missing recent OHLCV data for OMG/USDT.\n",
747
+ "WARNING:root:Missing recent OHLCV data for WAVES/USDT.\n",
748
+ "WARNING:root:Missing recent OHLCV data for OCEAN/USDT.\n",
749
+ "WARNING:root:Missing recent OHLCV data for XEM/USDT.\n",
750
+ "WARNING:root:Missing recent OHLCV data for BTCST/USDT.\n",
751
+ "WARNING:root:Missing recent OHLCV data for AGIX/USDT.\n",
752
+ "WARNING:root:Missing recent OHLCV data for BOND/USDT.\n",
753
+ "WARNING:root:Missing recent OHLCV data for BSV/USDT.\n"
754
+ ]
755
+ }
756
+ ],
542
757
  "source": [
543
- "# df2 = GetData(data_req).get_series()"
758
+ "df2 = GetData(data_req).get_series()"
544
759
  ]
545
760
  },
546
761
  {
547
762
  "cell_type": "code",
548
- "execution_count": 18,
763
+ "execution_count": 27,
549
764
  "id": "4f63eb21",
550
765
  "metadata": {},
551
766
  "outputs": [],
@@ -556,7 +771,7 @@
556
771
  },
557
772
  {
558
773
  "cell_type": "code",
559
- "execution_count": 19,
774
+ "execution_count": 28,
560
775
  "id": "ce8929c1",
561
776
  "metadata": {},
562
777
  "outputs": [
@@ -656,7 +871,7 @@
656
871
  "2017-08-19 BTC 4108.37 4184.69 3850.00 4139.98 381.309763"
657
872
  ]
658
873
  },
659
- "execution_count": 19,
874
+ "execution_count": 28,
660
875
  "metadata": {},
661
876
  "output_type": "execute_result"
662
877
  }
@@ -675,7 +890,7 @@
675
890
  },
676
891
  {
677
892
  "cell_type": "code",
678
- "execution_count": 20,
893
+ "execution_count": 29,
679
894
  "id": "7f14d874",
680
895
  "metadata": {},
681
896
  "outputs": [],
@@ -689,17 +904,17 @@
689
904
  },
690
905
  {
691
906
  "cell_type": "code",
692
- "execution_count": 21,
907
+ "execution_count": 30,
693
908
  "id": "3a8708d3",
694
909
  "metadata": {},
695
910
  "outputs": [],
696
911
  "source": [
697
- "# df3 = GetData(data_req).get_series()"
912
+ "df3 = GetData(data_req).get_series()"
698
913
  ]
699
914
  },
700
915
  {
701
916
  "cell_type": "code",
702
- "execution_count": 22,
917
+ "execution_count": 31,
703
918
  "id": "aa265538",
704
919
  "metadata": {},
705
920
  "outputs": [],
@@ -718,7 +933,7 @@
718
933
  },
719
934
  {
720
935
  "cell_type": "code",
721
- "execution_count": 61,
936
+ "execution_count": 32,
722
937
  "id": "f5ee4f6d",
723
938
  "metadata": {},
724
939
  "outputs": [],
@@ -729,7 +944,7 @@
729
944
  },
730
945
  {
731
946
  "cell_type": "code",
732
- "execution_count": 62,
947
+ "execution_count": 33,
733
948
  "id": "cbe07c91",
734
949
  "metadata": {},
735
950
  "outputs": [
@@ -838,7 +1053,7 @@
838
1053
  "2010-07-21 BTC 0.07474 0.07921 0.06634 0.07921 575.00 0.0"
839
1054
  ]
840
1055
  },
841
- "execution_count": 62,
1056
+ "execution_count": 33,
842
1057
  "metadata": {},
843
1058
  "output_type": "execute_result"
844
1059
  }
@@ -849,24 +1064,534 @@
849
1064
  },
850
1065
  {
851
1066
  "cell_type": "code",
852
- "execution_count": 63,
853
- "id": "bcbd3394",
1067
+ "execution_count": 34,
1068
+ "id": "cef46007",
1069
+ "metadata": {},
1070
+ "outputs": [
1071
+ {
1072
+ "data": {
1073
+ "text/plain": [
1074
+ "Index(['BTC', 'LTC', 'DOGE', 'DASH', 'XLM', 'XMR', 'XRP', 'KEY', 'DGB', 'XEM',\n",
1075
+ " ...\n",
1076
+ " 'TNSR', 'SAGA', 'REZ', 'BB', 'NOT', 'IO', 'ZK', 'LISTA', 'ZRO',\n",
1077
+ " 'RENDER'],\n",
1078
+ " dtype='object', name='ticker', length=256)"
1079
+ ]
1080
+ },
1081
+ "execution_count": 34,
1082
+ "metadata": {},
1083
+ "output_type": "execute_result"
1084
+ }
1085
+ ],
1086
+ "source": [
1087
+ "df.index.get_level_values(1).unique()"
1088
+ ]
1089
+ },
1090
+ {
1091
+ "cell_type": "code",
1092
+ "execution_count": null,
1093
+ "id": "d4c497d1",
1094
+ "metadata": {},
1095
+ "outputs": [],
1096
+ "source": [
1097
+ "delisted_tickers = ['AGIX', 'CTK', 'CVC', 'CVX', 'DGB', 'FTT', 'GLMR', 'IDEX', 'MDT',\n",
1098
+ " 'OCEAN', 'RAD', 'RAY', 'SC', 'SLP', 'SNT', 'STPT', 'STRAX', 'WAVES']"
1099
+ ]
1100
+ },
1101
+ {
1102
+ "cell_type": "code",
1103
+ "execution_count": 35,
1104
+ "id": "9f8a899f",
854
1105
  "metadata": {},
855
1106
  "outputs": [],
856
1107
  "source": [
857
- "# Filter data\n",
858
- "clean_df = CleanData(df).filter_outliers(od_method='mad', excl_cols=['volume', 'funding_rate'], thresh_val=10).\\\n",
859
- " repair_outliers(imp_method='fcst').\\\n",
860
- " filter_avg_trading_val(thresh_val=1000000).\\\n",
861
- " filter_missing_vals_gaps().\\\n",
862
- " filter_min_nobs(ts_obs=1000, cs_obs=3).\\\n",
863
- " get(attr='df').dropna(how='all')"
1108
+ "clean = CleanData(df)"
1109
+ ]
1110
+ },
1111
+ {
1112
+ "cell_type": "code",
1113
+ "execution_count": 36,
1114
+ "id": "29e1b955",
1115
+ "metadata": {},
1116
+ "outputs": [
1117
+ {
1118
+ "data": {
1119
+ "text/plain": [
1120
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1121
+ ]
1122
+ },
1123
+ "execution_count": 36,
1124
+ "metadata": {},
1125
+ "output_type": "execute_result"
1126
+ }
1127
+ ],
1128
+ "source": [
1129
+ "clean.filter_delisted_tickers()"
1130
+ ]
1131
+ },
1132
+ {
1133
+ "cell_type": "code",
1134
+ "execution_count": 37,
1135
+ "id": "9315f178",
1136
+ "metadata": {},
1137
+ "outputs": [
1138
+ {
1139
+ "data": {
1140
+ "text/plain": [
1141
+ "['CVC',\n",
1142
+ " 'CTK',\n",
1143
+ " 'STRAX',\n",
1144
+ " 'STPT',\n",
1145
+ " 'DGB',\n",
1146
+ " 'CVX',\n",
1147
+ " 'FTT',\n",
1148
+ " 'GLMR',\n",
1149
+ " 'OCEAN',\n",
1150
+ " 'SLP',\n",
1151
+ " 'AGIX',\n",
1152
+ " 'RAY',\n",
1153
+ " 'MDT',\n",
1154
+ " 'SC',\n",
1155
+ " 'SNT',\n",
1156
+ " 'IDEX',\n",
1157
+ " 'RAD',\n",
1158
+ " 'WAVES']"
1159
+ ]
1160
+ },
1161
+ "execution_count": 37,
1162
+ "metadata": {},
1163
+ "output_type": "execute_result"
1164
+ }
1165
+ ],
1166
+ "source": [
1167
+ "clean.filtered_tickers"
1168
+ ]
1169
+ },
1170
+ {
1171
+ "cell_type": "code",
1172
+ "execution_count": 38,
1173
+ "id": "e4693e52",
1174
+ "metadata": {},
1175
+ "outputs": [
1176
+ {
1177
+ "data": {
1178
+ "text/plain": [
1179
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1180
+ ]
1181
+ },
1182
+ "execution_count": 38,
1183
+ "metadata": {},
1184
+ "output_type": "execute_result"
1185
+ }
1186
+ ],
1187
+ "source": [
1188
+ "clean.filter_outliers(od_method='mad', excl_cols=['volume', 'funding_rate'], thresh_val=10)"
1189
+ ]
1190
+ },
1191
+ {
1192
+ "cell_type": "code",
1193
+ "execution_count": 39,
1194
+ "id": "311ee6ed",
1195
+ "metadata": {},
1196
+ "outputs": [
1197
+ {
1198
+ "data": {
1199
+ "text/html": [
1200
+ "<div>\n",
1201
+ "<style scoped>\n",
1202
+ " .dataframe tbody tr th:only-of-type {\n",
1203
+ " vertical-align: middle;\n",
1204
+ " }\n",
1205
+ "\n",
1206
+ " .dataframe tbody tr th {\n",
1207
+ " vertical-align: top;\n",
1208
+ " }\n",
1209
+ "\n",
1210
+ " .dataframe thead th {\n",
1211
+ " text-align: right;\n",
1212
+ " }\n",
1213
+ "</style>\n",
1214
+ "<table border=\"1\" class=\"dataframe\">\n",
1215
+ " <thead>\n",
1216
+ " <tr style=\"text-align: right;\">\n",
1217
+ " <th></th>\n",
1218
+ " <th></th>\n",
1219
+ " <th>open</th>\n",
1220
+ " <th>high</th>\n",
1221
+ " <th>low</th>\n",
1222
+ " <th>close</th>\n",
1223
+ " </tr>\n",
1224
+ " <tr>\n",
1225
+ " <th>date</th>\n",
1226
+ " <th>ticker</th>\n",
1227
+ " <th></th>\n",
1228
+ " <th></th>\n",
1229
+ " <th></th>\n",
1230
+ " <th></th>\n",
1231
+ " </tr>\n",
1232
+ " </thead>\n",
1233
+ " <tbody>\n",
1234
+ " <tr>\n",
1235
+ " <th>2010-07-17</th>\n",
1236
+ " <th>BTC</th>\n",
1237
+ " <td>&lt;NA&gt;</td>\n",
1238
+ " <td>&lt;NA&gt;</td>\n",
1239
+ " <td>&lt;NA&gt;</td>\n",
1240
+ " <td>&lt;NA&gt;</td>\n",
1241
+ " </tr>\n",
1242
+ " <tr>\n",
1243
+ " <th>2010-07-18</th>\n",
1244
+ " <th>BTC</th>\n",
1245
+ " <td>&lt;NA&gt;</td>\n",
1246
+ " <td>&lt;NA&gt;</td>\n",
1247
+ " <td>&lt;NA&gt;</td>\n",
1248
+ " <td>&lt;NA&gt;</td>\n",
1249
+ " </tr>\n",
1250
+ " <tr>\n",
1251
+ " <th>2010-07-19</th>\n",
1252
+ " <th>BTC</th>\n",
1253
+ " <td>&lt;NA&gt;</td>\n",
1254
+ " <td>&lt;NA&gt;</td>\n",
1255
+ " <td>&lt;NA&gt;</td>\n",
1256
+ " <td>&lt;NA&gt;</td>\n",
1257
+ " </tr>\n",
1258
+ " <tr>\n",
1259
+ " <th>2010-07-20</th>\n",
1260
+ " <th>BTC</th>\n",
1261
+ " <td>&lt;NA&gt;</td>\n",
1262
+ " <td>&lt;NA&gt;</td>\n",
1263
+ " <td>&lt;NA&gt;</td>\n",
1264
+ " <td>&lt;NA&gt;</td>\n",
1265
+ " </tr>\n",
1266
+ " <tr>\n",
1267
+ " <th>2010-07-21</th>\n",
1268
+ " <th>BTC</th>\n",
1269
+ " <td>&lt;NA&gt;</td>\n",
1270
+ " <td>&lt;NA&gt;</td>\n",
1271
+ " <td>&lt;NA&gt;</td>\n",
1272
+ " <td>&lt;NA&gt;</td>\n",
1273
+ " </tr>\n",
1274
+ " <tr>\n",
1275
+ " <th>...</th>\n",
1276
+ " <th>...</th>\n",
1277
+ " <td>...</td>\n",
1278
+ " <td>...</td>\n",
1279
+ " <td>...</td>\n",
1280
+ " <td>...</td>\n",
1281
+ " </tr>\n",
1282
+ " <tr>\n",
1283
+ " <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
1284
+ " <th>ZEN</th>\n",
1285
+ " <td>&lt;NA&gt;</td>\n",
1286
+ " <td>9.112</td>\n",
1287
+ " <td>8.285</td>\n",
1288
+ " <td>8.462</td>\n",
1289
+ " </tr>\n",
1290
+ " <tr>\n",
1291
+ " <th>ZIL</th>\n",
1292
+ " <td>0.01422</td>\n",
1293
+ " <td>0.01441</td>\n",
1294
+ " <td>0.01392</td>\n",
1295
+ " <td>0.01396</td>\n",
1296
+ " </tr>\n",
1297
+ " <tr>\n",
1298
+ " <th>ZK</th>\n",
1299
+ " <td>0.11451</td>\n",
1300
+ " <td>0.11673</td>\n",
1301
+ " <td>0.10857</td>\n",
1302
+ " <td>0.10874</td>\n",
1303
+ " </tr>\n",
1304
+ " <tr>\n",
1305
+ " <th>ZRO</th>\n",
1306
+ " <td>3.631</td>\n",
1307
+ " <td>3.648</td>\n",
1308
+ " <td>3.362</td>\n",
1309
+ " <td>3.367</td>\n",
1310
+ " </tr>\n",
1311
+ " <tr>\n",
1312
+ " <th>ZRX</th>\n",
1313
+ " <td>0.3055</td>\n",
1314
+ " <td>0.3121</td>\n",
1315
+ " <td>0.2983</td>\n",
1316
+ " <td>0.2988</td>\n",
1317
+ " </tr>\n",
1318
+ " </tbody>\n",
1319
+ "</table>\n",
1320
+ "<p>357696 rows × 4 columns</p>\n",
1321
+ "</div>"
1322
+ ],
1323
+ "text/plain": [
1324
+ " open high low close\n",
1325
+ "date ticker \n",
1326
+ "2010-07-17 BTC <NA> <NA> <NA> <NA>\n",
1327
+ "2010-07-18 BTC <NA> <NA> <NA> <NA>\n",
1328
+ "2010-07-19 BTC <NA> <NA> <NA> <NA>\n",
1329
+ "2010-07-20 BTC <NA> <NA> <NA> <NA>\n",
1330
+ "2010-07-21 BTC <NA> <NA> <NA> <NA>\n",
1331
+ "... ... ... ... ...\n",
1332
+ "2024-08-04 ZEN <NA> 9.112 8.285 8.462\n",
1333
+ " ZIL 0.01422 0.01441 0.01392 0.01396\n",
1334
+ " ZK 0.11451 0.11673 0.10857 0.10874\n",
1335
+ " ZRO 3.631 3.648 3.362 3.367\n",
1336
+ " ZRX 0.3055 0.3121 0.2983 0.2988\n",
1337
+ "\n",
1338
+ "[357696 rows x 4 columns]"
1339
+ ]
1340
+ },
1341
+ "execution_count": 39,
1342
+ "metadata": {},
1343
+ "output_type": "execute_result"
1344
+ }
1345
+ ],
1346
+ "source": [
1347
+ "clean.df"
1348
+ ]
1349
+ },
1350
+ {
1351
+ "cell_type": "code",
1352
+ "execution_count": 40,
1353
+ "id": "83caa2c6",
1354
+ "metadata": {},
1355
+ "outputs": [
1356
+ {
1357
+ "data": {
1358
+ "text/plain": [
1359
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1360
+ ]
1361
+ },
1362
+ "execution_count": 40,
1363
+ "metadata": {},
1364
+ "output_type": "execute_result"
1365
+ }
1366
+ ],
1367
+ "source": [
1368
+ "clean.repair_outliers(imp_method='fcst')"
1369
+ ]
1370
+ },
1371
+ {
1372
+ "cell_type": "code",
1373
+ "execution_count": 41,
1374
+ "id": "f4c21352",
1375
+ "metadata": {},
1376
+ "outputs": [
1377
+ {
1378
+ "data": {
1379
+ "text/html": [
1380
+ "<div>\n",
1381
+ "<style scoped>\n",
1382
+ " .dataframe tbody tr th:only-of-type {\n",
1383
+ " vertical-align: middle;\n",
1384
+ " }\n",
1385
+ "\n",
1386
+ " .dataframe tbody tr th {\n",
1387
+ " vertical-align: top;\n",
1388
+ " }\n",
1389
+ "\n",
1390
+ " .dataframe thead th {\n",
1391
+ " text-align: right;\n",
1392
+ " }\n",
1393
+ "</style>\n",
1394
+ "<table border=\"1\" class=\"dataframe\">\n",
1395
+ " <thead>\n",
1396
+ " <tr style=\"text-align: right;\">\n",
1397
+ " <th></th>\n",
1398
+ " <th></th>\n",
1399
+ " <th>open</th>\n",
1400
+ " <th>high</th>\n",
1401
+ " <th>low</th>\n",
1402
+ " <th>close</th>\n",
1403
+ " <th>volume</th>\n",
1404
+ " <th>funding_rate</th>\n",
1405
+ " </tr>\n",
1406
+ " <tr>\n",
1407
+ " <th>date</th>\n",
1408
+ " <th>ticker</th>\n",
1409
+ " <th></th>\n",
1410
+ " <th></th>\n",
1411
+ " <th></th>\n",
1412
+ " <th></th>\n",
1413
+ " <th></th>\n",
1414
+ " <th></th>\n",
1415
+ " </tr>\n",
1416
+ " </thead>\n",
1417
+ " <tbody>\n",
1418
+ " <tr>\n",
1419
+ " <th>2010-07-17</th>\n",
1420
+ " <th>BTC</th>\n",
1421
+ " <td>42.99</td>\n",
1422
+ " <td>43.76</td>\n",
1423
+ " <td>40.99</td>\n",
1424
+ " <td>41.01</td>\n",
1425
+ " <td>2.000000e+01</td>\n",
1426
+ " <td>0.000000</td>\n",
1427
+ " </tr>\n",
1428
+ " <tr>\n",
1429
+ " <th>2010-07-18</th>\n",
1430
+ " <th>BTC</th>\n",
1431
+ " <td>0.07921</td>\n",
1432
+ " <td>0.08181</td>\n",
1433
+ " <td>0.06634</td>\n",
1434
+ " <td>0.07921</td>\n",
1435
+ " <td>7.501000e+01</td>\n",
1436
+ " <td>0.000000</td>\n",
1437
+ " </tr>\n",
1438
+ " <tr>\n",
1439
+ " <th>2010-07-19</th>\n",
1440
+ " <th>BTC</th>\n",
1441
+ " <td>0.07474</td>\n",
1442
+ " <td>0.07921</td>\n",
1443
+ " <td>0.0505</td>\n",
1444
+ " <td>0.06262</td>\n",
1445
+ " <td>5.740000e+02</td>\n",
1446
+ " <td>0.000000</td>\n",
1447
+ " </tr>\n",
1448
+ " <tr>\n",
1449
+ " <th>2010-07-20</th>\n",
1450
+ " <th>BTC</th>\n",
1451
+ " <td>0.06868</td>\n",
1452
+ " <td>0.07344</td>\n",
1453
+ " <td>0.0505</td>\n",
1454
+ " <td>0.06052</td>\n",
1455
+ " <td>2.620000e+02</td>\n",
1456
+ " <td>0.000000</td>\n",
1457
+ " </tr>\n",
1458
+ " <tr>\n",
1459
+ " <th>2010-07-21</th>\n",
1460
+ " <th>BTC</th>\n",
1461
+ " <td>0.06262</td>\n",
1462
+ " <td>0.06767</td>\n",
1463
+ " <td>0.0505</td>\n",
1464
+ " <td>0.05842</td>\n",
1465
+ " <td>5.750000e+02</td>\n",
1466
+ " <td>0.000000</td>\n",
1467
+ " </tr>\n",
1468
+ " <tr>\n",
1469
+ " <th>...</th>\n",
1470
+ " <th>...</th>\n",
1471
+ " <td>...</td>\n",
1472
+ " <td>...</td>\n",
1473
+ " <td>...</td>\n",
1474
+ " <td>...</td>\n",
1475
+ " <td>...</td>\n",
1476
+ " <td>...</td>\n",
1477
+ " </tr>\n",
1478
+ " <tr>\n",
1479
+ " <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
1480
+ " <th>ZEN</th>\n",
1481
+ " <td>9.657</td>\n",
1482
+ " <td>9.112</td>\n",
1483
+ " <td>8.285</td>\n",
1484
+ " <td>8.462</td>\n",
1485
+ " <td>2.071124e+06</td>\n",
1486
+ " <td>0.000194</td>\n",
1487
+ " </tr>\n",
1488
+ " <tr>\n",
1489
+ " <th>ZIL</th>\n",
1490
+ " <td>0.01422</td>\n",
1491
+ " <td>0.01441</td>\n",
1492
+ " <td>0.01392</td>\n",
1493
+ " <td>0.01396</td>\n",
1494
+ " <td>2.048626e+08</td>\n",
1495
+ " <td>-0.000031</td>\n",
1496
+ " </tr>\n",
1497
+ " <tr>\n",
1498
+ " <th>ZK</th>\n",
1499
+ " <td>0.11451</td>\n",
1500
+ " <td>0.11673</td>\n",
1501
+ " <td>0.10857</td>\n",
1502
+ " <td>0.10874</td>\n",
1503
+ " <td>3.833253e+08</td>\n",
1504
+ " <td>0.000200</td>\n",
1505
+ " </tr>\n",
1506
+ " <tr>\n",
1507
+ " <th>ZRO</th>\n",
1508
+ " <td>3.631</td>\n",
1509
+ " <td>3.648</td>\n",
1510
+ " <td>3.362</td>\n",
1511
+ " <td>3.367</td>\n",
1512
+ " <td>7.049472e+07</td>\n",
1513
+ " <td>0.000184</td>\n",
1514
+ " </tr>\n",
1515
+ " <tr>\n",
1516
+ " <th>ZRX</th>\n",
1517
+ " <td>0.3055</td>\n",
1518
+ " <td>0.3121</td>\n",
1519
+ " <td>0.2983</td>\n",
1520
+ " <td>0.2988</td>\n",
1521
+ " <td>9.810764e+06</td>\n",
1522
+ " <td>0.000186</td>\n",
1523
+ " </tr>\n",
1524
+ " </tbody>\n",
1525
+ "</table>\n",
1526
+ "<p>357696 rows × 6 columns</p>\n",
1527
+ "</div>"
1528
+ ],
1529
+ "text/plain": [
1530
+ " open high low close volume \\\n",
1531
+ "date ticker \n",
1532
+ "2010-07-17 BTC 42.99 43.76 40.99 41.01 2.000000e+01 \n",
1533
+ "2010-07-18 BTC 0.07921 0.08181 0.06634 0.07921 7.501000e+01 \n",
1534
+ "2010-07-19 BTC 0.07474 0.07921 0.0505 0.06262 5.740000e+02 \n",
1535
+ "2010-07-20 BTC 0.06868 0.07344 0.0505 0.06052 2.620000e+02 \n",
1536
+ "2010-07-21 BTC 0.06262 0.06767 0.0505 0.05842 5.750000e+02 \n",
1537
+ "... ... ... ... ... ... \n",
1538
+ "2024-08-04 ZEN 9.657 9.112 8.285 8.462 2.071124e+06 \n",
1539
+ " ZIL 0.01422 0.01441 0.01392 0.01396 2.048626e+08 \n",
1540
+ " ZK 0.11451 0.11673 0.10857 0.10874 3.833253e+08 \n",
1541
+ " ZRO 3.631 3.648 3.362 3.367 7.049472e+07 \n",
1542
+ " ZRX 0.3055 0.3121 0.2983 0.2988 9.810764e+06 \n",
1543
+ "\n",
1544
+ " funding_rate \n",
1545
+ "date ticker \n",
1546
+ "2010-07-17 BTC 0.000000 \n",
1547
+ "2010-07-18 BTC 0.000000 \n",
1548
+ "2010-07-19 BTC 0.000000 \n",
1549
+ "2010-07-20 BTC 0.000000 \n",
1550
+ "2010-07-21 BTC 0.000000 \n",
1551
+ "... ... \n",
1552
+ "2024-08-04 ZEN 0.000194 \n",
1553
+ " ZIL -0.000031 \n",
1554
+ " ZK 0.000200 \n",
1555
+ " ZRO 0.000184 \n",
1556
+ " ZRX 0.000186 \n",
1557
+ "\n",
1558
+ "[357696 rows x 6 columns]"
1559
+ ]
1560
+ },
1561
+ "execution_count": 41,
1562
+ "metadata": {},
1563
+ "output_type": "execute_result"
1564
+ }
1565
+ ],
1566
+ "source": [
1567
+ "clean.df"
1568
+ ]
1569
+ },
1570
+ {
1571
+ "cell_type": "code",
1572
+ "execution_count": 42,
1573
+ "id": "66b3d8d2",
1574
+ "metadata": {},
1575
+ "outputs": [
1576
+ {
1577
+ "data": {
1578
+ "text/plain": [
1579
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1580
+ ]
1581
+ },
1582
+ "execution_count": 42,
1583
+ "metadata": {},
1584
+ "output_type": "execute_result"
1585
+ }
1586
+ ],
1587
+ "source": [
1588
+ "clean.filter_avg_trading_val(thresh_val=1000000)"
864
1589
  ]
865
1590
  },
866
1591
  {
867
1592
  "cell_type": "code",
868
- "execution_count": 64,
869
- "id": "66c762a3",
1593
+ "execution_count": 43,
1594
+ "id": "b7f23056",
870
1595
  "metadata": {},
871
1596
  "outputs": [
872
1597
  {
@@ -911,51 +1636,54 @@
911
1636
  " </thead>\n",
912
1637
  " <tbody>\n",
913
1638
  " <tr>\n",
914
- " <th rowspan=\"4\" valign=\"top\">2017-03-21</th>\n",
1639
+ " <th>2010-07-17</th>\n",
915
1640
  " <th>BTC</th>\n",
916
- " <td>1047.51</td>\n",
917
- " <td>1125.53</td>\n",
918
- " <td>1043.87</td>\n",
919
- " <td>1121.29</td>\n",
920
- " <td>9.259127e+04</td>\n",
921
- " <td>0.000000</td>\n",
1641
+ " <td>&lt;NA&gt;</td>\n",
1642
+ " <td>&lt;NA&gt;</td>\n",
1643
+ " <td>&lt;NA&gt;</td>\n",
1644
+ " <td>&lt;NA&gt;</td>\n",
1645
+ " <td>NaN</td>\n",
1646
+ " <td>NaN</td>\n",
922
1647
  " </tr>\n",
923
1648
  " <tr>\n",
924
- " <th>ETC</th>\n",
925
- " <td>1.867</td>\n",
926
- " <td>2.39</td>\n",
927
- " <td>1.867</td>\n",
928
- " <td>2.378</td>\n",
929
- " <td>2.151590e+06</td>\n",
930
- " <td>0.000000</td>\n",
1649
+ " <th>2010-07-18</th>\n",
1650
+ " <th>BTC</th>\n",
1651
+ " <td>&lt;NA&gt;</td>\n",
1652
+ " <td>&lt;NA&gt;</td>\n",
1653
+ " <td>&lt;NA&gt;</td>\n",
1654
+ " <td>&lt;NA&gt;</td>\n",
1655
+ " <td>NaN</td>\n",
1656
+ " <td>NaN</td>\n",
931
1657
  " </tr>\n",
932
1658
  " <tr>\n",
933
- " <th>ETH</th>\n",
934
- " <td>42.51</td>\n",
935
- " <td>43.8</td>\n",
936
- " <td>41.68</td>\n",
937
- " <td>42.67</td>\n",
938
- " <td>4.843660e+05</td>\n",
939
- " <td>0.000000</td>\n",
1659
+ " <th>2010-07-19</th>\n",
1660
+ " <th>BTC</th>\n",
1661
+ " <td>&lt;NA&gt;</td>\n",
1662
+ " <td>&lt;NA&gt;</td>\n",
1663
+ " <td>&lt;NA&gt;</td>\n",
1664
+ " <td>&lt;NA&gt;</td>\n",
1665
+ " <td>NaN</td>\n",
1666
+ " <td>NaN</td>\n",
940
1667
  " </tr>\n",
941
1668
  " <tr>\n",
942
- " <th>LTC</th>\n",
943
- " <td>4.121</td>\n",
944
- " <td>4.155</td>\n",
945
- " <td>4.014</td>\n",
946
- " <td>4.09</td>\n",
947
- " <td>1.932581e+05</td>\n",
948
- " <td>0.000000</td>\n",
1669
+ " <th>2010-07-20</th>\n",
1670
+ " <th>BTC</th>\n",
1671
+ " <td>&lt;NA&gt;</td>\n",
1672
+ " <td>&lt;NA&gt;</td>\n",
1673
+ " <td>&lt;NA&gt;</td>\n",
1674
+ " <td>&lt;NA&gt;</td>\n",
1675
+ " <td>NaN</td>\n",
1676
+ " <td>NaN</td>\n",
949
1677
  " </tr>\n",
950
1678
  " <tr>\n",
951
- " <th>2017-03-22</th>\n",
1679
+ " <th>2010-07-21</th>\n",
952
1680
  " <th>BTC</th>\n",
953
- " <td>1121.29</td>\n",
954
- " <td>1121.88</td>\n",
955
- " <td>997.78</td>\n",
956
- " <td>1044.72</td>\n",
957
- " <td>1.152861e+05</td>\n",
958
- " <td>0.000000</td>\n",
1681
+ " <td>&lt;NA&gt;</td>\n",
1682
+ " <td>&lt;NA&gt;</td>\n",
1683
+ " <td>&lt;NA&gt;</td>\n",
1684
+ " <td>&lt;NA&gt;</td>\n",
1685
+ " <td>NaN</td>\n",
1686
+ " <td>NaN</td>\n",
959
1687
  " </tr>\n",
960
1688
  " <tr>\n",
961
1689
  " <th>...</th>\n",
@@ -969,30 +1697,12 @@
969
1697
  " </tr>\n",
970
1698
  " <tr>\n",
971
1699
  " <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
972
- " <th>YFI</th>\n",
973
- " <td>5341.0</td>\n",
974
- " <td>5341.0</td>\n",
975
- " <td>5196.0</td>\n",
976
- " <td>5198.0</td>\n",
977
- " <td>4.281050e+02</td>\n",
978
- " <td>0.000169</td>\n",
979
- " </tr>\n",
980
- " <tr>\n",
981
- " <th>ZEC</th>\n",
982
- " <td>31.76</td>\n",
983
- " <td>34.44</td>\n",
984
- " <td>31.28</td>\n",
985
- " <td>31.55</td>\n",
986
- " <td>1.348085e+06</td>\n",
987
- " <td>0.000191</td>\n",
988
- " </tr>\n",
989
- " <tr>\n",
990
1700
  " <th>ZEN</th>\n",
991
1701
  " <td>9.657</td>\n",
992
1702
  " <td>9.112</td>\n",
993
1703
  " <td>8.285</td>\n",
994
1704
  " <td>8.462</td>\n",
995
- " <td>2.071124e+06</td>\n",
1705
+ " <td>2071124.0</td>\n",
996
1706
  " <td>0.000194</td>\n",
997
1707
  " </tr>\n",
998
1708
  " <tr>\n",
@@ -1001,416 +1711,204 @@
1001
1711
  " <td>0.01441</td>\n",
1002
1712
  " <td>0.01392</td>\n",
1003
1713
  " <td>0.01396</td>\n",
1004
- " <td>2.048626e+08</td>\n",
1714
+ " <td>204862627.0</td>\n",
1005
1715
  " <td>-0.000031</td>\n",
1006
1716
  " </tr>\n",
1007
1717
  " <tr>\n",
1718
+ " <th>ZK</th>\n",
1719
+ " <td>0.11451</td>\n",
1720
+ " <td>0.11673</td>\n",
1721
+ " <td>0.10857</td>\n",
1722
+ " <td>0.10874</td>\n",
1723
+ " <td>383325323.0</td>\n",
1724
+ " <td>0.000200</td>\n",
1725
+ " </tr>\n",
1726
+ " <tr>\n",
1727
+ " <th>ZRO</th>\n",
1728
+ " <td>3.631</td>\n",
1729
+ " <td>3.648</td>\n",
1730
+ " <td>3.362</td>\n",
1731
+ " <td>3.367</td>\n",
1732
+ " <td>70494717.0</td>\n",
1733
+ " <td>0.000184</td>\n",
1734
+ " </tr>\n",
1735
+ " <tr>\n",
1008
1736
  " <th>ZRX</th>\n",
1009
1737
  " <td>0.3055</td>\n",
1010
1738
  " <td>0.3121</td>\n",
1011
1739
  " <td>0.2983</td>\n",
1012
1740
  " <td>0.2988</td>\n",
1013
- " <td>9.810764e+06</td>\n",
1741
+ " <td>9810764.1</td>\n",
1014
1742
  " <td>0.000186</td>\n",
1015
1743
  " </tr>\n",
1016
1744
  " </tbody>\n",
1017
1745
  "</table>\n",
1018
- "<p>193002 rows × 6 columns</p>\n",
1746
+ "<p>357696 rows × 6 columns</p>\n",
1019
1747
  "</div>"
1020
1748
  ],
1021
1749
  "text/plain": [
1022
- " open high low close volume \\\n",
1023
- "date ticker \n",
1024
- "2017-03-21 BTC 1047.51 1125.53 1043.87 1121.29 9.259127e+04 \n",
1025
- " ETC 1.867 2.39 1.867 2.378 2.151590e+06 \n",
1026
- " ETH 42.51 43.8 41.68 42.67 4.843660e+05 \n",
1027
- " LTC 4.121 4.155 4.014 4.09 1.932581e+05 \n",
1028
- "2017-03-22 BTC 1121.29 1121.88 997.78 1044.72 1.152861e+05 \n",
1029
- "... ... ... ... ... ... \n",
1030
- "2024-08-04 YFI 5341.0 5341.0 5196.0 5198.0 4.281050e+02 \n",
1031
- " ZEC 31.76 34.44 31.28 31.55 1.348085e+06 \n",
1032
- " ZEN 9.657 9.112 8.285 8.462 2.071124e+06 \n",
1033
- " ZIL 0.01422 0.01441 0.01392 0.01396 2.048626e+08 \n",
1034
- " ZRX 0.3055 0.3121 0.2983 0.2988 9.810764e+06 \n",
1750
+ " open high low close volume \\\n",
1751
+ "date ticker \n",
1752
+ "2010-07-17 BTC <NA> <NA> <NA> <NA> NaN \n",
1753
+ "2010-07-18 BTC <NA> <NA> <NA> <NA> NaN \n",
1754
+ "2010-07-19 BTC <NA> <NA> <NA> <NA> NaN \n",
1755
+ "2010-07-20 BTC <NA> <NA> <NA> <NA> NaN \n",
1756
+ "2010-07-21 BTC <NA> <NA> <NA> <NA> NaN \n",
1757
+ "... ... ... ... ... ... \n",
1758
+ "2024-08-04 ZEN 9.657 9.112 8.285 8.462 2071124.0 \n",
1759
+ " ZIL 0.01422 0.01441 0.01392 0.01396 204862627.0 \n",
1760
+ " ZK 0.11451 0.11673 0.10857 0.10874 383325323.0 \n",
1761
+ " ZRO 3.631 3.648 3.362 3.367 70494717.0 \n",
1762
+ " ZRX 0.3055 0.3121 0.2983 0.2988 9810764.1 \n",
1035
1763
  "\n",
1036
1764
  " funding_rate \n",
1037
1765
  "date ticker \n",
1038
- "2017-03-21 BTC 0.000000 \n",
1039
- " ETC 0.000000 \n",
1040
- " ETH 0.000000 \n",
1041
- " LTC 0.000000 \n",
1042
- "2017-03-22 BTC 0.000000 \n",
1766
+ "2010-07-17 BTC NaN \n",
1767
+ "2010-07-18 BTC NaN \n",
1768
+ "2010-07-19 BTC NaN \n",
1769
+ "2010-07-20 BTC NaN \n",
1770
+ "2010-07-21 BTC NaN \n",
1043
1771
  "... ... \n",
1044
- "2024-08-04 YFI 0.000169 \n",
1045
- " ZEC 0.000191 \n",
1046
- " ZEN 0.000194 \n",
1772
+ "2024-08-04 ZEN 0.000194 \n",
1047
1773
  " ZIL -0.000031 \n",
1774
+ " ZK 0.000200 \n",
1775
+ " ZRO 0.000184 \n",
1048
1776
  " ZRX 0.000186 \n",
1049
1777
  "\n",
1050
- "[193002 rows x 6 columns]"
1778
+ "[357696 rows x 6 columns]"
1051
1779
  ]
1052
1780
  },
1053
- "execution_count": 64,
1781
+ "execution_count": 43,
1054
1782
  "metadata": {},
1055
1783
  "output_type": "execute_result"
1056
1784
  }
1057
1785
  ],
1058
1786
  "source": [
1059
- "clean_df"
1787
+ "clean.df"
1060
1788
  ]
1061
1789
  },
1062
1790
  {
1063
1791
  "cell_type": "code",
1064
- "execution_count": 65,
1065
- "id": "bec999ba",
1792
+ "execution_count": 44,
1793
+ "id": "b5ac345d",
1066
1794
  "metadata": {},
1067
- "outputs": [],
1795
+ "outputs": [
1796
+ {
1797
+ "data": {
1798
+ "text/plain": [
1799
+ "['CVC',\n",
1800
+ " 'CTK',\n",
1801
+ " 'STRAX',\n",
1802
+ " 'STPT',\n",
1803
+ " 'DGB',\n",
1804
+ " 'CVX',\n",
1805
+ " 'FTT',\n",
1806
+ " 'GLMR',\n",
1807
+ " 'OCEAN',\n",
1808
+ " 'SLP',\n",
1809
+ " 'AGIX',\n",
1810
+ " 'RAY',\n",
1811
+ " 'MDT',\n",
1812
+ " 'SC',\n",
1813
+ " 'SNT',\n",
1814
+ " 'IDEX',\n",
1815
+ " 'RAD',\n",
1816
+ " 'WAVES']"
1817
+ ]
1818
+ },
1819
+ "execution_count": 44,
1820
+ "metadata": {},
1821
+ "output_type": "execute_result"
1822
+ }
1823
+ ],
1068
1824
  "source": [
1069
- "# Filter data\n",
1070
- "clean = CleanData(df)"
1825
+ "clean.filtered_tickers"
1071
1826
  ]
1072
1827
  },
1073
1828
  {
1074
1829
  "cell_type": "code",
1075
- "execution_count": 66,
1076
- "id": "34372e70",
1830
+ "execution_count": 45,
1831
+ "id": "6d36d4ce",
1077
1832
  "metadata": {},
1078
1833
  "outputs": [
1079
1834
  {
1080
1835
  "data": {
1081
1836
  "text/plain": [
1082
- "<cryptodatapy.transform.clean.CleanData at 0x7face8e7eb50>"
1837
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1083
1838
  ]
1084
1839
  },
1085
- "execution_count": 66,
1840
+ "execution_count": 45,
1086
1841
  "metadata": {},
1087
1842
  "output_type": "execute_result"
1088
1843
  }
1089
1844
  ],
1090
1845
  "source": [
1091
- "clean.filter_outliers(od_method='mad', excl_cols=['volume', 'funding_rate'], thresh_val=10).\\\n",
1092
- " repair_outliers(imp_method='fcst').\\\n",
1093
- " filter_avg_trading_val(thresh_val=1000000).\\\n",
1094
- " filter_missing_vals_gaps().\\\n",
1095
- " filter_min_nobs(ts_obs=1000, cs_obs=3)"
1846
+ "clean.filter_missing_vals_gaps()"
1096
1847
  ]
1097
1848
  },
1098
1849
  {
1099
1850
  "cell_type": "code",
1100
- "execution_count": 79,
1101
- "id": "ba73590b",
1851
+ "execution_count": 47,
1852
+ "id": "7fe432d5",
1102
1853
  "metadata": {},
1103
1854
  "outputs": [
1104
1855
  {
1105
1856
  "data": {
1106
- "text/html": [
1107
- "<div>\n",
1108
- "<style scoped>\n",
1109
- " .dataframe tbody tr th:only-of-type {\n",
1110
- " vertical-align: middle;\n",
1111
- " }\n",
1112
- "\n",
1113
- " .dataframe tbody tr th {\n",
1114
- " vertical-align: top;\n",
1115
- " }\n",
1116
- "\n",
1117
- " .dataframe thead th {\n",
1118
- " text-align: right;\n",
1119
- " }\n",
1120
- "</style>\n",
1121
- "<table border=\"1\" class=\"dataframe\">\n",
1122
- " <thead>\n",
1123
- " <tr style=\"text-align: right;\">\n",
1124
- " <th></th>\n",
1125
- " <th>1000SATS</th>\n",
1126
- " <th>1INCH</th>\n",
1127
- " <th>AAVE</th>\n",
1128
- " <th>ACE</th>\n",
1129
- " <th>ACH</th>\n",
1130
- " <th>ADA</th>\n",
1131
- " <th>AEVO</th>\n",
1132
- " <th>AGIX</th>\n",
1133
- " <th>AGLD</th>\n",
1134
- " <th>AI</th>\n",
1135
- " <th>...</th>\n",
1136
- " <th>XVG</th>\n",
1137
- " <th>XVS</th>\n",
1138
- " <th>YFI</th>\n",
1139
- " <th>YGG</th>\n",
1140
- " <th>ZEC</th>\n",
1141
- " <th>ZEN</th>\n",
1142
- " <th>ZIL</th>\n",
1143
- " <th>ZK</th>\n",
1144
- " <th>ZRO</th>\n",
1145
- " <th>ZRX</th>\n",
1146
- " </tr>\n",
1147
- " </thead>\n",
1148
- " <tbody>\n",
1149
- " <tr>\n",
1150
- " <th>n_obs</th>\n",
1151
- " <td>237.000000</td>\n",
1152
- " <td>1319.000000</td>\n",
1153
- " <td>1395.000000</td>\n",
1154
- " <td>326.000000</td>\n",
1155
- " <td>1419.000000</td>\n",
1156
- " <td>2500.000000</td>\n",
1157
- " <td>278.000000</td>\n",
1158
- " <td>536.000000</td>\n",
1159
- " <td>1040.000000</td>\n",
1160
- " <td>524.000000</td>\n",
1161
- " <td>...</td>\n",
1162
- " <td>3101.000000</td>\n",
1163
- " <td>1399.000000</td>\n",
1164
- " <td>1466.000000</td>\n",
1165
- " <td>1067.000000</td>\n",
1166
- " <td>2838.000000</td>\n",
1167
- " <td>2617.000000</td>\n",
1168
- " <td>2379.000000</td>\n",
1169
- " <td>49.000000</td>\n",
1170
- " <td>46.000000</td>\n",
1171
- " <td>2551.000000</td>\n",
1172
- " </tr>\n",
1173
- " <tr>\n",
1174
- " <th>%_NaN_start</th>\n",
1175
- " <td>95.382817</td>\n",
1176
- " <td>74.303526</td>\n",
1177
- " <td>72.822911</td>\n",
1178
- " <td>93.648938</td>\n",
1179
- " <td>72.355348</td>\n",
1180
- " <td>51.295539</td>\n",
1181
- " <td>94.584064</td>\n",
1182
- " <td>89.557763</td>\n",
1183
- " <td>79.738944</td>\n",
1184
- " <td>89.791545</td>\n",
1185
- " <td>...</td>\n",
1186
- " <td>39.586986</td>\n",
1187
- " <td>72.744983</td>\n",
1188
- " <td>71.439704</td>\n",
1189
- " <td>79.212936</td>\n",
1190
- " <td>44.710695</td>\n",
1191
- " <td>49.016170</td>\n",
1192
- " <td>53.652835</td>\n",
1193
- " <td>99.045393</td>\n",
1194
- " <td>99.103838</td>\n",
1195
- " <td>50.301968</td>\n",
1196
- " </tr>\n",
1197
- " <tr>\n",
1198
- " <th>%_outliers</th>\n",
1199
- " <td>0.038964</td>\n",
1200
- " <td>0.642899</td>\n",
1201
- " <td>0.370154</td>\n",
1202
- " <td>0.253263</td>\n",
1203
- " <td>0.584454</td>\n",
1204
- " <td>0.876680</td>\n",
1205
- " <td>0.331190</td>\n",
1206
- " <td>0.253263</td>\n",
1207
- " <td>0.526008</td>\n",
1208
- " <td>0.253263</td>\n",
1209
- " <td>...</td>\n",
1210
- " <td>2.824859</td>\n",
1211
- " <td>0.642899</td>\n",
1212
- " <td>0.389636</td>\n",
1213
- " <td>0.467563</td>\n",
1214
- " <td>0.857199</td>\n",
1215
- " <td>1.207871</td>\n",
1216
- " <td>0.974089</td>\n",
1217
- " <td>0.000000</td>\n",
1218
- " <td>0.019482</td>\n",
1219
- " <td>0.993571</td>\n",
1220
- " </tr>\n",
1221
- " <tr>\n",
1222
- " <th>%_imputed</th>\n",
1223
- " <td>0.175336</td>\n",
1224
- " <td>1.052016</td>\n",
1225
- " <td>0.584454</td>\n",
1226
- " <td>0.370154</td>\n",
1227
- " <td>0.701344</td>\n",
1228
- " <td>1.246834</td>\n",
1229
- " <td>0.545490</td>\n",
1230
- " <td>1.188389</td>\n",
1231
- " <td>0.779271</td>\n",
1232
- " <td>0.662381</td>\n",
1233
- " <td>...</td>\n",
1234
- " <td>3.779466</td>\n",
1235
- " <td>0.779271</td>\n",
1236
- " <td>0.701344</td>\n",
1237
- " <td>0.740308</td>\n",
1238
- " <td>1.207871</td>\n",
1239
- " <td>1.461134</td>\n",
1240
- " <td>1.422170</td>\n",
1241
- " <td>0.116891</td>\n",
1242
- " <td>0.136372</td>\n",
1243
- " <td>1.363725</td>\n",
1244
- " </tr>\n",
1245
- " <tr>\n",
1246
- " <th>%_below_avg_trading_val</th>\n",
1247
- " <td>0.564972</td>\n",
1248
- " <td>0.564972</td>\n",
1249
- " <td>0.603935</td>\n",
1250
- " <td>1.753361</td>\n",
1251
- " <td>6.409507</td>\n",
1252
- " <td>0.564972</td>\n",
1253
- " <td>2.591077</td>\n",
1254
- " <td>1.110462</td>\n",
1255
- " <td>1.636470</td>\n",
1256
- " <td>6.019871</td>\n",
1257
- " <td>...</td>\n",
1258
- " <td>41.671537</td>\n",
1259
- " <td>4.227547</td>\n",
1260
- " <td>0.564972</td>\n",
1261
- " <td>1.402688</td>\n",
1262
- " <td>4.773037</td>\n",
1263
- " <td>17.786869</td>\n",
1264
- " <td>9.526593</td>\n",
1265
- " <td>0.564972</td>\n",
1266
- " <td>0.564972</td>\n",
1267
- " <td>12.195597</td>\n",
1268
- " </tr>\n",
1269
- " <tr>\n",
1270
- " <th>%_missing_vals_gaps</th>\n",
1271
- " <td>0.000000</td>\n",
1272
- " <td>0.000000</td>\n",
1273
- " <td>0.000000</td>\n",
1274
- " <td>0.019482</td>\n",
1275
- " <td>0.019482</td>\n",
1276
- " <td>0.000000</td>\n",
1277
- " <td>0.000000</td>\n",
1278
- " <td>0.000000</td>\n",
1279
- " <td>0.000000</td>\n",
1280
- " <td>0.000000</td>\n",
1281
- " <td>...</td>\n",
1282
- " <td>10.325346</td>\n",
1283
- " <td>13.598286</td>\n",
1284
- " <td>0.000000</td>\n",
1285
- " <td>11.903370</td>\n",
1286
- " <td>16.540035</td>\n",
1287
- " <td>6.916034</td>\n",
1288
- " <td>6.623807</td>\n",
1289
- " <td>0.000000</td>\n",
1290
- " <td>0.000000</td>\n",
1291
- " <td>7.305669</td>\n",
1292
- " </tr>\n",
1293
- " <tr>\n",
1294
- " <th>n_tickers_below_min_obs</th>\n",
1295
- " <td>129.000000</td>\n",
1296
- " <td>129.000000</td>\n",
1297
- " <td>129.000000</td>\n",
1298
- " <td>129.000000</td>\n",
1299
- " <td>129.000000</td>\n",
1300
- " <td>129.000000</td>\n",
1301
- " <td>129.000000</td>\n",
1302
- " <td>129.000000</td>\n",
1303
- " <td>129.000000</td>\n",
1304
- " <td>129.000000</td>\n",
1305
- " <td>...</td>\n",
1306
- " <td>129.000000</td>\n",
1307
- " <td>129.000000</td>\n",
1308
- " <td>129.000000</td>\n",
1309
- " <td>129.000000</td>\n",
1310
- " <td>129.000000</td>\n",
1311
- " <td>129.000000</td>\n",
1312
- " <td>129.000000</td>\n",
1313
- " <td>129.000000</td>\n",
1314
- " <td>129.000000</td>\n",
1315
- " <td>129.000000</td>\n",
1316
- " </tr>\n",
1317
- " </tbody>\n",
1318
- "</table>\n",
1319
- "<p>7 rows × 256 columns</p>\n",
1320
- "</div>"
1321
- ],
1322
1857
  "text/plain": [
1323
- " 1000SATS 1INCH AAVE ACE \\\n",
1324
- "n_obs 237.000000 1319.000000 1395.000000 326.000000 \n",
1325
- "%_NaN_start 95.382817 74.303526 72.822911 93.648938 \n",
1326
- "%_outliers 0.038964 0.642899 0.370154 0.253263 \n",
1327
- "%_imputed 0.175336 1.052016 0.584454 0.370154 \n",
1328
- "%_below_avg_trading_val 0.564972 0.564972 0.603935 1.753361 \n",
1329
- "%_missing_vals_gaps 0.000000 0.000000 0.000000 0.019482 \n",
1330
- "n_tickers_below_min_obs 129.000000 129.000000 129.000000 129.000000 \n",
1331
- "\n",
1332
- " ACH ADA AEVO AGIX \\\n",
1333
- "n_obs 1419.000000 2500.000000 278.000000 536.000000 \n",
1334
- "%_NaN_start 72.355348 51.295539 94.584064 89.557763 \n",
1335
- "%_outliers 0.584454 0.876680 0.331190 0.253263 \n",
1336
- "%_imputed 0.701344 1.246834 0.545490 1.188389 \n",
1337
- "%_below_avg_trading_val 6.409507 0.564972 2.591077 1.110462 \n",
1338
- "%_missing_vals_gaps 0.019482 0.000000 0.000000 0.000000 \n",
1339
- "n_tickers_below_min_obs 129.000000 129.000000 129.000000 129.000000 \n",
1340
- "\n",
1341
- " AGLD AI ... XVG \\\n",
1342
- "n_obs 1040.000000 524.000000 ... 3101.000000 \n",
1343
- "%_NaN_start 79.738944 89.791545 ... 39.586986 \n",
1344
- "%_outliers 0.526008 0.253263 ... 2.824859 \n",
1345
- "%_imputed 0.779271 0.662381 ... 3.779466 \n",
1346
- "%_below_avg_trading_val 1.636470 6.019871 ... 41.671537 \n",
1347
- "%_missing_vals_gaps 0.000000 0.000000 ... 10.325346 \n",
1348
- "n_tickers_below_min_obs 129.000000 129.000000 ... 129.000000 \n",
1349
- "\n",
1350
- " XVS YFI YGG ZEC \\\n",
1351
- "n_obs 1399.000000 1466.000000 1067.000000 2838.000000 \n",
1352
- "%_NaN_start 72.744983 71.439704 79.212936 44.710695 \n",
1353
- "%_outliers 0.642899 0.389636 0.467563 0.857199 \n",
1354
- "%_imputed 0.779271 0.701344 0.740308 1.207871 \n",
1355
- "%_below_avg_trading_val 4.227547 0.564972 1.402688 4.773037 \n",
1356
- "%_missing_vals_gaps 13.598286 0.000000 11.903370 16.540035 \n",
1357
- "n_tickers_below_min_obs 129.000000 129.000000 129.000000 129.000000 \n",
1358
- "\n",
1359
- " ZEN ZIL ZK ZRO \\\n",
1360
- "n_obs 2617.000000 2379.000000 49.000000 46.000000 \n",
1361
- "%_NaN_start 49.016170 53.652835 99.045393 99.103838 \n",
1362
- "%_outliers 1.207871 0.974089 0.000000 0.019482 \n",
1363
- "%_imputed 1.461134 1.422170 0.116891 0.136372 \n",
1364
- "%_below_avg_trading_val 17.786869 9.526593 0.564972 0.564972 \n",
1365
- "%_missing_vals_gaps 6.916034 6.623807 0.000000 0.000000 \n",
1366
- "n_tickers_below_min_obs 129.000000 129.000000 129.000000 129.000000 \n",
1367
- "\n",
1368
- " ZRX \n",
1369
- "n_obs 2551.000000 \n",
1370
- "%_NaN_start 50.301968 \n",
1371
- "%_outliers 0.993571 \n",
1372
- "%_imputed 1.363725 \n",
1373
- "%_below_avg_trading_val 12.195597 \n",
1374
- "%_missing_vals_gaps 7.305669 \n",
1375
- "n_tickers_below_min_obs 129.000000 \n",
1376
- "\n",
1377
- "[7 rows x 256 columns]"
1858
+ "['CVC',\n",
1859
+ " 'CTK',\n",
1860
+ " 'STRAX',\n",
1861
+ " 'STPT',\n",
1862
+ " 'DGB',\n",
1863
+ " 'CVX',\n",
1864
+ " 'FTT',\n",
1865
+ " 'GLMR',\n",
1866
+ " 'OCEAN',\n",
1867
+ " 'SLP',\n",
1868
+ " 'AGIX',\n",
1869
+ " 'RAY',\n",
1870
+ " 'MDT',\n",
1871
+ " 'SC',\n",
1872
+ " 'SNT',\n",
1873
+ " 'IDEX',\n",
1874
+ " 'RAD',\n",
1875
+ " 'WAVES']"
1378
1876
  ]
1379
1877
  },
1380
- "execution_count": 79,
1878
+ "execution_count": 47,
1381
1879
  "metadata": {},
1382
1880
  "output_type": "execute_result"
1383
1881
  }
1384
1882
  ],
1385
1883
  "source": [
1386
- "clean.summary.close"
1884
+ "clean.filtered_tickers"
1387
1885
  ]
1388
1886
  },
1389
1887
  {
1390
1888
  "cell_type": "code",
1391
- "execution_count": 80,
1392
- "id": "f89c762c",
1889
+ "execution_count": 48,
1890
+ "id": "f7cbdbae",
1393
1891
  "metadata": {},
1394
1892
  "outputs": [
1395
1893
  {
1396
1894
  "data": {
1397
1895
  "text/plain": [
1398
- "<cryptodatapy.transform.clean.CleanData at 0x7face8e7eb50>"
1896
+ "<cryptodatapy.transform.clean.CleanData at 0x7fb888103af0>"
1399
1897
  ]
1400
1898
  },
1401
- "execution_count": 80,
1899
+ "execution_count": 48,
1402
1900
  "metadata": {},
1403
1901
  "output_type": "execute_result"
1404
1902
  }
1405
1903
  ],
1406
1904
  "source": [
1407
- "clean.filter_tickers(['BTC'])"
1905
+ "clean.filter_min_nobs(ts_obs=1400, cs_obs=5)"
1408
1906
  ]
1409
1907
  },
1410
1908
  {
1411
1909
  "cell_type": "code",
1412
- "execution_count": 85,
1413
- "id": "d621e6f3",
1910
+ "execution_count": 49,
1911
+ "id": "f13d9f1f",
1414
1912
  "metadata": {},
1415
1913
  "outputs": [
1416
1914
  {
@@ -1455,53 +1953,49 @@
1455
1953
  " </thead>\n",
1456
1954
  " <tbody>\n",
1457
1955
  " <tr>\n",
1458
- " <th>2013-09-29</th>\n",
1459
- " <th>LTC</th>\n",
1460
- " <td>2.56500</td>\n",
1461
- " <td>2.59000</td>\n",
1462
- " <td>2.59000</td>\n",
1463
- " <td>2.59000</td>\n",
1464
- " <td>5.000000e+00</td>\n",
1465
- " <td>0.000000</td>\n",
1956
+ " <th rowspan=\"5\" valign=\"top\">2017-06-18</th>\n",
1957
+ " <th>BAT</th>\n",
1958
+ " <td>&lt;NA&gt;</td>\n",
1959
+ " <td>&lt;NA&gt;</td>\n",
1960
+ " <td>&lt;NA&gt;</td>\n",
1961
+ " <td>&lt;NA&gt;</td>\n",
1962
+ " <td>NaN</td>\n",
1963
+ " <td>NaN</td>\n",
1466
1964
  " </tr>\n",
1467
1965
  " <tr>\n",
1468
- " <th>2013-09-30</th>\n",
1469
- " <th>LTC</th>\n",
1470
- " <td>2.59000</td>\n",
1471
- " <td>2.55400</td>\n",
1472
- " <td>2.55400</td>\n",
1473
- " <td>2.55400</td>\n",
1474
- " <td>5.623000e+01</td>\n",
1966
+ " <th>BTC</th>\n",
1967
+ " <td>2655.1</td>\n",
1968
+ " <td>2676.04</td>\n",
1969
+ " <td>2488.59</td>\n",
1970
+ " <td>2539.56</td>\n",
1971
+ " <td>9.200422e+04</td>\n",
1475
1972
  " <td>0.000000</td>\n",
1476
1973
  " </tr>\n",
1477
1974
  " <tr>\n",
1478
- " <th>2013-10-01</th>\n",
1479
- " <th>LTC</th>\n",
1480
- " <td>2.55400</td>\n",
1481
- " <td>2.66600</td>\n",
1482
- " <td>2.52500</td>\n",
1483
- " <td>2.66600</td>\n",
1484
- " <td>1.000000e+00</td>\n",
1485
- " <td>0.000000</td>\n",
1975
+ " <th>DASH</th>\n",
1976
+ " <td>&lt;NA&gt;</td>\n",
1977
+ " <td>&lt;NA&gt;</td>\n",
1978
+ " <td>&lt;NA&gt;</td>\n",
1979
+ " <td>&lt;NA&gt;</td>\n",
1980
+ " <td>NaN</td>\n",
1981
+ " <td>NaN</td>\n",
1486
1982
  " </tr>\n",
1487
1983
  " <tr>\n",
1488
- " <th>2013-10-02</th>\n",
1489
- " <th>LTC</th>\n",
1490
- " <td>2.66600</td>\n",
1491
- " <td>2.33700</td>\n",
1492
- " <td>2.33700</td>\n",
1493
- " <td>2.33700</td>\n",
1494
- " <td>1.500000e+00</td>\n",
1495
- " <td>0.000000</td>\n",
1984
+ " <th>DOGE</th>\n",
1985
+ " <td>&lt;NA&gt;</td>\n",
1986
+ " <td>&lt;NA&gt;</td>\n",
1987
+ " <td>&lt;NA&gt;</td>\n",
1988
+ " <td>&lt;NA&gt;</td>\n",
1989
+ " <td>NaN</td>\n",
1990
+ " <td>NaN</td>\n",
1496
1991
  " </tr>\n",
1497
1992
  " <tr>\n",
1498
- " <th>2013-10-03</th>\n",
1499
- " <th>LTC</th>\n",
1500
- " <td>2.33700</td>\n",
1501
- " <td>2.50200</td>\n",
1502
- " <td>2.48900</td>\n",
1503
- " <td>2.50200</td>\n",
1504
- " <td>9.274000e+01</td>\n",
1993
+ " <th>ETC</th>\n",
1994
+ " <td>21.98</td>\n",
1995
+ " <td>23.8</td>\n",
1996
+ " <td>19.5</td>\n",
1997
+ " <td>20.27</td>\n",
1998
+ " <td>1.306320e+06</td>\n",
1505
1999
  " <td>0.000000</td>\n",
1506
2000
  " </tr>\n",
1507
2001
  " <tr>\n",
@@ -1516,13 +2010,31 @@
1516
2010
  " </tr>\n",
1517
2011
  " <tr>\n",
1518
2012
  " <th rowspan=\"5\" valign=\"top\">2024-08-04</th>\n",
1519
- " <th>ZEN</th>\n",
1520
- " <td>8.40600</td>\n",
1521
- " <td>9.11200</td>\n",
1522
- " <td>8.28500</td>\n",
1523
- " <td>8.46200</td>\n",
1524
- " <td>2.071124e+06</td>\n",
1525
- " <td>0.000194</td>\n",
2013
+ " <th>XTZ</th>\n",
2014
+ " <td>0.685</td>\n",
2015
+ " <td>0.693</td>\n",
2016
+ " <td>0.677</td>\n",
2017
+ " <td>0.679</td>\n",
2018
+ " <td>5.373374e+06</td>\n",
2019
+ " <td>0.000200</td>\n",
2020
+ " </tr>\n",
2021
+ " <tr>\n",
2022
+ " <th>YFI</th>\n",
2023
+ " <td>5341.0</td>\n",
2024
+ " <td>5341.0</td>\n",
2025
+ " <td>5196.0</td>\n",
2026
+ " <td>5198.0</td>\n",
2027
+ " <td>4.281050e+02</td>\n",
2028
+ " <td>0.000169</td>\n",
2029
+ " </tr>\n",
2030
+ " <tr>\n",
2031
+ " <th>ZEC</th>\n",
2032
+ " <td>31.76</td>\n",
2033
+ " <td>34.44</td>\n",
2034
+ " <td>31.28</td>\n",
2035
+ " <td>31.55</td>\n",
2036
+ " <td>1.348085e+06</td>\n",
2037
+ " <td>0.000191</td>\n",
1526
2038
  " </tr>\n",
1527
2039
  " <tr>\n",
1528
2040
  " <th>ZIL</th>\n",
@@ -1534,82 +2046,135 @@
1534
2046
  " <td>-0.000031</td>\n",
1535
2047
  " </tr>\n",
1536
2048
  " <tr>\n",
1537
- " <th>ZK</th>\n",
1538
- " <td>0.11451</td>\n",
1539
- " <td>0.11673</td>\n",
1540
- " <td>0.10857</td>\n",
1541
- " <td>0.10874</td>\n",
1542
- " <td>3.833253e+08</td>\n",
1543
- " <td>0.000200</td>\n",
1544
- " </tr>\n",
1545
- " <tr>\n",
1546
- " <th>ZRO</th>\n",
1547
- " <td>3.63100</td>\n",
1548
- " <td>3.64800</td>\n",
1549
- " <td>3.36200</td>\n",
1550
- " <td>3.36700</td>\n",
1551
- " <td>7.049472e+07</td>\n",
1552
- " <td>0.000184</td>\n",
1553
- " </tr>\n",
1554
- " <tr>\n",
1555
2049
  " <th>ZRX</th>\n",
1556
- " <td>0.30550</td>\n",
1557
- " <td>0.31210</td>\n",
1558
- " <td>0.29830</td>\n",
1559
- " <td>0.29880</td>\n",
2050
+ " <td>0.3055</td>\n",
2051
+ " <td>0.3121</td>\n",
2052
+ " <td>0.2983</td>\n",
2053
+ " <td>0.2988</td>\n",
1560
2054
  " <td>9.810764e+06</td>\n",
1561
2055
  " <td>0.000186</td>\n",
1562
2056
  " </tr>\n",
1563
2057
  " </tbody>\n",
1564
2058
  "</table>\n",
1565
- "<p>387494 rows × 6 columns</p>\n",
2059
+ "<p>136630 rows × 6 columns</p>\n",
1566
2060
  "</div>"
1567
2061
  ],
1568
2062
  "text/plain": [
1569
2063
  " open high low close volume \\\n",
1570
2064
  "date ticker \n",
1571
- "2013-09-29 LTC 2.56500 2.59000 2.59000 2.59000 5.000000e+00 \n",
1572
- "2013-09-30 LTC 2.59000 2.55400 2.55400 2.55400 5.623000e+01 \n",
1573
- "2013-10-01 LTC 2.55400 2.66600 2.52500 2.66600 1.000000e+00 \n",
1574
- "2013-10-02 LTC 2.66600 2.33700 2.33700 2.33700 1.500000e+00 \n",
1575
- "2013-10-03 LTC 2.33700 2.50200 2.48900 2.50200 9.274000e+01 \n",
2065
+ "2017-06-18 BAT <NA> <NA> <NA> <NA> NaN \n",
2066
+ " BTC 2655.1 2676.04 2488.59 2539.56 9.200422e+04 \n",
2067
+ " DASH <NA> <NA> <NA> <NA> NaN \n",
2068
+ " DOGE <NA> <NA> <NA> <NA> NaN \n",
2069
+ " ETC 21.98 23.8 19.5 20.27 1.306320e+06 \n",
1576
2070
  "... ... ... ... ... ... \n",
1577
- "2024-08-04 ZEN 8.40600 9.11200 8.28500 8.46200 2.071124e+06 \n",
2071
+ "2024-08-04 XTZ 0.685 0.693 0.677 0.679 5.373374e+06 \n",
2072
+ " YFI 5341.0 5341.0 5196.0 5198.0 4.281050e+02 \n",
2073
+ " ZEC 31.76 34.44 31.28 31.55 1.348085e+06 \n",
1578
2074
  " ZIL 0.01422 0.01441 0.01392 0.01396 2.048626e+08 \n",
1579
- " ZK 0.11451 0.11673 0.10857 0.10874 3.833253e+08 \n",
1580
- " ZRO 3.63100 3.64800 3.36200 3.36700 7.049472e+07 \n",
1581
- " ZRX 0.30550 0.31210 0.29830 0.29880 9.810764e+06 \n",
2075
+ " ZRX 0.3055 0.3121 0.2983 0.2988 9.810764e+06 \n",
1582
2076
  "\n",
1583
2077
  " funding_rate \n",
1584
2078
  "date ticker \n",
1585
- "2013-09-29 LTC 0.000000 \n",
1586
- "2013-09-30 LTC 0.000000 \n",
1587
- "2013-10-01 LTC 0.000000 \n",
1588
- "2013-10-02 LTC 0.000000 \n",
1589
- "2013-10-03 LTC 0.000000 \n",
2079
+ "2017-06-18 BAT NaN \n",
2080
+ " BTC 0.000000 \n",
2081
+ " DASH NaN \n",
2082
+ " DOGE NaN \n",
2083
+ " ETC 0.000000 \n",
1590
2084
  "... ... \n",
1591
- "2024-08-04 ZEN 0.000194 \n",
2085
+ "2024-08-04 XTZ 0.000200 \n",
2086
+ " YFI 0.000169 \n",
2087
+ " ZEC 0.000191 \n",
1592
2088
  " ZIL -0.000031 \n",
1593
- " ZK 0.000200 \n",
1594
- " ZRO 0.000184 \n",
1595
2089
  " ZRX 0.000186 \n",
1596
2090
  "\n",
1597
- "[387494 rows x 6 columns]"
2091
+ "[136630 rows x 6 columns]"
1598
2092
  ]
1599
2093
  },
1600
- "execution_count": 85,
2094
+ "execution_count": 49,
1601
2095
  "metadata": {},
1602
2096
  "output_type": "execute_result"
1603
2097
  }
1604
2098
  ],
1605
2099
  "source": [
1606
- "df.drop(['BTC'], level=1, axis=0)"
2100
+ "clean.df"
2101
+ ]
2102
+ },
2103
+ {
2104
+ "cell_type": "code",
2105
+ "execution_count": 50,
2106
+ "id": "a9b1764c",
2107
+ "metadata": {},
2108
+ "outputs": [],
2109
+ "source": [
2110
+ "# # clean data\n",
2111
+ "# clean_df = clean.filter_delisted_tickers().\\\n",
2112
+ "# filter_outliers(od_method='mad', excl_cols=['volume', 'funding_rate'], thresh_val=10).\\\n",
2113
+ "# repair_outliers(imp_method='fcst').\\\n",
2114
+ "# filter_avg_trading_val(thresh_val=1000000).\\\n",
2115
+ "# filter_missing_vals_gaps().\\\n",
2116
+ "# filter_min_nobs(ts_obs=1500, cs_obs=10).\\\n",
2117
+ "# get(attr='df').dropna(how='all')"
2118
+ ]
2119
+ },
2120
+ {
2121
+ "cell_type": "code",
2122
+ "execution_count": null,
2123
+ "id": "572d7a2e",
2124
+ "metadata": {},
2125
+ "outputs": [],
2126
+ "source": []
2127
+ },
2128
+ {
2129
+ "cell_type": "code",
2130
+ "execution_count": 53,
2131
+ "id": "99857595",
2132
+ "metadata": {},
2133
+ "outputs": [],
2134
+ "source": [
2135
+ "df.to_parquet('s3://factorlab-data/binance_historical_ohlcv_daily.parquet')"
2136
+ ]
2137
+ },
2138
+ {
2139
+ "cell_type": "code",
2140
+ "execution_count": 54,
2141
+ "id": "15e66225",
2142
+ "metadata": {},
2143
+ "outputs": [],
2144
+ "source": [
2145
+ "clean.df.dropna(how='all').to_parquet('../../../../factorlab/notebooks/binance_historical_ohlcv_daily.parquet')"
2146
+ ]
2147
+ },
2148
+ {
2149
+ "cell_type": "code",
2150
+ "execution_count": null,
2151
+ "id": "8a962fa7",
2152
+ "metadata": {},
2153
+ "outputs": [],
2154
+ "source": []
2155
+ },
2156
+ {
2157
+ "cell_type": "code",
2158
+ "execution_count": 55,
2159
+ "id": "54b818cd",
2160
+ "metadata": {},
2161
+ "outputs": [],
2162
+ "source": [
2163
+ "clean.df.dropna(how='all').to_csv('../../../../factorlab/notebooks/binance_historical_ohlcv_daily.csv')"
1607
2164
  ]
1608
2165
  },
1609
2166
  {
1610
2167
  "cell_type": "code",
1611
2168
  "execution_count": null,
1612
- "id": "fabfa152",
2169
+ "id": "b8fa525d",
2170
+ "metadata": {},
2171
+ "outputs": [],
2172
+ "source": []
2173
+ },
2174
+ {
2175
+ "cell_type": "code",
2176
+ "execution_count": null,
2177
+ "id": "da953cbf",
1613
2178
  "metadata": {},
1614
2179
  "outputs": [],
1615
2180
  "source": []