cryptodatapy 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. cryptodatapy/conf/fields.csv +1 -1
  2. cryptodatapy/conf/tickers.csv +0 -1
  3. cryptodatapy/extract/data_vendors/CoinMetrics.ipynb +747 -0
  4. cryptodatapy/extract/data_vendors/coinmetrics_api.py +279 -209
  5. cryptodatapy/extract/data_vendors/cryptocompare_api.py +3 -5
  6. cryptodatapy/extract/data_vendors/datavendor.py +32 -12
  7. cryptodatapy/extract/data_vendors/glassnode_api.py +3 -2
  8. cryptodatapy/extract/data_vendors/tiingo_api.py +3 -2
  9. cryptodatapy/extract/datarequest.py +197 -36
  10. cryptodatapy/extract/libraries/Untitled.ipynb +33 -0
  11. cryptodatapy/extract/libraries/ccxt.ipynb +628 -754
  12. cryptodatapy/extract/libraries/ccxt_api.py +630 -346
  13. cryptodatapy/extract/libraries/pandasdr_api.py +13 -12
  14. cryptodatapy/extract/libraries/yfinance_api.py +511 -0
  15. cryptodatapy/transform/cc_onchain_data.csv +118423 -0
  16. cryptodatapy/transform/clean.py +17 -15
  17. cryptodatapy/transform/clean_onchain_data.ipynb +4750 -0
  18. cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +1712 -1097
  19. cryptodatapy/transform/cmdty_data.ipynb +402 -0
  20. cryptodatapy/transform/convertparams.py +139 -181
  21. cryptodatapy/transform/credit_data.ipynb +291 -0
  22. cryptodatapy/transform/eqty_data.ipynb +836 -0
  23. cryptodatapy/transform/filter.py +13 -10
  24. cryptodatapy/transform/global_credit_data_daily.parquet +0 -0
  25. cryptodatapy/transform/od.py +1 -0
  26. cryptodatapy/transform/rates_data.ipynb +465 -0
  27. cryptodatapy/transform/us_rates_daily.csv +227752 -0
  28. cryptodatapy/transform/wrangle.py +109 -20
  29. cryptodatapy/util/datacredentials.py +28 -7
  30. {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.7.dist-info}/METADATA +10 -7
  31. {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.7.dist-info}/RECORD +33 -31
  32. {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.7.dist-info}/WHEEL +1 -1
  33. cryptodatapy/.DS_Store +0 -0
  34. cryptodatapy/.idea/.gitignore +0 -3
  35. cryptodatapy/.idea/cryptodatapy.iml +0 -12
  36. cryptodatapy/.idea/csv-plugin.xml +0 -16
  37. cryptodatapy/.idea/inspectionProfiles/Project_Default.xml +0 -6
  38. cryptodatapy/.idea/inspectionProfiles/profiles_settings.xml +0 -6
  39. cryptodatapy/.idea/misc.xml +0 -4
  40. cryptodatapy/.idea/modules.xml +0 -8
  41. cryptodatapy/.idea/vcs.xml +0 -6
  42. {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.7.dist-info}/LICENSE +0 -0
@@ -0,0 +1,836 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "9fea9fae",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "fatal: bad revision 'HEAD'\n",
14
+ "Importing plotly failed. Interactive plots will not work.\n"
15
+ ]
16
+ }
17
+ ],
18
+ "source": [
19
+ "import pandas as pd\n",
20
+ "import numpy as np\n",
21
+ "\n",
22
+ "from cryptodatapy.extract.datarequest import DataRequest\n",
23
+ "from cryptodatapy.util.datacatalog import DataCatalog \n",
24
+ "from cryptodatapy.extract.getdata import GetData\n",
25
+ "from cryptodatapy.transform.od import OutlierDetection\n",
26
+ "from cryptodatapy.transform.impute import Impute\n",
27
+ "from cryptodatapy.transform.filter import Filter\n",
28
+ "from cryptodatapy.transform.clean import CleanData, stitch_dataframes\n",
29
+ "from cryptodatapy.transform.impute import Impute"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "markdown",
34
+ "id": "fc533553-649e-4479-8a4b-b3db5cb77d76",
35
+ "metadata": {},
36
+ "source": [
37
+ "# Data"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 2,
43
+ "id": "2ad72bc7-5fdd-4ae5-8d9e-e90118efcc26",
44
+ "metadata": {},
45
+ "outputs": [],
46
+ "source": [
47
+ "dc = DataCatalog()"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": 3,
53
+ "id": "0342bab7-a792-4be3-8d4d-44c4343d0e6a",
54
+ "metadata": {},
55
+ "outputs": [
56
+ {
57
+ "data": {
58
+ "text/plain": [
59
+ "ticker\n",
60
+ "AR_Eqty_Idx ^MERV\n",
61
+ "AU_Eqty_Idx ^AXJO\n",
62
+ "BE_Eqty_Idx ^BFX\n",
63
+ "BR_Eqty_Idx ^BVSP\n",
64
+ "CA_Eqty_Idx ^GSPTSE\n",
65
+ "CL_Eqty_Idx ^IPSA\n",
66
+ "CN_Eqty_Idx 000001.SS\n",
67
+ "DE_Eqty_Idx ^GDAXI\n",
68
+ "EZ_Eqty_Idx ^STOXX50E\n",
69
+ "FR_Eqty_Idx ^FCHI\n",
70
+ "GB_Eqty_Idx ^FTSE\n",
71
+ "HK_Eqty_Idx ^HIS\n",
72
+ "ID_Eqty_Idx ^JKSE\n",
73
+ "IL_Eqty_Idx ^TA125.TA\n",
74
+ "JP_Eqty_Idx ^N225\n",
75
+ "MX_Eqty_Idx ^MXX\n",
76
+ "MY_Eqty_Idx ^KLSE\n",
77
+ "NZ_Eqty_Idx ^NZ50\n",
78
+ "RU_Eqty_Idx IMOEX.ME\n",
79
+ "SG_Eqty_Idx ^STI\n",
80
+ "KR_Eqty_Idx ^KS11\n",
81
+ "TW_Eqty_Idx ^TWII\n",
82
+ "US_Eqty_Idx ^GSPC\n",
83
+ "ZA_Eqty_Idx ^JN0U.JO\n",
84
+ "US_Eqty_Vol_Idx ^VIX\n",
85
+ "Name: yahoo_id, dtype: object"
86
+ ]
87
+ },
88
+ "execution_count": 3,
89
+ "metadata": {},
90
+ "output_type": "execute_result"
91
+ }
92
+ ],
93
+ "source": [
94
+ "dc.get_tickers_metadata(cat='eqty').loc[:, 'yahoo_id'].dropna()"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": null,
100
+ "id": "12de559e-ddff-45c8-a185-0652e3743733",
101
+ "metadata": {},
102
+ "outputs": [],
103
+ "source": []
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": 4,
108
+ "id": "d875cd96-a29c-4e22-9806-a1b1c2513564",
109
+ "metadata": {},
110
+ "outputs": [],
111
+ "source": [
112
+ "msci_etf_tickers = dc.get_tickers_metadata(cat='eqty').loc[:, 'tiingo_id'].dropna().values.tolist()"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": 5,
118
+ "id": "1bf0af0d-7ed7-4e07-9da1-5625b1f32bce",
119
+ "metadata": {},
120
+ "outputs": [
121
+ {
122
+ "data": {
123
+ "text/plain": [
124
+ "['SPY',\n",
125
+ " 'ARGT',\n",
126
+ " 'UAE',\n",
127
+ " 'EWA',\n",
128
+ " 'EWO',\n",
129
+ " 'EWK',\n",
130
+ " 'EWZ',\n",
131
+ " 'EWC',\n",
132
+ " 'EWL',\n",
133
+ " 'ECH',\n",
134
+ " 'MCHI',\n",
135
+ " 'ICOL',\n",
136
+ " 'EWG',\n",
137
+ " 'URTH',\n",
138
+ " 'EGPT',\n",
139
+ " 'EEM',\n",
140
+ " 'EWP',\n",
141
+ " 'EZU',\n",
142
+ " 'EWQ',\n",
143
+ " 'EFNL',\n",
144
+ " 'EWU',\n",
145
+ " 'EWH',\n",
146
+ " 'EIDO',\n",
147
+ " 'INDA',\n",
148
+ " 'EIRL',\n",
149
+ " 'EIS',\n",
150
+ " 'EWI',\n",
151
+ " 'EWJ',\n",
152
+ " 'EWW',\n",
153
+ " 'EWM',\n",
154
+ " 'NGE',\n",
155
+ " 'EWN',\n",
156
+ " 'ENOR',\n",
157
+ " 'ENZL',\n",
158
+ " 'PAK',\n",
159
+ " 'EPU',\n",
160
+ " 'EPHE',\n",
161
+ " 'EPOL',\n",
162
+ " 'PGAL',\n",
163
+ " 'QAT',\n",
164
+ " 'ERUS',\n",
165
+ " 'KSA',\n",
166
+ " 'EWS',\n",
167
+ " 'EWD',\n",
168
+ " 'EWY',\n",
169
+ " 'EWT',\n",
170
+ " 'THD',\n",
171
+ " 'TUR',\n",
172
+ " 'EUSA',\n",
173
+ " 'ACWI',\n",
174
+ " 'EZA',\n",
175
+ " 'VXX',\n",
176
+ " 'IYR',\n",
177
+ " 'REET']"
178
+ ]
179
+ },
180
+ "execution_count": 5,
181
+ "metadata": {},
182
+ "output_type": "execute_result"
183
+ }
184
+ ],
185
+ "source": [
186
+ "msci_etf_tickers"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": 6,
192
+ "id": "0b1bc395-40dd-44c0-bac1-dd1e00f8a5c4",
193
+ "metadata": {},
194
+ "outputs": [],
195
+ "source": [
196
+ "data_req = DataRequest(source='yahoo', source_tickers=msci_etf_tickers, cat='eqty')"
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "code",
201
+ "execution_count": 7,
202
+ "id": "2674260a-56c7-40a4-9708-0eb335fa075d",
203
+ "metadata": {},
204
+ "outputs": [
205
+ {
206
+ "name": "stdout",
207
+ "output_type": "stream",
208
+ "text": [
209
+ "[*********************100%***********************] 54 of 54 completed\n"
210
+ ]
211
+ }
212
+ ],
213
+ "source": [
214
+ "msci_etf_df = GetData(data_req).get_series()"
215
+ ]
216
+ },
217
+ {
218
+ "cell_type": "code",
219
+ "execution_count": 8,
220
+ "id": "ee0523a8-c6ee-42b9-9410-36b26906f2de",
221
+ "metadata": {},
222
+ "outputs": [
223
+ {
224
+ "data": {
225
+ "text/html": [
226
+ "<div>\n",
227
+ "<style scoped>\n",
228
+ " .dataframe tbody tr th:only-of-type {\n",
229
+ " vertical-align: middle;\n",
230
+ " }\n",
231
+ "\n",
232
+ " .dataframe tbody tr th {\n",
233
+ " vertical-align: top;\n",
234
+ " }\n",
235
+ "\n",
236
+ " .dataframe thead th {\n",
237
+ " text-align: right;\n",
238
+ " }\n",
239
+ "</style>\n",
240
+ "<table border=\"1\" class=\"dataframe\">\n",
241
+ " <thead>\n",
242
+ " <tr style=\"text-align: right;\">\n",
243
+ " <th>ticker</th>\n",
244
+ " <th>ACWI</th>\n",
245
+ " <th>ARGT</th>\n",
246
+ " <th>ECH</th>\n",
247
+ " <th>EEM</th>\n",
248
+ " <th>EFNL</th>\n",
249
+ " <th>EGPT</th>\n",
250
+ " <th>EIDO</th>\n",
251
+ " <th>EIRL</th>\n",
252
+ " <th>EIS</th>\n",
253
+ " <th>ENOR</th>\n",
254
+ " <th>...</th>\n",
255
+ " <th>PAK</th>\n",
256
+ " <th>PGAL</th>\n",
257
+ " <th>QAT</th>\n",
258
+ " <th>REET</th>\n",
259
+ " <th>SPY</th>\n",
260
+ " <th>THD</th>\n",
261
+ " <th>TUR</th>\n",
262
+ " <th>UAE</th>\n",
263
+ " <th>URTH</th>\n",
264
+ " <th>VXX</th>\n",
265
+ " </tr>\n",
266
+ " <tr>\n",
267
+ " <th>date</th>\n",
268
+ " <th></th>\n",
269
+ " <th></th>\n",
270
+ " <th></th>\n",
271
+ " <th></th>\n",
272
+ " <th></th>\n",
273
+ " <th></th>\n",
274
+ " <th></th>\n",
275
+ " <th></th>\n",
276
+ " <th></th>\n",
277
+ " <th></th>\n",
278
+ " <th></th>\n",
279
+ " <th></th>\n",
280
+ " <th></th>\n",
281
+ " <th></th>\n",
282
+ " <th></th>\n",
283
+ " <th></th>\n",
284
+ " <th></th>\n",
285
+ " <th></th>\n",
286
+ " <th></th>\n",
287
+ " <th></th>\n",
288
+ " <th></th>\n",
289
+ " </tr>\n",
290
+ " </thead>\n",
291
+ " <tbody>\n",
292
+ " <tr>\n",
293
+ " <th>1993-01-29</th>\n",
294
+ " <td>&lt;NA&gt;</td>\n",
295
+ " <td>&lt;NA&gt;</td>\n",
296
+ " <td>&lt;NA&gt;</td>\n",
297
+ " <td>&lt;NA&gt;</td>\n",
298
+ " <td>&lt;NA&gt;</td>\n",
299
+ " <td>&lt;NA&gt;</td>\n",
300
+ " <td>&lt;NA&gt;</td>\n",
301
+ " <td>&lt;NA&gt;</td>\n",
302
+ " <td>&lt;NA&gt;</td>\n",
303
+ " <td>&lt;NA&gt;</td>\n",
304
+ " <td>...</td>\n",
305
+ " <td>&lt;NA&gt;</td>\n",
306
+ " <td>&lt;NA&gt;</td>\n",
307
+ " <td>&lt;NA&gt;</td>\n",
308
+ " <td>&lt;NA&gt;</td>\n",
309
+ " <td>&lt;NA&gt;</td>\n",
310
+ " <td>&lt;NA&gt;</td>\n",
311
+ " <td>&lt;NA&gt;</td>\n",
312
+ " <td>&lt;NA&gt;</td>\n",
313
+ " <td>&lt;NA&gt;</td>\n",
314
+ " <td>&lt;NA&gt;</td>\n",
315
+ " </tr>\n",
316
+ " <tr>\n",
317
+ " <th>1993-02-01</th>\n",
318
+ " <td>&lt;NA&gt;</td>\n",
319
+ " <td>&lt;NA&gt;</td>\n",
320
+ " <td>&lt;NA&gt;</td>\n",
321
+ " <td>&lt;NA&gt;</td>\n",
322
+ " <td>&lt;NA&gt;</td>\n",
323
+ " <td>&lt;NA&gt;</td>\n",
324
+ " <td>&lt;NA&gt;</td>\n",
325
+ " <td>&lt;NA&gt;</td>\n",
326
+ " <td>&lt;NA&gt;</td>\n",
327
+ " <td>&lt;NA&gt;</td>\n",
328
+ " <td>...</td>\n",
329
+ " <td>&lt;NA&gt;</td>\n",
330
+ " <td>&lt;NA&gt;</td>\n",
331
+ " <td>&lt;NA&gt;</td>\n",
332
+ " <td>&lt;NA&gt;</td>\n",
333
+ " <td>&lt;NA&gt;</td>\n",
334
+ " <td>&lt;NA&gt;</td>\n",
335
+ " <td>&lt;NA&gt;</td>\n",
336
+ " <td>&lt;NA&gt;</td>\n",
337
+ " <td>&lt;NA&gt;</td>\n",
338
+ " <td>&lt;NA&gt;</td>\n",
339
+ " </tr>\n",
340
+ " <tr>\n",
341
+ " <th>1993-02-02</th>\n",
342
+ " <td>&lt;NA&gt;</td>\n",
343
+ " <td>&lt;NA&gt;</td>\n",
344
+ " <td>&lt;NA&gt;</td>\n",
345
+ " <td>&lt;NA&gt;</td>\n",
346
+ " <td>&lt;NA&gt;</td>\n",
347
+ " <td>&lt;NA&gt;</td>\n",
348
+ " <td>&lt;NA&gt;</td>\n",
349
+ " <td>&lt;NA&gt;</td>\n",
350
+ " <td>&lt;NA&gt;</td>\n",
351
+ " <td>&lt;NA&gt;</td>\n",
352
+ " <td>...</td>\n",
353
+ " <td>&lt;NA&gt;</td>\n",
354
+ " <td>&lt;NA&gt;</td>\n",
355
+ " <td>&lt;NA&gt;</td>\n",
356
+ " <td>&lt;NA&gt;</td>\n",
357
+ " <td>&lt;NA&gt;</td>\n",
358
+ " <td>&lt;NA&gt;</td>\n",
359
+ " <td>&lt;NA&gt;</td>\n",
360
+ " <td>&lt;NA&gt;</td>\n",
361
+ " <td>&lt;NA&gt;</td>\n",
362
+ " <td>&lt;NA&gt;</td>\n",
363
+ " </tr>\n",
364
+ " <tr>\n",
365
+ " <th>1993-02-03</th>\n",
366
+ " <td>&lt;NA&gt;</td>\n",
367
+ " <td>&lt;NA&gt;</td>\n",
368
+ " <td>&lt;NA&gt;</td>\n",
369
+ " <td>&lt;NA&gt;</td>\n",
370
+ " <td>&lt;NA&gt;</td>\n",
371
+ " <td>&lt;NA&gt;</td>\n",
372
+ " <td>&lt;NA&gt;</td>\n",
373
+ " <td>&lt;NA&gt;</td>\n",
374
+ " <td>&lt;NA&gt;</td>\n",
375
+ " <td>&lt;NA&gt;</td>\n",
376
+ " <td>...</td>\n",
377
+ " <td>&lt;NA&gt;</td>\n",
378
+ " <td>&lt;NA&gt;</td>\n",
379
+ " <td>&lt;NA&gt;</td>\n",
380
+ " <td>&lt;NA&gt;</td>\n",
381
+ " <td>&lt;NA&gt;</td>\n",
382
+ " <td>&lt;NA&gt;</td>\n",
383
+ " <td>&lt;NA&gt;</td>\n",
384
+ " <td>&lt;NA&gt;</td>\n",
385
+ " <td>&lt;NA&gt;</td>\n",
386
+ " <td>&lt;NA&gt;</td>\n",
387
+ " </tr>\n",
388
+ " <tr>\n",
389
+ " <th>1993-02-04</th>\n",
390
+ " <td>&lt;NA&gt;</td>\n",
391
+ " <td>&lt;NA&gt;</td>\n",
392
+ " <td>&lt;NA&gt;</td>\n",
393
+ " <td>&lt;NA&gt;</td>\n",
394
+ " <td>&lt;NA&gt;</td>\n",
395
+ " <td>&lt;NA&gt;</td>\n",
396
+ " <td>&lt;NA&gt;</td>\n",
397
+ " <td>&lt;NA&gt;</td>\n",
398
+ " <td>&lt;NA&gt;</td>\n",
399
+ " <td>&lt;NA&gt;</td>\n",
400
+ " <td>...</td>\n",
401
+ " <td>&lt;NA&gt;</td>\n",
402
+ " <td>&lt;NA&gt;</td>\n",
403
+ " <td>&lt;NA&gt;</td>\n",
404
+ " <td>&lt;NA&gt;</td>\n",
405
+ " <td>&lt;NA&gt;</td>\n",
406
+ " <td>&lt;NA&gt;</td>\n",
407
+ " <td>&lt;NA&gt;</td>\n",
408
+ " <td>&lt;NA&gt;</td>\n",
409
+ " <td>&lt;NA&gt;</td>\n",
410
+ " <td>&lt;NA&gt;</td>\n",
411
+ " </tr>\n",
412
+ " <tr>\n",
413
+ " <th>...</th>\n",
414
+ " <td>...</td>\n",
415
+ " <td>...</td>\n",
416
+ " <td>...</td>\n",
417
+ " <td>...</td>\n",
418
+ " <td>...</td>\n",
419
+ " <td>...</td>\n",
420
+ " <td>...</td>\n",
421
+ " <td>...</td>\n",
422
+ " <td>...</td>\n",
423
+ " <td>...</td>\n",
424
+ " <td>...</td>\n",
425
+ " <td>...</td>\n",
426
+ " <td>...</td>\n",
427
+ " <td>...</td>\n",
428
+ " <td>...</td>\n",
429
+ " <td>...</td>\n",
430
+ " <td>...</td>\n",
431
+ " <td>...</td>\n",
432
+ " <td>...</td>\n",
433
+ " <td>...</td>\n",
434
+ " <td>...</td>\n",
435
+ " </tr>\n",
436
+ " <tr>\n",
437
+ " <th>2024-09-16</th>\n",
438
+ " <td>66.760002</td>\n",
439
+ " <td>68.5</td>\n",
440
+ " <td>24.48</td>\n",
441
+ " <td>94.160004</td>\n",
442
+ " <td>32.599998</td>\n",
443
+ " <td>8.06</td>\n",
444
+ " <td>70.540001</td>\n",
445
+ " <td>51.580002</td>\n",
446
+ " <td>26.049999</td>\n",
447
+ " <td>37.299999</td>\n",
448
+ " <td>...</td>\n",
449
+ " <td>63.52</td>\n",
450
+ " <td>&lt;NA&gt;</td>\n",
451
+ " <td>58.130001</td>\n",
452
+ " <td>49.240002</td>\n",
453
+ " <td>116.610001</td>\n",
454
+ " <td>18.200001</td>\n",
455
+ " <td>27.17</td>\n",
456
+ " <td>26.07</td>\n",
457
+ " <td>40.66</td>\n",
458
+ " <td>15.09</td>\n",
459
+ " </tr>\n",
460
+ " <tr>\n",
461
+ " <th>2024-09-17</th>\n",
462
+ " <td>66.43</td>\n",
463
+ " <td>68.260002</td>\n",
464
+ " <td>24.15</td>\n",
465
+ " <td>94.330002</td>\n",
466
+ " <td>32.549999</td>\n",
467
+ " <td>8.06</td>\n",
468
+ " <td>69.709999</td>\n",
469
+ " <td>51.400002</td>\n",
470
+ " <td>26.34</td>\n",
471
+ " <td>37.029999</td>\n",
472
+ " <td>...</td>\n",
473
+ " <td>63.220001</td>\n",
474
+ " <td>&lt;NA&gt;</td>\n",
475
+ " <td>57.959999</td>\n",
476
+ " <td>50.299999</td>\n",
477
+ " <td>116.489998</td>\n",
478
+ " <td>18.049999</td>\n",
479
+ " <td>26.969999</td>\n",
480
+ " <td>25.82</td>\n",
481
+ " <td>40.439999</td>\n",
482
+ " <td>15.19</td>\n",
483
+ " </tr>\n",
484
+ " <tr>\n",
485
+ " <th>2024-09-18</th>\n",
486
+ " <td>66.400002</td>\n",
487
+ " <td>68.769997</td>\n",
488
+ " <td>24.27</td>\n",
489
+ " <td>94.199997</td>\n",
490
+ " <td>32.580002</td>\n",
491
+ " <td>8.06</td>\n",
492
+ " <td>69.389999</td>\n",
493
+ " <td>51.16</td>\n",
494
+ " <td>26.42</td>\n",
495
+ " <td>36.990002</td>\n",
496
+ " <td>...</td>\n",
497
+ " <td>63.259998</td>\n",
498
+ " <td>&lt;NA&gt;</td>\n",
499
+ " <td>57.66</td>\n",
500
+ " <td>49.880001</td>\n",
501
+ " <td>116.160004</td>\n",
502
+ " <td>18.049999</td>\n",
503
+ " <td>26.889999</td>\n",
504
+ " <td>25.66</td>\n",
505
+ " <td>40.900002</td>\n",
506
+ " <td>15.07</td>\n",
507
+ " </tr>\n",
508
+ " <tr>\n",
509
+ " <th>2024-09-19</th>\n",
510
+ " <td>67.709999</td>\n",
511
+ " <td>70.150002</td>\n",
512
+ " <td>24.690001</td>\n",
513
+ " <td>95.419998</td>\n",
514
+ " <td>33.09</td>\n",
515
+ " <td>8.06</td>\n",
516
+ " <td>71.230003</td>\n",
517
+ " <td>51.509998</td>\n",
518
+ " <td>26.74</td>\n",
519
+ " <td>37.490002</td>\n",
520
+ " <td>...</td>\n",
521
+ " <td>63.91</td>\n",
522
+ " <td>&lt;NA&gt;</td>\n",
523
+ " <td>58.110001</td>\n",
524
+ " <td>47.720001</td>\n",
525
+ " <td>118.230003</td>\n",
526
+ " <td>18.219999</td>\n",
527
+ " <td>26.98</td>\n",
528
+ " <td>25.799999</td>\n",
529
+ " <td>41.400002</td>\n",
530
+ " <td>15.41</td>\n",
531
+ " </tr>\n",
532
+ " <tr>\n",
533
+ " <th>2024-09-20</th>\n",
534
+ " <td>68.089996</td>\n",
535
+ " <td>69.389999</td>\n",
536
+ " <td>24.549999</td>\n",
537
+ " <td>94.970001</td>\n",
538
+ " <td>32.689999</td>\n",
539
+ " <td>8.06</td>\n",
540
+ " <td>71.269997</td>\n",
541
+ " <td>50.66</td>\n",
542
+ " <td>26.940001</td>\n",
543
+ " <td>37.040001</td>\n",
544
+ " <td>...</td>\n",
545
+ " <td>63.32</td>\n",
546
+ " <td>&lt;NA&gt;</td>\n",
547
+ " <td>58.740002</td>\n",
548
+ " <td>47.43</td>\n",
549
+ " <td>117.760002</td>\n",
550
+ " <td>18.15</td>\n",
551
+ " <td>26.83</td>\n",
552
+ " <td>25.799999</td>\n",
553
+ " <td>41.349998</td>\n",
554
+ " <td>15.4</td>\n",
555
+ " </tr>\n",
556
+ " </tbody>\n",
557
+ "</table>\n",
558
+ "<p>7968 rows × 54 columns</p>\n",
559
+ "</div>"
560
+ ],
561
+ "text/plain": [
562
+ "ticker ACWI ARGT ECH EEM EFNL EGPT \\\n",
563
+ "date \n",
564
+ "1993-01-29 <NA> <NA> <NA> <NA> <NA> <NA> \n",
565
+ "1993-02-01 <NA> <NA> <NA> <NA> <NA> <NA> \n",
566
+ "1993-02-02 <NA> <NA> <NA> <NA> <NA> <NA> \n",
567
+ "1993-02-03 <NA> <NA> <NA> <NA> <NA> <NA> \n",
568
+ "1993-02-04 <NA> <NA> <NA> <NA> <NA> <NA> \n",
569
+ "... ... ... ... ... ... ... \n",
570
+ "2024-09-16 66.760002 68.5 24.48 94.160004 32.599998 8.06 \n",
571
+ "2024-09-17 66.43 68.260002 24.15 94.330002 32.549999 8.06 \n",
572
+ "2024-09-18 66.400002 68.769997 24.27 94.199997 32.580002 8.06 \n",
573
+ "2024-09-19 67.709999 70.150002 24.690001 95.419998 33.09 8.06 \n",
574
+ "2024-09-20 68.089996 69.389999 24.549999 94.970001 32.689999 8.06 \n",
575
+ "\n",
576
+ "ticker EIDO EIRL EIS ENOR ... PAK PGAL \\\n",
577
+ "date ... \n",
578
+ "1993-01-29 <NA> <NA> <NA> <NA> ... <NA> <NA> \n",
579
+ "1993-02-01 <NA> <NA> <NA> <NA> ... <NA> <NA> \n",
580
+ "1993-02-02 <NA> <NA> <NA> <NA> ... <NA> <NA> \n",
581
+ "1993-02-03 <NA> <NA> <NA> <NA> ... <NA> <NA> \n",
582
+ "1993-02-04 <NA> <NA> <NA> <NA> ... <NA> <NA> \n",
583
+ "... ... ... ... ... ... ... ... \n",
584
+ "2024-09-16 70.540001 51.580002 26.049999 37.299999 ... 63.52 <NA> \n",
585
+ "2024-09-17 69.709999 51.400002 26.34 37.029999 ... 63.220001 <NA> \n",
586
+ "2024-09-18 69.389999 51.16 26.42 36.990002 ... 63.259998 <NA> \n",
587
+ "2024-09-19 71.230003 51.509998 26.74 37.490002 ... 63.91 <NA> \n",
588
+ "2024-09-20 71.269997 50.66 26.940001 37.040001 ... 63.32 <NA> \n",
589
+ "\n",
590
+ "ticker QAT REET SPY THD TUR UAE \\\n",
591
+ "date \n",
592
+ "1993-01-29 <NA> <NA> <NA> <NA> <NA> <NA> \n",
593
+ "1993-02-01 <NA> <NA> <NA> <NA> <NA> <NA> \n",
594
+ "1993-02-02 <NA> <NA> <NA> <NA> <NA> <NA> \n",
595
+ "1993-02-03 <NA> <NA> <NA> <NA> <NA> <NA> \n",
596
+ "1993-02-04 <NA> <NA> <NA> <NA> <NA> <NA> \n",
597
+ "... ... ... ... ... ... ... \n",
598
+ "2024-09-16 58.130001 49.240002 116.610001 18.200001 27.17 26.07 \n",
599
+ "2024-09-17 57.959999 50.299999 116.489998 18.049999 26.969999 25.82 \n",
600
+ "2024-09-18 57.66 49.880001 116.160004 18.049999 26.889999 25.66 \n",
601
+ "2024-09-19 58.110001 47.720001 118.230003 18.219999 26.98 25.799999 \n",
602
+ "2024-09-20 58.740002 47.43 117.760002 18.15 26.83 25.799999 \n",
603
+ "\n",
604
+ "ticker URTH VXX \n",
605
+ "date \n",
606
+ "1993-01-29 <NA> <NA> \n",
607
+ "1993-02-01 <NA> <NA> \n",
608
+ "1993-02-02 <NA> <NA> \n",
609
+ "1993-02-03 <NA> <NA> \n",
610
+ "1993-02-04 <NA> <NA> \n",
611
+ "... ... ... \n",
612
+ "2024-09-16 40.66 15.09 \n",
613
+ "2024-09-17 40.439999 15.19 \n",
614
+ "2024-09-18 40.900002 15.07 \n",
615
+ "2024-09-19 41.400002 15.41 \n",
616
+ "2024-09-20 41.349998 15.4 \n",
617
+ "\n",
618
+ "[7968 rows x 54 columns]"
619
+ ]
620
+ },
621
+ "execution_count": 8,
622
+ "metadata": {},
623
+ "output_type": "execute_result"
624
+ }
625
+ ],
626
+ "source": [
627
+ "msci_etf_df.close.unstack()"
628
+ ]
629
+ },
630
+ {
631
+ "cell_type": "code",
632
+ "execution_count": 10,
633
+ "id": "83f30ee7-b686-4ee8-8c5f-9333d2be31c8",
634
+ "metadata": {},
635
+ "outputs": [],
636
+ "source": [
637
+ "msci_etf_df.to_parquet('s3://factorlab-data/global_msci_eqty_etf_data_daily.parquet')"
638
+ ]
639
+ },
640
+ {
641
+ "cell_type": "code",
642
+ "execution_count": null,
643
+ "id": "fa7fb5fc-e6f3-4a1e-bda5-365d09e2b3e0",
644
+ "metadata": {},
645
+ "outputs": [],
646
+ "source": []
647
+ },
648
+ {
649
+ "cell_type": "code",
650
+ "execution_count": null,
651
+ "id": "0ad35f12-5943-4640-96fb-07b7ea738abd",
652
+ "metadata": {},
653
+ "outputs": [],
654
+ "source": []
655
+ },
656
+ {
657
+ "cell_type": "code",
658
+ "execution_count": 53,
659
+ "id": "9b958699-5d8b-48bb-81e2-9d6baa301be0",
660
+ "metadata": {},
661
+ "outputs": [],
662
+ "source": [
663
+ "msci_etf_df.to_csv('../../../../research_notebooks/msci_etf_df.csv')"
664
+ ]
665
+ },
666
+ {
667
+ "cell_type": "code",
668
+ "execution_count": null,
669
+ "id": "8e25221e-1c63-4939-ba5e-136f607b3b66",
670
+ "metadata": {},
671
+ "outputs": [],
672
+ "source": []
673
+ },
674
+ {
675
+ "cell_type": "code",
676
+ "execution_count": 42,
677
+ "id": "555d1bcc-2d79-46fd-8297-426a3c9fb3f9",
678
+ "metadata": {},
679
+ "outputs": [],
680
+ "source": [
681
+ "eqty_df.to_csv('../../../../research_notebooks/eqty_df.csv')"
682
+ ]
683
+ },
684
+ {
685
+ "cell_type": "code",
686
+ "execution_count": null,
687
+ "id": "e2e90a05-6b20-4fad-83c2-710e0735a427",
688
+ "metadata": {},
689
+ "outputs": [],
690
+ "source": []
691
+ },
692
+ {
693
+ "cell_type": "code",
694
+ "execution_count": 18,
695
+ "id": "752820be-c23a-4343-b48a-690086327b2c",
696
+ "metadata": {},
697
+ "outputs": [
698
+ {
699
+ "name": "stdout",
700
+ "output_type": "stream",
701
+ "text": [
702
+ "[*********************100%***********************] 1 of 1 completed\n"
703
+ ]
704
+ }
705
+ ],
706
+ "source": [
707
+ "vix_df = GetData(data_req).get_series()"
708
+ ]
709
+ },
710
+ {
711
+ "cell_type": "code",
712
+ "execution_count": 26,
713
+ "id": "1f193d00-704c-4d71-80ae-df1eb16da177",
714
+ "metadata": {},
715
+ "outputs": [],
716
+ "source": [
717
+ "vix_df = vix_df.unstack().close.rename(columns={'^VIX': 'US_Eqty_Vol_Idx'}).stack().to_frame('close')"
718
+ ]
719
+ },
720
+ {
721
+ "cell_type": "code",
722
+ "execution_count": 27,
723
+ "id": "e38b1248-088f-4b4c-8f07-4a5245a5f433",
724
+ "metadata": {},
725
+ "outputs": [],
726
+ "source": [
727
+ "wld_eqty_df = pd.read_parquet('s3://factorlab-data/global_eqty_data_daily.parquet')"
728
+ ]
729
+ },
730
+ {
731
+ "cell_type": "code",
732
+ "execution_count": 31,
733
+ "id": "ffc83529-9d3c-4b09-b0d7-15c7c606acfc",
734
+ "metadata": {},
735
+ "outputs": [],
736
+ "source": [
737
+ "eqty_df = pd.concat([wld_eqty_df, vix_df]).sort_index()"
738
+ ]
739
+ },
740
+ {
741
+ "cell_type": "code",
742
+ "execution_count": 40,
743
+ "id": "1f7e18c1-cf62-4afb-92f1-41c630e84142",
744
+ "metadata": {},
745
+ "outputs": [],
746
+ "source": [
747
+ "eqty_df.to_csv('../../../../research_notebooks/eqty_df.csv')"
748
+ ]
749
+ },
750
+ {
751
+ "cell_type": "code",
752
+ "execution_count": null,
753
+ "id": "d58ea41b-5193-4e34-b5a6-b59b686e53ca",
754
+ "metadata": {},
755
+ "outputs": [],
756
+ "source": []
757
+ },
758
+ {
759
+ "cell_type": "code",
760
+ "execution_count": null,
761
+ "id": "243eca1e-c809-4c0d-a256-15ef85218c26",
762
+ "metadata": {},
763
+ "outputs": [],
764
+ "source": []
765
+ },
766
+ {
767
+ "cell_type": "code",
768
+ "execution_count": null,
769
+ "id": "ddd77033-9785-481e-ae32-020ce23a188d",
770
+ "metadata": {},
771
+ "outputs": [],
772
+ "source": [
773
+ "clean = CleanData(fx_df1)"
774
+ ]
775
+ },
776
+ {
777
+ "cell_type": "code",
778
+ "execution_count": null,
779
+ "id": "0dd143fb-a5ca-40c6-860d-4846ad171586",
780
+ "metadata": {},
781
+ "outputs": [],
782
+ "source": [
783
+ "clean.filter_outliers(od_method='mad', thresh_val=10)"
784
+ ]
785
+ },
786
+ {
787
+ "cell_type": "code",
788
+ "execution_count": null,
789
+ "id": "2bde3b51-aac3-488a-95b8-2bd75bf654f5",
790
+ "metadata": {},
791
+ "outputs": [],
792
+ "source": [
793
+ "clean.repair_outliers(imp_method='fcst')"
794
+ ]
795
+ },
796
+ {
797
+ "cell_type": "code",
798
+ "execution_count": null,
799
+ "id": "a61a27b5-c03b-4091-8f1e-4659e301ec7b",
800
+ "metadata": {},
801
+ "outputs": [],
802
+ "source": [
803
+ "clean.df.unstack().close.plot(subplots=True, figsize=(15,100))"
804
+ ]
805
+ },
806
+ {
807
+ "cell_type": "code",
808
+ "execution_count": null,
809
+ "id": "10ecb5cb-83c2-474f-acc1-b6d2f1e08a50",
810
+ "metadata": {},
811
+ "outputs": [],
812
+ "source": []
813
+ }
814
+ ],
815
+ "metadata": {
816
+ "kernelspec": {
817
+ "display_name": "cryptodatapy",
818
+ "language": "python",
819
+ "name": "cryptodatapy"
820
+ },
821
+ "language_info": {
822
+ "codemirror_mode": {
823
+ "name": "ipython",
824
+ "version": 3
825
+ },
826
+ "file_extension": ".py",
827
+ "mimetype": "text/x-python",
828
+ "name": "python",
829
+ "nbconvert_exporter": "python",
830
+ "pygments_lexer": "ipython3",
831
+ "version": "3.9.12"
832
+ }
833
+ },
834
+ "nbformat": 4,
835
+ "nbformat_minor": 5
836
+ }