cryptodatapy 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,465 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 461,
6
- "id": "9fea9fae",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "name": "stdout",
11
- "output_type": "stream",
12
- "text": [
13
- "The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.\n"
14
- ]
15
- }
16
- ],
17
- "source": [
18
- "import pandas as pd\n",
19
- "import numpy as np\n",
20
- "\n",
21
- "from cryptodatapy.extract.datarequest import DataRequest\n",
22
- "from cryptodatapy.util.datacatalog import DataCatalog \n",
23
- "from cryptodatapy.extract.getdata import GetData\n",
24
- "from cryptodatapy.transform.od import OutlierDetection\n",
25
- "from cryptodatapy.transform.impute import Impute\n",
26
- "from cryptodatapy.transform.filter import Filter\n",
27
- "from cryptodatapy.transform.clean import CleanData, stitch_dataframes\n",
28
- "from cryptodatapy.transform.impute import Impute"
29
- ]
30
- },
31
- {
32
- "cell_type": "markdown",
33
- "id": "fc533553-649e-4479-8a4b-b3db5cb77d76",
34
- "metadata": {},
35
- "source": [
36
- "# Data"
37
- ]
38
- },
39
- {
40
- "cell_type": "code",
41
- "execution_count": 2,
42
- "id": "2ad72bc7-5fdd-4ae5-8d9e-e90118efcc26",
43
- "metadata": {},
44
- "outputs": [],
45
- "source": [
46
- "dc = DataCatalog()"
47
- ]
48
- },
49
- {
50
- "cell_type": "code",
51
- "execution_count": 24,
52
- "id": "e7fe6fda-f0cb-420a-a892-b7f73eba48a8",
53
- "metadata": {},
54
- "outputs": [],
55
- "source": [
56
- "us_rates_tickers = dc.get_tickers_metadata(cat='rates', subcat='yield', mkt_type='spot').loc[:, 'fred_id'].dropna().index.tolist()"
57
- ]
58
- },
59
- {
60
- "cell_type": "code",
61
- "execution_count": 450,
62
- "id": "d92d80db-bf8c-4e8a-87c5-aaebb298bdb5",
63
- "metadata": {},
64
- "outputs": [],
65
- "source": [
66
- "data_req = DataRequest(source='yahoo', source_tickers=['^MOVE'], cat='rates')"
67
- ]
68
- },
69
- {
70
- "cell_type": "code",
71
- "execution_count": 452,
72
- "id": "c36cda5a-6406-49b5-84f2-ce83932f423e",
73
- "metadata": {},
74
- "outputs": [
75
- {
76
- "name": "stdout",
77
- "output_type": "stream",
78
- "text": [
79
- "[*********************100%***********************] 1 of 1 completed\n"
80
- ]
81
- }
82
- ],
83
- "source": [
84
- "move_df = GetData(data_req).get_series()"
85
- ]
86
- },
87
- {
88
- "cell_type": "code",
89
- "execution_count": 456,
90
- "id": "c2080432-892e-44e8-9671-45f0ddebb28b",
91
- "metadata": {},
92
- "outputs": [],
93
- "source": [
94
- "move_df = move_df.rename(index={'^MOVE': 'US_Rates_Vol'})"
95
- ]
96
- },
97
- {
98
- "cell_type": "code",
99
- "execution_count": 27,
100
- "id": "cf537011-9837-46e9-8715-b7cd4c21e226",
101
- "metadata": {},
102
- "outputs": [],
103
- "source": [
104
- "data_req = DataRequest(source='fred', tickers=us_rates_tickers, cat='rates')"
105
- ]
106
- },
107
- {
108
- "cell_type": "code",
109
- "execution_count": 28,
110
- "id": "c5e4ca96-302b-4ba7-bd4b-010d891d1804",
111
- "metadata": {},
112
- "outputs": [],
113
- "source": [
114
- "us_rates_df = GetData(data_req).get_series()"
115
- ]
116
- },
117
- {
118
- "cell_type": "code",
119
- "execution_count": 457,
120
- "id": "f3a6829d-ea74-4495-b792-6f5af054d604",
121
- "metadata": {},
122
- "outputs": [
123
- {
124
- "data": {
125
- "text/plain": [
126
- "Index(['US_Rates_10Y', 'US_Rates_1M', 'US_Rates_1Y', 'US_Rates_20Y',\n",
127
- " 'US_Rates_2Y', 'US_Rates_30Y', 'US_Rates_3M', 'US_Rates_3Y',\n",
128
- " 'US_Rates_5Y', 'US_Rates_6M', 'US_Rates_7Y'],\n",
129
- " dtype='object', name='ticker')"
130
- ]
131
- },
132
- "execution_count": 457,
133
- "metadata": {},
134
- "output_type": "execute_result"
135
- }
136
- ],
137
- "source": [
138
- "us_rates_df.unstack().close.columns"
139
- ]
140
- },
141
- {
142
- "cell_type": "code",
143
- "execution_count": 458,
144
- "id": "f6e4298d-a46c-4efd-b888-8ebe947607f9",
145
- "metadata": {},
146
- "outputs": [],
147
- "source": [
148
- "us_rates_df = pd.concat([us_rates_df, move_df]).sort_index()"
149
- ]
150
- },
151
- {
152
- "cell_type": "code",
153
- "execution_count": 31,
154
- "id": "ee827d6c-31b4-40a9-927f-2e27f3d9db1e",
155
- "metadata": {},
156
- "outputs": [],
157
- "source": [
158
- "# us_rates_df.to_csv('us_rates_daily.csv')"
159
- ]
160
- },
161
- {
162
- "cell_type": "code",
163
- "execution_count": 460,
164
- "id": "885955b2-6e25-44d2-8f16-d60b79c00c5b",
165
- "metadata": {},
166
- "outputs": [],
167
- "source": [
168
- "us_rates_df.to_parquet('s3://factorlab-data/us_yields_daily.parquet')"
169
- ]
170
- },
171
- {
172
- "cell_type": "code",
173
- "execution_count": 428,
174
- "id": "ede2c7be-9061-4a9f-866e-6070472e195d",
175
- "metadata": {},
176
- "outputs": [],
177
- "source": [
178
- "er_df = pd.read_csv('../../../../rose_excess_returns_daily.csv', index_col=['date'], parse_dates=['date'])"
179
- ]
180
- },
181
- {
182
- "cell_type": "code",
183
- "execution_count": 429,
184
- "id": "d133fd38-0202-41f5-b034-885ba4e4fa12",
185
- "metadata": {},
186
- "outputs": [],
187
- "source": [
188
- "prices_df = pd.read_csv('../../../../rose_asset_prices_daily.csv', index_col=['date'], parse_dates=['date'])"
189
- ]
190
- },
191
- {
192
- "cell_type": "code",
193
- "execution_count": 430,
194
- "id": "1357a5c6-85ca-4ba0-8952-dd0334f5cf27",
195
- "metadata": {},
196
- "outputs": [],
197
- "source": [
198
- "iso3_to_iso2 = {\n",
199
- " 'arg': 'AR',\n",
200
- " 'aus': 'AU',\n",
201
- " 'aut': 'AT',\n",
202
- " 'bel': 'BE',\n",
203
- " 'bra': 'BR',\n",
204
- " 'can': 'CA',\n",
205
- " 'che': 'CH',\n",
206
- " 'chl': 'CL',\n",
207
- " 'chn': 'CN',\n",
208
- " 'col': 'CO',\n",
209
- " 'cze': 'CZ',\n",
210
- " 'deu': 'DE',\n",
211
- " 'dnk': 'DK', \n",
212
- " 'esp': 'ES',\n",
213
- " 'eur': 'EU',\n",
214
- " 'fin': 'FI',\n",
215
- " 'fra': 'FR',\n",
216
- " 'gbr': 'GB',\n",
217
- " 'grc': 'GR',\n",
218
- " 'hkg': 'HK',\n",
219
- " 'hun': 'HU',\n",
220
- " 'idn': 'ID',\n",
221
- " 'ind': 'IN',\n",
222
- " 'irl': 'IE',\n",
223
- " 'ita': 'IT',\n",
224
- " 'isl': 'IS',\n",
225
- " 'isr': 'IL',\n",
226
- " 'jpn': 'JP',\n",
227
- " 'mex': 'MX',\n",
228
- " 'mys': 'MY',\n",
229
- " 'nld': 'NL',\n",
230
- " 'nor': 'NO',\n",
231
- " 'nzl': 'NZ',\n",
232
- " 'per': 'PE',\n",
233
- " 'phl': 'PH',\n",
234
- " 'pol': 'PL',\n",
235
- " 'prt': 'PT',\n",
236
- " 'rus': 'RU',\n",
237
- " 'sgp': 'SG',\n",
238
- " 'swe': 'SE',\n",
239
- " 'kor': 'KR',\n",
240
- " 'swe': 'SE',\n",
241
- " 'twn': 'TW',\n",
242
- " 'tha': 'TH',\n",
243
- " 'tur': 'TR',\n",
244
- " 'usa': 'US',\n",
245
- " 'zaf': 'ZA'\n",
246
- "}"
247
- ]
248
- },
249
- {
250
- "cell_type": "code",
251
- "execution_count": 431,
252
- "id": "f21d322f-1b3b-4c62-9599-3bdf15b8a1af",
253
- "metadata": {},
254
- "outputs": [],
255
- "source": [
256
- "asset_mapping = {\n",
257
- " 'lr': 'Rates_Long', \n",
258
- " 'sr': 'Rates_Short', \n",
259
- " 'eq': 'Eqty',\n",
260
- " 'fx': 'FX'\n",
261
- "}"
262
- ]
263
- },
264
- {
265
- "cell_type": "code",
266
- "execution_count": 432,
267
- "id": "05a96e73-c6f7-4599-bcac-090700f3e3eb",
268
- "metadata": {},
269
- "outputs": [],
270
- "source": [
271
- "def map_columns(col_name):\n",
272
- " country_code, asset_code = col_name.split('.')\n",
273
- " new_country = iso3_to_iso2.get(country_code, country_code) # Handle missing mappings gracefully\n",
274
- " new_asset = asset_mapping.get(asset_code, asset_code) # Handle missing mappings gracefully\n",
275
- " \n",
276
- " if new_asset: # Only add underscore if the asset name is not empty\n",
277
- " return f'{new_country}_{new_asset}'\n",
278
- " else:\n",
279
- " return new_country"
280
- ]
281
- },
282
- {
283
- "cell_type": "code",
284
- "execution_count": 433,
285
- "id": "cde06474-83d5-4d6d-a321-d0293141a557",
286
- "metadata": {},
287
- "outputs": [],
288
- "source": [
289
- "def map_columns_ret(col_name):\n",
290
- " country_code, asset_code, ret = col_name.split('.')\n",
291
- " new_country = iso3_to_iso2.get(country_code, country_code) # Handle missing mappings gracefully\n",
292
- " new_asset = asset_mapping.get(asset_code, asset_code) # Handle missing mappings gracefully\n",
293
- " \n",
294
- " if new_asset: # Only add underscore if the asset name is not empty\n",
295
- " return f'{new_country}_{new_asset}'\n",
296
- " else:\n",
297
- " return new_country"
298
- ]
299
- },
300
- {
301
- "cell_type": "code",
302
- "execution_count": 434,
303
- "id": "618933c5-252e-4c9e-a020-bcb1207b00be",
304
- "metadata": {},
305
- "outputs": [],
306
- "source": [
307
- "# asset dfs\n",
308
- "eqty_df = prices_df.loc[:, prices_df.columns[:41]]\n",
309
- "eqty_er_df = er_df.loc[:, er_df.columns[:41]]\n",
310
- "rates_df = prices_df.loc[:, prices_df.columns[41:123]]\n",
311
- "rates_er_df = er_df.loc[:, er_df.columns[42:83]]\n",
312
- "fx_df = prices_df.loc[:, prices_df.columns[123:164]]\n",
313
- "fx_er_df = er_df.loc[:, er_df.columns[83:124]]\n",
314
- "cmdty_df = prices_df.loc[:, prices_df.columns[164:]]\n",
315
- "cmdty_er_df = er_df.loc[:, er_df.columns[124:]]"
316
- ]
317
- },
318
- {
319
- "cell_type": "code",
320
- "execution_count": 435,
321
- "id": "a2e94a95-8e9b-4949-b3ed-d20db0cd093b",
322
- "metadata": {},
323
- "outputs": [],
324
- "source": [
325
- "# convert price cols\n",
326
- "eqty_df.columns = [map_columns(col) for col in eqty_df.columns]\n",
327
- "rates_df.columns = [map_columns(col) for col in rates_df.columns]\n",
328
- "fx_df.columns = [map_columns(col) for col in fx_df.columns]\n",
329
- "cmdty_df.columns = [col.title() for col in cmdty_df.columns]\n",
330
- "cmdty_df.columns = ['Gold', 'Silver', 'Platinum', 'Palladium', 'Oil_WTI', 'Oil_Brent', 'Natgas',\n",
331
- " 'Rbob', 'Corn', 'Wheat', 'Soybeans', 'Coffee', 'Sugar', 'Cotton',\n",
332
- " 'Orange_Juice', 'Cocoa', 'Pork', 'Beef', 'Aluminum', 'Copper', 'Lead',\n",
333
- " 'Nickel', 'Tin', 'Zinc']"
334
- ]
335
- },
336
- {
337
- "cell_type": "code",
338
- "execution_count": 436,
339
- "id": "9054c525-7fd2-4ca9-86a3-e2dd01c3ac22",
340
- "metadata": {},
341
- "outputs": [],
342
- "source": [
343
- "# convert ret cols\n",
344
- "eqty_er_df.columns = [map_columns_ret(col) for col in eqty_er_df.columns]\n",
345
- "rates_er_df.columns = [map_columns_ret(col) for col in rates_er_df.columns]\n",
346
- "fx_er_df.columns = [map_columns_ret(col) for col in fx_er_df.columns]\n",
347
- "cmdty_er_df.columns = [col.title().split('.')[0] for col in cmdty_er_df.columns]\n",
348
- "cmdty_er_df.columns = ['Gold', 'Silver', 'Platinum', 'Palladium', 'Oil_WTI', 'Oil_Brent', 'Natgas',\n",
349
- " 'Rbob', 'Corn', 'Wheat', 'Soybeans', 'Coffee', 'Sugar', 'Cotton',\n",
350
- " 'Orange_Juice', 'Cocoa', 'Pork', 'Beef', 'Aluminum', 'Copper', 'Lead',\n",
351
- " 'Nickel', 'Tin', 'Zinc']"
352
- ]
353
- },
354
- {
355
- "cell_type": "code",
356
- "execution_count": 437,
357
- "id": "2dd75dbd-b287-4aca-af65-3f770d7dd20b",
358
- "metadata": {},
359
- "outputs": [],
360
- "source": [
361
- "# stack dfs\n",
362
- "eqty_df = eqty_df.stack().to_frame('close')\n",
363
- "eqty_er_df = eqty_er_df.stack().to_frame('er')\n",
364
- "rates_df = rates_df.stack().to_frame('close')\n",
365
- "rates_er_df = rates_er_df.stack().to_frame('er')\n",
366
- "fx_df = fx_df.stack().to_frame('close')\n",
367
- "fx_er_df = fx_er_df.stack().to_frame('er')\n",
368
- "cmdty_df = cmdty_df.stack().to_frame('close')\n",
369
- "cmdty_er_df = cmdty_er_df.stack().to_frame('er')"
370
- ]
371
- },
372
- {
373
- "cell_type": "code",
374
- "execution_count": 438,
375
- "id": "5fb80d6e-f98d-4143-9449-9c6c2ea68369",
376
- "metadata": {},
377
- "outputs": [],
378
- "source": [
379
- "# set indexes\n",
380
- "eqty_df.index.names = ['date', 'ticker']\n",
381
- "eqty_er_df.index.names = ['date', 'ticker']\n",
382
- "rates_df.index.names = ['date', 'ticker']\n",
383
- "rates_er_df.index.names = ['date', 'ticker']\n",
384
- "fx_df.index.names = ['date', 'ticker']\n",
385
- "fx_er_df.index.names = ['date', 'ticker']\n",
386
- "cmdty_df.index.names = ['date', 'ticker']\n",
387
- "cmdty_er_df.index.names = ['date', 'ticker']"
388
- ]
389
- },
390
- {
391
- "cell_type": "code",
392
- "execution_count": 439,
393
- "id": "cdfceea4-af45-4bd9-be59-1de835196138",
394
- "metadata": {},
395
- "outputs": [],
396
- "source": [
397
- "eqty_df = pd.concat([eqty_df, eqty_er_df], axis=1).sort_index()\n",
398
- "rates_df = pd.concat([rates_df, rates_er_df], axis=1).sort_index()\n",
399
- "fx_df = pd.concat([fx_df, fx_er_df], axis=1).sort_index()\n",
400
- "cmdty_df = pd.concat([cmdty_df, cmdty_er_df], axis=1).sort_index()"
401
- ]
402
- },
403
- {
404
- "cell_type": "code",
405
- "execution_count": 441,
406
- "id": "15a544cf-233b-4ea0-9e4a-9d6dc3e63d3b",
407
- "metadata": {},
408
- "outputs": [],
409
- "source": [
410
- "eqty_df.to_parquet('s3://factorlab-data/global_eqty_data_daily.parquet')"
411
- ]
412
- },
413
- {
414
- "cell_type": "code",
415
- "execution_count": 442,
416
- "id": "a2b5599b-afff-4241-8c18-06486b1968fa",
417
- "metadata": {},
418
- "outputs": [],
419
- "source": [
420
- "rates_df.to_parquet('s3://factorlab-data/global_rates_data_daily.parquet')"
421
- ]
422
- },
423
- {
424
- "cell_type": "code",
425
- "execution_count": 443,
426
- "id": "87b4f132-6f87-4770-9892-97e069d7f3c0",
427
- "metadata": {},
428
- "outputs": [],
429
- "source": [
430
- "fx_df.to_parquet('s3://factorlab-data/global_fx_data_daily.parquet')"
431
- ]
432
- },
433
- {
434
- "cell_type": "code",
435
- "execution_count": 444,
436
- "id": "5f21be89-a92d-4458-bca6-9a79f79b9d65",
437
- "metadata": {},
438
- "outputs": [],
439
- "source": [
440
- "cmdty_df.to_parquet('s3://factorlab-data/commodity_data_daily.parquet')"
441
- ]
442
- }
443
- ],
444
- "metadata": {
445
- "kernelspec": {
446
- "display_name": "cryptodatapy",
447
- "language": "python",
448
- "name": "cryptodatapy"
449
- },
450
- "language_info": {
451
- "codemirror_mode": {
452
- "name": "ipython",
453
- "version": 3
454
- },
455
- "file_extension": ".py",
456
- "mimetype": "text/x-python",
457
- "name": "python",
458
- "nbconvert_exporter": "python",
459
- "pygments_lexer": "ipython3",
460
- "version": "3.9.12"
461
- }
462
- },
463
- "nbformat": 4,
464
- "nbformat_minor": 5
465
- }