cryptodatapy 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,559 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "777df641",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "import logging\n",
11
- "from typing import Dict, List, Optional, Union\n",
12
- "\n",
13
- "import pandas as pd\n",
14
- "\n",
15
- "from cryptodatapy.extract.datarequest import DataRequest\n",
16
- "from cryptodatapy.extract.web.web import Web\n",
17
- "from cryptodatapy.transform.convertparams import ConvertParams\n",
18
- "from cryptodatapy.transform.wrangle import WrangleData\n",
19
- "from cryptodatapy.util.datacredentials import DataCredentials\n",
20
- "\n",
21
- "# data credentials\n",
22
- "data_cred = DataCredentials()\n",
23
- "\n",
24
- "\n",
25
- "class AQR(Web):\n",
26
- " \"\"\"\n",
27
- " Retrieves data from AQR data sets.\n",
28
- " \"\"\"\n",
29
- "\n",
30
- " def __init__(\n",
31
- " self,\n",
32
- " categories=None,\n",
33
- " indexes: Optional[Dict[str, List[str]]] = None,\n",
34
- " assets: Optional[Dict[str, List[str]]] = None,\n",
35
- " markets: Optional[Dict[str, List[str]]] = None,\n",
36
- " market_types=None,\n",
37
- " fields: Optional[Dict[str, List[str]]] = None,\n",
38
- " frequencies=None,\n",
39
- " base_url: str = data_cred.aqr_base_url,\n",
40
- " file_formats: Optional[Union[str, List[str]]] = 'xlsx'\n",
41
- " ):\n",
42
- " \"\"\"\n",
43
- " Constructor\n",
44
- "\n",
45
- " Parameters\n",
46
- " ----------\n",
47
- " categories: list or str, {'crypto', 'fx', 'rates', 'eqty', 'cmdty', 'credit', 'macro', 'alt'}\n",
48
- " List or string of available categories, e.g. ['crypto', 'fx', 'alt'].\n",
49
- " indexes: dictionary, optional, default None\n",
50
- " Dictionary of available indexes, by cat-indexes key-value pairs, e.g. [{'eqty': ['SPX', 'N225'],\n",
51
- " 'rates': [.... , ...}.\n",
52
- " assets: dictionary, optional, default None\n",
53
- " Dictionary of available assets, by cat-assets key-value pairs, e.g. {'rates': ['Germany 2Y', 'Japan 10Y',\n",
54
- " ...], 'eqty: ['SPY', 'TLT', ...], ...}.\n",
55
- " markets: dictionary, optional, default None\n",
56
- " Dictionary of available markets, by cat-markets key-value pairs, e.g. [{'fx': ['EUR/USD', 'USD/JPY', ...],\n",
57
- " 'crypto': ['BTC/ETH', 'ETH/USDT', ...}.\n",
58
- " market_types: list\n",
59
- " List of available market types e.g. [spot', 'perpetual_future', 'future', 'option'].\n",
60
- " fields: dictionary, optional, default None\n",
61
- " Dictionary of available fields, by cat-fields key-value pairs, e.g. {'cmdty': ['date', 'open', 'high',\n",
62
- " 'low', 'close', 'volume'], 'macro': ['actual', 'previous', 'expected', 'surprise']}\n",
63
- " frequencies: dictionary\n",
64
- " Dictionary of available frequencies, by cat-frequencies key-value pairs, e.g. {'fx':\n",
65
- " ['d', 'w', 'm', 'q', 'y'], 'rates': ['d', 'w', 'm', 'q', 'y'], 'eqty': ['d', 'w', 'm', 'q', 'y'], ...}.\n",
66
- " base_url: str, optional, default None\n",
67
- " Base url used for GET requests. If not provided, default is set to base_url stored in DataCredentials.\n",
68
- " file_formats: list or str, {'xlsx', 'xls'}, default 'xlsx'\n",
69
- " List of available file formats.\n",
70
- " \"\"\"\n",
71
- " Web.__init__(\n",
72
- " self,\n",
73
- " categories,\n",
74
- " indexes,\n",
75
- " assets,\n",
76
- " markets,\n",
77
- " market_types,\n",
78
- " fields,\n",
79
- " frequencies,\n",
80
- " base_url,\n",
81
- " file_formats\n",
82
- " )\n",
83
- "\n",
84
- " if categories is None:\n",
85
- " self.categories = [\"fx\", \"rates\", \"eqty\", \"cmdty\", \"credit\"]\n",
86
- " if frequencies is None:\n",
87
- " self.frequencies = {\n",
88
- " \"fx\": [\"m\", \"q\", \"y\"],\n",
89
- " \"rates\": [\"m\", \"q\", \"y\"],\n",
90
- " \"cmdty\": [\"m\", \"q\", \"y\"],\n",
91
- " \"eqty\": [\"d\", \"w\", \"m\", \"q\", \"y\"],\n",
92
- " \"credit\": [\"m\", \"q\", \"y\"],\n",
93
- " }\n",
94
- " if market_types is None:\n",
95
- " self.market_types = [\"spot\", \"future\"]\n",
96
- " if fields is None:\n",
97
- " self.fields = self.get_fields_info()\n",
98
- "\n",
99
- " def get_indexes_info(self) -> None:\n",
100
- " \"\"\"\n",
101
- " Get indexes info.\n",
102
- " \"\"\"\n",
103
- " eqty_idxs = ['AUS', 'AUT', 'BEL', 'CAN', 'CHE', 'DEU', 'DNK', 'ESP', 'FIN', 'FRA', 'GBR', 'GRC', 'HKG', 'IRL',\n",
104
- " 'ISR', 'ITA', 'JPN', 'NLD', 'NOR', 'NZL', 'PRT', 'SGP', 'SWE', 'USA', 'WLD']\n",
105
- " print(\n",
106
- " f\"AQR publishes excess returns data for the following equity market indexes: {eqty_idxs}\"\n",
107
- " )\n",
108
- "\n",
109
- " def get_assets_info(self) -> None:\n",
110
- " \"\"\"\n",
111
- " Get assets info.\n",
112
- " \"\"\"\n",
113
- " print(\n",
114
- " f\"AQR does not publish data for individual assets.\"\n",
115
- " )\n",
116
- "\n",
117
- " def get_markets_info(self) -> None:\n",
118
- " \"\"\"\n",
119
- " Get markets info.\n",
120
- " \"\"\"\n",
121
- " print(\n",
122
- " f\"AQR does not publish data for individual markets.\"\n",
123
- " )\n",
124
- "\n",
125
- " @staticmethod\n",
126
- " def get_fields_info(\n",
127
- " data_type: Optional[str] = \"market\", cat: Optional[str] = None\n",
128
- " ) -> Dict[str, List[str]]:\n",
129
- " \"\"\"\n",
130
- " Get fields info.\n",
131
- "\n",
132
- " Parameters\n",
133
- " ----------\n",
134
- " data_type: str, {'market', 'on-chain', 'off-chain'}, default 'market'\n",
135
- " Type of data.\n",
136
- " cat: str, {'crypto', 'eqty', 'fx', 'rates', 'cmdty', 'macro'}, optional, default None\n",
137
- " Asset class or time series category.\n",
138
- "\n",
139
- " Returns\n",
140
- " -------\n",
141
- " fields: dictionary\n",
142
- " Dictionary with info on available fields, by category.\n",
143
- " \"\"\"\n",
144
- " if data_type == \"on-chain\" or data_type == 'off-chain':\n",
145
- " raise ValueError(\n",
146
- " \"AQR only publishes total and excess return series used in their research papers.\"\n",
147
- " )\n",
148
- "\n",
149
- " # list of fields\n",
150
- " market_fields_list = ['ret', 'tr', 'er']\n",
151
- "\n",
152
- " # fields dict\n",
153
- " fields = {\n",
154
- " \"fx\": market_fields_list,\n",
155
- " \"rates\": market_fields_list,\n",
156
- " \"eqty\": market_fields_list,\n",
157
- " \"cmdty\": market_fields_list,\n",
158
- " \"credit\": market_fields_list,\n",
159
- " }\n",
160
- "\n",
161
- " # fields obj\n",
162
- " if cat is not None:\n",
163
- " fields = fields[cat]\n",
164
- "\n",
165
- " return fields\n",
166
- "\n",
167
- " def set_excel_params(self, data_req: DataRequest, ticker: str) -> Dict[str, Union[str, int]]:\n",
168
- " \"\"\"\n",
169
- " Sets excel parameters for reading excel files.\n",
170
- "\n",
171
- " Parameters\n",
172
- " ----------\n",
173
- " data_req: DataRequest\n",
174
- " Parameters of data request in CryptoDataPy format.\n",
175
- " ticker: str\n",
176
- " Ticker symbol.\n",
177
- "\n",
178
- " Returns\n",
179
- " -------\n",
180
- " dict: dictionary\n",
181
- " Dictionary with params to read excel file.\n",
182
- "\n",
183
- " \"\"\"\n",
184
- " # convert data request parameters to aqr format\n",
185
- " conv_data_req = ConvertParams(data_req).to_aqr()\n",
186
- "\n",
187
- " # param dict\n",
188
- " params = {\n",
189
- " 'file': conv_data_req['tickers'][ticker][0], # file name\n",
190
- " 'freq': conv_data_req['freq'], # freq\n",
191
- " 'format': self.file_formats[0], # file format\n",
192
- " 'sheet': conv_data_req['tickers'][ticker][1], # sheet name\n",
193
- " 'url': None, # url\n",
194
- " 'parse_dates': True, # parsing dates\n",
195
- " 'index_col': None, # index col\n",
196
- " 'header': None # header row\n",
197
- " }\n",
198
- " # set index url, col and header\n",
199
- " params['url'] = self.base_url + params['file'] + params['freq'] + \".\" + params['format']\n",
200
- " if params['file'] == 'Century-of-Factor-Premia-':\n",
201
- " params['index_col'] = 'Unnamed: 0'\n",
202
- " params['header'] = 18\n",
203
- " elif params['file'] == 'Time-Series-Momentum-Factors-':\n",
204
- " params['index_col'] = 'Unnamed: 0'\n",
205
- " params['header'] = 17\n",
206
- " elif params['file'] == 'Commodities-for-the-Long-Run-Index-Level-Data-':\n",
207
- " params['index_col'] = 'Unnamed: 0'\n",
208
- " params['header'] = 10\n",
209
- " elif params['file'] == 'Credit-Risk-Premium-Preliminary-Paper-Data':\n",
210
- " params['url'] = self.base_url + params['file'] + \".\" + params['format']\n",
211
- " params['index_col'] = 'Date'\n",
212
- " params['header'] = 10\n",
213
- " else:\n",
214
- " params['index_col'] = 'DATE'\n",
215
- " params['header'] = 18\n",
216
- "\n",
217
- " return params\n",
218
- "\n",
219
- " def get_series(self, data_req: DataRequest) -> Dict[str, pd.DataFrame]:\n",
220
- " \"\"\"\n",
221
- " Gets series from AQR data file.\n",
222
- "\n",
223
- " Parameters\n",
224
- " ----------\n",
225
- " data_req: DataRequest\n",
226
- " Parameters of data request in CryptoDataPy format.\n",
227
- "\n",
228
- " Returns\n",
229
- " -------\n",
230
- " dfs_dict: dictionary\n",
231
- " Dictionary with ticker-dataframe key-value pairs.\n",
232
- "\n",
233
- " \"\"\"\n",
234
- " # convert data request parameters to aqr format\n",
235
- " conv_data_req = ConvertParams(data_req).to_aqr()\n",
236
- "\n",
237
- " try:\n",
238
- " # fetch data\n",
239
- " df_dicts = {}\n",
240
- "\n",
241
- " for ticker in conv_data_req['tickers']:\n",
242
- " # set excel params\n",
243
- " params = self.set_excel_params(data_req, ticker)\n",
244
- " # fetch excel file\n",
245
- " df1 = pd.read_excel(params['url'], sheet_name=params['sheet'], index_col=params['index_col'],\n",
246
- " parse_dates=params['parse_dates'], header=params['header'])\n",
247
- " # add df to dicts\n",
248
- " df_dicts[ticker] = df1\n",
249
- "\n",
250
- " except Exception as e:\n",
251
- " logging.warning(e)\n",
252
- " logging.warning(f\"Failed to get data for: {conv_data_req['tickers']}.\")\n",
253
- "\n",
254
- " else:\n",
255
- " return df_dicts\n",
256
- "\n",
257
- " @staticmethod\n",
258
- " def wrangle_data_resp(data_req: DataRequest, data_resp: Dict[str, pd.DataFrame]) -> pd.DataFrame:\n",
259
- " \"\"\"\n",
260
- " Wrangle data response.\n",
261
- "\n",
262
- " Parameters\n",
263
- " ----------\n",
264
- " data_req: DataRequest\n",
265
- " Parameters of data request in CryptoDataPy format.\n",
266
- " data_resp: Dictionary\n",
267
- " Dictionary with ticker-dataframe key-value pairs.\n",
268
- "\n",
269
- " Returns\n",
270
- " -------\n",
271
- " df: pd.DataFrame\n",
272
- " Wrangled dataframe with DatetimeIndex (level 0), ticker (level 1), and values for market or macro series\n",
273
- " for selected fields (cols), in tidy format.\n",
274
- " \"\"\"\n",
275
- " # wrangle data resp\n",
276
- " df = WrangleData(data_req, data_resp).aqr()\n",
277
- "\n",
278
- " return df\n",
279
- "\n",
280
- " def get_tidy_data(self, data_req: DataRequest) -> pd.DataFrame:\n",
281
- " \"\"\"\n",
282
- " Gets data from FRED and wrangles the data response into tidy data format.\n",
283
- "\n",
284
- " Parameters\n",
285
- " ----------\n",
286
- " data_req: DataRequest\n",
287
- " Parameters of data request in CryptoDataPy format.\n",
288
- "\n",
289
- " Returns\n",
290
- " -------\n",
291
- " df: pd.DataFrame - MultiIndex\n",
292
- " Dataframe with DatetimeIndex (level 0), tickers (level 1) and actual values (cols),\n",
293
- " in tidy data format.\n",
294
- " \"\"\"\n",
295
- " # change to get series\n",
296
- " data_resp = self.get_series(data_req)\n",
297
- " # wrangle data resp\n",
298
- " df = self.wrangle_data_resp(data_req, data_resp)\n",
299
- "\n",
300
- " return df\n",
301
- "\n",
302
- " def check_params(self, data_req: DataRequest) -> None:\n",
303
- " \"\"\"\n",
304
- " Checks the data request parameters before requesting data to reduce API calls\n",
305
- " and improve efficiency.\n",
306
- "\n",
307
- " \"\"\"\n",
308
- " # check cat\n",
309
- " if data_req.cat not in self.categories:\n",
310
- " raise ValueError(\n",
311
- " f\"Select a valid category. Valid categories are: {self.categories}.\"\n",
312
- " )\n",
313
- " # check freq\n",
314
- " if data_req.freq not in self.frequencies[data_req.cat]:\n",
315
- " raise ValueError(\n",
316
- " f\"Invalid data frequency. Valid data frequencies are: {self.frequencies}.\"\n",
317
- " )\n",
318
- " # check fields\n",
319
- " if not any(field in self.fields[data_req.cat] for field in data_req.fields):\n",
320
- " raise ValueError(f\"Invalid fields. Valid data fields are: {self.fields}.\")\n",
321
- "\n",
322
- " def get_data(self, data_req: DataRequest) -> pd.DataFrame:\n",
323
- " \"\"\"\n",
324
- " Get data.\n",
325
- "\n",
326
- " Parameters\n",
327
- " data_req: DataRequest\n",
328
- " Parameters of data request in CryptoDataPy format.\n",
329
- "\n",
330
- " Returns\n",
331
- " -------\n",
332
- " df: pd.DataFrame - MultiIndex\n",
333
- " DataFrame with DatetimeIndex (level 0), ticker (level 1), and values for selected fields (cols),\n",
334
- " in tidy format.\n",
335
- " \"\"\"\n",
336
- " # check params\n",
337
- " self.check_params(data_req)\n",
338
- "\n",
339
- " # get tidy data\n",
340
- " df = self.get_tidy_data(data_req)\n",
341
- "\n",
342
- " # check if df empty\n",
343
- " if df.empty:\n",
344
- " raise Exception(\n",
345
- " \"No data returned. Check data request parameters and try again.\"\n",
346
- " )\n",
347
- "\n",
348
- " return df.sort_index()\n"
349
- ]
350
- },
351
- {
352
- "cell_type": "code",
353
- "execution_count": 11,
354
- "id": "67d71194",
355
- "metadata": {},
356
- "outputs": [],
357
- "source": [
358
- "aqr = AQR()"
359
- ]
360
- },
361
- {
362
- "cell_type": "code",
363
- "execution_count": 12,
364
- "id": "b2d605c3",
365
- "metadata": {},
366
- "outputs": [
367
- {
368
- "name": "stdout",
369
- "output_type": "stream",
370
- "text": [
371
- "AQR publishes excess returns data for the following equity market indexes: ['AUS', 'AUT', 'BEL', 'CAN', 'CHE', 'DEU', 'DNK', 'ESP', 'FIN', 'FRA', 'GBR', 'GRC', 'HKG', 'IRL', 'ISR', 'ITA', 'JPN', 'NLD', 'NOR', 'NZL', 'PRT', 'SGP', 'SWE', 'USA', 'WLD']\n"
372
- ]
373
- }
374
- ],
375
- "source": [
376
- "aqr.get_indexes_info()"
377
- ]
378
- },
379
- {
380
- "cell_type": "code",
381
- "execution_count": 26,
382
- "id": "b508d7eb",
383
- "metadata": {},
384
- "outputs": [],
385
- "source": [
386
- "dr = DataRequest(source='aqr', tickers=['US_Eqty_Qual', 'US_Eqty_Beta'], freq='m', cat='eqty', fields='tr')"
387
- ]
388
- },
389
- {
390
- "cell_type": "code",
391
- "execution_count": 27,
392
- "id": "ccc66aad",
393
- "metadata": {},
394
- "outputs": [],
395
- "source": [
396
- "df = aqr.get_data(dr)"
397
- ]
398
- },
399
- {
400
- "cell_type": "code",
401
- "execution_count": 28,
402
- "id": "66db393a",
403
- "metadata": {},
404
- "outputs": [
405
- {
406
- "data": {
407
- "text/html": [
408
- "<div>\n",
409
- "<style scoped>\n",
410
- " .dataframe tbody tr th:only-of-type {\n",
411
- " vertical-align: middle;\n",
412
- " }\n",
413
- "\n",
414
- " .dataframe tbody tr th {\n",
415
- " vertical-align: top;\n",
416
- " }\n",
417
- "\n",
418
- " .dataframe thead th {\n",
419
- " text-align: right;\n",
420
- " }\n",
421
- "</style>\n",
422
- "<table border=\"1\" class=\"dataframe\">\n",
423
- " <thead>\n",
424
- " <tr style=\"text-align: right;\">\n",
425
- " <th></th>\n",
426
- " <th></th>\n",
427
- " <th>er</th>\n",
428
- " </tr>\n",
429
- " <tr>\n",
430
- " <th>date</th>\n",
431
- " <th>ticker</th>\n",
432
- " <th></th>\n",
433
- " </tr>\n",
434
- " </thead>\n",
435
- " <tbody>\n",
436
- " <tr>\n",
437
- " <th>1930-12-31</th>\n",
438
- " <th>US_Eqty_Beta</th>\n",
439
- " <td>-0.000558</td>\n",
440
- " </tr>\n",
441
- " <tr>\n",
442
- " <th>1931-01-31</th>\n",
443
- " <th>US_Eqty_Beta</th>\n",
444
- " <td>-0.022446</td>\n",
445
- " </tr>\n",
446
- " <tr>\n",
447
- " <th>1931-02-28</th>\n",
448
- " <th>US_Eqty_Beta</th>\n",
449
- " <td>-0.077423</td>\n",
450
- " </tr>\n",
451
- " <tr>\n",
452
- " <th>1931-03-31</th>\n",
453
- " <th>US_Eqty_Beta</th>\n",
454
- " <td>0.029235</td>\n",
455
- " </tr>\n",
456
- " <tr>\n",
457
- " <th>1931-04-30</th>\n",
458
- " <th>US_Eqty_Beta</th>\n",
459
- " <td>-0.012986</td>\n",
460
- " </tr>\n",
461
- " <tr>\n",
462
- " <th>...</th>\n",
463
- " <th>...</th>\n",
464
- " <td>...</td>\n",
465
- " </tr>\n",
466
- " <tr>\n",
467
- " <th>2023-04-30</th>\n",
468
- " <th>US_Eqty_Qual</th>\n",
469
- " <td>0.010278</td>\n",
470
- " </tr>\n",
471
- " <tr>\n",
472
- " <th rowspan=\"2\" valign=\"top\">2023-05-31</th>\n",
473
- " <th>US_Eqty_Beta</th>\n",
474
- " <td>-0.053566</td>\n",
475
- " </tr>\n",
476
- " <tr>\n",
477
- " <th>US_Eqty_Qual</th>\n",
478
- " <td>-0.039877</td>\n",
479
- " </tr>\n",
480
- " <tr>\n",
481
- " <th rowspan=\"2\" valign=\"top\">2023-06-30</th>\n",
482
- " <th>US_Eqty_Beta</th>\n",
483
- " <td>-0.01</td>\n",
484
- " </tr>\n",
485
- " <tr>\n",
486
- " <th>US_Eqty_Qual</th>\n",
487
- " <td>-0.013823</td>\n",
488
- " </tr>\n",
489
- " </tbody>\n",
490
- "</table>\n",
491
- "<p>1903 rows × 1 columns</p>\n",
492
- "</div>"
493
- ],
494
- "text/plain": [
495
- " er\n",
496
- "date ticker \n",
497
- "1930-12-31 US_Eqty_Beta -0.000558\n",
498
- "1931-01-31 US_Eqty_Beta -0.022446\n",
499
- "1931-02-28 US_Eqty_Beta -0.077423\n",
500
- "1931-03-31 US_Eqty_Beta 0.029235\n",
501
- "1931-04-30 US_Eqty_Beta -0.012986\n",
502
- "... ...\n",
503
- "2023-04-30 US_Eqty_Qual 0.010278\n",
504
- "2023-05-31 US_Eqty_Beta -0.053566\n",
505
- " US_Eqty_Qual -0.039877\n",
506
- "2023-06-30 US_Eqty_Beta -0.01\n",
507
- " US_Eqty_Qual -0.013823\n",
508
- "\n",
509
- "[1903 rows x 1 columns]"
510
- ]
511
- },
512
- "execution_count": 28,
513
- "metadata": {},
514
- "output_type": "execute_result"
515
- }
516
- ],
517
- "source": [
518
- "df"
519
- ]
520
- },
521
- {
522
- "cell_type": "code",
523
- "execution_count": null,
524
- "id": "3bde6a95",
525
- "metadata": {},
526
- "outputs": [],
527
- "source": []
528
- },
529
- {
530
- "cell_type": "code",
531
- "execution_count": null,
532
- "id": "dd8784b5",
533
- "metadata": {},
534
- "outputs": [],
535
- "source": []
536
- }
537
- ],
538
- "metadata": {
539
- "kernelspec": {
540
- "display_name": "cryptodatapy",
541
- "language": "python",
542
- "name": "cryptodatapy"
543
- },
544
- "language_info": {
545
- "codemirror_mode": {
546
- "name": "ipython",
547
- "version": 3
548
- },
549
- "file_extension": ".py",
550
- "mimetype": "text/x-python",
551
- "name": "python",
552
- "nbconvert_exporter": "python",
553
- "pygments_lexer": "ipython3",
554
- "version": "3.9.12"
555
- }
556
- },
557
- "nbformat": 4,
558
- "nbformat_minor": 5
559
- }