PyPI - cryptodatapy - Versions diffs - 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl - Mend

cryptodatapy 0.2.6py3-none-any.whl → 0.2.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

cryptodatapy/conf/fields.csv +1 -1
cryptodatapy/extract/datarequest.py +169 -28
cryptodatapy/extract/libraries/Untitled.ipynb +199 -0
cryptodatapy/extract/libraries/ccxt.ipynb +747 -0
cryptodatapy/extract/libraries/ccxt_api.py +631 -358
cryptodatapy/extract/libraries/pandasdr_api.py +153 -138
cryptodatapy/extract/libraries/yfinance_api.py +511 -0
cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +226 -30
cryptodatapy/transform/cmdty_data.ipynb +402 -0
cryptodatapy/transform/convertparams.py +160 -303
cryptodatapy/transform/eqty_data.ipynb +126 -99
cryptodatapy/transform/wrangle.py +152 -43
{cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/METADATA +9 -6
{cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/RECORD +16 -12
{cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/WHEEL +1 -1
{cryptodatapy-0.2.6.dist-info → cryptodatapy-0.2.8.dist-info}/LICENSE +0 -0

cryptodatapy/transform/eqty_data.ipynb CHANGED Viewed

@@ -2,10 +2,19 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 1,
    "id": "9fea9fae",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "fatal: bad revision 'HEAD'\n",
+      "Importing plotly failed. Interactive plots will not work.\n"
+     ]
+    }
+   ],
    "source": [
     "import pandas as pd\n",
     "import numpy as np\n",
@@ -30,7 +39,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 2,
    "id": "2ad72bc7-5fdd-4ae5-8d9e-e90118efcc26",
    "metadata": {},
    "outputs": [],
@@ -40,7 +49,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 3,
    "id": "0342bab7-a792-4be3-8d4d-44c4343d0e6a",
    "metadata": {},
    "outputs": [
@@ -76,7 +85,7 @@
        "Name: yahoo_id, dtype: object"
       ]
      },
-     "execution_count": 46,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -95,7 +104,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 4,
    "id": "d875cd96-a29c-4e22-9806-a1b1c2513564",
    "metadata": {},
    "outputs": [],
@@ -105,7 +114,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 5,
    "id": "1bf0af0d-7ed7-4e07-9da1-5625b1f32bce",
    "metadata": {},
    "outputs": [
@@ -168,7 +177,7 @@
        " 'REET']"
       ]
      },
-     "execution_count": 48,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -179,7 +188,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 6,
    "id": "0b1bc395-40dd-44c0-bac1-dd1e00f8a5c4",
    "metadata": {},
    "outputs": [],
@@ -189,7 +198,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 7,
    "id": "2674260a-56c7-40a4-9708-0eb335fa075d",
    "metadata": {},
    "outputs": [
@@ -207,7 +216,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 8,
    "id": "ee0523a8-c6ee-42b9-9410-36b26906f2de",
    "metadata": {},
    "outputs": [
@@ -425,78 +434,6 @@
        "      <td>...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2024-09-11</th>\n",
-       "      <td>64.93</td>\n",
-       "      <td>66.199997</td>\n",
-       "      <td>23.629999</td>\n",
-       "      <td>92.019997</td>\n",
-       "      <td>31.809999</td>\n",
-       "      <td>8.06</td>\n",
-       "      <td>69.559998</td>\n",
-       "      <td>50.709999</td>\n",
-       "      <td>25.52</td>\n",
-       "      <td>36.630001</td>\n",
-       "      <td>...</td>\n",
-       "      <td>61.66</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>57.130001</td>\n",
-       "      <td>49.330002</td>\n",
-       "      <td>114.629997</td>\n",
-       "      <td>17.67</td>\n",
-       "      <td>26.59</td>\n",
-       "      <td>25.309999</td>\n",
-       "      <td>39.07</td>\n",
-       "      <td>15.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2024-09-12</th>\n",
-       "      <td>65.75</td>\n",
-       "      <td>67.93</td>\n",
-       "      <td>23.870001</td>\n",
-       "      <td>92.519997</td>\n",
-       "      <td>32.130001</td>\n",
-       "      <td>8.06</td>\n",
-       "      <td>70.25</td>\n",
-       "      <td>51.07</td>\n",
-       "      <td>25.450001</td>\n",
-       "      <td>36.790001</td>\n",
-       "      <td>...</td>\n",
-       "      <td>62.509998</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>57.759998</td>\n",
-       "      <td>48.939999</td>\n",
-       "      <td>115.629997</td>\n",
-       "      <td>18.1</td>\n",
-       "      <td>26.780001</td>\n",
-       "      <td>25.85</td>\n",
-       "      <td>39.950001</td>\n",
-       "      <td>14.96</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2024-09-13</th>\n",
-       "      <td>66.139999</td>\n",
-       "      <td>68.529999</td>\n",
-       "      <td>24.120001</td>\n",
-       "      <td>93.5</td>\n",
-       "      <td>32.400002</td>\n",
-       "      <td>8.06</td>\n",
-       "      <td>69.870003</td>\n",
-       "      <td>51.389999</td>\n",
-       "      <td>25.93</td>\n",
-       "      <td>36.900002</td>\n",
-       "      <td>...</td>\n",
-       "      <td>63.299999</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>57.849998</td>\n",
-       "      <td>48.790001</td>\n",
-       "      <td>116.209999</td>\n",
-       "      <td>18.139999</td>\n",
-       "      <td>27.01</td>\n",
-       "      <td>25.950001</td>\n",
-       "      <td>40.549999</td>\n",
-       "      <td>15.11</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
        "      <th>2024-09-16</th>\n",
        "      <td>66.760002</td>\n",
        "      <td>68.5</td>\n",
@@ -544,9 +481,81 @@
        "      <td>40.439999</td>\n",
        "      <td>15.19</td>\n",
        "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2024-09-18</th>\n",
+       "      <td>66.400002</td>\n",
+       "      <td>68.769997</td>\n",
+       "      <td>24.27</td>\n",
+       "      <td>94.199997</td>\n",
+       "      <td>32.580002</td>\n",
+       "      <td>8.06</td>\n",
+       "      <td>69.389999</td>\n",
+       "      <td>51.16</td>\n",
+       "      <td>26.42</td>\n",
+       "      <td>36.990002</td>\n",
+       "      <td>...</td>\n",
+       "      <td>63.259998</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>57.66</td>\n",
+       "      <td>49.880001</td>\n",
+       "      <td>116.160004</td>\n",
+       "      <td>18.049999</td>\n",
+       "      <td>26.889999</td>\n",
+       "      <td>25.66</td>\n",
+       "      <td>40.900002</td>\n",
+       "      <td>15.07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2024-09-19</th>\n",
+       "      <td>67.709999</td>\n",
+       "      <td>70.150002</td>\n",
+       "      <td>24.690001</td>\n",
+       "      <td>95.419998</td>\n",
+       "      <td>33.09</td>\n",
+       "      <td>8.06</td>\n",
+       "      <td>71.230003</td>\n",
+       "      <td>51.509998</td>\n",
+       "      <td>26.74</td>\n",
+       "      <td>37.490002</td>\n",
+       "      <td>...</td>\n",
+       "      <td>63.91</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>58.110001</td>\n",
+       "      <td>47.720001</td>\n",
+       "      <td>118.230003</td>\n",
+       "      <td>18.219999</td>\n",
+       "      <td>26.98</td>\n",
+       "      <td>25.799999</td>\n",
+       "      <td>41.400002</td>\n",
+       "      <td>15.41</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2024-09-20</th>\n",
+       "      <td>68.089996</td>\n",
+       "      <td>69.389999</td>\n",
+       "      <td>24.549999</td>\n",
+       "      <td>94.970001</td>\n",
+       "      <td>32.689999</td>\n",
+       "      <td>8.06</td>\n",
+       "      <td>71.269997</td>\n",
+       "      <td>50.66</td>\n",
+       "      <td>26.940001</td>\n",
+       "      <td>37.040001</td>\n",
+       "      <td>...</td>\n",
+       "      <td>63.32</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>58.740002</td>\n",
+       "      <td>47.43</td>\n",
+       "      <td>117.760002</td>\n",
+       "      <td>18.15</td>\n",
+       "      <td>26.83</td>\n",
+       "      <td>25.799999</td>\n",
+       "      <td>41.349998</td>\n",
+       "      <td>15.4</td>\n",
+       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>7965 rows × 54 columns</p>\n",
+       "<p>7968 rows × 54 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
@@ -558,11 +567,11 @@
        "1993-02-03       <NA>       <NA>       <NA>       <NA>       <NA>  <NA>   \n",
        "1993-02-04       <NA>       <NA>       <NA>       <NA>       <NA>  <NA>   \n",
        "...               ...        ...        ...        ...        ...   ...   \n",
-       "2024-09-11      64.93  66.199997  23.629999  92.019997  31.809999  8.06   \n",
-       "2024-09-12      65.75      67.93  23.870001  92.519997  32.130001  8.06   \n",
-       "2024-09-13  66.139999  68.529999  24.120001       93.5  32.400002  8.06   \n",
        "2024-09-16  66.760002       68.5      24.48  94.160004  32.599998  8.06   \n",
        "2024-09-17      66.43  68.260002      24.15  94.330002  32.549999  8.06   \n",
+       "2024-09-18  66.400002  68.769997      24.27  94.199997  32.580002  8.06   \n",
+       "2024-09-19  67.709999  70.150002  24.690001  95.419998      33.09  8.06   \n",
+       "2024-09-20  68.089996  69.389999  24.549999  94.970001  32.689999  8.06   \n",
        "\n",
        "ticker           EIDO       EIRL        EIS       ENOR  ...        PAK  PGAL  \\\n",
        "date                                                    ...                    \n",
@@ -572,11 +581,11 @@
        "1993-02-03       <NA>       <NA>       <NA>       <NA>  ...       <NA>  <NA>   \n",
        "1993-02-04       <NA>       <NA>       <NA>       <NA>  ...       <NA>  <NA>   \n",
        "...               ...        ...        ...        ...  ...        ...   ...   \n",
-       "2024-09-11  69.559998  50.709999      25.52  36.630001  ...      61.66  <NA>   \n",
-       "2024-09-12      70.25      51.07  25.450001  36.790001  ...  62.509998  <NA>   \n",
-       "2024-09-13  69.870003  51.389999      25.93  36.900002  ...  63.299999  <NA>   \n",
        "2024-09-16  70.540001  51.580002  26.049999  37.299999  ...      63.52  <NA>   \n",
        "2024-09-17  69.709999  51.400002      26.34  37.029999  ...  63.220001  <NA>   \n",
+       "2024-09-18  69.389999      51.16      26.42  36.990002  ...  63.259998  <NA>   \n",
+       "2024-09-19  71.230003  51.509998      26.74  37.490002  ...      63.91  <NA>   \n",
+       "2024-09-20  71.269997      50.66  26.940001  37.040001  ...      63.32  <NA>   \n",
        "\n",
        "ticker            QAT       REET         SPY        THD        TUR        UAE  \\\n",
        "date                                                                            \n",
@@ -586,11 +595,11 @@
        "1993-02-03       <NA>       <NA>        <NA>       <NA>       <NA>       <NA>   \n",
        "1993-02-04       <NA>       <NA>        <NA>       <NA>       <NA>       <NA>   \n",
        "...               ...        ...         ...        ...        ...        ...   \n",
-       "2024-09-11  57.130001  49.330002  114.629997      17.67      26.59  25.309999   \n",
-       "2024-09-12  57.759998  48.939999  115.629997       18.1  26.780001      25.85   \n",
-       "2024-09-13  57.849998  48.790001  116.209999  18.139999      27.01  25.950001   \n",
        "2024-09-16  58.130001  49.240002  116.610001  18.200001      27.17      26.07   \n",
        "2024-09-17  57.959999  50.299999  116.489998  18.049999  26.969999      25.82   \n",
+       "2024-09-18      57.66  49.880001  116.160004  18.049999  26.889999      25.66   \n",
+       "2024-09-19  58.110001  47.720001  118.230003  18.219999      26.98  25.799999   \n",
+       "2024-09-20  58.740002      47.43  117.760002      18.15      26.83  25.799999   \n",
        "\n",
        "ticker           URTH    VXX  \n",
        "date                          \n",
@@ -600,16 +609,16 @@
        "1993-02-03       <NA>   <NA>  \n",
        "1993-02-04       <NA>   <NA>  \n",
        "...               ...    ...  \n",
-       "2024-09-11      39.07   15.0  \n",
-       "2024-09-12  39.950001  14.96  \n",
-       "2024-09-13  40.549999  15.11  \n",
        "2024-09-16      40.66  15.09  \n",
        "2024-09-17  40.439999  15.19  \n",
+       "2024-09-18  40.900002  15.07  \n",
+       "2024-09-19  41.400002  15.41  \n",
+       "2024-09-20  41.349998   15.4  \n",
        "\n",
-       "[7965 rows x 54 columns]"
+       "[7968 rows x 54 columns]"
       ]
      },
-     "execution_count": 56,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -620,10 +629,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "83f30ee7-b686-4ee8-8c5f-9333d2be31c8",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "msci_etf_df.to_parquet('s3://factorlab-data/global_msci_eqty_etf_data_daily.parquet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa7fb5fc-e6f3-4a1e-bda5-365d09e2b3e0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0ad35f12-5943-4640-96fb-07b7ea738abd",
+   "metadata": {},
+   "outputs": [],
    "source": []
   },
   {

cryptodatapy/transform/wrangle.py CHANGED Viewed

@@ -469,7 +469,6 @@ class WrangleData:
     """
     Wrangles time series data responses from various APIs into tidy data format.
     """
     def __init__(self, data_req: DataRequest, data_resp: Union[Dict[str, pd.DataFrame], pd.DataFrame]):
         """
         Constructor
@@ -484,6 +483,7 @@ class WrangleData:
         """
         self.data_req = data_req
         self.data_resp = data_resp
+        self.tidy_data = pd.DataFrame()
     def cryptocompare(self) -> pd.DataFrame:
         """
@@ -717,16 +717,22 @@ class WrangleData:
         """
         # convert fields to lib
         self.convert_fields_to_lib(data_source='dbnomics')
         # convert to datetime
         self.data_resp['date'] = pd.to_datetime(self.data_resp['date'])
         # set index
         self.data_resp = self.data_resp.set_index('date').sort_index()
         # resample
         self.data_resp = self.data_resp.resample(self.data_req.freq).last().ffill()
         # filter dates
         self.filter_dates()
         # type conversion
         self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
         # remove bad data
         self.data_resp = self.data_resp[self.data_resp != 0]  # 0 values
         self.data_resp = self.data_resp[~self.data_resp.index.duplicated()]  # duplicate rows
@@ -734,36 +740,125 @@ class WrangleData:
         return self.data_resp
-    def ccxt(self) -> pd.DataFrame:
+    def ccxt_ohlcv(self) -> pd.DataFrame:
         """
-        Wrangles CCXT data response to dataframe with tidy data format.
+        Wrangles CCXT OHLCV data response to dataframe with tidy data format.
         Returns
         -------
         pd.DataFrame
-            Wrangled dataframe into tidy data format.
+            Dataframe with tidy data format.
+        """
+        # field cols
+        cols = ["date", "open", "high", "low", "close", "volume"]
+        # add tickers
+        for i in range(len(self.data_req.source_markets)):
+            df = pd.DataFrame(self.data_resp[i], columns=cols)
+            df['ticker'] = self.data_req.source_markets[i]
+            self.tidy_data = pd.concat([self.tidy_data, df])
+        # convert to datetime
+        self.tidy_data['date'] = pd.to_datetime(self.tidy_data['date'], unit='ms')
+        # set index
+        self.tidy_data = self.tidy_data.set_index(['date', 'ticker']).sort_index()
+        return self.tidy_data
+    def ccxt_funding_rates(self) -> pd.DataFrame:
         """
-        # convert fields to lib
+        Wrangles CCXT funding rates data response to dataframe with tidy data format.
+        Returns
+        -------
+        pd.DataFrame
+            Dataframe with tidy data format.
+        """
+        # add tickers
+        for i in range(len(self.data_req.source_markets)):
+            df = pd.DataFrame(self.data_resp[i])
+            self.tidy_data = pd.concat([self.tidy_data, df])
+        self.tidy_data = self.tidy_data[['symbol', 'fundingRate', 'datetime']]
+        self.data_resp = self.tidy_data
+        # convert to lib fields
         self.convert_fields_to_lib(data_source='ccxt')
+        self.tidy_data = self.data_resp
         # convert to datetime
-        if 'close' in self.data_resp.columns:
-            self.data_resp['date'] = pd.to_datetime(self.data_resp.date, unit='ms')
-        elif 'funding_rate' in self.data_resp.columns:
-            self.data_resp['date'] = pd.to_datetime(self.data_resp.set_index('date').index).floor('s').tz_localize(None)
+        self.tidy_data['date'] = pd.to_datetime(self.tidy_data.set_index('date').index).floor('s').tz_localize(None)
         # set index
-        self.data_resp = self.data_resp.set_index('date').sort_index()
+        self.tidy_data = self.tidy_data.set_index(['date', 'ticker']).sort_index()
         # resample
-        if 'funding_rate' in self.data_resp.columns and self.data_req.freq in ['d', 'w', 'm', 'q', 'y']:
-            self.data_resp = ((self.data_resp.funding_rate + 1).resample(self.data_req.freq).prod() - 1).to_frame()
+        if self.data_req.freq in ['d', 'w', 'm', 'q', 'y']:
+            self.tidy_data = (
+                    (1 + self.tidy_data.funding_rate)
+                    .groupby('ticker')
+                    .resample('d', level='date')
+                    .prod() - 1
+            ).to_frame().swaplevel('ticker', 'date').sort_index()
+        return self.tidy_data
+    def ccxt_open_interest(self) -> pd.DataFrame:
+        """
+        Wrangles CCXT open interest data response to dataframe with tidy data format.
+        Returns
+        -------
+        pd.DataFrame
+            Dataframe with tidy data format.
+        """
+        # add tickers
+        for i in range(len(self.data_req.source_markets)):
+            df = pd.DataFrame(self.data_resp[i])
+            self.tidy_data = pd.concat([self.tidy_data, df])
+        self.tidy_data = self.tidy_data[['symbol', 'openInterestAmount', 'datetime']]
+        self.data_resp = self.tidy_data
+        # convert to lib fields
+        self.convert_fields_to_lib(data_source='ccxt')
+        self.tidy_data = self.data_resp
+        # convert to datetime
+        self.tidy_data['date'] = pd.to_datetime(self.tidy_data.set_index('date').index).floor('s').tz_localize(None)
+        # set index
+        self.tidy_data = self.tidy_data.set_index(['date', 'ticker']).sort_index()
+        return self.tidy_data
+    def ccxt(self, data_type: str) -> pd.DataFrame:
+        """
+        Wrangles CCXT data response to dataframe with tidy data format.
+        Returns
+        -------
+        pd.DataFrame
+            Wrangled dataframe into tidy data format.
+        """
+        if data_type == 'ohlcv':
+            self.tidy_data = self.ccxt_ohlcv()
+        elif data_type == 'funding_rates':
+            self.tidy_data = self.ccxt_funding_rates()
+        elif data_type == 'open_interest':
+            self.tidy_data = self.ccxt_open_interest()
+        else:
+            raise ValueError(f"Data type {data_type} not supported.")
         # type conversion
-        self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
+        self.tidy_data = self.tidy_data.apply(pd.to_numeric, errors='coerce').convert_dtypes()
         # remove bad data
-        self.data_resp = self.data_resp[self.data_resp != 0]  # 0 values
-        self.data_resp = self.data_resp[~self.data_resp.index.duplicated()]  # duplicate rows
-        self.data_resp = self.data_resp.dropna(how='all').dropna(how='all', axis=1)  # entire row or col NaNs
+        self.tidy_data = self.tidy_data[self.tidy_data != 0]  # 0 values
+        self.tidy_data = self.tidy_data[~self.tidy_data.index.duplicated()]  # duplicate rows
+        self.tidy_data = self.tidy_data.dropna(how='all').dropna(how='all', axis=1)  # entire row or col NaNs
-        return self.data_resp
+        return self.tidy_data
     def fred(self) -> pd.DataFrame:
         """
@@ -773,24 +868,29 @@ class WrangleData:
         -------
         pd.DataFrame
             Wrangled dataframe into tidy data format.
         """
-        # convert tickers to cryptodatapy format
+        # tickers
         self.data_resp.columns = self.data_req.tickers  # convert tickers to cryptodatapy format
         # resample to match end of reporting period, not beginning
         self.data_resp = self.data_resp.resample('d').last().ffill().resample(self.data_req.freq).last().stack(). \
             to_frame().reset_index()
         # convert cols
         if self.data_req.cat == 'macro':
             self.data_resp.columns = ['DATE', 'symbol', 'actual']
         else:
             self.data_resp.columns = ['DATE', 'symbol', 'close']
-        # convert fields to lib
+        # fields
         self.convert_fields_to_lib(data_source='fred')
-        # set index
+        # index
         self.data_resp.set_index(['date', 'ticker'], inplace=True)
         # type conversion
         self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
         # remove bad data
         self.data_resp = self.data_resp[self.data_resp != 0]  # 0 values
         self.data_resp = self.data_resp[~self.data_resp.index.duplicated()]  # duplicate rows
@@ -807,37 +907,41 @@ class WrangleData:
         pd.DataFrame
             Wrangled dataframe into tidy data format.
         """
-        # convert tickers
-        if len(self.data_req.tickers) == 1:  # add ticker
-            if self.data_req.cat == 'eqty' or self.data_req.cat == 'fx':
-                self.data_resp['Ticker'] = self.data_req.tickers[0].upper()
-            else:
-                self.data_resp['Ticker'] = self.data_req.tickers[0]
-        else:   # convert tickers to cryptodatapy format
-            self.data_resp = self.data_resp.stack()  # stack to multi-index
+        # tickers
+        tickers_dict = {source_ticker: ticker for source_ticker, ticker in zip(self.data_req.source_tickers,
+                        self.data_req.tickers)}
+        if len(self.data_req.tickers) == 1:
+            self.data_resp['Ticker'] = self.data_req.tickers[0]
+        else:
+            self.data_resp = self.data_resp.stack()
             self.data_resp.index.names = ['Date', 'Ticker']
-            if self.data_req.cat == 'eqty' or self.data_req.cat == 'fx':
-                self.data_resp.index = self.data_resp.index.set_levels([ticker.upper() for ticker in
-                                                                        self.data_req.tickers], level=1)
-            else:
-                self.data_resp.index = self.data_resp.index.set_levels([ticker for ticker in self.data_req.tickers],
-                                                                       level=1)
+            self.data_resp.index = self.data_resp.index.set_levels(self.data_resp.index.levels[1].map(tickers_dict),
+                                                                   level=1)
         self.data_resp.reset_index(inplace=True)
-        # convert fields
+        #  fields
         self.convert_fields_to_lib(data_source='yahoo')
-        # convert to datetime
+        # index
         self.data_resp['date'] = pd.to_datetime(self.data_resp['date'])
+        self.data_resp.set_index(['date', 'ticker'], inplace=True)
         # resample
-        self.data_resp = self.data_resp.set_index('date').groupby('ticker').resample(self.data_req.freq).last().\
-            droplevel(0).reset_index().set_index(['date', 'ticker'])
+        self.data_resp = self.data_resp.groupby('ticker').\
+            resample(self.data_req.freq, level='date').\
+            last().swaplevel('ticker', 'date').sort_index()
         # re-order cols
         self.data_resp = self.data_resp.loc[:, ['open', 'high', 'low', 'close', 'close_adj', 'volume']]
         # type conversion
         self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
         # remove bad data
         self.data_resp = self.data_resp[self.data_resp != 0]  # 0 values
         self.data_resp = self.data_resp[~self.data_resp.index.duplicated()]  # duplicate rows
         self.data_resp = self.data_resp.dropna(how='all').dropna(how='all', axis=1)  # entire row or col NaNs
         # keep only requested fields and sort index
         self.data_resp = self.data_resp[self.data_req.fields].sort_index()
@@ -853,7 +957,7 @@ class WrangleData:
             Wrangled dataframe into tidy data format.
         """
-        # convert tickers to cryptodatapy format
+        # ticker
         ff_tickers_dict = {'RF': 'US_Rates_1M_RF',
                            'Mkt-RF': 'US_Eqty_CSRP_ER',
                            'HML': 'US_Eqty_Val',
@@ -862,6 +966,7 @@ class WrangleData:
                            'CMA': 'US_Eqty_Inv',
                            'Mom': 'US_Eqty_Mom',
                            'ST_Rev': 'US_Eqty_STRev'}
         # remove white space from cols str
         self.data_resp.columns = [col.strip() for col in self.data_resp.columns]
         # keep cols in data req tickers
@@ -870,14 +975,18 @@ class WrangleData:
         drop_cols = [col for col in self.data_resp.columns if col not in self.data_req.tickers]
         self.data_resp.drop(columns=drop_cols, inplace=True)
         self.data_resp = self.data_resp.loc[:, ~self.data_resp.columns.duplicated()]  # drop dup cols
         # resample freq
         self.data_resp = self.data_resp.resample(self.data_req.freq).sum()
         # format index
         self.data_resp.index.name = 'date'  # rename
         self.data_resp = self.data_resp.stack().to_frame('er')
         self.data_resp.index.names = ['date', 'ticker']
         # type and conversion to decimals
         self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes() / 100
         # remove bad data
         self.data_resp = self.data_resp[self.data_resp != 0]  # 0 values
         self.data_resp = self.data_resp[~self.data_resp.index.duplicated()]  # duplicate rows
@@ -1016,9 +1125,9 @@ class WrangleData:
         # loop through data resp cols
         for col in self.data_resp.columns:
-            if self.data_req.source_fields is not None and col in self.data_req.source_fields:
-                pass
-            elif col in fields_list or col.title() in fields_list or col.lower() in fields_list:
+            # if self.data_req.source_fields is not None and col in self.data_req.source_fields:
+            #     pass
+            if col in fields_list or col.title() in fields_list or col.lower() in fields_list:
                 self.data_resp.rename(columns={col: fields_df[(fields_df[str(data_source) + '_id']
                                       == col.title()) |
                                     (fields_df[str(data_source) + '_id'] == col.lower()) |

cryptodatapy 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

cryptodatapy 0.2.6py3-none-any.whl → 0.2.8py3-none-any.whl