PyPI - imsciences - Versions diffs - 0.6.1.2__tar.gz → 0.6.1.4__tar.gz - Mend

imsciences 0.6.1.2tar.gz → 0.6.1.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{imsciences-0.6.1.2 → imsciences-0.6.1.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.1.2
+Version: 0.6.1.4
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

{imsciences-0.6.1.2 → imsciences-0.6.1.4}/imsciences/datafunctions.py RENAMED Viewed

@@ -19,6 +19,7 @@ import requests
 from geopy.geocoders import Nominatim
 import subprocess
 import json
+import xml.etree.ElementTree as ET
 class dataprocessing:
@@ -1335,9 +1336,9 @@ class datapull:
         print("   - Example: pull_ons_data([{'series_id': 'LMSBSA', 'dataset_id': 'LMS'}], 'mon')")
         print("\n4. pull_oecd")
-        print("   - Description: Fetch macroeconomic data from OECD and other sources for a specified country.")
-        print("   - Usage: pull_macro(country='GBR', week_commencing='mon')")
-        print("   - Example: pull_macro('GBR', 'mon')")
+        print("   - Description: Fetch macroeconomic data from OECD for a specified country.")
+        print("   - Usage: pull_oecd(country='GBR', week_commencing='mon', start_date: '1950-01-01')")
+        print("   - Example: pull_oecd('GBR', 'mon', '1950-01-01')")
         print("\n5. get_google_mobility_data")
         print("   - Description: Fetch Google Mobility data for the specified country.")
@@ -1353,6 +1354,11 @@ class datapull:
         print("   - Description: Fetch and process historical weather data for the specified country.")
         print("   - Usage: pull_weather(week_commencing, country)")
         print("   - Example: pull_weather('mon', 'GBR')")
+        print("\n8. pull_covid_data")
+        print("   - Description: Get covid pandemic data for the country of interest.")
+        print("   - Usage: pull_covid_data(folder_path, country, week_commencing)")
+        print("   - Example: pull_covid_data('C:/Users/--username--/OneDrive/', 'GB', 'mon')")
     ###############################################################  MACRO ##########################################################################
@@ -1579,370 +1585,148 @@ class datapull:
         return ons_df_final
-    def pull_macro(self, country: str = "GBR", week_commencing: str = "mon"):
-        # Change country input to list
-        print("")
-        countries_list = [country]
+    def pull_oecd(self, country: str = "GBR", week_commencing: str = "mon", start_date: str = "1950-01-01") -> pd.DataFrame:
+        """
+        Fetch and process time series data from the OECD API.
-        # Check if the data wants to be inputted at any other week commencing date
-        day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
+        Args:
+            country (list): A string containing a 3-letter code the of country of interest (E.g: "GBR", "FRA", "USA", "DEU")
+            week_commencing (str): The starting day of the week for aggregation.
+                                Options are "mon", "tue", "wed", "thur", "fri", "sat", "sun".
+            start_date (str): Dataset start date in the format "YYYY-MM-DD"
-        # Two useful functions for quarterly data
-        # Define a function to get quarterly data
-        def get_quarter(p_date: datetime.date) -> int:
-            return (p_date.month - 1) // 3 + 1
-        # Define a function to get the last day of the quarter
-        def get_last_day_of_the_quarter(p_date: datetime.date):
-            quarter = get_quarter(p_date)
-            return datetime(p_date.year + 3 * quarter // 12, 3 * quarter % 12 + 1, 1) + pd.Timedelta(days=-1)
-        # For the monthly data
-        data_M, subjects_M, measures_M = cif.createDataFrameFromOECD(countries=countries_list, dsname='MEI',
-                                                                    subject=['LCEAMN01', 'LCEAPR', 'CSCICP03', 'CPALTT01',
-                                                                            'LRHUTTTT', 'LORSGPRT', 'IR3TIB01',
-                                                                            'PRINTO01'],
-                                                                    measure=['IXOBSA', 'IXNSA', 'IXNB', 'STSA', 'ST', 'GPSA', 'GY'],
-                                                                    frequency='M', startDate='2015-01')
-        data_M = data_M.stack(level=[0, -1, -2]).reset_index()
-        data_Q, subjects_Q, measures_Q = cif.createDataFrameFromOECD(countries=countries_list, dsname='MEI',
-                                                                    subject=['LCEAMN01', 'LCEAPR', 'CSCICP03', 'CPALTT01',
-                                                                            'LRHUTTTT', 'LORSGPRT', 'IR3TIB01',
-                                                                            'PRINTO01'],
-                                                                    measure=['IXOBSA', 'IXNSA', 'IXNB', 'STSA', 'ST', 'GPSA', 'GY'],
-                                                                    frequency='Q', startDate='2015-01')
-        data_Q = data_Q.stack(level=[0, -1, -2]).reset_index()
-        # Create a data frame dictionary to store your monthly data frames
-        DataFrameDict_M = {elem: pd.DataFrame() for elem in countries_list}
-        for key in DataFrameDict_M.keys():
-            DataFrameDict_M[key] = data_M[:][data_M.country == key]
-        # Create a data frame dictionary to store your quarterly data frames
-        DataFrameDict_Q = {elem: pd.DataFrame() for elem in countries_list}
-        for key in DataFrameDict_Q.keys():
-            DataFrameDict_Q[key] = data_Q[:][data_Q.country == key]
-        # Create a monthly list of the dataframes to iterate through
-        countries_df_list_M = []
-        for i in countries_list:
-            df = pd.DataFrame(DataFrameDict_M[i])
-            df.rename(columns={0: 'Values'}, inplace=True)
-            df = pd.pivot_table(data=df, index='time', values='Values', columns=['subject', 'measure'])
-            countries_df_list_M.append(df)
-        # Create a quarterly list of the dataframes to iterate through
-        countries_df_list_Q = []
-        for i in countries_list:
-            df = pd.DataFrame(DataFrameDict_Q[i])
-            df.rename(columns={0: 'Values'}, inplace=True)
-            df = pd.pivot_table(data=df, index='time', values='Values', columns=['subject', 'measure'])
-            countries_df_list_Q.append(df)
-        combined_countries_df_list = list(zip(countries_df_list_M, countries_df_list_Q))
-        # Loop through and create dataframes for every country
-        for index, data in enumerate(combined_countries_df_list):
-            # Find country being extracted
-            country = countries_list[index]
-            print(country)
-            # For consumer confidence
-            # For countries with no data
-            if country in ['CAN', 'IND', 'NOR']:
-                Consumer_Confidence_Index_df_M = pd.DataFrame()
-                Consumer_Confidence_Index_df_Q = pd.DataFrame()
-            # For countries with quarterly data
-            elif country in []:
-                Consumer_Confidence_Index_df_Q = data[1]['CSCICP03']['IXNSA']
-                Consumer_Confidence_Index_df_Q.rename('consumer_confidence_index', inplace=True)
-                Consumer_Confidence_Index_df_M = pd.DataFrame()
-            # For countries with monthly data
-            else:
-                Consumer_Confidence_Index_df_M = data[0]['CSCICP03']['IXNSA']
-                Consumer_Confidence_Index_df_M.rename('consumer_confidence_index', inplace=True)
-                Consumer_Confidence_Index_df_Q = pd.DataFrame()
-            # For consumer prices for COST OF LIVING
-            # For countries with no data
-            if country in []:
-                Consumer_Price_Index_Cost_Of_Living_df_M = pd.DataFrame()
-                Consumer_Price_Index_Cost_Of_Living_df_Q = pd.DataFrame()
-            # For countries with quarterly data
-            elif country in ['AUS', 'NZL']:
-                Consumer_Price_Index_Cost_Of_Living_df_Q = data[1]['CPALTT01']['IXNB']
-                Consumer_Price_Index_Cost_Of_Living_df_Q.rename('consumer_price_index_cost_of_living', inplace=True)
-                Consumer_Price_Index_Cost_Of_Living_df_M = pd.DataFrame()
-            # For countries with monthly data
-            else:
-                Consumer_Price_Index_Cost_Of_Living_df_M = data[0]['CPALTT01']['IXNB']
-                Consumer_Price_Index_Cost_Of_Living_df_M.rename('consumer_price_index_cost_of_living', inplace=True)
-                Consumer_Price_Index_Cost_Of_Living_df_Q = pd.DataFrame()
-            # For consumer prices FOR INFLATION
-            # For countries with no data
-            if country in []:
-                Consumer_Price_Index_Inflation_df_M = pd.DataFrame()
-                Consumer_Price_Index_Inflation_df_Q = pd.DataFrame()
-            # For countries with quarterly data
-            elif country in ['AUS', 'NZL']:
-                Consumer_Price_Index_Inflation_df_Q = data[1]['CPALTT01']['GY']
-                Consumer_Price_Index_Inflation_df_Q.rename('consumer_price_index_inflation', inplace=True)
-                Consumer_Price_Index_Inflation_df_M = pd.DataFrame()
-            # For countries with monthly data
-            else:
-                Consumer_Price_Index_Inflation_df_M = data[0]['CPALTT01']['GY']
-                Consumer_Price_Index_Inflation_df_M.rename('consumer_price_index_inflation', inplace=True)
-                Consumer_Price_Index_Inflation_df_Q = pd.DataFrame()
-            # For GDP Index Smoothed
-            # For countries with no data
-            if country in ['NLD', 'CHE', 'NZL', 'SWE', 'NOR']:
-                GDP_Index_Smoothed_df_M = pd.DataFrame()
-                GDP_Index_Smoothed_df_Q = pd.DataFrame()
-            # For countries with quarterly data
-            elif country in []:
-                GDP_Index_Smoothed_df_Q = data[1]['LORSGPRT']['STSA']
-                GDP_Index_Smoothed_df_Q.rename('gdp_index_smoothed', inplace=True)
-                GDP_Index_Smoothed_df_M = pd.DataFrame()
-            # For countries with monthly data
-            else:
-                GDP_Index_Smoothed_df_M = data[0]['LORSGPRT']['STSA']
-                GDP_Index_Smoothed_df_M.rename('gdp_index_smoothed', inplace=True)
-                GDP_Index_Smoothed_df_Q = pd.DataFrame()
-            # For Harmonised Unemployment Index
-            # For countries with no data
-            if country in ['IND', 'CHE', 'ZAF', 'CHN']:
-                Harmonised_Unemployment_Index_df_M = pd.DataFrame()
-                Harmonised_Unemployment_Index_df_Q = pd.DataFrame()
-            # For countries with quarterly data
-            elif country in ['NZL']:
-                Harmonised_Unemployment_Index_df_Q = data[1]['LRHUTTTT']['STSA']
-                Harmonised_Unemployment_Index_df_Q.rename('harmonised_unemployment_index', inplace=True)
-                Harmonised_Unemployment_Index_df_M = pd.DataFrame()
-            # For countries with monthly data
-            else:
-                Harmonised_Unemployment_Index_df_M = data[0]['LRHUTTTT']['STSA']
-                Harmonised_Unemployment_Index_df_M.rename('harmonised_unemployment_index', inplace=True)
-                Harmonised_Unemployment_Index_df_Q = pd.DataFrame()
-            # For hourly earnings index manufacturing
-            # For countries with no data
-            if country in ['IND', 'CHE', 'ZAF', 'CHN']:
-                Hourly_Earnings_Index_Manufacturing_df_M = pd.DataFrame()
-                Hourly_Earnings_Index_Manufacturing_df_Q = pd.DataFrame()
-            # For countries with quarterly data
-            elif country in ['FRA', 'DEU', 'ESP', 'AUS', 'NZL', 'KOR', 'NOR']:
-                Hourly_Earnings_Index_Manufacturing_df_Q = data[1]['LCEAMN01']['IXOBSA']
-                Hourly_Earnings_Index_Manufacturing_df_Q.rename('hourly_earnings_index_manufacturing', inplace=True)
-                Hourly_Earnings_Index_Manufacturing_df_M = pd.DataFrame()
-            # For countries with monthly data
-            else:
-                Hourly_Earnings_Index_Manufacturing_df_M = data[0]['LCEAMN01']['IXOBSA']
-                Hourly_Earnings_Index_Manufacturing_df_M.rename('hourly_earnings_index_manufacturing', inplace=True)
-                Hourly_Earnings_Index_Manufacturing_df_Q = pd.DataFrame()
-            # For Short Term Interest Rate
-            # For countries with no data
-            if country in []:
-                Short_Term_Interest_Rate_df_M = pd.DataFrame()
-                Short_Term_Interest_Rate_df_Q = pd.DataFrame()
-            # For countries with quarterly data
-            elif country in []:
-                Short_Term_Interest_Rate_df_Q = data[1]['IR3TIB01']['ST']
-                Short_Term_Interest_Rate_df_Q.rename('short_term_interest_rate', inplace=True)
-                Short_Term_Interest_Rate_df_M = pd.DataFrame()
-            # For countries with monthly data
-            else:
-                Short_Term_Interest_Rate_df_M = data[0]['IR3TIB01']['ST']
-                Short_Term_Interest_Rate_df_M.rename('short_term_interest_rate', inplace=True)
-                Short_Term_Interest_Rate_df_Q = pd.DataFrame()
-            # For Industrial Product Growth on Previous Period
-            # For countries with no data
-            if country in ['ZAF', 'CHN']:
-                Industrial_Product_Growth_on_Previous_Period_df_M = pd.DataFrame()
-                Industrial_Product_Growth_on_Previous_Period_df_Q = pd.DataFrame()
-            # For countries with quarterly data
-            elif country in ['AUS', 'NZL']:
-                Industrial_Product_Growth_on_Previous_Period_df_Q = data[1]['PRINTO01']['GPSA']
-                Industrial_Product_Growth_on_Previous_Period_df_Q.rename('industrial_product_growth_on_previous_period', inplace=True)
-                Industrial_Product_Growth_on_Previous_Period_df_M = pd.DataFrame()
-            # For countries with monthly data
-            else:
-                Industrial_Product_Growth_on_Previous_Period_df_M = data[0]['PRINTO01']['GPSA']
-                Industrial_Product_Growth_on_Previous_Period_df_M.rename('industrial_product_growth_on_previous_period', inplace=True)
-                Industrial_Product_Growth_on_Previous_Period_df_Q = pd.DataFrame()
-            # For Industrial Production Index
-            # For countries with no data
-            if country in ['ZAF', 'CHN']:
-                Industrial_Production_Index_df_M = pd.DataFrame()
-                Industrial_Production_Index_df_Q = pd.DataFrame()
-            # For countries with quarterly data
-            elif country in ['AUS', 'NZL']:
-                Industrial_Production_Index_df_Q = data[1]['PRINTO01']['IXOBSA']
-                Industrial_Production_Index_df_Q.rename('industrial_production_index', inplace=True)
-                Industrial_Production_Index_df_M = pd.DataFrame()
-            # For countries with monthly data
-            else:
-                Industrial_Production_Index_df_M = data[0]['PRINTO01']['IXOBSA']
-                Industrial_Production_Index_df_M.rename('industrial_production_index', inplace=True)
-                Industrial_Production_Index_df_Q = pd.DataFrame()
-            # Create monthly macroeconomic dataframe
-            all_dfs_list_M = [Consumer_Confidence_Index_df_M,
-                            Consumer_Price_Index_Cost_Of_Living_df_M,
-                            Consumer_Price_Index_Inflation_df_M,
-                            GDP_Index_Smoothed_df_M,
-                            Harmonised_Unemployment_Index_df_M,
-                            Hourly_Earnings_Index_Manufacturing_df_M,
-                            Short_Term_Interest_Rate_df_M,
-                            Industrial_Product_Growth_on_Previous_Period_df_M,
-                            Industrial_Production_Index_df_M]
-            # Check if any dataframes are empty and if there are remove them
-            all_dfs_list_M = [df for df in all_dfs_list_M if not df.empty]
-            cif_Macroeconomic_df_M = pd.concat(all_dfs_list_M, axis=1)
-            # Create quarterly macroeconomic dataframe
-            all_dfs_list_Q = [Consumer_Confidence_Index_df_Q,
-                            Consumer_Price_Index_Cost_Of_Living_df_Q,
-                            Consumer_Price_Index_Inflation_df_Q,
-                            GDP_Index_Smoothed_df_Q,
-                            Harmonised_Unemployment_Index_df_Q,
-                            Hourly_Earnings_Index_Manufacturing_df_Q,
-                            Short_Term_Interest_Rate_df_Q,
-                            Industrial_Product_Growth_on_Previous_Period_df_Q,
-                            Industrial_Production_Index_df_Q]
-            # Check if any dataframes are empty and if there are remove them
-            all_dfs_list_Q = [df for df in all_dfs_list_Q if not df.empty]
-            if all_dfs_list_Q != []:
-                macroeconomic_monthly_df_Q = pd.concat(all_dfs_list_Q, axis=1)
-            else:
-                macroeconomic_monthly_df_Q = pd.DataFrame()
-            # For USD GBP Exchange Rate
-            # If it's the UK add this series else don't
-            if countries_list[index] == 'GBR':
-                USD_GBP_Exchange_Rate_df = pd.read_csv(
-                    'https://stats.oecd.org/SDMX-JSON/data/MEI_FIN/CCUS.' + countries_list[index] + '.M/OECD?contentType=csv')
-                USD_GBP_Exchange_Rate_df.head()
-                USD_GBP_Exchange_Rate_df_pivot = pd.pivot_table(USD_GBP_Exchange_Rate_df, values='Value', index='TIME',
-                                                                columns='Subject')
-                USD_GBP_Exchange_Rate_df_pivot_final = USD_GBP_Exchange_Rate_df_pivot.loc["2015-01":]
-                USD_GBP_Exchange_Rate_df_pivot_final.rename(
-                    columns={'Currency exchange rates, monthly average': 'usd_gbp_exchange_rate'}, inplace=True)
-                # Create final monthly dataframe
-                macroeconomic_monthly_df_M = pd.concat([cif_Macroeconomic_df_M, USD_GBP_Exchange_Rate_df_pivot_final], axis=1)
-            else:
-                # Create final monthly dataframe
-                macroeconomic_monthly_df_M = cif_Macroeconomic_df_M
-            # Create the final W/C Sunday dataframe
-            # For monthly data
-            macroeconomic_monthly_df_M['Date'] = macroeconomic_monthly_df_M.index
-            df_M = macroeconomic_monthly_df_M.set_index(pd.to_datetime(macroeconomic_monthly_df_M['Date'])).drop(columns='Date')
-            df_M.fillna(method="ffill", inplace=True)
-            df_M.reset_index(inplace=True)
-            daily_records = []
-            # Iterate over each row in the DataFrame
-            for _, row in df_M.iterrows():
-                # Calculate the number of days in the month
-                num_days = calendar.monthrange(row["Date"].year, row["Date"].month)[1]
-                # Create a new record for each day of the month
-                for day in range(1, num_days + 1):
-                    daily_row = row.copy()
-                    daily_row["Date"] = row["Date"].replace(day=day)
-                    daily_records.append(daily_row)
-            # Convert the list of daily records into a DataFrame
-            daily_df = pd.DataFrame(daily_records)
-            # Extend dataframe to include the current data if needed
-            datelist = pd.date_range(daily_df["Date"].iloc[-1] + pd.Timedelta(days=1), datetime.today()).tolist()
-            extended_data = np.repeat([list(daily_df.iloc[-1, 1:].values)], len(datelist), axis=0)
-            q = pd.Series(datelist, name="Date")
-            s = pd.DataFrame(extended_data, columns=list(df_M.columns[1:]))
-            extended_daily_df = pd.concat([q, s], axis=1)
-            extended_daily_df = pd.concat([daily_df, extended_daily_df], ignore_index=False)
-            # Create a week commencing column
-            extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
-            extended_daily_df['week_start'] = extended_daily_df["Date"].apply(
-                lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-            extended_daily_df.drop("Date", axis=1, inplace=True)
-            extended_daily_df.rename(columns={'week_start': "Date"}, inplace=True)
-            # Take a weekly average
-            macroeconomic_weekly_df_M = extended_daily_df.groupby('Date').mean()
-            # For quarterly data
-            # If there are quarterly datasets
-            if all_dfs_list_Q != []:
-                macroeconomic_monthly_df_Q['Date'] = macroeconomic_monthly_df_Q.index
-                df_Q = macroeconomic_monthly_df_Q.set_index(pd.to_datetime(macroeconomic_monthly_df_Q['Date'])).drop(
-                    columns='Date')
-                df_Q.fillna(method="ffill", inplace=True)
-                df_Q.reset_index(inplace=True)
-                daily_records = []
-                for _, row in df_Q.iterrows():
-                    year = row["Date"].year
-                    month = row["Date"].month
-                    day = row["Date"].day
-                    last_date = get_last_day_of_the_quarter(datetime(year, month, day).date())
-                    all_days = pd.date_range(row["Date"], last_date, freq="D")
-                    # Create a new record for each day of the quarter
-                    for day in all_days:
-                        daily_row = row.copy()
-                        daily_row["Date"] = row["Date"].replace(day=day.day, month=day.month)
-                        daily_records.append(daily_row)
-                # Convert the list of daily records into a DataFrame
-                daily_df = pd.DataFrame(daily_records)
-                # Extend dataframe to include data up to today
-                datelist = pd.date_range(daily_df["Date"].iloc[-1] + pd.Timedelta(days=1), datetime.today()).tolist()
-                extended_data = np.repeat([list(daily_df.iloc[-1, 1:].values)], len(datelist), axis=0)
-                q = pd.Series(datelist, name="Date")
-                s = pd.DataFrame(extended_data, columns=list(df_Q.columns[1:]))
-                extended_daily_df = pd.concat([q, s], axis=1)
-                extended_daily_df = pd.concat([daily_df, extended_daily_df], ignore_index=False)
-                # Create a week commencing column
-                extended_daily_df["Date"] = pd.to_datetime(extended_daily_df["Date"], format='%d %b %Y')
-                extended_daily_df['week_start'] = extended_daily_df["Date"].apply(
-                    lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-                extended_daily_df.drop("Date", axis=1, inplace=True)
-                extended_daily_df.rename(columns={'week_start': "Date"}, inplace=True)
-                # Take a weekly average
-                macroeconomic_weekly_df_Q = extended_daily_df.groupby('Date').mean()
-            # Merge the two datasets together
-            if all_dfs_list_Q != []:
-                macroeconomic_weekly_df = macroeconomic_weekly_df_M.merge(macroeconomic_weekly_df_Q, left_index=True,
-                                                                        right_index=True)
-            # If there are no quarterly datasets
+        Returns:
+            pd.DataFrame: A DataFrame with weekly aggregated OECD data. The 'OBS' column contains the week
+                        commencing dates, and other columns contain the aggregated time series values.
+        """
+        def parse_quarter(date_str):
+            """Parses a string in 'YYYY-Q#' format into a datetime object."""
+            year, quarter = date_str.split('-')
+            quarter_number = int(quarter[1])
+            month = (quarter_number - 1) * 3 + 1
+            return pd.Timestamp(f"{year}-{month:02d}-01")
+        # Generate a date range from 1950-01-01 to today
+        date_range = pd.date_range(start=start_date, end=datetime.today(), freq='D')
+        url_details = [
+            ["BCICP",    "SDD.STES,DSD_STES@DF_CLI,",                       ".....",              "macro_business_confidence_index"],
+            ["CCICP",    "SDD.STES,DSD_STES@DF_CLI,",                       ".....",              "macro_consumer_confidence_index"],
+            ["N.CPI",    "SDD.TPS,DSD_PRICES@DF_PRICES_ALL,",               "PA._T.N.GY",         "macro_cpi_total"],
+            ["N.CPI",    "SDD.TPS,DSD_PRICES@DF_PRICES_ALL,",               "PA.CP041T043.N.GY",  "macro_cpi_housing"],
+            ["N.CPI",    "SDD.TPS,DSD_PRICES@DF_PRICES_ALL,",               "PA.CP01.N.GY",       "macro_cpi_food"],
+            ["N.CPI",    "SDD.TPS,DSD_PRICES@DF_PRICES_ALL,",               "PA.CP045_0722.N.GY", "macro_cpi_energy"],
+            ["UNE_LF_M", "SDD.TPS,DSD_LFS@DF_IALFS_UNE_M,",                 "._Z.Y._T.Y_GE15.",   "macro_unemployment_rate"],
+            ["EAR",      "SDD.TPS,DSD_EAR@DF_HOU_EAR,",                     ".Y..S1D",            "macro_private_hourly_earnings"],
+            ["RHP",      "ECO.MPD,DSD_AN_HOUSE_PRICES@DF_HOUSE_PRICES,1.0", "",                   "macro_real_house_prices"],
+            ["PRVM",     "SDD.STES,DSD_KEI@DF_KEI,4.0",                     "IX.C..",             "macro_manufacturing_production_volume"],
+            ["TOVM",     "SDD.STES,DSD_KEI@DF_KEI,4.0",                     "IX...",              "macro_retail_trade_volume"],
+            ["IRSTCI",   "SDD.STES,DSD_KEI@DF_KEI,4.0",                     "PA...",              "macro_interbank_rate"],
+            ["IRLT",     "SDD.STES,DSD_KEI@DF_KEI,4.0",                     "PA...",              "macro_long_term_interest_rate"],
+            ["B1GQ",     "SDD.NAD,DSD_NAMAIN1@DF_QNA,1.1",                  "._Z....GY.T0102",    "macro_gdp_growth_yoy"]
+        ]
+        # Create empty final dataframe
+        oecd_df_final = pd.DataFrame()
+        daily_df = pd.DataFrame({'OBS': date_range})
+        value_columns = []
+        # Iterate for each variable of interest
+        for series_details in url_details:
+            series = series_details[0]
+            dataset_id = series_details[1]
+            filter = series_details[2]
+            col_name = series_details[3]
+            # check if request was successful and determine the most granular data available
+            for freq in ['M', 'Q', 'A']:
+                if series in ["UNE_LF_M", "EAR"]:
+                    data_url = f"https://sdmx.oecd.org/public/rest/data/OECD.{dataset_id}/{country}.{series}.{filter}.{freq}?startPeriod=1950-01"
+                elif series in ["B1GQ"]:
+                    data_url = f"https://sdmx.oecd.org/public/rest/data/OECD.{dataset_id}/{freq}..{country}...{series}.{filter}?startPeriod=1950-01"
+                else:
+                    data_url = f"https://sdmx.oecd.org/public/rest/data/OECD.{dataset_id}/{country}.{freq}.{series}.{filter}?startPeriod=1950-01"
+                # Make the request to the OECD API for data
+                data_response = requests.get(data_url)
+                # Check if the request was successful
+                if data_response.status_code != 200:
+                    print(f"Failed to fetch data for series {series} with frequency '{freq}' for {country}: {data_response.status_code} {data_response.text}")
+                    url_test = False
+                    continue
+                else:
+                    url_test = True
+                    break
+            # get data for the next variable if url doesn't exist
+            if url_test == False:
+                continue
+            root = ET.fromstring(data_response.content)
+            # Define namespaces if necessary (the namespace is included in the tags)
+            namespaces = {'generic': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic'}
+            # Lists to store the data
+            dates = []
+            values = []
+            # Iterate over all <Obs> elements and extract date and value
+            for obs in root.findall('.//generic:Obs', namespaces):
+                # Extracting the time period (date)
+                time_period = obs.find('.//generic:ObsDimension', namespaces).get('value')
+                # Extracting the observation value
+                value = obs.find('.//generic:ObsValue', namespaces).get('value')
+                # Storing the data
+                if time_period and value:
+                    dates.append(time_period)
+                    values.append(float(value))  # Convert value to float
+            # Add variable names that were found to a list
+            value_columns.append(col_name)
+            # Creating a DataFrame
+            data = pd.DataFrame({'OBS': dates, col_name: values})
+            # Convert date strings into datetime format
+            if freq == 'Q':
+                data['OBS'] = data['OBS'].apply(parse_quarter)
             else:
-                macroeconomic_weekly_df = macroeconomic_weekly_df_M
+                # Display the DataFrame
+                data['OBS'] = data['OBS'].apply(lambda x: datetime.strptime(x, '%Y-%m'))
+            # Sort data by chronological order
+            data.sort_values(by='OBS', inplace=True)
+            # Merge the data based on the observation date
+            daily_df = pd.merge_asof(daily_df, data[['OBS', col_name]], on='OBS', direction='backward')
-            # Change datetime format
-            macroeconomic_weekly_df.index = macroeconomic_weekly_df.index.strftime('%d/%m/%Y')
-        macroeconomic_weekly_df.reset_index()
-        macroeconomic_weekly_df.reset_index(drop=False, inplace=True)
-        macroeconomic_weekly_df.rename(columns={'Date': 'OBS'}, inplace=True)
+        # Ensure columns are numeric
+        for col in value_columns:
+            if col in daily_df.columns:
+                daily_df[col] = pd.to_numeric(daily_df[col], errors='coerce').fillna(0)
+            else:
+                print(f"Column {col} not found in daily_df")
+        # Aggregate results by week
+        country_df = ims_proc.aggregate_daily_to_wc_wide(df=daily_df,
+                                                        date_column="OBS",
+                                                        group_columns=[],
+                                                        sum_columns=value_columns,
+                                                        wc=week_commencing,
+                                                        aggregation="average")
+        oecd_df_final = pd.concat([oecd_df_final, country_df], axis=0, ignore_index=True)
-        return macroeconomic_weekly_df
+        return oecd_df_final
     def get_google_mobility_data(self, country: str, wc: str) -> pd.DataFrame:
         """
@@ -2709,4 +2493,37 @@ class datapull:
         final_weather = ims_proc.rename_cols(merged_df, 'seas_')
-        return final_weather
+        return final_weather
+    def pull_covid_data(self, folder_path: str, country: str = "GB", week_commencing: str = "mon") -> pd.DataFrame:
+        """
+        Get covid pandemic data for the country of interest.
+        Args:
+            folder_path (str): A string containing the local location of the OneDrive folder.
+                                Example: "C:/Users/-- username --/OneDrive - im-sciences.com"
+                                The file location within the MasterDrive of the worldwide covid data is:
+                                MasterDrive/Central Database/Covid/oxford-government-response.csv
+            country (str): A string containing the country of interest (E.g: "GB", "FR")
+            week_commencing (str): The starting day of the week for aggregation.
+                                Options are "mon", "tue", "wed", "thur", "fri", "sat", "sun".
+        Returns:
+            pd.DataFrame: A DataFrame containing seasonality and public holiday dummies for the country of interest.
+                            The 'OBS' column contains the week commencing dates.
+        """
+        df = pd.read_csv(f'{folder_path}/MasterDrive/Central Database/Covid/oxford-government-response.csv')
+        country_df = df[df['location_key']==country]
+        country_df.rename(columns={'date': 'OBS'}, inplace=True)
+        country_df.drop('location_key', axis=1, inplace=True)
+        agg_df = ims_proc.aggregate_daily_to_wc_wide(country_df, 'OBS', [], country_df.columns.to_list(), week_commencing, 'average')
+        covid_df = ims_proc.rename_cols(agg_df, 'covid_')
+        covid_df['OBS'] = covid_df['OBS'].apply(lambda x: x[0].date())
+        return covid_df

{imsciences-0.6.1.2 → imsciences-0.6.1.4}/imsciences.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.1.2
+Version: 0.6.1.4
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

{imsciences-0.6.1.2 → imsciences-0.6.1.4}/setup.py RENAMED Viewed

@@ -8,7 +8,7 @@ def read_md(file_name):
             return f.read()
     return ''
-VERSION = '0.6.1.2'
+VERSION = '0.6.1.4'
 DESCRIPTION = 'IMS Data Processing Package'
 LONG_DESCRIPTION = read_md('README.md')  # Reading from README.md

{imsciences-0.6.1.2 → imsciences-0.6.1.4}/README.md RENAMED Viewed

File without changes

{imsciences-0.6.1.2 → imsciences-0.6.1.4}/imsciences/__init__.py RENAMED Viewed

File without changes

{imsciences-0.6.1.2 → imsciences-0.6.1.4}/imsciences.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{imsciences-0.6.1.2 → imsciences-0.6.1.4}/imsciences.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{imsciences-0.6.1.2 → imsciences-0.6.1.4}/imsciences.egg-info/requires.txt RENAMED Viewed

File without changes

{imsciences-0.6.1.2 → imsciences-0.6.1.4}/imsciences.egg-info/top_level.txt RENAMED Viewed

File without changes

{imsciences-0.6.1.2 → imsciences-0.6.1.4}/setup.cfg RENAMED Viewed

File without changes

imsciences 0.6.1.2__tar.gz → 0.6.1.4__tar.gz

imsciences 0.6.1.2tar.gz → 0.6.1.4tar.gz