PyPI - imsciences - Versions diffs - 0.6.3.2__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

imsciences 0.6.3.2py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

imsciences/__init__.py CHANGED Viewed

@@ -1,3 +1,2 @@
 from .datafunctions import dataprocessing
 from .datafunctions import datapull
-from .unittesting import TestDataProcessor

imsciences/datafunctions.py CHANGED Viewed

@@ -4,21 +4,18 @@ import os
 import plotly.express as px
 import plotly.graph_objs as go
 import numpy as np
-import datetime
 import re
 from fredapi import Fred
 import time
-from datetime import datetime, timedelta  # noqa: F811
+from datetime import datetime, timedelta
 from io import StringIO
-import urllib
-import requests_cache  # noqa: F401
-import urllib.request  # noqa: F401
 import requests
-from geopy.geocoders import Nominatim  # noqa: F401
 import subprocess
 import json
 import xml.etree.ElementTree as ET
 from bs4 import BeautifulSoup
+import yfinance as yf
+import holidays
 class dataprocessing:
@@ -1767,17 +1764,6 @@ class dataprocessing:
 ########################################################################################################################################
 ########################################################################################################################################
 ims_proc = dataprocessing()
 class datapull:
@@ -1788,38 +1774,43 @@ class datapull:
         print("\n1. pull_fred_data")
         print("   - Description: Get data from FRED by using series id tokens.")
         print("   - Usage: pull_fred_data(week_commencing, series_id_list)")
-        print("   - Example: pull_fred_data('mon', ['GPDIC1', 'Y057RX1Q020SBEA', 'GCEC1', 'ND000333Q', 'Y006RX1Q020SBEA'])")
+        print("   - Example: pull_fred_data('mon', ['GPDIC1'])")
         print("\n2. pull_boe_data")
         print("   - Description: Fetch and process Bank of England interest rate data.")
         print("   - Usage: pull_boe_data(week_commencing)")
         print("   - Example: pull_boe_data('mon')")
-        print("\n3. pull_ons_data")
-        print("   - Description: Fetch and process time series data from the ONS API.")
-        print("   - Usage: pull_ons_data(series_list, week_commencing)")
-        print("   - Example: pull_ons_data([{'series_id': 'LMSBSA', 'dataset_id': 'LMS'}], 'mon')")
-        print("\n4. pull_oecd")
+        print("\n3. pull_oecd")
         print("   - Description: Fetch macroeconomic data from OECD for a specified country.")
-        print("   - Usage: pull_oecd(country='GBR', week_commencing='mon', start_date: '1950-01-01')")
-        print("   - Example: pull_oecd('GBR', 'mon', '1950-01-01')")
+        print("   - Usage: pull_oecd(country='GBR', week_commencing='mon', start_date: '2020-01-01')")
+        print("   - Example: pull_oecd('GBR', 'mon', '2000-01-01')")
-        print("\n5. get_google_mobility_data")
+        print("\n4. get_google_mobility_data")
         print("   - Description: Fetch Google Mobility data for the specified country.")
         print("   - Usage: get_google_mobility_data(country, wc)")
         print("   - Example: get_google_mobility_data('United Kingdom', 'mon')")
-        print("\n6. pull_combined_dummies")
+        print("\n5. pull_seasonality")
         print("   - Description: Generate combined dummy variables for seasonality, trends, and COVID lockdowns.")
-        print("   - Usage: pull_combined_dummies(week_commencing)")
-        print("   - Example: pull_combined_dummies('mon')")
+        print("   - Usage: pull_seasonality(week_commencing, start_date, countries)")
+        print("   - Example: pull_seasonality('mon', '2020-01-01', ['US', 'GB'])")
-        print("\n7. pull_weather")
+        print("\n6. pull_weather")
         print("   - Description: Fetch and process historical weather data for the specified country.")
         print("   - Usage: pull_weather(week_commencing, country)")
         print("   - Example: pull_weather('mon', 'GBR')")
+        print("\n7. pull_macro_ons_uk")
+        print("   - Description: Fetch and process time series data from the Beta ONS API.")
+        print("   - Usage: pull_macro_ons_uk(aditional_list, week_commencing, sector)")
+        print("   - Example: pull_macro_ons_uk(['HBOI'], 'mon', 'fast_food')")
+        print("\n8. pull_yfinance")
+        print("   - Description: Fetch and process time series data from the Beta ONS API.")
+        print("   - Usage: pull_yfinance(tickers, week_start_day)")
+        print("   - Example: pull_yfinance(['^FTMC', '^IXIC'], 'mon')")
     ###############################################################  MACRO ##########################################################################
     def pull_fred_data(self, week_commencing: str = 'mon', series_id_list: list[str] = ["GPDIC1", "Y057RX1Q020SBEA", "GCEC1"]) -> pd.DataFrame:
@@ -1837,10 +1828,6 @@ class datapull:
         ----------
         pd.DataFrame
             Return a data frame with FRED data according to the series IDs provided
-        Example
-        ----------
-        pull_fred_data("mon", ["GCEC1", "SP500"])
         '''
         # Fred API
         fred = Fred(api_key='76f5f8156145fdb8fbaf66f1eb944f8a')
@@ -1958,107 +1945,7 @@ class datapull:
         return df_final
-    def pull_ons_data(self, series_list, week_commencing):
-        """
-        Fetch and process time series data from the ONS API.
-        Args:
-            series_list (list): A list of dictionaries where each dictionary represents a time series.
-                                Each dictionary should have the keys 'series_id' and 'dataset_id'.
-            week_commencing (str): The starting day of the week for aggregation.
-                                Options are "mon", "tue", "wed", "thur", "fri", "sat", "sun".
-        Returns:
-            pd.DataFrame: A DataFrame with weekly aggregated ONS data. The 'OBS' column contains the week
-                        commencing dates and other columns contain the aggregated time series values.
-        """
-        def parse_quarter(date_str):
-            """Parses a string in 'YYYY Q#' format into a datetime object."""
-            year, quarter = date_str.split(' ')
-            quarter_number = int(quarter[1])
-            month = (quarter_number - 1) * 3 + 1
-            return pd.Timestamp(f"{year}-{month:02d}-01")
-        # Generate a date range from 1950-01-01 to today
-        date_range = pd.date_range(start="1950-01-01", end=datetime.today(), freq='D')
-        daily_df = pd.DataFrame(date_range, columns=['OBS'])
-        # Keep track of the renamed value columns
-        value_columns = []
-        for series in series_list:
-            series_id = series['series_id']
-            dataset_id = series['dataset_id']
-            # Construct the URL for data
-            data_url = f"https://api.ons.gov.uk/timeseries/{series_id}/dataset/{dataset_id}/data"
-            # Make the request to the ONS API for data
-            data_response = requests.get(data_url)
-            # Check if the request was successful
-            if data_response.status_code != 200:
-                print(f"Failed to fetch data for series {series_id}: {data_response.status_code} {data_response.text}")
-                continue
-            # Parse the JSON response for data
-            data = data_response.json()
-            # Attempt to extract the name of the time series from the data response
-            series_name = data.get('description', {}).get('title', 'Value')
-            # Determine the most granular time series data available
-            if 'months' in data and data['months']:
-                time_series_data = data['months']
-            elif 'quarters' in data and data['quarters']:
-                time_series_data = data['quarters']
-            elif 'years' in data and data['years']:
-                time_series_data = data['years']
-            else:
-                print("No time series data found in the response")
-                continue
-            # Create a DataFrame from the time series data
-            df = pd.DataFrame(time_series_data)
-            # Handle different frequencies in the data
-            if 'date' in df.columns:
-                if any(df['date'].str.contains('Q')):
-                    df['date'] = df['date'].apply(parse_quarter)
-                else:
-                    df['date'] = pd.to_datetime(df['date'])
-            df = df.rename(columns={'date': 'OBS', 'value': series_name})
-            # Rename the value column
-            new_col_name = 'macro_' + series_name.lower().replace(':', '').replace(' ', '_').replace('-', '_')
-            df = df.rename(columns={series_name: new_col_name})
-            # Track the renamed value column
-            value_columns.append(new_col_name)
-            # Merge the data based on the observation date
-            daily_df = pd.merge_asof(daily_df, df[['OBS', new_col_name]], on='OBS', direction='backward')
-        # Ensure columns are numeric
-        for col in value_columns:
-            if col in daily_df.columns:
-                daily_df[col] = pd.to_numeric(daily_df[col], errors='coerce').fillna(0)
-            else:
-                print(f"Column {col} not found in daily_df")
-        # Aggregate results by week
-        ons_df_final = ims_proc.aggregate_daily_to_wc_wide(df=daily_df,
-                                                    date_column="OBS",
-                                                    group_columns=[],
-                                                    sum_columns=value_columns,
-                                                    wc=week_commencing,
-                                                    aggregation="average")
-        return ons_df_final
-    def pull_oecd(self, country: str = "GBR", week_commencing: str = "mon", start_date: str = "1950-01-01") -> pd.DataFrame:
+    def pull_oecd(self, country: str = "GBR", week_commencing: str = "mon", start_date: str = "2020-01-01") -> pd.DataFrame:
         """
         Fetch and process time series data from the OECD API.
@@ -2235,135 +2122,160 @@ class datapull:
     ###############################################################  Seasonality  ##########################################################################
-    def pull_combined_dummies(self, week_commencing):
+    def pull_seasonality(self, week_commencing, start_date, countries):
         # Week commencing dictionary
         day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
-        # Create daily date range dataframe
-        date_range = pd.date_range(datetime(2015, 1, 1), datetime.today(), freq="d")
+        # Create daily date range dataframe starting from start_date
+        date_range = pd.date_range(
+            start=pd.to_datetime(start_date),
+            end=datetime.today(),
+            freq="D"
+        )
         df_daily = pd.DataFrame(date_range, columns=["Date"])
-        # Create weekly date range dataframe
-        df_daily['week_start'] = df_daily["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
+        # ------------------------------------------------
+        # 1. Identify "week_start" for each daily row
+        # ------------------------------------------------
+        df_daily['week_start'] = df_daily["Date"].apply(
+            lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7)
+        )
+        # ------------------------------------------------
+        # 2. Build a weekly index (df_weekly_start) with dummy columns
+        # ------------------------------------------------
         df_weekly_start = df_daily[['week_start']].drop_duplicates().reset_index(drop=True)
         df_weekly_start.rename(columns={'week_start': "Date"}, inplace=True)
+        # Set index to weekly "start of week"
         df_weekly_start.index = np.arange(1, len(df_weekly_start) + 1)
         df_weekly_start.set_index("Date", inplace=True)
         # Create individual weekly dummies
         dummy_columns = {}
         for i in range(len(df_weekly_start)):
             col_name = f"dum_{df_weekly_start.index[i].strftime('%Y_%m_%d')}"
             dummy_columns[col_name] = [0] * len(df_weekly_start)
             dummy_columns[col_name][i] = 1
         df_dummies = pd.DataFrame(dummy_columns, index=df_weekly_start.index)
         df_weekly_start = pd.concat([df_weekly_start, df_dummies], axis=1)
-        # Create monthly dummies
-        df_daily["Month"] = df_daily["Date"].dt.month_name().str.lower()
-        df_monthly_dummies = pd.get_dummies(df_daily, prefix="seas", columns=["Month"])
-        df_monthly_dummies['week_start'] = df_daily["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-        df_monthly_dummies = df_monthly_dummies.groupby('week_start').sum(numeric_only=True).reset_index().rename(columns={'week_start': "Date"})
-        df_monthly_dummies.set_index("Date", inplace=True)
-        df_monthly_dummies = df_monthly_dummies / 7
-        # Combine weekly and monthly dataframes
-        df_combined = pd.concat([df_weekly_start, df_monthly_dummies], axis=1)
-        # Create weekly dummies
-        df_combined.reset_index(inplace=True)
-        df_combined["Week"] = df_combined["Date"].dt.isocalendar().week
-        df_combined = pd.get_dummies(df_combined, prefix="wk", columns=["Week"])
-        # Create yearly dummies
-        df_combined["Year"] = df_combined["Date"].dt.year
-        df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Year"])
-        # Add constant
-        df_combined["Constant"] = 1
-        # Add trend
-        df_combined["Trend"] = df_combined.index + 1
-        # Set date as index
-        df_combined.set_index("Date", inplace=True)
-        # Create COVID lockdown dummies
-        lockdown_periods = [
-            # Lockdown 1
-            ("2020-03-23", "2020-05-24"),
-            # Lockdown 2
-            ("2020-11-05", "2020-12-02"),
-            # Lockdown 3
-            ("2021-01-04", "2021-03-08")
-        ]
-        df_covid = pd.DataFrame(date_range, columns=["Date"])
-        df_covid["national_lockdown"] = 0
-        for start, end in lockdown_periods:
-            df_covid.loc[(df_covid["Date"] >= start) & (df_covid["Date"] <= end), "national_lockdown"] = 1
-        df_covid['week_start'] = df_covid["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-        df_covid.drop("Date", axis=1, inplace=True)
-        df_covid.rename(columns={"week_start": "OBS"}, inplace=True)
-        df_national_lockdown_total = df_covid.groupby('OBS').sum(numeric_only=True)
-        df_national_lockdown_total.rename(columns={"national_lockdown": "covid_uk_national_lockdown_total"}, inplace=True)
-        df_national_lockdown_1 = df_national_lockdown_total.copy(deep=True)
-        df_national_lockdown_2 = df_national_lockdown_total.copy(deep=True)
-        df_national_lockdown_3 = df_national_lockdown_total.copy(deep=True)
-        df_national_lockdown_1.loc[df_national_lockdown_1.index > "2020-05-24"] = 0
-        df_national_lockdown_1.rename(columns={"covid_uk_national_lockdown_total": "covid_uk_national_lockdown_1"}, inplace=True)
-        df_national_lockdown_2.loc[df_national_lockdown_2.index < "2020-11-05"] = 0
-        df_national_lockdown_2.loc[df_national_lockdown_2.index > "2020-12-02"] = 0
-        df_national_lockdown_2.rename(columns={"covid_uk_national_lockdown_total": "covid_uk_national_lockdown_2"}, inplace=True)
-        df_national_lockdown_3.loc[df_national_lockdown_3.index < "2021-01-04"] = 0
-        df_national_lockdown_3.rename(columns={"covid_uk_national_lockdown_total": "covid_uk_national_lockdown_3"}, inplace=True)
+        # ------------------------------------------------
+        # 3. Public holidays (daily) and specific holiday columns
+        # ------------------------------------------------
+        for country in countries:
+            country_holidays = holidays.CountryHoliday(
+                country,
+                years=range(int(start_date[:4]), datetime.today().year + 1)
+            )
+            # Daily indicator: 1 if that date is a holiday
+            df_daily[f"seas_holiday_{country.lower()}"] = df_daily["Date"].apply(
+                lambda x: 1 if x in country_holidays else 0
+            )
+            # Create columns for specific holiday names
+            for date_hol, name in country_holidays.items():
+                col_name = f"seas_{name.replace(' ', '_').lower()}_{country.lower()}"
+                if col_name not in df_daily.columns:
+                    df_daily[col_name] = 0
+                df_daily.loc[df_daily["Date"] == pd.Timestamp(date_hol), col_name] = 1
+        # ------------------------------------------------
+        # 4. Add daily indicators for last day & last Friday of month
+        #    Then aggregate them to weekly level using .max()
+        # ------------------------------------------------
+        # Last day of month (daily)
+        df_daily["seas_last_day_of_month"] = df_daily["Date"].apply(
+            lambda d: 1 if d == d.to_period("M").to_timestamp("M") else 0
+        )
-        df_final_covid = pd.concat([df_national_lockdown_total, df_national_lockdown_1, df_national_lockdown_2, df_national_lockdown_3], axis=1)
-        df_final_covid.reset_index(inplace=True)
-        df_final_covid.rename(columns={"index": "OBS"}, inplace=True)
-        # Create seasonal indicators for the last day and last Friday of the month
-        min_date = '2019-12-29'
-        max_date = datetime.today().strftime('%Y-%m-%d')
-        date_range_seas = pd.date_range(start=min_date, end=max_date)
-        df_seas = pd.DataFrame(date_range_seas, columns=['Date'])
-        df_seas['Last_Day_of_Month'] = df_seas['Date'].apply(lambda x: 1 if x == x.to_period('M').to_timestamp('M') else 0)
+        # Last Friday of month (daily)
         def is_last_friday(date):
-            last_day_of_month = date.to_period('M').to_timestamp('M')
+            # last day of the month
+            last_day_of_month = date.to_period("M").to_timestamp("M")
             last_day_weekday = last_day_of_month.dayofweek
+            # Determine how many days we go back from the last day to get Friday
             if last_day_weekday >= 4:
                 days_to_subtract = last_day_weekday - 4
             else:
                 days_to_subtract = last_day_weekday + 3
             last_friday = last_day_of_month - pd.Timedelta(days=days_to_subtract)
             return 1 if date == last_friday else 0
+        df_daily["seas_last_friday_of_month"] = df_daily["Date"].apply(is_last_friday)
+        # ------------------------------------------------
+        # 5. Weekly aggregation for HOLIDAYS & monthly dummies
+        #    (Using .max() for holiday indicators so they become binary)
+        # ------------------------------------------------
+        # For monthly dummies, create a daily col "Month", then get_dummies
+        df_daily["Month"] = df_daily["Date"].dt.month_name().str.lower()
+        df_monthly_dummies = pd.get_dummies(
+            df_daily,
+            prefix="seas",
+            columns=["Month"],
+            dtype=int
+        )
+        # Recalculate 'week_start' (already in df_daily, but just to be sure)
+        df_monthly_dummies['week_start'] = df_daily['week_start']
+        # Group monthly dummies by .sum() or .mean()—often we average across the week
+        df_monthly_dummies = (
+            df_monthly_dummies
+            .groupby('week_start')
+            .sum(numeric_only=True)    # sum the daily flags
+            .reset_index()
+            .rename(columns={'week_start': "Date"})
+            .set_index("Date")
+        )
+        # Divide the monthly dummy columns by 7 to spread them across the week
+        monthly_cols = [
+            c for c in df_monthly_dummies.columns
+            if c.startswith("seas_month_")
+        ]
+        df_monthly_dummies[monthly_cols] = df_monthly_dummies[monthly_cols] / 7
+        # Group holiday columns (and last-day-of-month columns) by .max() => binary
+        df_holidays = (
+            df_daily
+            .groupby('week_start')
+            .max(numeric_only=True)   # use max => if any day=1, entire week=1
+            .reset_index()
+            .rename(columns={'week_start': "Date"})
+            .set_index("Date")
+        )
-        df_seas['Last_Friday_of_Month'] = df_seas['Date'].apply(is_last_friday)
+        # ------------------------------------------------
+        # 6. Combine weekly start, monthly dummies, holiday flags
+        # ------------------------------------------------
+        df_combined = pd.concat([df_weekly_start, df_monthly_dummies], axis=1)
+        df_combined = pd.concat([df_combined, df_holidays], axis=1)
+        df_combined = df_combined.loc[:, ~df_combined.columns.duplicated()]
+        # ------------------------------------------------
+        # 7. Create weekly dummies for Week of Year & yearly dummies
+        # ------------------------------------------------
+        df_combined.reset_index(inplace=True)
+        df_combined.rename(columns={"index": "old_index"}, inplace=True)  # just in case
-        df_seas['week_start'] = df_seas["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-        df_seas = df_seas.groupby('week_start').sum(numeric_only=True).reset_index().rename(columns={'week_start': "Date"})
-        df_seas.set_index("Date", inplace=True)
+        df_combined["Week"] = df_combined["Date"].dt.isocalendar().week
+        df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Week"], dtype=int)
-        # Combine all dataframes
-        df_combined = df_combined.reset_index().rename(columns={"Date": "OBS"})
-        df_final_combined = pd.merge(df_combined, df_final_covid, how='left', left_on='OBS', right_on='OBS')
-        df_final_combined = pd.merge(df_final_combined, df_seas, how='left', left_on='OBS', right_on='Date')
-        # Fill any NaN values with 0
-        df_final_combined.fillna(0, inplace=True)
+        df_combined["Year"] = df_combined["Date"].dt.year
+        df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Year"], dtype=int)
+        # ------------------------------------------------
+        # 8. Add constant & trend
+        # ------------------------------------------------
+        df_combined["Constant"] = 1
+        df_combined["Trend"] = df_combined.index + 1
-        return df_final_combined
+        # ------------------------------------------------
+        # 9. Rename Date -> OBS and return
+        # ------------------------------------------------
+        df_combined.rename(columns={"Date": "OBS"}, inplace=True)
+        return df_combined
     def pull_weather(self, week_commencing, country) -> pd.DataFrame:
         import pandas as pd
@@ -2966,4 +2878,240 @@ class datapull:
         final_weather = ims_proc.rename_cols(merged_df, 'seas_')
-        return final_weather
+        return final_weather
+    def pull_macro_ons_uk(self, cdid_list=None, week_start_day="mon", sector=None):
+        """
+        Fetches time series data for multiple CDIDs from the ONS API, converts it to daily frequency,
+        aggregates it to weekly averages, and renames variables based on specified rules.
+        Parameters:
+            cdid_list (list): A list of additional CDIDs to fetch (e.g., ['JP9Z', 'UKPOP']). Defaults to None.
+            week_start_day (str): The day the week starts on (e.g., 'Monday', 'Sunday').
+            sector (str): The sector for which the standard CDIDs are fetched (e.g., 'fast_food', 'retail').
+        Returns:
+            pd.DataFrame: A DataFrame with weekly frequency, containing a 'week_commencing' column
+                        and all series as renamed columns.
+        """
+        # Define CDIDs for sectors and defaults
+        sector_cdids = {
+            "fast_food": ["L7TD", "L78Q", "DOAD"],
+            "default": ["D7G7", "MGSX", "UKPOP", "IHYQ", "YBEZ", "MS77"],
+        }
+        default_cdids = sector_cdids["default"]
+        sector_specific_cdids = sector_cdids.get(sector, [])
+        standard_cdids = list(set(default_cdids + sector_specific_cdids))  # Avoid duplicates
+        # Combine standard CDIDs and additional CDIDs
+        if cdid_list is None:
+            cdid_list = []
+        cdid_list = list(set(standard_cdids + cdid_list))  # Avoid duplicates
+        base_search_url = "https://api.beta.ons.gov.uk/v1/search?content_type=timeseries&cdids="
+        base_data_url = "https://api.beta.ons.gov.uk/v1/data?uri="
+        combined_df = pd.DataFrame()
+        # Map week start day to pandas weekday convention
+        days_map = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
+        if week_start_day not in days_map:
+            raise ValueError("Invalid week start day. Choose from: " + ", ".join(days_map.keys()))
+        week_start = days_map[week_start_day]
+        for cdid in cdid_list:
+            try:
+                # Search for the series
+                search_url = f"{base_search_url}{cdid}"
+                search_response = requests.get(search_url)
+                search_response.raise_for_status()
+                search_data = search_response.json()
+                items = search_data.get("items", [])
+                if not items:
+                    print(f"No data found for CDID: {cdid}")
+                    continue
+                # Extract series name and latest release URI
+                series_name = items[0].get("title", f"Series_{cdid}")
+                latest_date = max(
+                    datetime.fromisoformat(item["release_date"].replace("Z", "+00:00"))
+                    for item in items if "release_date" in item
+                )
+                latest_uri = next(
+                    item["uri"] for item in items
+                    if "release_date" in item and datetime.fromisoformat(item["release_date"].replace("Z", "+00:00")) == latest_date
+                )
+                # Fetch the dataset
+                data_url = f"{base_data_url}{latest_uri}"
+                data_response = requests.get(data_url)
+                data_response.raise_for_status()
+                data_json = data_response.json()
+                # Detect the frequency and process accordingly
+                if "months" in data_json and data_json["months"]:
+                    frequency_key = "months"
+                elif "quarters" in data_json and data_json["quarters"]:
+                    frequency_key = "quarters"
+                elif "years" in data_json and data_json["years"]:
+                    frequency_key = "years"
+                else:
+                    print(f"Unsupported frequency or no data for CDID: {cdid}")
+                    continue
+                # Prepare the DataFrame
+                df = pd.DataFrame(data_json[frequency_key])
+                # Parse the 'date' field based on frequency
+                if frequency_key == "months":
+                    df["date"] = pd.to_datetime(df["date"], format="%Y %b", errors="coerce")
+                elif frequency_key == "quarters":
+                    def parse_quarter(quarter_str):
+                        year, qtr = quarter_str.split(" Q")
+                        month = {"1": 1, "2": 4, "3": 7, "4": 10}[qtr]
+                        return datetime(int(year), month, 1)
+                    df["date"] = df["date"].apply(parse_quarter)
+                elif frequency_key == "years":
+                    df["date"] = pd.to_datetime(df["date"], format="%Y", errors="coerce")
+                df["value"] = pd.to_numeric(df["value"], errors="coerce")
+                df.rename(columns={"value": series_name}, inplace=True)
+                # Combine data
+                df = df.loc[:, ["date", series_name]].dropna().reset_index(drop=True)
+                if combined_df.empty:
+                    combined_df = df
+                else:
+                    combined_df = pd.merge(combined_df, df, on="date", how="outer")
+            except requests.exceptions.RequestException as e:
+                print(f"Error fetching data for CDID {cdid}: {e}")
+            except (KeyError, ValueError) as e:
+                print(f"Error processing data for CDID {cdid}: {e}")
+        if not combined_df.empty:
+            min_date = combined_df["date"].min()
+            max_date = datetime.today()
+            date_range = pd.date_range(start=min_date, end=max_date, freq='D')
+            daily_df = pd.DataFrame(date_range, columns=['date'])
+            daily_df = pd.merge(daily_df, combined_df, on="date", how="left")
+            daily_df = daily_df.ffill()
+            # Aggregate to weekly frequency
+            daily_df["week_commencing"] = daily_df["date"] - pd.to_timedelta((daily_df["date"].dt.weekday - week_start) % 7, unit='D')
+            weekly_df = daily_df.groupby("week_commencing").mean(numeric_only=True).reset_index()
+            def clean_column_name(name):
+                name = re.sub(r"\(.*?\)", "", name)
+                name = re.split(r":", name)[0]
+                name = re.sub(r"\d+", "", name)
+                name = re.sub(r"\b(annual|rate)\b", "", name, flags=re.IGNORECASE)
+                name = re.sub(r"[^\w\s]", "", name)
+                name = name.replace(" ", "_")
+                name = re.sub(r"_+", "_", name)
+                name = name.rstrip("_")
+                return f"macro_{name.lower()}_uk"
+            weekly_df.columns = [clean_column_name(col) if col != "week_commencing" else col for col in weekly_df.columns]
+            weekly_df.rename(columns={"week_commencing": "OBS"}, inplace=True)
+            weekly_df = weekly_df.fillna(0)
+            return weekly_df
+        else:
+            print("No data available to process.")
+            return pd.DataFrame()
+    def pull_yfinance(self, tickers=None, week_start_day="mon"):
+        """
+        Fetches stock data for multiple tickers from Yahoo Finance, converts it to daily frequency,
+        aggregates it to weekly averages, and renames variables.
+        Parameters:
+            tickers (list): A list of additional stock tickers to fetch (e.g., ['AAPL', 'MSFT']). Defaults to None.
+            week_start_day (str): The day the week starts on (e.g., 'Monday', 'Sunday').
+        Returns:
+            pd.DataFrame: A DataFrame with weekly frequency, containing an 'OBS' column
+                        and aggregated stock data for the specified tickers, with NaN values filled with 0.
+        """
+        # Define default tickers
+        default_tickers = ["^FTSE", "GBPUSD=X", "GBPEUR=X", "^GSPC"]
+        # Combine default tickers with additional ones
+        if tickers is None:
+            tickers = []
+        tickers = list(set(default_tickers + tickers))  # Ensure no duplicates
+        # Automatically set end_date to today
+        end_date = datetime.today().strftime("%Y-%m-%d")
+        # Mapping week start day to pandas weekday convention
+        days_map = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
+        if week_start_day not in days_map:
+            raise ValueError("Invalid week start day. Choose from: " + ", ".join(days_map.keys()))
+        week_start = days_map[week_start_day]
+        # Fetch data for all tickers without specifying a start date to get all available data
+        data = yf.download(tickers, end=end_date, group_by="ticker", auto_adjust=True)
+        # Process the data
+        combined_df = pd.DataFrame()
+        for ticker in tickers:
+            try:
+                # Extract the ticker's data
+                ticker_data = data[ticker] if len(tickers) > 1 else data
+                ticker_data = ticker_data.reset_index()
+                # Ensure necessary columns are present
+                if "Close" not in ticker_data.columns:
+                    raise ValueError(f"Ticker {ticker} does not have 'Close' price data.")
+                # Keep only relevant columns
+                ticker_data = ticker_data[["Date", "Close"]]
+                ticker_data.rename(columns={"Close": ticker}, inplace=True)
+                # Merge data
+                if combined_df.empty:
+                    combined_df = ticker_data
+                else:
+                    combined_df = pd.merge(combined_df, ticker_data, on="Date", how="outer")
+            except KeyError:
+                print(f"Data for ticker {ticker} not available.")
+            except Exception as e:
+                print(f"Error processing ticker {ticker}: {e}")
+        if not combined_df.empty:
+            # Convert to daily frequency
+            combined_df["Date"] = pd.to_datetime(combined_df["Date"])
+            combined_df.set_index("Date", inplace=True)
+            # Fill missing dates
+            min_date = combined_df.index.min()
+            max_date = combined_df.index.max()
+            daily_index = pd.date_range(start=min_date, end=max_date, freq='D')
+            combined_df = combined_df.reindex(daily_index)
+            combined_df.index.name = "Date"
+            combined_df = combined_df.ffill()
+            # Aggregate to weekly frequency
+            combined_df["OBS"] = combined_df.index - pd.to_timedelta((combined_df.index.weekday - week_start) % 7, unit="D")
+            weekly_df = combined_df.groupby("OBS").mean(numeric_only=True).reset_index()
+            # Fill NaN values with 0
+            weekly_df = weekly_df.fillna(0)
+            # Clean column names
+            def clean_column_name(name):
+                name = re.sub(r"[^\w\s]", "", name)
+                return f"macro_{name.lower()}"
+            weekly_df.columns = [clean_column_name(col) if col != "OBS" else col for col in weekly_df.columns]
+            return weekly_df
+        else:
+            print("No data available to process.")
+            return pd.DataFrame()

{imsciences-0.6.3.2.dist-info → imsciences-0.8.1.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,9 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.3.2
+Version: 0.8.1
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com
-License: MIT
 Keywords: python,data processing,apis
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers
@@ -20,93 +19,113 @@ Requires-Dist: fredapi
 Requires-Dist: requests-cache
 Requires-Dist: geopy
 Requires-Dist: bs4
+Requires-Dist: yfinance
+Requires-Dist: holidays
 # IMS Package Documentation
-The IMS package is a python library for processing incoming data into a format that can be used for specifically for econometrics projects that use weekly timeseries data. IMS processing offers a variety of functions to manipulate and analyze data efficiently. Here are the functionalities provided by the package:
+The **IMSciences package** is a Python library designed to process incoming data into a format tailored for econometrics projects, particularly those utilising weekly time series data. This package offers a suite of functions for efficient data manipulation and analysis.
-## Data Processing
+---
+## Key Features
+- Seamless data processing for econometrics workflows.
+- Aggregation, filtering, and transformation of time series data.
+- Integration with external data sources like FRED, Bank of England, ONS and OECD.
+---
+Table of Contents
+=================
-# Function Descriptions and Usage Examples
+1. `Data Processing <#data-processing>`_
+2. `Data Pulling <#data-pulling>`_
+3. `Installation <#installation>`_
+4. `Usage <#usage>`_
+5. `License <#license>`_
+---
-## 1. `get_wd_levels`
+## Data Processing
+## 1. get_wd_levels
 - **Description**: Get the working directory with the option of moving up parents.
 - **Usage**: `get_wd_levels(levels)`
 - **Example**: `get_wd_levels(0)`
 ---
-## 2. `remove_rows`
+## 2. remove_rows
 - **Description**: Removes a specified number of rows from a pandas DataFrame.
 - **Usage**: `remove_rows(data_frame, num_rows_to_remove)`
 - **Example**: `remove_rows(df, 2)`
 ---
-## 3. `aggregate_daily_to_wc_long`
+## 3. aggregate_daily_to_wc_long
 - **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
 - **Usage**: `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum')`
 - **Example**: `aggregate_daily_to_wc_long(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average')`
 ---
-## 4. `convert_monthly_to_daily`
+## 4. convert_monthly_to_daily
 - **Description**: Converts monthly data in a DataFrame to daily data by expanding and dividing the numeric values.
 - **Usage**: `convert_monthly_to_daily(df, date_column, divide)`
 - **Example**: `convert_monthly_to_daily(df, 'date')`
 ---
-## 5. `plot_two`
+## 5. plot_two
 - **Description**: Plots specified columns from two different DataFrames using a shared date column. Useful for comparing data.
 - **Usage**: `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
 - **Example**: `plot_two(df1, 'cost', df2, 'cost', 'obs', True)`
 ---
-## 6. `remove_nan_rows`
+## 6. remove_nan_rows
 - **Description**: Removes rows from a DataFrame where the specified column has NaN values.
 - **Usage**: `remove_nan_rows(df, col_to_remove_rows)`
 - **Example**: `remove_nan_rows(df, 'date')`
 ---
-## 7. `filter_rows`
+## 7. filter_rows
 - **Description**: Filters the DataFrame based on whether the values in a specified column are in a provided list.
 - **Usage**: `filter_rows(df, col_to_filter, list_of_filters)`
 - **Example**: `filter_rows(df, 'country', ['UK', 'IE'])`
 ---
-## 8. `plot_one`
+## 8. plot_one
 - **Description**: Plots a specified column from a DataFrame.
 - **Usage**: `plot_one(df1, col1, date_column)`
 - **Example**: `plot_one(df, 'Spend', 'OBS')`
 ---
-## 9. `week_of_year_mapping`
+## 9. week_of_year_mapping
 - **Description**: Converts a week column in `yyyy-Www` or `yyyy-ww` format to week commencing date.
 - **Usage**: `week_of_year_mapping(df, week_col, start_day_str)`
 - **Example**: `week_of_year_mapping(df, 'week', 'mon')`
 ---
-## 10. `exclude_rows`
+## 10. exclude_rows
 - **Description**: Removes rows from a DataFrame based on whether the values in a specified column are not in a provided list.
 - **Usage**: `exclude_rows(df, col_to_filter, list_of_filters)`
 - **Example**: `exclude_rows(df, 'week', ['2022-W20', '2022-W21'])`
 ---
-## 11. `rename_cols`
+## 11. rename_cols
 - **Description**: Renames columns in a pandas DataFrame.
 - **Usage**: `rename_cols(df, name)`
 - **Example**: `rename_cols(df, 'ame_facebook')`
 ---
-## 12. `merge_new_and_old`
+## 12. merge_new_and_old
 - **Description**: Creates a new DataFrame with two columns: one for dates and one for merged numeric values.
   - Merges numeric values from specified columns in the old and new DataFrames based on a given cutoff date.
 - **Usage**: `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
@@ -114,21 +133,21 @@ The IMS package is a python library for processing incoming data into a format t
 ---
-## 13. `merge_dataframes_on_date`
+## 13. merge_dataframes_on_date
 - **Description**: Merge a list of DataFrames on a common column.
 - **Usage**: `merge_dataframes_on_date(dataframes, common_column='OBS', merge_how='outer')`
 - **Example**: `merge_dataframes_on_date([df1, df2, df3], common_column='OBS', merge_how='outer')`
 ---
-## 14. `merge_and_update_dfs`
+## 14. merge_and_update_dfs
 - **Description**: Merges two dataframes on a key column, updates the first dataframe's columns with the second's where available, and returns a dataframe sorted by the key column.
 - **Usage**: `merge_and_update_dfs(df1, df2, key_column)`
 - **Example**: `merge_and_update_dfs(processed_facebook, finalised_meta, 'OBS')`
 ---
-## 15. `convert_us_to_uk_dates`
+## 15. convert_us_to_uk_dates
 - **Description**: Convert a DataFrame column with mixed date formats to datetime.
 - **Usage**: `convert_us_to_uk_dates(df, date_col)`
 - **Example**: `convert_us_to_uk_dates(df, 'date')`
@@ -142,189 +161,189 @@ The IMS package is a python library for processing incoming data into a format t
 ---
-## 17. `pivot_table`
+## 17. pivot_table
 - **Description**: Dynamically pivots a DataFrame based on specified columns.
 - **Usage**: `pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=False, fill_missing_weekly_dates=False, week_commencing='W-MON')`
 - **Example**: `pivot_table(df, 'OBS', 'Channel Short Names', 'Value', filters_dict={'Master Include': ' == 1', 'OBS': ' >= datetime(2019,9,9)', 'Metric Short Names': ' == spd'}, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=True, fill_missing_weekly_dates=True, week_commencing='W-MON')`
 ---
-## 18. `apply_lookup_table_for_columns`
+## 18. apply_lookup_table_for_columns
 - **Description**: Equivalent of XLOOKUP in Excel. Allows mapping of a dictionary of substrings within a column.
 - **Usage**: `apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict='Other', new_column_name='Mapping')`
 - **Example**: `apply_lookup_table_for_columns(df, col_names, {'spend': 'spd', 'clicks': 'clk'}, if_not_in_dict='Other', new_column_name='Metrics Short')`
 ---
-## 19. `aggregate_daily_to_wc_wide`
+## 19. aggregate_daily_to_wc_wide
 - **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
 - **Usage**: `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
 - **Example**: `aggregate_daily_to_wc_wide(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average', True)`
 ---
-## 20. `merge_cols_with_seperator`
+## 20. merge_cols_with_seperator
 - **Description**: Merges multiple columns in a DataFrame into one column with a separator `_`. Useful for lookup tables.
 - **Usage**: `merge_cols_with_seperator(df, col_names, seperator='_', output_column_name='Merged', starting_prefix_str=None, ending_prefix_str=None)`
 - **Example**: `merge_cols_with_seperator(df, ['Campaign', 'Product'], seperator='|', output_column_name='Merged Columns', starting_prefix_str='start_', ending_prefix_str='_end')`
 ---
-## 21. `check_sum_of_df_cols_are_equal`
+## 21. check_sum_of_df_cols_are_equal
 - **Description**: Checks if the sum of two columns in two DataFrames are the same, and provides the sums and differences.
 - **Usage**: `check_sum_of_df_cols_are_equal(df_1, df_2, cols_1, cols_2)`
 - **Example**: `check_sum_of_df_cols_are_equal(df_1, df_2, 'Media Cost', 'Spend')`
 ---
-## 22. `convert_2_df_cols_to_dict`
+## 22. convert_2_df_cols_to_dict
 - **Description**: Creates a dictionary using two columns in a DataFrame.
 - **Usage**: `convert_2_df_cols_to_dict(df, key_col, value_col)`
 - **Example**: `convert_2_df_cols_to_dict(df, 'Campaign', 'Channel')`
 ---
-## 23. `create_FY_and_H_columns`
+## 23. create_FY_and_H_columns
 - **Description**: Creates financial year, half-year, and financial half-year columns.
 - **Usage**: `create_FY_and_H_columns(df, index_col, start_date, starting_FY, short_format='No', half_years='No', combined_FY_and_H='No')`
 - **Example**: `create_FY_and_H_columns(df, 'Week (M-S)', '2022-10-03', 'FY2023', short_format='Yes', half_years='Yes', combined_FY_and_H='Yes')`
 ---
-## 24. `keyword_lookup_replacement`
+## 24. keyword_lookup_replacement
 - **Description**: Updates chosen values in a specified column of the DataFrame based on a lookup dictionary.
 - **Usage**: `keyword_lookup_replacement(df, col, replacement_rows, cols_to_merge, replacement_lookup_dict, output_column_name='Updated Column')`
 - **Example**: `keyword_lookup_replacement(df, 'channel', 'Paid Search Generic', ['channel', 'segment', 'product'], qlik_dict_for_channel, output_column_name='Channel New')`
 ---
-## 25. `create_new_version_of_col_using_LUT`
+## 25. create_new_version_of_col_using_LUT
 - **Description**: Creates a new column in a DataFrame by mapping values from an old column using a lookup table.
 - **Usage**: `create_new_version_of_col_using_LUT(df, keys_col, value_col, dict_for_specific_changes, new_col_name='New Version of Old Col')`
 - **Example**: `create_new_version_of_col_using_LUT(df, 'Campaign Name', 'Campaign Type', search_campaign_name_retag_lut, 'Campaign Name New')`
 ---
-## 26. `convert_df_wide_2_long`
+## 26. convert_df_wide_2_long
 - **Description**: Converts a DataFrame from wide to long format.
 - **Usage**: `convert_df_wide_2_long(df, value_cols, variable_col_name='Stacked', value_col_name='Value')`
 - **Example**: `convert_df_wide_2_long(df, ['Media Cost', 'Impressions', 'Clicks'], variable_col_name='Metric')`
 ---
-## 27. `manually_edit_data`
+## 27. manually_edit_data
 - **Description**: Enables manual updates to DataFrame cells by applying filters and editing a column.
 - **Usage**: `manually_edit_data(df, filters_dict, col_to_change, new_value, change_in_existing_df_col='No', new_col_to_change_name='New', manual_edit_col_name=None, add_notes='No', existing_note_col_name=None, note=None)`
 - **Example**: `manually_edit_data(df, {'OBS': ' <= datetime(2023,1,23)', 'File_Name': ' == France media'}, 'Master Include', 1, change_in_existing_df_col='Yes', new_col_to_change_name='Master Include', manual_edit_col_name='Manual Changes')`
 ---
-## 28. `format_numbers_with_commas`
+## 28. format_numbers_with_commas
 - **Description**: Formats numeric data into numbers with commas and specified decimal places.
 - **Usage**: `format_numbers_with_commas(df, decimal_length_chosen=2)`
 - **Example**: `format_numbers_with_commas(df, 1)`
 ---
-## 29. `filter_df_on_multiple_conditions`
+## 29. filter_df_on_multiple_conditions
 - **Description**: Filters a DataFrame based on multiple conditions from a dictionary.
 - **Usage**: `filter_df_on_multiple_conditions(df, filters_dict)`
 - **Example**: `filter_df_on_multiple_conditions(df, {'OBS': ' <= datetime(2023,1,23)', 'File_Name': ' == France media'})`
 ---
-## 30. `read_and_concatenate_files`
+## 30. read_and_concatenate_files
 - **Description**: Reads and concatenates all files of a specified type in a folder.
 - **Usage**: `read_and_concatenate_files(folder_path, file_type='csv')`
 - **Example**: `read_and_concatenate_files(folder_path, file_type='csv')`
 ---
-## 31. `remove_zero_values`
+## 31. remove_zero_values
 - **Description**: Removes rows with zero values in a specified column.
 - **Usage**: `remove_zero_values(data_frame, column_to_filter)`
 - **Example**: `remove_zero_values(df, 'Funeral_Delivery')`
 ---
-## 32. `upgrade_outdated_packages`
+## 32. upgrade_outdated_packages
 - **Description**: Upgrades all outdated packages in the environment.
 - **Usage**: `upgrade_outdated_packages()`
 - **Example**: `upgrade_outdated_packages()`
 ---
-## 33. `convert_mixed_formats_dates`
+## 33. convert_mixed_formats_dates
 - **Description**: Converts a mix of US and UK date formats to datetime.
 - **Usage**: `convert_mixed_formats_dates(df, date_col)`
 - **Example**: `convert_mixed_formats_dates(df, 'OBS')`
 ---
-## 34. `fill_weekly_date_range`
+## 34. fill_weekly_date_range
 - **Description**: Fills in missing weeks with zero values.
 - **Usage**: `fill_weekly_date_range(df, date_column, freq)`
 - **Example**: `fill_weekly_date_range(df, 'OBS', 'W-MON')`
 ---
-## 35. `add_prefix_and_suffix`
+## 35. add_prefix_and_suffix
 - **Description**: Adds prefixes and/or suffixes to column headers.
 - **Usage**: `add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)`
 - **Example**: `add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')`
 ---
-## 36. `create_dummies`
+## 36. create_dummies
 - **Description**: Converts time series into binary indicators based on a threshold.
 - **Usage**: `create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')`
 - **Example**: `create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')`
 ---
-## 37. `replace_substrings`
+## 37. replace_substrings
 - **Description**: Replaces substrings in a column of strings using a dictionary and can change column values to lowercase.
 - **Usage**: `replace_substrings(df, column, replacements, to_lower=False, new_column=None)`
 - **Example**: `replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True, new_column='Short Version')`
 ---
-## 38. `add_total_column`
+## 38. `add_total_column
 - **Description**: Sums all columns (excluding a specified column) to create a total column.
 - **Usage**: `add_total_column(df, exclude_col=None, total_col_name='Total')`
 - **Example**: `add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')`
 ---
-## 39. `apply_lookup_table_based_on_substring`
+## 39. apply_lookup_table_based_on_substring
 - **Description**: Maps substrings in a column to values using a lookup dictionary.
 - **Usage**: `apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other')`
 - **Example**: `apply_lookup_table_based_on_substring(df, 'Campaign Name', campaign_dict, new_col_name='Campaign Name Short', other_label='Full Funnel')`
 ---
-## 40. `compare_overlap`
+## 40. compare_overlap
 - **Description**: Compares matching rows and columns in two DataFrames and outputs the differences.
 - **Usage**: `compare_overlap(df1, df2, date_col)`
 - **Example**: `compare_overlap(df_1, df_2, 'obs')`
 ---
-## 41. `week_commencing_2_week_commencing_conversion`
+## 41. week_commencing_2_week_commencing_conversion
 - **Description**: Converts a week commencing column to a different start day.
 - **Usage**: `week_commencing_2_week_commencing_conversion(df, date_col, week_commencing='sun')`
 - **Example**: `week_commencing_2_week_commencing_conversion(df, 'obs', week_commencing='mon')`
 ---
-## 42. `plot_chart`
+## 42. plot_chart
 - **Description**: Plots various chart types including line, area, scatter, and bar.
 - **Usage**: `plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs)`
 - **Example**: `plot_chart(df, 'obs', df.cols, chart_type='line', title='Spend Over Time', x_title='Date', y_title='Spend')`
 ---
-## 43. `plot_two_with_common_cols`
+## 43. plot_two_with_common_cols
 - **Description**: Plots charts for two DataFrames based on common column names.
 - **Usage**: `plot_two_with_common_cols(df1, df2, date_column, same_axis=True)`
 - **Example**: `plot_two_with_common_cols(df_1, df_2, date_column='obs')`
@@ -333,51 +352,82 @@ The IMS package is a python library for processing incoming data into a format t
 ## Data Pulling
-## 1. `pull_fred_data`
+## 1. pull_fred_data
 - **Description**: Fetch data from FRED using series ID tokens.
-- **Usage**: `pull_fred_data(week_commencing, series_id_list)`
-- **Example**: `pull_fred_data('mon', ['GPDIC1', 'Y057RX1Q020SBEA', 'GCEC1', 'ND000333Q', 'Y006RX1Q020SBEA'])`
+- **Usage**: pull_fred_data(week_commencing, series_id_list)
+- **Example**: pull_fred_data('mon', ['GPDIC1', 'Y057RX1Q020SBEA', 'GCEC1', 'ND000333Q', 'Y006RX1Q020SBEA'])
 ---
-## 2. `pull_boe_data`
+## 2. pull_boe_data
 - **Description**: Fetch and process Bank of England interest rate data.
-- **Usage**: `pull_boe_data(week_commencing)`
-- **Example**: `pull_boe_data('mon')`
+- **Usage**: pull_boe_data(week_commencing)
+- **Example**: pull_boe_data('mon')
 ---
-## 3. `pull_ons_data`
-- **Description**: Fetch and process time series data from the ONS API.
-- **Usage**: `pull_ons_data(series_list, week_commencing)`
-- **Example**: `pull_ons_data([{'series_id': 'LMSBSA', 'dataset_id': 'LMS'}], 'mon')`
----
-## 4. `pull_oecd`
+## 3. pull_oecd
 - **Description**: Fetch macroeconomic data from OECD for a specified country.
-- **Usage**: `pull_oecd(country='GBR', week_commencing='mon', start_date='1950-01-01')`
-- **Example**: `pull_oecd('GBR', 'mon', '1950-01-01')`
+- **Usage**: pull_oecd(country='GBR', week_commencing='mon', start_date='2020-01-01')
+- **Example**: pull_oecd('GBR', 'mon', '2000-01-01')
 ---
-## 5. `get_google_mobility_data`
+## 4. get_google_mobility_data
 - **Description**: Fetch Google Mobility data for the specified country.
-- **Usage**: `get_google_mobility_data(country, wc)`
-- **Example**: `get_google_mobility_data('United Kingdom', 'mon')`
+- **Usage**: get_google_mobility_data(country, wc)
+- **Example**: get_google_mobility_data('United Kingdom', 'mon')
 ---
-## 6. `pull_combined_dummies`
+## 5. pull_seasonality
 - **Description**: Generate combined dummy variables for seasonality, trends, and COVID lockdowns.
-- **Usage**: `pull_combined_dummies(week_commencing)`
-- **Example**: `pull_combined_dummies('mon')`
+- **Usage**: pull_seasonality(week_commencing, start_date, countries)
+- **Example**: pull_seasonality('mon', '2020-01-01', ['US', 'GB'])
 ---
-## 7. `pull_weather`
+## 6. pull_weather
 - **Description**: Fetch and process historical weather data for the specified country.
-- **Usage**: `pull_weather(week_commencing, country)`
-- **Example**: `pull_weather('mon', 'GBR')`
+- **Usage**: pull_weather(week_commencing, country)
+- **Example**: pull_weather('mon', 'GBR')
+---
+## 7. pull_macro_ons_uk
+- **Description**: Fetch and process time series data from the Beta ONS API.
+- **Usage**: pull_macro_ons_uk(additional_list, week_commencing, sector)
+- **Example**: pull_macro_ons_uk(['HBOI'], 'mon', 'fast_food')
+---
+## 8. pull_yfinance
+- **Description**: Fetch and process time series data from Yahoo Finance.
+- **Usage**: pull_yfinance(tickers, week_start_day)
+- **Example**: pull_yfinance(['^FTMC', '^IXIC'], 'mon')
+## Installation
+Install the IMS package via pip:
+```bash
+pip install imsciences
+```
+---
+## Useage
+```bash
+from imsciences import *
+ims = dataprocessing()
+ims_pull = datapull()
+```
+---
+## License
+This project is licensed under the MIT License.
 ---

{imsciences-0.6.3.2.dist-info → imsciences-0.8.1.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
-imsciences/__init__.py,sha256=0IwH7R_2N8vimJJo2DLzIG1hq9ddn8gB6ijlLrQemZs,122
+imsciences/__init__.py,sha256=7CfK2dMjPnBBw6I4st-20MdMlLjZULviFVXF2eMD9NI,80
 imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
-imsciences/datafunctions.py,sha256=lvvodU8dZ9IN_GS7FYMuft9ZsQkD2BMIGQxLiN8GY7c,151557
+imsciences/datafunctions.py,sha256=XrvJWWFh9gdKAoeIHee2nYi0Z0zPxmW3oB6ICnGTxYc,158444
 imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
 imsciences/unittesting.py,sha256=d9H5HN8y7oof59hqN9mGqkjulExqFd93BEW-X8w_Id8,58142
 imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
-imsciences-0.6.3.2.dist-info/METADATA,sha256=k22-OJm6rdvDU7mubqDGW1K9Z-inek4VCQ4HdAw51cA,16981
-imsciences-0.6.3.2.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
-imsciences-0.6.3.2.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
-imsciences-0.6.3.2.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
-imsciences-0.6.3.2.dist-info/RECORD,,
+imsciences-0.8.1.dist-info/METADATA,sha256=sJK90uzVkH6KCDVM3hmkbRyGoXNmie8JMoCVLy4J7Fg,17785
+imsciences-0.8.1.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
+imsciences-0.8.1.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
+imsciences-0.8.1.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
+imsciences-0.8.1.dist-info/RECORD,,

{imsciences-0.6.3.2.dist-info → imsciences-0.8.1.dist-info}/PKG-INFO-IMS-24Ltp-3 RENAMED Viewed

File without changes

{imsciences-0.6.3.2.dist-info → imsciences-0.8.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{imsciences-0.6.3.2.dist-info → imsciences-0.8.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

imsciences 0.6.3.2__py3-none-any.whl → 0.8.1__py3-none-any.whl

imsciences 0.6.3.2py3-none-any.whl → 0.8.1py3-none-any.whl