PyPI - imsciences - Versions diffs - 0.8__py3-none-any.whl → 0.9__py3-none-any.whl - Mend

imsciences 0.8py3-none-any.whl → 0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

imsciences/datafunctions.py CHANGED Viewed

@@ -16,6 +16,7 @@ import xml.etree.ElementTree as ET
 from bs4 import BeautifulSoup
 import yfinance as yf
 import holidays
+from dateutil.easter import easter
 class dataprocessing:
@@ -2123,18 +2124,35 @@ class datapull:
     ###############################################################  Seasonality  ##########################################################################
     def pull_seasonality(self, week_commencing, start_date, countries):
-        # Week commencing dictionary
+        # ---------------------------------------------------------------------
+        # 0. Setup: dictionary for 'week_commencing' to Python weekday() integer
+        # ---------------------------------------------------------------------
         day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
-        # Create daily date range dataframe starting from start_date
-        date_range = pd.date_range(start=pd.to_datetime(start_date), end=datetime.today(), freq="d")
+        # ---------------------------------------------------------------------
+        # 1. Create daily date range from start_date to today
+        # ---------------------------------------------------------------------
+        date_range = pd.date_range(
+            start=pd.to_datetime(start_date),
+            end=datetime.today(),
+            freq="D"
+        )
         df_daily = pd.DataFrame(date_range, columns=["Date"])
-        # Create weekly date range dataframe
-        df_daily['week_start'] = df_daily["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
+        # ---------------------------------------------------------------------
+        # 1.1 Identify "week_start" for each daily row, based on week_commencing
+        # ---------------------------------------------------------------------
+        df_daily['week_start'] = df_daily["Date"].apply(
+            lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7)
+        )
+        # ---------------------------------------------------------------------
+        # 2. Build a weekly index (df_weekly_start) with dummy columns
+        # ---------------------------------------------------------------------
         df_weekly_start = df_daily[['week_start']].drop_duplicates().reset_index(drop=True)
         df_weekly_start.rename(columns={'week_start': "Date"}, inplace=True)
+        # Set index to weekly "start of week"
         df_weekly_start.index = np.arange(1, len(df_weekly_start) + 1)
         df_weekly_start.set_index("Date", inplace=True)
@@ -2144,79 +2162,236 @@ class datapull:
             col_name = f"dum_{df_weekly_start.index[i].strftime('%Y_%m_%d')}"
             dummy_columns[col_name] = [0] * len(df_weekly_start)
             dummy_columns[col_name][i] = 1
         df_dummies = pd.DataFrame(dummy_columns, index=df_weekly_start.index)
         df_weekly_start = pd.concat([df_weekly_start, df_dummies], axis=1)
-        # Add public holidays for each country and holiday type
+        # ---------------------------------------------------------------------
+        # 3. Public holidays (daily) from 'holidays' package + each holiday name
+        # ---------------------------------------------------------------------
         for country in countries:
-            country_holidays = holidays.CountryHoliday(country, years=range(int(start_date[:4]), datetime.today().year + 1))
-            df_daily[f"seas_holiday_{country.lower()}"] = df_daily["Date"].apply(lambda x: 1 if x in country_holidays else 0)
-            # Extract specific holidays
-            for date, name in country_holidays.items():
+            country_holidays = holidays.CountryHoliday(
+                country,
+                years=range(int(start_date[:4]), datetime.today().year + 1)
+            )
+            # Daily indicator: 1 if that date is a holiday
+            df_daily[f"seas_holiday_{country.lower()}"] = df_daily["Date"].apply(
+                lambda x: 1 if x in country_holidays else 0
+            )
+            # Create columns for specific holiday names
+            for date_hol, name in country_holidays.items():
                 col_name = f"seas_{name.replace(' ', '_').lower()}_{country.lower()}"
                 if col_name not in df_daily.columns:
                     df_daily[col_name] = 0
-                df_daily.loc[df_daily["Date"] == pd.Timestamp(date), col_name] = 1
+                df_daily.loc[df_daily["Date"] == pd.Timestamp(date_hol), col_name] = 1
+        # ---------------------------------------------------------------------
+        # 3.1 Additional Special Days (Father's Day, Mother's Day, etc.)
+        #     We'll add daily columns for each.
+        # ---------------------------------------------------------------------
+        # Initialize columns
+        extra_cols = [
+            "seas_valentines_day",
+            "seas_halloween",
+            "seas_fathers_day_us_uk",
+            "seas_mothers_day_us",
+            "seas_mothers_day_uk",
+            "seas_good_friday",
+            "seas_easter_monday",
+            "seas_black_friday",
+            "seas_cyber_monday",
+        ]
+        for c in extra_cols:
+            df_daily[c] = 0  # default zero
-        # Map daily holidays to weekly aggregation
-        df_daily['week_start'] = df_daily["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-        df_holidays = df_daily.groupby('week_start').sum(numeric_only=True).reset_index().rename(columns={'week_start': "Date"})
-        df_holidays.set_index("Date", inplace=True)
+        # Helper: nth_weekday_of_month(year, month, weekday, nth=1 => first, 2 => second, etc.)
+        # weekday: Monday=0, Tuesday=1, ... Sunday=6
+        def nth_weekday_of_month(year, month, weekday, nth):
+            """
+            Returns date of the nth <weekday> in <month> of <year>.
+            E.g. nth_weekday_of_month(2023, 6, 6, 3) => 3rd Sunday of June 2023.
+            """
+            # 1st day of the month
+            d = datetime(year, month, 1)
+            # What is the weekday of day #1?
+            w = d.weekday()  # Monday=0, Tuesday=1, ... Sunday=6
+            # If we want, e.g. Sunday=6, we see how many days to add
+            delta = (weekday - w) % 7
+            # This is the first <weekday> in that month
+            first_weekday = d + timedelta(days=delta)
+            # Now add 7*(nth-1) days
+            return first_weekday + timedelta(days=7 * (nth-1))
+        def get_good_friday(year):
+            """Good Friday is 2 days before Easter Sunday."""
+            return easter(year) - timedelta(days=2)
+        def get_easter_monday(year):
+            """Easter Monday is 1 day after Easter Sunday."""
+            return easter(year) + timedelta(days=1)
+        def get_black_friday(year):
+            """
+            Black Friday = day after US Thanksgiving,
+            and US Thanksgiving is the 4th Thursday in November.
+            """
+            # 4th Thursday in November
+            fourth_thursday = nth_weekday_of_month(year, 11, 3, 4)  # weekday=3 => Thursday
+            return fourth_thursday + timedelta(days=1)
+        def get_cyber_monday(year):
+            """Cyber Monday = Monday after US Thanksgiving, i.e. 4 days after 4th Thursday in Nov."""
+            # 4th Thursday in November
+            fourth_thursday = nth_weekday_of_month(year, 11, 3, 4)
+            return fourth_thursday + timedelta(days=4)  # Monday after Thanksgiving
+        # Loop over each year in range
+        start_yr = int(start_date[:4])
+        end_yr = datetime.today().year
+        for yr in range(start_yr, end_yr + 1):
+            # Valentines = Feb 14
+            valentines_day = datetime(yr, 2, 14)
+            # Halloween = Oct 31
+            halloween_day  = datetime(yr, 10, 31)
+            # Father's Day (US & UK) = 3rd Sunday in June
+            fathers_day    = nth_weekday_of_month(yr, 6, 6, 3)  # Sunday=6
+            # Mother's Day US = 2nd Sunday in May
+            mothers_day_us = nth_weekday_of_month(yr, 5, 6, 2)
+            # Mother's Day UK: 4th Sunday in Lent => "Mothering Sunday"
+            #   We can approximate as: Easter Sunday - 21 days
+            #   BUT we also must ensure it's actually Sunday
+            #   (the 4th Sunday in Lent can shift. We'll do the official approach below.)
+            #   Another approach: Easter Sunday - 7 * (4 weeks) is the 4th Sunday prior to Easter.
+            #   But that might overshoot if Lent started mid-week.
+            # Let's do a quick approach:
+            #   Officially: Mothering Sunday = 3 weeks before Easter Sunday (the 4th Sunday is Easter Sunday itself).
+            #   So Easter - 21 days should be the Sunday, but let's confirm with weekday check.
+            mothering_sunday = easter(yr) - timedelta(days=21)
+            # If for some reason that's not a Sunday (rare corner cases), shift to Sunday:
+            while mothering_sunday.weekday() != 6:  # Sunday=6
+                mothering_sunday -= timedelta(days=1)
+            # Good Friday, Easter Monday
+            gf = get_good_friday(yr)
+            em = get_easter_monday(yr)
+            # Black Friday, Cyber Monday
+            bf = get_black_friday(yr)
+            cm = get_cyber_monday(yr)
+            # Mark them in df_daily if in range
+            for special_date, col in [
+                (valentines_day, "seas_valentines_day"),
+                (halloween_day,  "seas_halloween"),
+                (fathers_day,    "seas_fathers_day_us_uk"),
+                (mothers_day_us, "seas_mothers_day_us"),
+                (mothering_sunday, "seas_mothers_day_uk"),
+                (gf, "seas_good_friday"),
+                (em, "seas_easter_monday"),
+                (bf, "seas_black_friday"),
+                (cm, "seas_cyber_monday"),
+            ]:
+                # Convert to pd.Timestamp:
+                special_ts = pd.Timestamp(special_date)
+                # Only set if it's within your daily range
+                if (special_ts >= df_daily["Date"].min()) and (special_ts <= df_daily["Date"].max()):
+                    df_daily.loc[df_daily["Date"] == special_ts, col] = 1
+        # ---------------------------------------------------------------------
+        # 4. Add daily indicators for last day & last Friday of month
+        #    Then aggregate them to weekly level using .max()
+        # ---------------------------------------------------------------------
+        # Last day of month (daily)
+        df_daily["seas_last_day_of_month"] = df_daily["Date"].apply(
+            lambda d: 1 if d == d.to_period("M").to_timestamp("M") else 0
+        )
-        # Create monthly dummies (separately from holidays)
-        df_daily["Month"] = df_daily["Date"].dt.month_name().str.lower()
-        df_monthly_dummies = pd.get_dummies(df_daily, prefix="seas", columns=["Month"], dtype=int)
-        df_monthly_dummies['week_start'] = df_daily["Date"].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
-        df_monthly_dummies = df_monthly_dummies.groupby('week_start').sum(numeric_only=True).reset_index().rename(columns={'week_start': "Date"})
-        df_monthly_dummies.set_index("Date", inplace=True)
+        # Last Friday of month (daily)
+        def is_last_friday(date):
+            # last day of the month
+            last_day_of_month = date.to_period("M").to_timestamp("M")
+            last_day_weekday = last_day_of_month.weekday()  # Monday=0,...Sunday=6
+            # Determine how many days we go back from the last day to get Friday (weekday=4)
+            if last_day_weekday >= 4:
+                days_to_subtract = last_day_weekday - 4
+            else:
+                days_to_subtract = last_day_weekday + 3
+            last_friday = last_day_of_month - pd.Timedelta(days=days_to_subtract)
+            return 1 if date == last_friday else 0
-        # Divide only the monthly dummy columns by 7 (exclude holiday-related columns)
-        monthly_cols = [col for col in df_monthly_dummies.columns if not col.startswith("seas_holiday") and not col.startswith("seas_")]
+        df_daily["seas_last_friday_of_month"] = df_daily["Date"].apply(is_last_friday)
+        # ---------------------------------------------------------------------
+        # 5. Weekly aggregation for holiday columns & monthly dummies
+        # ---------------------------------------------------------------------
+        # For monthly dummies, create a daily col "Month", then get_dummies
+        df_daily["Month"] = df_daily["Date"].dt.month_name().str.lower()
+        df_monthly_dummies = pd.get_dummies(
+            df_daily,
+            prefix="seas",
+            columns=["Month"],
+            dtype=int
+        )
+        # Recalculate 'week_start' (already in df_daily, but just to be sure)
+        df_monthly_dummies['week_start'] = df_daily['week_start']
+        # Group monthly dummies by .sum() or .mean()—we often spread them across the week
+        df_monthly_dummies = (
+            df_monthly_dummies
+            .groupby('week_start')
+            .sum(numeric_only=True)    # sum the daily flags
+            .reset_index()
+            .rename(columns={'week_start': "Date"})
+            .set_index("Date")
+        )
+        # Spread monthly dummies by 7 to distribute across that week
+        monthly_cols = [c for c in df_monthly_dummies.columns if c.startswith("seas_month_")]
         df_monthly_dummies[monthly_cols] = df_monthly_dummies[monthly_cols] / 7
-        # Merge weekly dummies, monthly dummies, and holidays
-        df_combined = pd.concat([df_weekly_start, df_monthly_dummies], axis=1)  # Combine weekly and monthly first
-        df_combined = pd.concat([df_combined, df_holidays], axis=1)  # Add holidays separately
+        # Group holiday & special-day columns by .max() => binary at weekly level
+        df_holidays = (
+            df_daily
+            .groupby('week_start')
+            .max(numeric_only=True)   # if any day=1 in that week, entire week=1
+            .reset_index()
+            .rename(columns={'week_start': "Date"})
+            .set_index("Date")
+        )
-        # Drop duplicate columns if any exist (this ensures holidays are not duplicated)
+        # ---------------------------------------------------------------------
+        # 6. Combine weekly start, monthly dummies, holiday flags
+        # ---------------------------------------------------------------------
+        df_combined = pd.concat([df_weekly_start, df_monthly_dummies], axis=1)
+        df_combined = pd.concat([df_combined, df_holidays], axis=1)
         df_combined = df_combined.loc[:, ~df_combined.columns.duplicated()]
-        # Create weekly dummies
+        # ---------------------------------------------------------------------
+        # 7. Create weekly dummies for Week of Year & yearly dummies
+        # ---------------------------------------------------------------------
         df_combined.reset_index(inplace=True)
+        df_combined.rename(columns={"index": "old_index"}, inplace=True)  # just in case
         df_combined["Week"] = df_combined["Date"].dt.isocalendar().week
         df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Week"], dtype=int)
-        # Create yearly dummies
         df_combined["Year"] = df_combined["Date"].dt.year
         df_combined = pd.get_dummies(df_combined, prefix="seas", columns=["Year"], dtype=int)
-        # Add constant
+        # ---------------------------------------------------------------------
+        # 8. Add constant & trend
+        # ---------------------------------------------------------------------
         df_combined["Constant"] = 1
-        # Add trend
         df_combined["Trend"] = df_combined.index + 1
-        # Create seasonal indicators for the last day and last Friday of the month
-        df_combined['seas_last_day_of_month'] = df_combined["Date"].apply(lambda x: 1 if x == x.to_period('M').to_timestamp('M') else 0)
-        def is_last_friday(date):
-            last_day_of_month = date.to_period('M').to_timestamp('M')
-            last_day_weekday = last_day_of_month.dayofweek
-            if last_day_weekday >= 4:
-                days_to_subtract = last_day_weekday - 4
-            else:
-                days_to_subtract = last_day_weekday + 3
-            last_friday = last_day_of_month - pd.Timedelta(days=days_to_subtract)
-            return 1 if date == last_friday else 0
-        df_combined['seas_last_friday_of_month'] = df_combined["Date"].apply(is_last_friday)
-        # Rename Date to OBS
+        # ---------------------------------------------------------------------
+        # 9. Rename Date -> OBS and return
+        # ---------------------------------------------------------------------
         df_combined.rename(columns={"Date": "OBS"}, inplace=True)
         return df_combined
     def pull_weather(self, week_commencing, country) -> pd.DataFrame:
         import pandas as pd

{imsciences-0.8.dist-info → imsciences-0.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.8
+Version: 0.9
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com
@@ -35,97 +35,97 @@ The **IMSciences package** is a Python library designed to process incoming data
 ---
-## Table of Contents
+Table of Contents
+=================
-1. [Data Processing](#data-processing)
-2. [Data Pulling](#data-pulling)
-3. [Installation](#installation)
-4. [Useage](#useage)
-5. [License](#license)
+1. [Data Processing](#Data-Processing)
+2. [Data Pulling](#Data-Pulling)
+3. [Installation](#Installation)
+4. [Useage](#Useage)
+5. [License](#License)
 ---
 ## Data Processing
-## 1. `get_wd_levels`
+## 1. get_wd_levels
 - **Description**: Get the working directory with the option of moving up parents.
 - **Usage**: `get_wd_levels(levels)`
 - **Example**: `get_wd_levels(0)`
 ---
-## 2. `remove_rows`
+## 2. remove_rows
 - **Description**: Removes a specified number of rows from a pandas DataFrame.
 - **Usage**: `remove_rows(data_frame, num_rows_to_remove)`
 - **Example**: `remove_rows(df, 2)`
 ---
-## 3. `aggregate_daily_to_wc_long`
+## 3. aggregate_daily_to_wc_long
 - **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
 - **Usage**: `aggregate_daily_to_wc_long(df, date_column, group_columns, sum_columns, wc, aggregation='sum')`
 - **Example**: `aggregate_daily_to_wc_long(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average')`
 ---
-## 4. `convert_monthly_to_daily`
+## 4. convert_monthly_to_daily
 - **Description**: Converts monthly data in a DataFrame to daily data by expanding and dividing the numeric values.
 - **Usage**: `convert_monthly_to_daily(df, date_column, divide)`
 - **Example**: `convert_monthly_to_daily(df, 'date')`
 ---
-## 5. `plot_two`
+## 5. plot_two
 - **Description**: Plots specified columns from two different DataFrames using a shared date column. Useful for comparing data.
 - **Usage**: `plot_two(df1, col1, df2, col2, date_column, same_axis=True)`
 - **Example**: `plot_two(df1, 'cost', df2, 'cost', 'obs', True)`
 ---
-## 6. `remove_nan_rows`
+## 6. remove_nan_rows
 - **Description**: Removes rows from a DataFrame where the specified column has NaN values.
 - **Usage**: `remove_nan_rows(df, col_to_remove_rows)`
 - **Example**: `remove_nan_rows(df, 'date')`
 ---
-## 7. `filter_rows`
+## 7. filter_rows
 - **Description**: Filters the DataFrame based on whether the values in a specified column are in a provided list.
 - **Usage**: `filter_rows(df, col_to_filter, list_of_filters)`
 - **Example**: `filter_rows(df, 'country', ['UK', 'IE'])`
 ---
-## 8. `plot_one`
+## 8. plot_one
 - **Description**: Plots a specified column from a DataFrame.
 - **Usage**: `plot_one(df1, col1, date_column)`
 - **Example**: `plot_one(df, 'Spend', 'OBS')`
 ---
-## 9. `week_of_year_mapping`
+## 9. week_of_year_mapping
 - **Description**: Converts a week column in `yyyy-Www` or `yyyy-ww` format to week commencing date.
 - **Usage**: `week_of_year_mapping(df, week_col, start_day_str)`
 - **Example**: `week_of_year_mapping(df, 'week', 'mon')`
 ---
-## 10. `exclude_rows`
+## 10. exclude_rows
 - **Description**: Removes rows from a DataFrame based on whether the values in a specified column are not in a provided list.
 - **Usage**: `exclude_rows(df, col_to_filter, list_of_filters)`
 - **Example**: `exclude_rows(df, 'week', ['2022-W20', '2022-W21'])`
 ---
-## 11. `rename_cols`
+## 11. rename_cols
 - **Description**: Renames columns in a pandas DataFrame.
 - **Usage**: `rename_cols(df, name)`
 - **Example**: `rename_cols(df, 'ame_facebook')`
 ---
-## 12. `merge_new_and_old`
+## 12. merge_new_and_old
 - **Description**: Creates a new DataFrame with two columns: one for dates and one for merged numeric values.
   - Merges numeric values from specified columns in the old and new DataFrames based on a given cutoff date.
 - **Usage**: `merge_new_and_old(old_df, old_col, new_df, new_col, cutoff_date, date_col_name='OBS')`
@@ -133,21 +133,21 @@ The **IMSciences package** is a Python library designed to process incoming data
 ---
-## 13. `merge_dataframes_on_date`
+## 13. merge_dataframes_on_date
 - **Description**: Merge a list of DataFrames on a common column.
 - **Usage**: `merge_dataframes_on_date(dataframes, common_column='OBS', merge_how='outer')`
 - **Example**: `merge_dataframes_on_date([df1, df2, df3], common_column='OBS', merge_how='outer')`
 ---
-## 14. `merge_and_update_dfs`
+## 14. merge_and_update_dfs
 - **Description**: Merges two dataframes on a key column, updates the first dataframe's columns with the second's where available, and returns a dataframe sorted by the key column.
 - **Usage**: `merge_and_update_dfs(df1, df2, key_column)`
 - **Example**: `merge_and_update_dfs(processed_facebook, finalised_meta, 'OBS')`
 ---
-## 15. `convert_us_to_uk_dates`
+## 15. convert_us_to_uk_dates
 - **Description**: Convert a DataFrame column with mixed date formats to datetime.
 - **Usage**: `convert_us_to_uk_dates(df, date_col)`
 - **Example**: `convert_us_to_uk_dates(df, 'date')`
@@ -161,189 +161,189 @@ The **IMSciences package** is a Python library designed to process incoming data
 ---
-## 17. `pivot_table`
+## 17. pivot_table
 - **Description**: Dynamically pivots a DataFrame based on specified columns.
 - **Usage**: `pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=False, fill_missing_weekly_dates=False, week_commencing='W-MON')`
 - **Example**: `pivot_table(df, 'OBS', 'Channel Short Names', 'Value', filters_dict={'Master Include': ' == 1', 'OBS': ' >= datetime(2019,9,9)', 'Metric Short Names': ' == spd'}, fill_value=0, aggfunc='sum', margins=False, margins_name='Total', datetime_trans_needed=True, reverse_header_order=True, fill_missing_weekly_dates=True, week_commencing='W-MON')`
 ---
-## 18. `apply_lookup_table_for_columns`
+## 18. apply_lookup_table_for_columns
 - **Description**: Equivalent of XLOOKUP in Excel. Allows mapping of a dictionary of substrings within a column.
 - **Usage**: `apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict='Other', new_column_name='Mapping')`
 - **Example**: `apply_lookup_table_for_columns(df, col_names, {'spend': 'spd', 'clicks': 'clk'}, if_not_in_dict='Other', new_column_name='Metrics Short')`
 ---
-## 19. `aggregate_daily_to_wc_wide`
+## 19. aggregate_daily_to_wc_wide
 - **Description**: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.
 - **Usage**: `aggregate_daily_to_wc_wide(df, date_column, group_columns, sum_columns, wc, aggregation='sum', include_totals=False)`
 - **Example**: `aggregate_daily_to_wc_wide(df, 'date', ['platform'], ['cost', 'impressions', 'clicks'], 'mon', 'average', True)`
 ---
-## 20. `merge_cols_with_seperator`
+## 20. merge_cols_with_seperator
 - **Description**: Merges multiple columns in a DataFrame into one column with a separator `_`. Useful for lookup tables.
 - **Usage**: `merge_cols_with_seperator(df, col_names, seperator='_', output_column_name='Merged', starting_prefix_str=None, ending_prefix_str=None)`
 - **Example**: `merge_cols_with_seperator(df, ['Campaign', 'Product'], seperator='|', output_column_name='Merged Columns', starting_prefix_str='start_', ending_prefix_str='_end')`
 ---
-## 21. `check_sum_of_df_cols_are_equal`
+## 21. check_sum_of_df_cols_are_equal
 - **Description**: Checks if the sum of two columns in two DataFrames are the same, and provides the sums and differences.
 - **Usage**: `check_sum_of_df_cols_are_equal(df_1, df_2, cols_1, cols_2)`
 - **Example**: `check_sum_of_df_cols_are_equal(df_1, df_2, 'Media Cost', 'Spend')`
 ---
-## 22. `convert_2_df_cols_to_dict`
+## 22. convert_2_df_cols_to_dict
 - **Description**: Creates a dictionary using two columns in a DataFrame.
 - **Usage**: `convert_2_df_cols_to_dict(df, key_col, value_col)`
 - **Example**: `convert_2_df_cols_to_dict(df, 'Campaign', 'Channel')`
 ---
-## 23. `create_FY_and_H_columns`
+## 23. create_FY_and_H_columns
 - **Description**: Creates financial year, half-year, and financial half-year columns.
 - **Usage**: `create_FY_and_H_columns(df, index_col, start_date, starting_FY, short_format='No', half_years='No', combined_FY_and_H='No')`
 - **Example**: `create_FY_and_H_columns(df, 'Week (M-S)', '2022-10-03', 'FY2023', short_format='Yes', half_years='Yes', combined_FY_and_H='Yes')`
 ---
-## 24. `keyword_lookup_replacement`
+## 24. keyword_lookup_replacement
 - **Description**: Updates chosen values in a specified column of the DataFrame based on a lookup dictionary.
 - **Usage**: `keyword_lookup_replacement(df, col, replacement_rows, cols_to_merge, replacement_lookup_dict, output_column_name='Updated Column')`
 - **Example**: `keyword_lookup_replacement(df, 'channel', 'Paid Search Generic', ['channel', 'segment', 'product'], qlik_dict_for_channel, output_column_name='Channel New')`
 ---
-## 25. `create_new_version_of_col_using_LUT`
+## 25. create_new_version_of_col_using_LUT
 - **Description**: Creates a new column in a DataFrame by mapping values from an old column using a lookup table.
 - **Usage**: `create_new_version_of_col_using_LUT(df, keys_col, value_col, dict_for_specific_changes, new_col_name='New Version of Old Col')`
 - **Example**: `create_new_version_of_col_using_LUT(df, 'Campaign Name', 'Campaign Type', search_campaign_name_retag_lut, 'Campaign Name New')`
 ---
-## 26. `convert_df_wide_2_long`
+## 26. convert_df_wide_2_long
 - **Description**: Converts a DataFrame from wide to long format.
 - **Usage**: `convert_df_wide_2_long(df, value_cols, variable_col_name='Stacked', value_col_name='Value')`
 - **Example**: `convert_df_wide_2_long(df, ['Media Cost', 'Impressions', 'Clicks'], variable_col_name='Metric')`
 ---
-## 27. `manually_edit_data`
+## 27. manually_edit_data
 - **Description**: Enables manual updates to DataFrame cells by applying filters and editing a column.
 - **Usage**: `manually_edit_data(df, filters_dict, col_to_change, new_value, change_in_existing_df_col='No', new_col_to_change_name='New', manual_edit_col_name=None, add_notes='No', existing_note_col_name=None, note=None)`
 - **Example**: `manually_edit_data(df, {'OBS': ' <= datetime(2023,1,23)', 'File_Name': ' == France media'}, 'Master Include', 1, change_in_existing_df_col='Yes', new_col_to_change_name='Master Include', manual_edit_col_name='Manual Changes')`
 ---
-## 28. `format_numbers_with_commas`
+## 28. format_numbers_with_commas
 - **Description**: Formats numeric data into numbers with commas and specified decimal places.
 - **Usage**: `format_numbers_with_commas(df, decimal_length_chosen=2)`
 - **Example**: `format_numbers_with_commas(df, 1)`
 ---
-## 29. `filter_df_on_multiple_conditions`
+## 29. filter_df_on_multiple_conditions
 - **Description**: Filters a DataFrame based on multiple conditions from a dictionary.
 - **Usage**: `filter_df_on_multiple_conditions(df, filters_dict)`
 - **Example**: `filter_df_on_multiple_conditions(df, {'OBS': ' <= datetime(2023,1,23)', 'File_Name': ' == France media'})`
 ---
-## 30. `read_and_concatenate_files`
+## 30. read_and_concatenate_files
 - **Description**: Reads and concatenates all files of a specified type in a folder.
 - **Usage**: `read_and_concatenate_files(folder_path, file_type='csv')`
 - **Example**: `read_and_concatenate_files(folder_path, file_type='csv')`
 ---
-## 31. `remove_zero_values`
+## 31. remove_zero_values
 - **Description**: Removes rows with zero values in a specified column.
 - **Usage**: `remove_zero_values(data_frame, column_to_filter)`
 - **Example**: `remove_zero_values(df, 'Funeral_Delivery')`
 ---
-## 32. `upgrade_outdated_packages`
+## 32. upgrade_outdated_packages
 - **Description**: Upgrades all outdated packages in the environment.
 - **Usage**: `upgrade_outdated_packages()`
 - **Example**: `upgrade_outdated_packages()`
 ---
-## 33. `convert_mixed_formats_dates`
+## 33. convert_mixed_formats_dates
 - **Description**: Converts a mix of US and UK date formats to datetime.
 - **Usage**: `convert_mixed_formats_dates(df, date_col)`
 - **Example**: `convert_mixed_formats_dates(df, 'OBS')`
 ---
-## 34. `fill_weekly_date_range`
+## 34. fill_weekly_date_range
 - **Description**: Fills in missing weeks with zero values.
 - **Usage**: `fill_weekly_date_range(df, date_column, freq)`
 - **Example**: `fill_weekly_date_range(df, 'OBS', 'W-MON')`
 ---
-## 35. `add_prefix_and_suffix`
+## 35. add_prefix_and_suffix
 - **Description**: Adds prefixes and/or suffixes to column headers.
 - **Usage**: `add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)`
 - **Example**: `add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')`
 ---
-## 36. `create_dummies`
+## 36. create_dummies
 - **Description**: Converts time series into binary indicators based on a threshold.
 - **Usage**: `create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')`
 - **Example**: `create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')`
 ---
-## 37. `replace_substrings`
+## 37. replace_substrings
 - **Description**: Replaces substrings in a column of strings using a dictionary and can change column values to lowercase.
 - **Usage**: `replace_substrings(df, column, replacements, to_lower=False, new_column=None)`
 - **Example**: `replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True, new_column='Short Version')`
 ---
-## 38. `add_total_column`
+## 38. `add_total_column
 - **Description**: Sums all columns (excluding a specified column) to create a total column.
 - **Usage**: `add_total_column(df, exclude_col=None, total_col_name='Total')`
 - **Example**: `add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')`
 ---
-## 39. `apply_lookup_table_based_on_substring`
+## 39. apply_lookup_table_based_on_substring
 - **Description**: Maps substrings in a column to values using a lookup dictionary.
 - **Usage**: `apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other')`
 - **Example**: `apply_lookup_table_based_on_substring(df, 'Campaign Name', campaign_dict, new_col_name='Campaign Name Short', other_label='Full Funnel')`
 ---
-## 40. `compare_overlap`
+## 40. compare_overlap
 - **Description**: Compares matching rows and columns in two DataFrames and outputs the differences.
 - **Usage**: `compare_overlap(df1, df2, date_col)`
 - **Example**: `compare_overlap(df_1, df_2, 'obs')`
 ---
-## 41. `week_commencing_2_week_commencing_conversion`
+## 41. week_commencing_2_week_commencing_conversion
 - **Description**: Converts a week commencing column to a different start day.
 - **Usage**: `week_commencing_2_week_commencing_conversion(df, date_col, week_commencing='sun')`
 - **Example**: `week_commencing_2_week_commencing_conversion(df, 'obs', week_commencing='mon')`
 ---
-## 42. `plot_chart`
+## 42. plot_chart
 - **Description**: Plots various chart types including line, area, scatter, and bar.
 - **Usage**: `plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs)`
 - **Example**: `plot_chart(df, 'obs', df.cols, chart_type='line', title='Spend Over Time', x_title='Date', y_title='Spend')`
 ---
-## 43. `plot_two_with_common_cols`
+## 43. plot_two_with_common_cols
 - **Description**: Plots charts for two DataFrames based on common column names.
 - **Usage**: `plot_two_with_common_cols(df1, df2, date_column, same_axis=True)`
 - **Example**: `plot_two_with_common_cols(df_1, df_2, date_column='obs')`
@@ -411,7 +411,7 @@ The **IMSciences package** is a Python library designed to process incoming data
 Install the IMS package via pip:
 ```bash
-pip install ims-package
+pip install imsciences
 ```
 ---

{imsciences-0.8.dist-info → imsciences-0.9.dist-info}/RECORD RENAMED Viewed

@@ -3,15 +3,15 @@ dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nF
 dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsciences/__init__.py,sha256=7CfK2dMjPnBBw6I4st-20MdMlLjZULviFVXF2eMD9NI,80
 imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
-imsciences/datafunctions.py,sha256=KbZuvjJF-1gydPsb2qFlvpbVLwuG6y-lhLKt-wZ5JDI,156389
+imsciences/datafunctions.py,sha256=NGJ3j1HIXX2G2bE529Tlvq6AXaAxSye6YjobUF_QpL4,164562
 imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
 imsciences/unittesting.py,sha256=d9H5HN8y7oof59hqN9mGqkjulExqFd93BEW-X8w_Id8,58142
 imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
-imsciences-0.8.dist-info/METADATA,sha256=moylR64i_w4kk3TPPZMpFmAPc9f0A4xJgjAY-Zy-Tac,17845
-imsciences-0.8.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
-imsciences-0.8.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
-imsciences-0.8.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
-imsciences-0.8.dist-info/RECORD,,
+imsciences-0.9.dist-info/METADATA,sha256=KZSjJgbi89Oon07qhMCo9nlP_kE3GIUeRM29vs50tds,17775
+imsciences-0.9.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
+imsciences-0.9.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
+imsciences-0.9.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
+imsciences-0.9.dist-info/RECORD,,

{imsciences-0.8.dist-info → imsciences-0.9.dist-info}/PKG-INFO-IMS-24Ltp-3 RENAMED Viewed

File without changes

{imsciences-0.8.dist-info → imsciences-0.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{imsciences-0.8.dist-info → imsciences-0.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

imsciences 0.8__py3-none-any.whl → 0.9__py3-none-any.whl

imsciences 0.8py3-none-any.whl → 0.9py3-none-any.whl