PyPI - imsciences - Versions diffs - 0.6.1.3__tar.gz → 0.6.1.5__tar.gz - Mend

@@ -194,9 +194,29 @@ class dataprocessing:
         print("\n34. Fill Weekly Missing Dates")
         print("   - Description: Fill in any missing weeks with 0.")
-        print("   - Usage: fill_weekly_date_range(self, df, date_column, freq)")
+        print("   - Usage: fill_weekly_date_range(df, date_column, freq)")
         print("   - Example: fill_weekly_date_range(df, 'OBS', 'W-MON')")
+        print("\n35. Add Prefix and/or Suffix to Column Headers")
+        print("   - Description: Add Prefix and/or Suffix to Column Headers.")
+        print("   - Usage: add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)")
+        print("   - Example: add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')")
+        print("\n36. Change all data to dummies")
+        print("   - Description: Changes time series to 0s and 1s based off threshold")
+        print("   - Usage: create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')")
+        print("   - Example: create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')")
+        print("\n37. Replace substrings in column of strings")
+        print("   - Description: Replace substrings in column of strings based off dictionary, can also change column to lower")
+        print("   - Usage: replace_substrings(df, column, replacements, to_lower=False)")
+        print("   - Example: replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True)")
+        print("\n38. Add totals column")
+        print("   - Description: Sums all columns with the option to exclude an date column to create a total column")
+        print("   - Usage: add_total_column(df, exclude_col=None, total_col_name='Total')")
+        print("   - Example: add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')")
     def get_wd_levels(self, levels):
         """
         Gets the current wd of whoever is working on it and gives the options to move the number of levels up.
@@ -1291,10 +1311,110 @@ class dataprocessing:
         df_full.fillna(0, inplace=True)
         return df_full
+    def add_prefix_and_suffix(self, df, prefix='', suffix='', date_col=None):
+        """
+        Adds a specified prefix and/or suffix to the column names of a DataFrame. Optionally, a column (e.g., a date column) can be excluded.
+        Args:
+        df (pd.DataFrame): The DataFrame whose column names will be modified.
+        prefix (str, optional): The prefix to add to each column name. Default is an empty string.
+        suffix (str, optional): The suffix to add to each column name. Default is an empty string.
+        date_col (str, optional): The name of the column to exclude from adding prefix and suffix, typically a date column. Default is None.
+        Returns:
+        pd.DataFrame: The DataFrame with updated column names.
+        """
+        # If there is no date column
+        if date_col is None:
+            # Add prefixes and suffixes to all columns
+            df.columns = [prefix + col + suffix for col in df.columns]
+        else:
+            # Add prefixes and suffixes to all columns except the date column
+            df.columns = [prefix + col + suffix if col != date_col else col for col in df.columns]
+        return df
+    def create_dummies(self, df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total'):
+        """
+        Creates dummy variables for the DataFrame, converting values greater than the threshold to 1 and others to 0.
+        Optionally adds a total dummy column indicating whether any row contains at least one value greater than the threshold.
+        Args:
+        df (pd.DataFrame): The DataFrame to process.
+        date_col (str, optional): The column name to exclude from the dummy conversion, typically a date column. Default is None.
+        dummy_threshold (int, optional): The threshold value; values greater than this become 1, others become 0. Default is 0.
+        add_total_dummy_col (str, optional): If set to any value other than 'No', adds a column that contains the max value (1 or 0) for each row. Default is 'No'.
+        total_col_name (str, optional): The name of the total column to add if add_total_dummy_col is not 'No'. Default is 'total'.
+        Returns:
+        pd.DataFrame: The modified DataFrame with dummies applied and optional total column.
+        """
+        # If there is no date column
+        if date_col is None:
+            df = df.applymap(lambda x: 1 if x > dummy_threshold else 0)
+            if add_total_dummy_col != 'No':
+                # Find max value of rows
+                df[total_col_name] = df.max(axis=1)
+        # If there is a date column
+        else:
+            # Create dummies
+            df.loc[:, df.columns != date_col] = df.loc[:, df.columns != date_col].applymap(lambda x: 1 if x > dummy_threshold else 0)
+            if add_total_dummy_col != 'No':
+                # Find max value of rows
+                df[total_col_name] = df.loc[:, df.columns != date_col].max(axis=1)
+        return df
+    def replace_substrings(self, df, column, replacements, to_lower=False):
+        """
+        Replaces substrings in a column of a DataFrame based on a dictionary of replacements.
+        Optionally converts the column values to lowercase.
+        Args:
+        df (pd.DataFrame): The DataFrame containing the column to modify.
+        column (str): The column name where the replacements will be made.
+        replacements (dict): A dictionary where keys are substrings to replace and values are the replacement strings.
+        to_lower (bool, optional): If True, the column values will be converted to lowercase before applying replacements. Default is False.
+        Returns:
+        pd.DataFrame: The DataFrame with the specified replacements made, and optionally with lowercase strings.
+        """
+        # Apply substring replacements
+        for old, new in replacements.items():
+            df[column] = df[column].str.replace(old, new, regex=False)
+        # Optionally convert to lowercase
+        if to_lower:
+            df[column] = df[column].str.lower()
+        return df
+    def add_total_column(self, df, exclude_col=None, total_col_name='Total'):
+        """
+        Adds a total column to a DataFrame by summing across all columns. Optionally excludes a specified column.
+        Args:
+        df (pd.DataFrame): The DataFrame to modify.
+        exclude_col (str, optional): The column name to exclude from the sum. Default is None.
+        total_col_name (str, optional): The name of the new total column. Default is 'Total'.
+        Returns:
+        pd.DataFrame: The DataFrame with an added total column.
+        """
+        # If exclude_col is provided, drop that column before summing
+        if exclude_col:
+            df[total_col_name] = df.drop(columns=[exclude_col]).sum(axis=1)
+        else:
+            # Sum across all columns if exclude_col is not provided
+            df[total_col_name] = df.sum(axis=1)
+        return df
@@ -2495,7 +2615,7 @@ class datapull:
         return final_weather
-    def pull_covid_data(folder_path: str, country: str = "GB", week_commencing: str = "mon") -> pd.DataFrame:
+    def pull_covid_data(self, folder_path: str, country: str = "GB", week_commencing: str = "mon") -> pd.DataFrame:
         """
         Get covid pandemic data for the country of interest.

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.1.3
+Version: 0.6.1.5
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com
@@ -12,7 +12,6 @@ Classifier: Operating System :: Unix
 Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: Microsoft :: Windows
 Description-Content-Type: text/markdown
-Requires-Dist: pandas
 # IMS Package Documentation

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.1.3
+Version: 0.6.1.5
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com
@@ -12,7 +12,6 @@ Classifier: Operating System :: Unix
 Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: Microsoft :: Windows
 Description-Content-Type: text/markdown
-Requires-Dist: pandas
 # IMS Package Documentation

@@ -8,7 +8,7 @@ def read_md(file_name):
             return f.read()
     return ''
-VERSION = '0.6.1.3'
+VERSION = '0.6.1.5'
 DESCRIPTION = 'IMS Data Processing Package'
 LONG_DESCRIPTION = read_md('README.md')  # Reading from README.md

imsciences 0.6.1.3__tar.gz → 0.6.1.5__tar.gz

imsciences 0.6.1.3tar.gz → 0.6.1.5tar.gz