PyPI - imsciences - Versions diffs - 0.6.1.4__py3-none-any.whl → 0.6.1.6__py3-none-any.whl - Mend

imsciences 0.6.1.4py3-none-any.whl → 0.6.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

imsciences/datafunctions.py CHANGED Viewed

@@ -194,9 +194,29 @@ class dataprocessing:
         print("\n34. Fill Weekly Missing Dates")
         print("   - Description: Fill in any missing weeks with 0.")
-        print("   - Usage: fill_weekly_date_range(self, df, date_column, freq)")
+        print("   - Usage: fill_weekly_date_range(df, date_column, freq)")
         print("   - Example: fill_weekly_date_range(df, 'OBS', 'W-MON')")
+        print("\n35. Add Prefix and/or Suffix to Column Headers")
+        print("   - Description: Add Prefix and/or Suffix to Column Headers.")
+        print("   - Usage: add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)")
+        print("   - Example: add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')")
+        print("\n36. Change all data to dummies")
+        print("   - Description: Changes time series to 0s and 1s based off threshold")
+        print("   - Usage: create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')")
+        print("   - Example: create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')")
+        print("\n37. Replace substrings in column of strings")
+        print("   - Description: Replace substrings in column of strings based off dictionary, can also change column to lower")
+        print("   - Usage: replace_substrings(df, column, replacements, to_lower=False, new_column=None)")
+        print("   - Example: replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True, new_column='Short Version')")
+        print("\n38. Add totals column")
+        print("   - Description: Sums all columns with the option to exclude an date column to create a total column")
+        print("   - Usage: add_total_column(df, exclude_col=None, total_col_name='Total')")
+        print("   - Example: add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')")
     def get_wd_levels(self, levels):
         """
         Gets the current wd of whoever is working on it and gives the options to move the number of levels up.
@@ -1291,10 +1311,120 @@ class dataprocessing:
         df_full.fillna(0, inplace=True)
         return df_full
+    def add_prefix_and_suffix(self, df, prefix='', suffix='', date_col=None):
+        """
+        Adds a specified prefix and/or suffix to the column names of a DataFrame. Optionally, a column (e.g., a date column) can be excluded.
+        Args:
+        df (pd.DataFrame): The DataFrame whose column names will be modified.
+        prefix (str, optional): The prefix to add to each column name. Default is an empty string.
+        suffix (str, optional): The suffix to add to each column name. Default is an empty string.
+        date_col (str, optional): The name of the column to exclude from adding prefix and suffix, typically a date column. Default is None.
+        Returns:
+        pd.DataFrame: The DataFrame with updated column names.
+        """
+        # If there is no date column
+        if date_col is None:
+            # Add prefixes and suffixes to all columns
+            df.columns = [prefix + col + suffix for col in df.columns]
+        else:
+            # Add prefixes and suffixes to all columns except the date column
+            df.columns = [prefix + col + suffix if col != date_col else col for col in df.columns]
+        return df
+    def create_dummies(self, df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total'):
+        """
+        Creates dummy variables for the DataFrame, converting values greater than the threshold to 1 and others to 0.
+        Optionally adds a total dummy column indicating whether any row contains at least one value greater than the threshold.
+        Args:
+        df (pd.DataFrame): The DataFrame to process.
+        date_col (str, optional): The column name to exclude from the dummy conversion, typically a date column. Default is None.
+        dummy_threshold (int, optional): The threshold value; values greater than this become 1, others become 0. Default is 0.
+        add_total_dummy_col (str, optional): If set to any value other than 'No', adds a column that contains the max value (1 or 0) for each row. Default is 'No'.
+        total_col_name (str, optional): The name of the total column to add if add_total_dummy_col is not 'No'. Default is 'total'.
+        Returns:
+        pd.DataFrame: The modified DataFrame with dummies applied and optional total column.
+        """
+        # If there is no date column
+        if date_col is None:
+            df = df.applymap(lambda x: 1 if x > dummy_threshold else 0)
+            if add_total_dummy_col != 'No':
+                # Find max value of rows
+                df[total_col_name] = df.max(axis=1)
+        # If there is a date column
+        else:
+            # Create dummies
+            df.loc[:, df.columns != date_col] = df.loc[:, df.columns != date_col].applymap(lambda x: 1 if x > dummy_threshold else 0)
+            if add_total_dummy_col != 'No':
+                # Find max value of rows
+                df[total_col_name] = df.loc[:, df.columns != date_col].max(axis=1)
+        return df
+        def replace_substrings(self, df, column, replacements, to_lower=False, new_column=None):
+            """
+            Replaces substrings in a column of a DataFrame based on a dictionary of replacements.
+            Optionally converts the column values to lowercase and allows creating a new column or modifying the existing one.
+            Args:
+            df (pd.DataFrame): The DataFrame containing the column to modify.
+            column (str): The column name where the replacements will be made.
+            replacements (dict): A dictionary where keys are substrings to replace and values are the replacement strings.
+            to_lower (bool, optional): If True, the column values will be converted to lowercase before applying replacements. Default is False.
+            new_column (str, optional): If provided, the replacements will be applied to this new column. If None, the existing column will be modified. Default is None.
+            Returns:
+            pd.DataFrame: The DataFrame with the specified replacements made, and optionally with lowercase strings.
+            """
+            if new_column is not None:
+                # Create a new column for replacements
+                df[new_column] = df[column]
+                temp_column = new_column
+            else:
+                # Modify the existing column
+                temp_column = column
+            # Apply substring replacements
+            for old, new in replacements.items():
+                df[temp_column] = df[temp_column].str.replace(old, new, regex=False)
+            # Optionally convert to lowercase
+            if to_lower:
+                df[temp_column] = df[temp_column].str.lower()
+            return df
+    def add_total_column(self, df, exclude_col=None, total_col_name='Total'):
+        """
+        Adds a total column to a DataFrame by summing across all columns. Optionally excludes a specified column.
+        Args:
+        df (pd.DataFrame): The DataFrame to modify.
+        exclude_col (str, optional): The column name to exclude from the sum. Default is None.
+        total_col_name (str, optional): The name of the new total column. Default is 'Total'.
+        Returns:
+        pd.DataFrame: The DataFrame with an added total column.
+        """
+        # If exclude_col is provided, drop that column before summing
+        if exclude_col:
+            df[total_col_name] = df.drop(columns=[exclude_col]).sum(axis=1)
+        else:
+            # Sum across all columns if exclude_col is not provided
+            df[total_col_name] = df.sum(axis=1)
+        return df

{imsciences-0.6.1.4.dist-info → imsciences-0.6.1.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.1.4
+Version: 0.6.1.6
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

{imsciences-0.6.1.4.dist-info → imsciences-0.6.1.6.dist-info}/RECORD RENAMED Viewed

@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
-imsciences/datafunctions.py,sha256=IsF_pSdisE1KSPnpHkS3JTxtiBGBiS1vXLisDRgKgRQ,129899
+imsciences/datafunctions.py,sha256=1DOieL8Xfh6I-5JZlM_XKPwIon-I_VcDjppuvXmhYhk,137236
 imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
 imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
-imsciences-0.6.1.4.dist-info/METADATA,sha256=7qeNA6PXkl3QWfDizajpaIbOswxgoqu3BAcY0FP9GU8,854
-imsciences-0.6.1.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-imsciences-0.6.1.4.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
-imsciences-0.6.1.4.dist-info/RECORD,,
+imsciences-0.6.1.6.dist-info/METADATA,sha256=SbdVxObVs6UW90bJ0eQIAbX1rd0urpG6sNmcxiB5uLw,854
+imsciences-0.6.1.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+imsciences-0.6.1.6.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
+imsciences-0.6.1.6.dist-info/RECORD,,

{imsciences-0.6.1.4.dist-info → imsciences-0.6.1.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{imsciences-0.6.1.4.dist-info → imsciences-0.6.1.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

imsciences 0.6.1.4__py3-none-any.whl → 0.6.1.6__py3-none-any.whl

imsciences 0.6.1.4py3-none-any.whl → 0.6.1.6py3-none-any.whl