PyPI - imsciences - Versions diffs - 0.6.2.0__tar.gz → 0.6.2.2__tar.gz - Mend

@@ -109,8 +109,8 @@ class dataprocessing:
         print("\n17. pivot_table")
         print("    - Description: Dynamically pivots a DataFrame based on specified columns.")
-        print("    - Usage: pivot_table(df, filters_dict, index_col, columns, values_col, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)")
-        print("    - Example: pivot_table(df, {'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, 'OBS', 'Channel Short Names', 'Value', fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)")
+        print("    - Usage: pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'False')")
+        print("    - Example: pivot_table(df, 'OBS', 'Channel Short Names', 'Value',filters_dict={'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'True')")
         print("\n18. apply_lookup_table_for_columns")
         print("    - Description: Equivalent of xlookup in excel. Allows you to map a dictionary of substrings within a column. If multiple columns are need for the LUT then a | seperator is needed.")
@@ -657,59 +657,68 @@ class dataprocessing:
         return combined_df
-    def pivot_table(self, df, filters_dict, index_col, columns, values_col, fill_value=0,aggfunc='sum',margins=False,margins_name="Total",datetime_trans_needed=True):
+    def pivot_table(self, df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name="Total", datetime_trans_needed=True, reverse_header_order=False):
         """
         Provides the ability to create pivot tables, filtering the data to get to data you want and then pivoting on certain columns
         Args:
             df (pandas.DataFrame): The DataFrame containing the data.
-            filters_dict (dict): Dictionary of conditions for the boolean mask i.e. what to filter your df on to get to your chosen cell
             index_col (str): Name of Column for your pivot table to index on
             columns (str): Name of Columns for your pivot table.
             values_col (str): Name of Values Columns for your pivot table.
+            filters_dict (dict, optional): Dictionary of conditions for the boolean mask i.e. what to filter your df on to get to your chosen cell. Defaults to None
             fill_value (int, optional): The value to replace nan with. Defaults to 0.
             aggfunc (str, optional): The method on which to aggregate the values column. Defaults to sum.
             margins (bool, optional): Whether the pivot table needs a total rows and column. Defaults to False.
             margins_name (str, optional): The name of the Totals columns. Defaults to "Total".
             datetime_trans_needed (bool, optional): Whether the index column needs to be transformed into datetime format. Defaults to False.
+            reverse_header_order (bool, optional): Reverses the order of the column headers. Defaults to False.
         Returns:
             pandas.DataFrame: The pivot table specified
         """
         # Create the filtered df by applying the conditions
-        df_filtered = self.filter_df_on_multiple_conditions(df, filters_dict)
+        if filters_dict is None:
+            df_filtered = df
+        else:
+            df_filtered = self.filter_df_on_multiple_conditions(df, filters_dict)
-        # Ensure OBS is in datetime format for proper sorting
+        # Ensure index column is in datetime format for proper sorting
         df_filtered = df_filtered.copy()
         # If datetime transformation is needed
-        if datetime_trans_needed is True:
-            df_filtered.loc[:,index_col] = pd.to_datetime(df_filtered[index_col], dayfirst=True)
+        if datetime_trans_needed:
+            df_filtered[index_col] = pd.to_datetime(df_filtered[index_col], dayfirst=True)
         # Create the pivot table
-        pivoted_df = df_filtered.pivot_table(index=index_col, columns=columns, values=values_col, aggfunc=aggfunc,margins=margins,margins_name=margins_name)
+        pivoted_df = df_filtered.pivot_table(index=index_col, columns=columns, values=values_col, aggfunc=aggfunc, margins=margins, margins_name=margins_name)
         # Handling MultiIndex columns if present, making them a flat structure
-        if isinstance(pivoted_df.columns, pd.MultiIndex):
-            pivoted_df.columns = ['_'.join(map(str, col)).strip() for col in pivoted_df.columns.values]
+        if not reverse_header_order:
+            if isinstance(pivoted_df.columns, pd.MultiIndex):
+                pivoted_df.columns = ['_'.join(map(str, col)).strip() for col in pivoted_df.columns.values]
+            else:
+                pivoted_df.columns = pivoted_df.columns.map(str)
         else:
-            pivoted_df.columns = pivoted_df.columns.map(str)
+            if isinstance(pivoted_df.columns, pd.MultiIndex):
+                # Reorder the MultiIndex columns
+                pivoted_df.columns = ['_'.join(reversed(list(map(str, col)))).strip() for col in pivoted_df.columns.values]
+            else:
+                pivoted_df.columns = pivoted_df.columns.map(str)
+                # Reverse the order for single index columns
+                pivoted_df.columns = ['_'.join(reversed(col.split('_'))).strip() for col in pivoted_df.columns]
         # Reset the pivot before returning
         pivoted_df = pivoted_df.reset_index()
-        # Sort by OBS from oldest to newest
-        if datetime_trans_needed is True:
-            # pivoted_df = pivoted_df.reset_index()
+        # Sort by index column from oldest to newest
+        if datetime_trans_needed:
             pivoted_df[index_col] = pd.to_datetime(pivoted_df[index_col])  # Ensure sorting works correctly
             pivoted_df = pivoted_df.sort_values(by=index_col)
-            # Convert OBS back to a string in YYYY-MM-DD format for display purposes
-            pivoted_df[index_col] = pivoted_df[index_col].dt.strftime('%Y-%m-%d')
-            # Set index back to date column
-            # pivoted_df.set_index(index_col,inplace=True)
+            # Convert index column back to a string in YYYY-MM-DD format for display purposes
+            pivoted_df[index_col] = pivoted_df[index_col].dt.strftime('%Y-%m-%d')
         # Fill in any NaNs
         pivoted_df = pivoted_df.fillna(fill_value)
@@ -1436,15 +1445,14 @@ class dataprocessing:
         Categorizes text in a specified DataFrame column by applying a lookup table based on substrings.
         Args:
-        - df (pd.DataFrame): The DataFrame containing the column to categorize.
-        - column_name (str): The name of the column in the DataFrame that contains the text data to categorize.
-        - category_dict (dict): A dictionary where keys are substrings to search for in the text and values are
-            the categories to assign when a substring is found.
-        - new_col_name (str): The name of the new column to be created in the DataFrame, which will hold the
-            resulting categories. Default is 'Category'.
+        df (pd.DataFrame): The DataFrame containing the column to categorize.
+        column_name (str): The name of the column in the DataFrame that contains the text data to categorize.
+        category_dict (dict): A dictionary where keys are substrings to search for in the text and values are the categories to assign when a substring is found.
+        new_col_name (str, optional): The name of the new column to be created in the DataFrame, which will hold the resulting categories. Default is 'Category'.
+        other_label (str, optional): The name given to category if no substring from the dictionary is found in the cell
         Returns:
-        - pd.DataFrame: The original DataFrame with an additional column containing the assigned categories.
+        pd.DataFrame: The original DataFrame with an additional column containing the assigned categories.
         """
         def categorize_text(text):
@@ -1452,13 +1460,11 @@ class dataprocessing:
             Assigns a category to a single text string based on the presence of substrings from a dictionary.
             Args:
-            - text (str): The text string to categorize.
-            - category_dict (dict): A dictionary where keys are substrings to search for in the text and
-            values are the categories to assign if a substring is found.
+            text (str): The text string to categorize.
             Returns:
-            - str: The category assigned based on the first matching substring found in the text. If no
-            matching substring is found, returns 'Full Funnel'.
+            str: The category assigned based on the first matching substring found in the text. If no
+            matching substring is found, returns other_name.
             """
             for key, category in category_dict.items():
                 if key.lower() in text.lower():  # Check if the substring is in the text (case-insensitive)

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.2.0
+Version: 0.6.2.2
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.2.0
+Version: 0.6.2.2
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

@@ -8,7 +8,7 @@ def read_md(file_name):
             return f.read()
     return ''
-VERSION = '0.6.2.0'
+VERSION = '0.6.2.2'
 DESCRIPTION = 'IMS Data Processing Package'
 LONG_DESCRIPTION = read_md('README.md')  # Reading from README.md

imsciences 0.6.2.0__tar.gz → 0.6.2.2__tar.gz

imsciences 0.6.2.0tar.gz → 0.6.2.2tar.gz