PyPI - imsciences - Versions diffs - 0.6.2.0__py3-none-any.whl → 0.6.2.2__py3-none-any.whl - Mend

imsciences 0.6.2.0py3-none-any.whl → 0.6.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

imsciences/datafunctions.py CHANGED Viewed

@@ -109,8 +109,8 @@ class dataprocessing:
         print("\n17. pivot_table")
         print("    - Description: Dynamically pivots a DataFrame based on specified columns.")
-        print("    - Usage: pivot_table(df, filters_dict, index_col, columns, values_col, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)")
-        print("    - Example: pivot_table(df, {'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, 'OBS', 'Channel Short Names', 'Value', fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)")
+        print("    - Usage: pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'False')")
+        print("    - Example: pivot_table(df, 'OBS', 'Channel Short Names', 'Value',filters_dict={'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'True')")
         print("\n18. apply_lookup_table_for_columns")
         print("    - Description: Equivalent of xlookup in excel. Allows you to map a dictionary of substrings within a column. If multiple columns are need for the LUT then a | seperator is needed.")
@@ -657,59 +657,68 @@ class dataprocessing:
         return combined_df
-    def pivot_table(self, df, filters_dict, index_col, columns, values_col, fill_value=0,aggfunc='sum',margins=False,margins_name="Total",datetime_trans_needed=True):
+    def pivot_table(self, df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name="Total", datetime_trans_needed=True, reverse_header_order=False):
         """
         Provides the ability to create pivot tables, filtering the data to get to data you want and then pivoting on certain columns
         Args:
             df (pandas.DataFrame): The DataFrame containing the data.
-            filters_dict (dict): Dictionary of conditions for the boolean mask i.e. what to filter your df on to get to your chosen cell
             index_col (str): Name of Column for your pivot table to index on
             columns (str): Name of Columns for your pivot table.
             values_col (str): Name of Values Columns for your pivot table.
+            filters_dict (dict, optional): Dictionary of conditions for the boolean mask i.e. what to filter your df on to get to your chosen cell. Defaults to None
             fill_value (int, optional): The value to replace nan with. Defaults to 0.
             aggfunc (str, optional): The method on which to aggregate the values column. Defaults to sum.
             margins (bool, optional): Whether the pivot table needs a total rows and column. Defaults to False.
             margins_name (str, optional): The name of the Totals columns. Defaults to "Total".
             datetime_trans_needed (bool, optional): Whether the index column needs to be transformed into datetime format. Defaults to False.
+            reverse_header_order (bool, optional): Reverses the order of the column headers. Defaults to False.
         Returns:
             pandas.DataFrame: The pivot table specified
         """
         # Create the filtered df by applying the conditions
-        df_filtered = self.filter_df_on_multiple_conditions(df, filters_dict)
+        if filters_dict is None:
+            df_filtered = df
+        else:
+            df_filtered = self.filter_df_on_multiple_conditions(df, filters_dict)
-        # Ensure OBS is in datetime format for proper sorting
+        # Ensure index column is in datetime format for proper sorting
         df_filtered = df_filtered.copy()
         # If datetime transformation is needed
-        if datetime_trans_needed is True:
-            df_filtered.loc[:,index_col] = pd.to_datetime(df_filtered[index_col], dayfirst=True)
+        if datetime_trans_needed:
+            df_filtered[index_col] = pd.to_datetime(df_filtered[index_col], dayfirst=True)
         # Create the pivot table
-        pivoted_df = df_filtered.pivot_table(index=index_col, columns=columns, values=values_col, aggfunc=aggfunc,margins=margins,margins_name=margins_name)
+        pivoted_df = df_filtered.pivot_table(index=index_col, columns=columns, values=values_col, aggfunc=aggfunc, margins=margins, margins_name=margins_name)
         # Handling MultiIndex columns if present, making them a flat structure
-        if isinstance(pivoted_df.columns, pd.MultiIndex):
-            pivoted_df.columns = ['_'.join(map(str, col)).strip() for col in pivoted_df.columns.values]
+        if not reverse_header_order:
+            if isinstance(pivoted_df.columns, pd.MultiIndex):
+                pivoted_df.columns = ['_'.join(map(str, col)).strip() for col in pivoted_df.columns.values]
+            else:
+                pivoted_df.columns = pivoted_df.columns.map(str)
         else:
-            pivoted_df.columns = pivoted_df.columns.map(str)
+            if isinstance(pivoted_df.columns, pd.MultiIndex):
+                # Reorder the MultiIndex columns
+                pivoted_df.columns = ['_'.join(reversed(list(map(str, col)))).strip() for col in pivoted_df.columns.values]
+            else:
+                pivoted_df.columns = pivoted_df.columns.map(str)
+                # Reverse the order for single index columns
+                pivoted_df.columns = ['_'.join(reversed(col.split('_'))).strip() for col in pivoted_df.columns]
         # Reset the pivot before returning
         pivoted_df = pivoted_df.reset_index()
-        # Sort by OBS from oldest to newest
-        if datetime_trans_needed is True:
-            # pivoted_df = pivoted_df.reset_index()
+        # Sort by index column from oldest to newest
+        if datetime_trans_needed:
             pivoted_df[index_col] = pd.to_datetime(pivoted_df[index_col])  # Ensure sorting works correctly
             pivoted_df = pivoted_df.sort_values(by=index_col)
-            # Convert OBS back to a string in YYYY-MM-DD format for display purposes
-            pivoted_df[index_col] = pivoted_df[index_col].dt.strftime('%Y-%m-%d')
-            # Set index back to date column
-            # pivoted_df.set_index(index_col,inplace=True)
+            # Convert index column back to a string in YYYY-MM-DD format for display purposes
+            pivoted_df[index_col] = pivoted_df[index_col].dt.strftime('%Y-%m-%d')
         # Fill in any NaNs
         pivoted_df = pivoted_df.fillna(fill_value)
@@ -1436,15 +1445,14 @@ class dataprocessing:
         Categorizes text in a specified DataFrame column by applying a lookup table based on substrings.
         Args:
-        - df (pd.DataFrame): The DataFrame containing the column to categorize.
-        - column_name (str): The name of the column in the DataFrame that contains the text data to categorize.
-        - category_dict (dict): A dictionary where keys are substrings to search for in the text and values are
-            the categories to assign when a substring is found.
-        - new_col_name (str): The name of the new column to be created in the DataFrame, which will hold the
-            resulting categories. Default is 'Category'.
+        df (pd.DataFrame): The DataFrame containing the column to categorize.
+        column_name (str): The name of the column in the DataFrame that contains the text data to categorize.
+        category_dict (dict): A dictionary where keys are substrings to search for in the text and values are the categories to assign when a substring is found.
+        new_col_name (str, optional): The name of the new column to be created in the DataFrame, which will hold the resulting categories. Default is 'Category'.
+        other_label (str, optional): The name given to category if no substring from the dictionary is found in the cell
         Returns:
-        - pd.DataFrame: The original DataFrame with an additional column containing the assigned categories.
+        pd.DataFrame: The original DataFrame with an additional column containing the assigned categories.
         """
         def categorize_text(text):
@@ -1452,13 +1460,11 @@ class dataprocessing:
             Assigns a category to a single text string based on the presence of substrings from a dictionary.
             Args:
-            - text (str): The text string to categorize.
-            - category_dict (dict): A dictionary where keys are substrings to search for in the text and
-            values are the categories to assign if a substring is found.
+            text (str): The text string to categorize.
             Returns:
-            - str: The category assigned based on the first matching substring found in the text. If no
-            matching substring is found, returns 'Full Funnel'.
+            str: The category assigned based on the first matching substring found in the text. If no
+            matching substring is found, returns other_name.
             """
             for key, category in category_dict.items():
                 if key.lower() in text.lower():  # Check if the substring is in the text (case-insensitive)

{imsciences-0.6.2.0.dist-info → imsciences-0.6.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.2.0
+Version: 0.6.2.2
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

{imsciences-0.6.2.0.dist-info → imsciences-0.6.2.2.dist-info}/RECORD RENAMED Viewed

@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
-imsciences/datafunctions.py,sha256=d37Fu7_FxlWkdyzMn0io_1FUske6VAL_hbhvpNf3KKM,139739
+imsciences/datafunctions.py,sha256=6zY1sE_ucCQVCp3G2lOz0hBvKOol44nkY90Y_KZlYMg,140390
 imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
 imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
-imsciences-0.6.2.0.dist-info/METADATA,sha256=842dB5tjX2VVtJmemQtVZdson-VXiSafE6YOWAs0NWY,854
-imsciences-0.6.2.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-imsciences-0.6.2.0.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
-imsciences-0.6.2.0.dist-info/RECORD,,
+imsciences-0.6.2.2.dist-info/METADATA,sha256=viAJgSE2MA6ykZZRL70i9xzme8eJY__JxoCFv_5PGQw,854
+imsciences-0.6.2.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+imsciences-0.6.2.2.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
+imsciences-0.6.2.2.dist-info/RECORD,,

{imsciences-0.6.2.0.dist-info → imsciences-0.6.2.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{imsciences-0.6.2.0.dist-info → imsciences-0.6.2.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

imsciences 0.6.2.0__py3-none-any.whl → 0.6.2.2__py3-none-any.whl

imsciences 0.6.2.0py3-none-any.whl → 0.6.2.2py3-none-any.whl