imsciences 0.6.2.0__py3-none-any.whl → 0.6.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -109,8 +109,8 @@ class dataprocessing:
109
109
 
110
110
  print("\n17. pivot_table")
111
111
  print(" - Description: Dynamically pivots a DataFrame based on specified columns.")
112
- print(" - Usage: pivot_table(df, filters_dict, index_col, columns, values_col, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)")
113
- print(" - Example: pivot_table(df, {'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, 'OBS', 'Channel Short Names', 'Value', fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True)")
112
+ print(" - Usage: pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'False')")
113
+ print(" - Example: pivot_table(df, 'OBS', 'Channel Short Names', 'Value',filters_dict={'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'True')")
114
114
 
115
115
  print("\n18. apply_lookup_table_for_columns")
116
116
  print(" - Description: Equivalent of xlookup in excel. Allows you to map a dictionary of substrings within a column. If multiple columns are need for the LUT then a | seperator is needed.")
@@ -657,59 +657,68 @@ class dataprocessing:
657
657
 
658
658
  return combined_df
659
659
 
660
- def pivot_table(self, df, filters_dict, index_col, columns, values_col, fill_value=0,aggfunc='sum',margins=False,margins_name="Total",datetime_trans_needed=True):
660
+ def pivot_table(self, df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name="Total", datetime_trans_needed=True, reverse_header_order=False):
661
661
  """
662
662
  Provides the ability to create pivot tables, filtering the data to get to data you want and then pivoting on certain columns
663
663
 
664
664
  Args:
665
665
  df (pandas.DataFrame): The DataFrame containing the data.
666
- filters_dict (dict): Dictionary of conditions for the boolean mask i.e. what to filter your df on to get to your chosen cell
667
666
  index_col (str): Name of Column for your pivot table to index on
668
667
  columns (str): Name of Columns for your pivot table.
669
668
  values_col (str): Name of Values Columns for your pivot table.
669
+ filters_dict (dict, optional): Dictionary of conditions for the boolean mask i.e. what to filter your df on to get to your chosen cell. Defaults to None
670
670
  fill_value (int, optional): The value to replace nan with. Defaults to 0.
671
671
  aggfunc (str, optional): The method on which to aggregate the values column. Defaults to sum.
672
672
  margins (bool, optional): Whether the pivot table needs a total rows and column. Defaults to False.
673
673
  margins_name (str, optional): The name of the Totals columns. Defaults to "Total".
674
674
  datetime_trans_needed (bool, optional): Whether the index column needs to be transformed into datetime format. Defaults to False.
675
+ reverse_header_order (bool, optional): Reverses the order of the column headers. Defaults to False.
675
676
 
676
677
  Returns:
677
678
  pandas.DataFrame: The pivot table specified
678
679
  """
679
680
 
680
681
  # Create the filtered df by applying the conditions
681
- df_filtered = self.filter_df_on_multiple_conditions(df, filters_dict)
682
+ if filters_dict is None:
683
+ df_filtered = df
684
+ else:
685
+ df_filtered = self.filter_df_on_multiple_conditions(df, filters_dict)
682
686
 
683
- # Ensure OBS is in datetime format for proper sorting
687
+ # Ensure index column is in datetime format for proper sorting
684
688
  df_filtered = df_filtered.copy()
685
689
 
686
690
  # If datetime transformation is needed
687
- if datetime_trans_needed is True:
688
- df_filtered.loc[:,index_col] = pd.to_datetime(df_filtered[index_col], dayfirst=True)
691
+ if datetime_trans_needed:
692
+ df_filtered[index_col] = pd.to_datetime(df_filtered[index_col], dayfirst=True)
689
693
 
690
694
  # Create the pivot table
691
- pivoted_df = df_filtered.pivot_table(index=index_col, columns=columns, values=values_col, aggfunc=aggfunc,margins=margins,margins_name=margins_name)
695
+ pivoted_df = df_filtered.pivot_table(index=index_col, columns=columns, values=values_col, aggfunc=aggfunc, margins=margins, margins_name=margins_name)
692
696
 
693
697
  # Handling MultiIndex columns if present, making them a flat structure
694
- if isinstance(pivoted_df.columns, pd.MultiIndex):
695
- pivoted_df.columns = ['_'.join(map(str, col)).strip() for col in pivoted_df.columns.values]
698
+ if not reverse_header_order:
699
+ if isinstance(pivoted_df.columns, pd.MultiIndex):
700
+ pivoted_df.columns = ['_'.join(map(str, col)).strip() for col in pivoted_df.columns.values]
701
+ else:
702
+ pivoted_df.columns = pivoted_df.columns.map(str)
696
703
  else:
697
- pivoted_df.columns = pivoted_df.columns.map(str)
704
+ if isinstance(pivoted_df.columns, pd.MultiIndex):
705
+ # Reorder the MultiIndex columns
706
+ pivoted_df.columns = ['_'.join(reversed(list(map(str, col)))).strip() for col in pivoted_df.columns.values]
707
+ else:
708
+ pivoted_df.columns = pivoted_df.columns.map(str)
709
+ # Reverse the order for single index columns
710
+ pivoted_df.columns = ['_'.join(reversed(col.split('_'))).strip() for col in pivoted_df.columns]
698
711
 
699
712
  # Reset the pivot before returning
700
713
  pivoted_df = pivoted_df.reset_index()
701
714
 
702
- # Sort by OBS from oldest to newest
703
- if datetime_trans_needed is True:
704
- # pivoted_df = pivoted_df.reset_index()
715
+ # Sort by index column from oldest to newest
716
+ if datetime_trans_needed:
705
717
  pivoted_df[index_col] = pd.to_datetime(pivoted_df[index_col]) # Ensure sorting works correctly
706
718
  pivoted_df = pivoted_df.sort_values(by=index_col)
707
-
708
- # Convert OBS back to a string in YYYY-MM-DD format for display purposes
709
- pivoted_df[index_col] = pivoted_df[index_col].dt.strftime('%Y-%m-%d')
710
719
 
711
- # Set index back to date column
712
- # pivoted_df.set_index(index_col,inplace=True)
720
+ # Convert index column back to a string in YYYY-MM-DD format for display purposes
721
+ pivoted_df[index_col] = pivoted_df[index_col].dt.strftime('%Y-%m-%d')
713
722
 
714
723
  # Fill in any NaNs
715
724
  pivoted_df = pivoted_df.fillna(fill_value)
@@ -1436,15 +1445,14 @@ class dataprocessing:
1436
1445
  Categorizes text in a specified DataFrame column by applying a lookup table based on substrings.
1437
1446
 
1438
1447
  Args:
1439
- - df (pd.DataFrame): The DataFrame containing the column to categorize.
1440
- - column_name (str): The name of the column in the DataFrame that contains the text data to categorize.
1441
- - category_dict (dict): A dictionary where keys are substrings to search for in the text and values are
1442
- the categories to assign when a substring is found.
1443
- - new_col_name (str): The name of the new column to be created in the DataFrame, which will hold the
1444
- resulting categories. Default is 'Category'.
1448
+ df (pd.DataFrame): The DataFrame containing the column to categorize.
1449
+ column_name (str): The name of the column in the DataFrame that contains the text data to categorize.
1450
+ category_dict (dict): A dictionary where keys are substrings to search for in the text and values are the categories to assign when a substring is found.
1451
+ new_col_name (str, optional): The name of the new column to be created in the DataFrame, which will hold the resulting categories. Default is 'Category'.
1452
+ other_label (str, optional): The name given to category if no substring from the dictionary is found in the cell
1445
1453
 
1446
1454
  Returns:
1447
- - pd.DataFrame: The original DataFrame with an additional column containing the assigned categories.
1455
+ pd.DataFrame: The original DataFrame with an additional column containing the assigned categories.
1448
1456
  """
1449
1457
 
1450
1458
  def categorize_text(text):
@@ -1452,13 +1460,11 @@ class dataprocessing:
1452
1460
  Assigns a category to a single text string based on the presence of substrings from a dictionary.
1453
1461
 
1454
1462
  Args:
1455
- - text (str): The text string to categorize.
1456
- - category_dict (dict): A dictionary where keys are substrings to search for in the text and
1457
- values are the categories to assign if a substring is found.
1463
+ text (str): The text string to categorize.
1458
1464
 
1459
1465
  Returns:
1460
- - str: The category assigned based on the first matching substring found in the text. If no
1461
- matching substring is found, returns 'Full Funnel'.
1466
+ str: The category assigned based on the first matching substring found in the text. If no
1467
+ matching substring is found, returns other_name.
1462
1468
  """
1463
1469
  for key, category in category_dict.items():
1464
1470
  if key.lower() in text.lower(): # Check if the substring is in the text (case-insensitive)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.6.2.0
3
+ Version: 0.6.2.2
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
2
2
  dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
3
3
  dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
4
4
  imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
5
- imsciences/datafunctions.py,sha256=d37Fu7_FxlWkdyzMn0io_1FUske6VAL_hbhvpNf3KKM,139739
5
+ imsciences/datafunctions.py,sha256=6zY1sE_ucCQVCp3G2lOz0hBvKOol44nkY90Y_KZlYMg,140390
6
6
  imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
7
7
  imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
8
8
  imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
9
9
  imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
10
10
  imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
11
- imsciences-0.6.2.0.dist-info/METADATA,sha256=842dB5tjX2VVtJmemQtVZdson-VXiSafE6YOWAs0NWY,854
12
- imsciences-0.6.2.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
- imsciences-0.6.2.0.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
14
- imsciences-0.6.2.0.dist-info/RECORD,,
11
+ imsciences-0.6.2.2.dist-info/METADATA,sha256=viAJgSE2MA6ykZZRL70i9xzme8eJY__JxoCFv_5PGQw,854
12
+ imsciences-0.6.2.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
+ imsciences-0.6.2.2.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
14
+ imsciences-0.6.2.2.dist-info/RECORD,,