imsciences 0.6.2.1__py3-none-any.whl → 0.6.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/datafunctions.py +29 -20
- {imsciences-0.6.2.1.dist-info → imsciences-0.6.2.2.dist-info}/METADATA +1 -1
- {imsciences-0.6.2.1.dist-info → imsciences-0.6.2.2.dist-info}/RECORD +5 -5
- {imsciences-0.6.2.1.dist-info → imsciences-0.6.2.2.dist-info}/WHEEL +0 -0
- {imsciences-0.6.2.1.dist-info → imsciences-0.6.2.2.dist-info}/top_level.txt +0 -0
imsciences/datafunctions.py
CHANGED
|
@@ -109,8 +109,8 @@ class dataprocessing:
|
|
|
109
109
|
|
|
110
110
|
print("\n17. pivot_table")
|
|
111
111
|
print(" - Description: Dynamically pivots a DataFrame based on specified columns.")
|
|
112
|
-
print(" - Usage: pivot_table(df,
|
|
113
|
-
print(" - Example: pivot_table(df, {'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''},
|
|
112
|
+
print(" - Usage: pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'False')")
|
|
113
|
+
print(" - Example: pivot_table(df, 'OBS', 'Channel Short Names', 'Value',filters_dict={'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'True')")
|
|
114
114
|
|
|
115
115
|
print("\n18. apply_lookup_table_for_columns")
|
|
116
116
|
print(" - Description: Equivalent of xlookup in excel. Allows you to map a dictionary of substrings within a column. If multiple columns are need for the LUT then a | seperator is needed.")
|
|
@@ -657,59 +657,68 @@ class dataprocessing:
|
|
|
657
657
|
|
|
658
658
|
return combined_df
|
|
659
659
|
|
|
660
|
-
def pivot_table(self, df,
|
|
660
|
+
def pivot_table(self, df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name="Total", datetime_trans_needed=True, reverse_header_order=False):
|
|
661
661
|
"""
|
|
662
662
|
Provides the ability to create pivot tables, filtering the data to get to data you want and then pivoting on certain columns
|
|
663
663
|
|
|
664
664
|
Args:
|
|
665
665
|
df (pandas.DataFrame): The DataFrame containing the data.
|
|
666
|
-
filters_dict (dict): Dictionary of conditions for the boolean mask i.e. what to filter your df on to get to your chosen cell
|
|
667
666
|
index_col (str): Name of Column for your pivot table to index on
|
|
668
667
|
columns (str): Name of Columns for your pivot table.
|
|
669
668
|
values_col (str): Name of Values Columns for your pivot table.
|
|
669
|
+
filters_dict (dict, optional): Dictionary of conditions for the boolean mask i.e. what to filter your df on to get to your chosen cell. Defaults to None
|
|
670
670
|
fill_value (int, optional): The value to replace nan with. Defaults to 0.
|
|
671
671
|
aggfunc (str, optional): The method on which to aggregate the values column. Defaults to sum.
|
|
672
672
|
margins (bool, optional): Whether the pivot table needs a total rows and column. Defaults to False.
|
|
673
673
|
margins_name (str, optional): The name of the Totals columns. Defaults to "Total".
|
|
674
674
|
datetime_trans_needed (bool, optional): Whether the index column needs to be transformed into datetime format. Defaults to False.
|
|
675
|
+
reverse_header_order (bool, optional): Reverses the order of the column headers. Defaults to False.
|
|
675
676
|
|
|
676
677
|
Returns:
|
|
677
678
|
pandas.DataFrame: The pivot table specified
|
|
678
679
|
"""
|
|
679
680
|
|
|
680
681
|
# Create the filtered df by applying the conditions
|
|
681
|
-
|
|
682
|
+
if filters_dict is None:
|
|
683
|
+
df_filtered = df
|
|
684
|
+
else:
|
|
685
|
+
df_filtered = self.filter_df_on_multiple_conditions(df, filters_dict)
|
|
682
686
|
|
|
683
|
-
# Ensure
|
|
687
|
+
# Ensure index column is in datetime format for proper sorting
|
|
684
688
|
df_filtered = df_filtered.copy()
|
|
685
689
|
|
|
686
690
|
# If datetime transformation is needed
|
|
687
|
-
if datetime_trans_needed
|
|
688
|
-
df_filtered
|
|
691
|
+
if datetime_trans_needed:
|
|
692
|
+
df_filtered[index_col] = pd.to_datetime(df_filtered[index_col], dayfirst=True)
|
|
689
693
|
|
|
690
694
|
# Create the pivot table
|
|
691
|
-
pivoted_df = df_filtered.pivot_table(index=index_col, columns=columns, values=values_col, aggfunc=aggfunc,margins=margins,margins_name=margins_name)
|
|
695
|
+
pivoted_df = df_filtered.pivot_table(index=index_col, columns=columns, values=values_col, aggfunc=aggfunc, margins=margins, margins_name=margins_name)
|
|
692
696
|
|
|
693
697
|
# Handling MultiIndex columns if present, making them a flat structure
|
|
694
|
-
if
|
|
695
|
-
pivoted_df.columns
|
|
698
|
+
if not reverse_header_order:
|
|
699
|
+
if isinstance(pivoted_df.columns, pd.MultiIndex):
|
|
700
|
+
pivoted_df.columns = ['_'.join(map(str, col)).strip() for col in pivoted_df.columns.values]
|
|
701
|
+
else:
|
|
702
|
+
pivoted_df.columns = pivoted_df.columns.map(str)
|
|
696
703
|
else:
|
|
697
|
-
pivoted_df.columns
|
|
704
|
+
if isinstance(pivoted_df.columns, pd.MultiIndex):
|
|
705
|
+
# Reorder the MultiIndex columns
|
|
706
|
+
pivoted_df.columns = ['_'.join(reversed(list(map(str, col)))).strip() for col in pivoted_df.columns.values]
|
|
707
|
+
else:
|
|
708
|
+
pivoted_df.columns = pivoted_df.columns.map(str)
|
|
709
|
+
# Reverse the order for single index columns
|
|
710
|
+
pivoted_df.columns = ['_'.join(reversed(col.split('_'))).strip() for col in pivoted_df.columns]
|
|
698
711
|
|
|
699
712
|
# Reset the pivot before returning
|
|
700
713
|
pivoted_df = pivoted_df.reset_index()
|
|
701
714
|
|
|
702
|
-
# Sort by
|
|
703
|
-
if datetime_trans_needed
|
|
704
|
-
# pivoted_df = pivoted_df.reset_index()
|
|
715
|
+
# Sort by index column from oldest to newest
|
|
716
|
+
if datetime_trans_needed:
|
|
705
717
|
pivoted_df[index_col] = pd.to_datetime(pivoted_df[index_col]) # Ensure sorting works correctly
|
|
706
718
|
pivoted_df = pivoted_df.sort_values(by=index_col)
|
|
707
|
-
|
|
708
|
-
# Convert OBS back to a string in YYYY-MM-DD format for display purposes
|
|
709
|
-
pivoted_df[index_col] = pivoted_df[index_col].dt.strftime('%Y-%m-%d')
|
|
710
719
|
|
|
711
|
-
#
|
|
712
|
-
|
|
720
|
+
# Convert index column back to a string in YYYY-MM-DD format for display purposes
|
|
721
|
+
pivoted_df[index_col] = pivoted_df[index_col].dt.strftime('%Y-%m-%d')
|
|
713
722
|
|
|
714
723
|
# Fill in any NaNs
|
|
715
724
|
pivoted_df = pivoted_df.fillna(fill_value)
|
|
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
|
2
2
|
dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
3
3
|
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
4
|
imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
|
|
5
|
-
imsciences/datafunctions.py,sha256=
|
|
5
|
+
imsciences/datafunctions.py,sha256=6zY1sE_ucCQVCp3G2lOz0hBvKOol44nkY90Y_KZlYMg,140390
|
|
6
6
|
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
7
7
|
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
8
8
|
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
9
9
|
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
10
10
|
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
11
|
-
imsciences-0.6.2.
|
|
12
|
-
imsciences-0.6.2.
|
|
13
|
-
imsciences-0.6.2.
|
|
14
|
-
imsciences-0.6.2.
|
|
11
|
+
imsciences-0.6.2.2.dist-info/METADATA,sha256=viAJgSE2MA6ykZZRL70i9xzme8eJY__JxoCFv_5PGQw,854
|
|
12
|
+
imsciences-0.6.2.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
13
|
+
imsciences-0.6.2.2.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
14
|
+
imsciences-0.6.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|