PyPI - imsciences - Versions diffs - 0.6.2.5__py3-none-any.whl → 0.6.2.7__py3-none-any.whl - Mend

imsciences 0.6.2.5py3-none-any.whl → 0.6.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

imsciences/datafunctions.py CHANGED Viewed

@@ -109,13 +109,13 @@ class dataprocessing:
         print("\n17. pivot_table")
         print("    - Description: Dynamically pivots a DataFrame based on specified columns.")
-        print("    - Usage: pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'False')")
-        print("    - Example: pivot_table(df, 'OBS', 'Channel Short Names', 'Value',filters_dict={'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'True')")
+        print("    - Usage: pivot_table(df, index_col, columns, values_col, filters_dict=None, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'False',fill_missing_weekly_dates=False,week_commencing='W-MON')")
+        print("    - Example: pivot_table(df, 'OBS', 'Channel Short Names', 'Value',filters_dict={'Master Include':' == 1','OBS':' >= datetime(2019,9,9)','Metric Short Names':' == 'spd''}, fill_value=0,aggfunc='sum',margins=False,margins_name='Total',datetime_trans_needed=True,reverse_header_order = 'True',fill_missing_weekly_dates=True,week_commencing='W-MON')")
         print("\n18. apply_lookup_table_for_columns")
         print("    - Description: Equivalent of xlookup in excel. Allows you to map a dictionary of substrings within a column. If multiple columns are need for the LUT then a | seperator is needed.")
-        print("    - Usage: classify_within_column(df, col_names, to_find_dict, if_not_in_country_dict='Other'), new_column_name='Mapping'")
-        print("    - Example: classify_within_column(df, ['campaign type','media type'], {'France Paid Social FB|paid social': 'facebook','France Paid Social TW|paid social': 'twitter'}, 'other','mapping')")
+        print("    - Usage: apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict='Other', new_column_name='Mapping')")
+        print("    - Example: apply_lookup_table_for_columns(df, col_names, {'spend':'spd','clicks':'clk'}, if_not_in_dict='Other', new_column_name='Metrics Short')")
         print("\n19. aggregate_daily_to_wc_wide")
         print("   - Description: Aggregates daily data into weekly data, grouping and summing specified columns, starting on a specified day of the week.")
@@ -226,6 +226,16 @@ class dataprocessing:
         print("    - Description: With two matching dataset, it takes the common columns and rows and takes the difference between them, outputing a differences and total differences table")
         print("    - Usage: compare_overlap(df1, df2, date_col)")
         print("    - Example: compare_overlap(df_1, df_2, 'obs')")
+        print("\n41. week_commencing_2_week_commencing_conversion")
+        print("    - Description: Take a week commencing column say sunday and creates a new column with a different week commencing e.g. monday")
+        print("    - Usage: week_commencing_2_week_commencing_conversion(df,date_col,week_commencing='sun')")
+        print("    - Example: week_commencing_2_week_commencing_conversion(df,'obs,week_commencing='mon')")
+        print("\n42. week_commencing_2_week_commencing_conversion")
+        print("    - Description: Take a week commencing column say sunday and creates a new column with a different week commencing e.g. monday")
+        print("    - Usage: week_commencing_2_week_commencing_conversion(df,date_col,week_commencing='sun')")
+        print("    - Example: week_commencing_2_week_commencing_conversion(df,'obs,week_commencing='mon')")
@@ -664,7 +674,7 @@ class dataprocessing:
         return combined_df
-    def pivot_table(self, df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name="Total", datetime_trans_needed=True, reverse_header_order=False):
+    def pivot_table(self, df, index_col, columns, values_col, filters_dict=None, fill_value=0, aggfunc='sum', margins=False, margins_name="Total", datetime_trans_needed=True, reverse_header_order=False,fill_missing_weekly_dates=False,week_commencing='W-MON'):
         """
         Provides the ability to create pivot tables, filtering the data to get to data you want and then pivoting on certain columns
@@ -680,6 +690,8 @@ class dataprocessing:
             margins_name (str, optional): The name of the Totals columns. Defaults to "Total".
             datetime_trans_needed (bool, optional): Whether the index column needs to be transformed into datetime format. Defaults to False.
             reverse_header_order (bool, optional): Reverses the order of the column headers. Defaults to False.
+            fill_missing_weekly_dates (bool, optional): Fills in any weekly missing dates. Defaults to False.
+            week_commencing (str,optional): Fills in missing weeks if option is specified. Defaults to 'W-MON'.
         Returns:
             pandas.DataFrame: The pivot table specified
@@ -730,6 +742,10 @@ class dataprocessing:
         # Fill in any NaNs
         pivoted_df = pivoted_df.fillna(fill_value)
+        # If there is a need to fill in missing weeks
+        if fill_missing_weekly_dates == True:
+            pivoted_df = self.fill_weekly_date_range(pivoted_df, index_col, freq=week_commencing)
         return pivoted_df
     def apply_lookup_table_for_columns(self, df, col_names, to_find_dict, if_not_in_dict="Other", new_column_name="Mapping"):
@@ -1482,7 +1498,7 @@ class dataprocessing:
         df[new_col_name] = df[column_name].apply(categorize_text)
         return df
-    def compare_overlap(self, df1, df2, date_col):
+    def compare_overlap(df1, df2, date_col):
         """
         Compare overlapping periods between two DataFrames and provide a summary of total differences.
@@ -1509,21 +1525,23 @@ class dataprocessing:
         # Merge the dataframes on the date column to align data for comparison
         merged_df = pd.merge(df1_overlap, df2_overlap, on=date_col, suffixes=('_df1', '_df2'))
+        # Get the common columns between the two DataFrames, excluding the date column
+        common_cols = [col for col in df1.columns if col != date_col and col in df2.columns]
         # Initialize a list to collect total differences for each column
         total_diff_list = []
-        # Compare the values in each column (excluding the date column)
+        # Create a DataFrame for the differences
         diff_df = pd.DataFrame({date_col: merged_df[date_col]})  # Initialize diff_df with the date column
-        for col in df1.columns:
-            if col != date_col:
-                # Calculate the difference for each row
-                diff_col = f'diff_{col}'
-                diff_df[diff_col] = merged_df[f'{col}_df1'] - merged_df[f'{col}_df2']
-                # Calculate the total difference for the column and add it to the list
-                total_diff = diff_df[diff_col].sum()
-                total_diff_list.append({'Column': col, 'Total Difference': total_diff})
+        for col in common_cols:
+            # Calculate the difference for each row
+            diff_col = f'diff_{col}'
+            diff_df[diff_col] = merged_df[f'{col}_df1'] - merged_df[f'{col}_df2']
+            # Calculate the total difference for the column and add it to the list
+            total_diff = diff_df[diff_col].sum()
+            total_diff_list.append({'Column': col, 'Total Difference': total_diff})
         # Create a DataFrame for the summary of total differences
         total_diff_df = pd.DataFrame(total_diff_list)
@@ -1535,6 +1553,162 @@ class dataprocessing:
         return diff_df, total_diff_df
+    # Convert week commencing col (should be most likely monday to sunday or vice versa)
+    def week_commencing_2_week_commencing_conversion(df,date_col,week_commencing='sun'):
+        """
+        Convert week commencing column in a DataFrame to the start of another day specified.
+        Args:
+            df (pandas.DataFrame): The DataFrame containing the date-based data.
+            date_col (str): The name of the date column in the DataFrame.
+            week_commencing (str, optional): The day of the week that the week starts on ('sun' for Sunday, 'mon' for Monday, etc.). Defaults to 'sun'.
+        Returns:
+            pandas.DataFrame: The original DataFrame with an additional column indicating the start of the week.
+        """
+        # Week commencing dictionary
+        day_dict = {"mon": 0, "tue": 1, "wed": 2, "thur": 3, "fri": 4, "sat": 5, "sun": 6}
+        df['week_start_'+ week_commencing] = df[date_col].apply(lambda x: x - pd.Timedelta(days=(x.weekday() - day_dict[week_commencing]) % 7))
+        return df
+    def plot_chart(df, date_col, value_cols, chart_type='line', title='Chart', x_title='Date', y_title='Values', **kwargs):
+        """
+        Plot various types of charts using Plotly.
+        Args:
+            df (pandas.DataFrame): DataFrame containing the data.
+            date_col (str): The name of the column with date information.
+            value_cols (list): List of columns to plot.
+            chart_type (str): Type of chart to plot ('line', 'bar', 'scatter', 'pie', 'box', 'heatmap', 'area', 'bubble', 'funnel', 'waterfall', 'contour', 'scatter3d').
+            title (str): Title of the chart.
+            x_title (str): Title of the x-axis.
+            y_title (str): Title of the y-axis.
+            **kwargs: Additional keyword arguments for customization.
+        Returns:
+            plotly.graph_objects.Figure: The Plotly figure object.
+        """
+        # Ensure the date column is in datetime format
+        df[date_col] = pd.to_datetime(df[date_col])
+        # Initialize the figure
+        fig = go.Figure()
+        # Make sure the date col is excluded from the line cols
+        value_cols = [x for x in value_cols if x!=date_col]
+        # Add each value column to the plot based on the chart type
+        for col in value_cols:
+            if chart_type == 'line':
+                fig.add_trace(go.Scatter(
+                    x=df[date_col],
+                    y=df[col],
+                    mode='lines',
+                    name=col,
+                    **kwargs
+                ))
+            elif chart_type == 'bar':
+                fig.add_trace(go.Bar(
+                    x=df[date_col],
+                    y=df[col],
+                    name=col,
+                    **kwargs
+                ))
+            elif chart_type == 'scatter':
+                fig.add_trace(go.Scatter(
+                    x=df[date_col],
+                    y=df[col],
+                    mode='markers',
+                    name=col,
+                    **kwargs
+                ))
+            elif chart_type == 'histogram':
+                fig.add_trace(go.Histogram(
+                    x=df[col],
+                    name=col,
+                    **kwargs
+                ))
+            elif chart_type == 'pie':
+                fig.add_trace(go.Pie(
+                    labels=df[date_col],  # or another column for labels
+                    values=df[col],
+                    name=col,
+                    **kwargs
+                ))
+            elif chart_type == 'box':
+                fig.add_trace(go.Box(
+                    y=df[col],
+                    name=col,
+                    **kwargs
+                ))
+            elif chart_type == 'heatmap':
+                fig.add_trace(go.Heatmap(
+                    z=df.pivot_table(index=date_col, columns=value_cols[0], values=value_cols[1]),
+                    x=df[value_cols[0]],
+                    y=df[date_col],
+                    **kwargs
+                ))
+            elif chart_type == 'area':
+                fig.add_trace(go.Scatter(
+                    x=df[date_col],
+                    y=df[col],
+                    mode='lines',  # Use 'lines+markers' if you want markers
+                    fill='tozeroy',  # Fill the area under the line
+                    name=col,
+                    **kwargs
+                ))
+            elif chart_type == 'bubble':
+                fig.add_trace(go.Scatter(
+                    x=df[value_cols[0]],
+                    y=df[value_cols[1]],
+                    mode='markers',
+                    marker=dict(size=df[value_cols[2]]),
+                    name='Bubble Chart',
+                    **kwargs
+                ))
+            elif chart_type == 'funnel':
+                fig.add_trace(go.Funnel(
+                    y=df[date_col],
+                    x=df[col],
+                    **kwargs
+                ))
+            elif chart_type == 'waterfall':
+                fig.add_trace(go.Waterfall(
+                    x=df[date_col],
+                    y=df[col],
+                    measure=df[value_cols[1]],  # measures like 'increase', 'decrease', 'total'
+                    **kwargs
+                ))
+            elif chart_type == 'contour':
+                fig.add_trace(go.Contour(
+                    z=df.pivot_table(index=value_cols[0], columns=value_cols[1], values=value_cols[2]),
+                    x=df[value_cols[0]],
+                    y=df[value_cols[1]],
+                    **kwargs
+                ))
+            elif chart_type == 'scatter3d':
+                fig.add_trace(go.Scatter3d(
+                    x=df[value_cols[0]],
+                    y=df[value_cols[1]],
+                    z=df[value_cols[2]],
+                    mode='markers',
+                    **kwargs
+                ))
+            else:
+                raise ValueError(f"Unsupported chart type: {chart_type}")
+        # Update the layout of the figure
+        fig.update_layout(
+            title=title,
+            xaxis_title=x_title,
+            yaxis_title=y_title,
+            legend_title='Series',
+            template='plotly_dark'
+        )
+        return fig
 ########################################################################################################################################
 ########################################################################################################################################

{imsciences-0.6.2.5.dist-info → imsciences-0.6.2.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.2.5
+Version: 0.6.2.7
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

{imsciences-0.6.2.5.dist-info → imsciences-0.6.2.7.dist-info}/RECORD RENAMED Viewed

@@ -3,14 +3,14 @@ dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nF
 dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
 imsciences/datafunctions-IMS-24Ltp-3.py,sha256=3Snv-0iE_03StmyjtT-riOU9f4v8TaJWLoyZLJp6l8Y,141406
-imsciences/datafunctions.py,sha256=WOdezS0IW_n91eWr9SL_czLjCCkl5-951n58sjCG51Q,143447
+imsciences/datafunctions.py,sha256=PGuvgJIurXGWM8E1M_w9BijUJGBm5FTaZVE-C1_sPog,151382
 imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
 imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
-imsciences-0.6.2.5.dist-info/METADATA,sha256=HOqcQ7JFLITN-1vcra1XZTHQ72_JM_vajUfmpXNMyl4,854
-imsciences-0.6.2.5.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
-imsciences-0.6.2.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-imsciences-0.6.2.5.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
-imsciences-0.6.2.5.dist-info/RECORD,,
+imsciences-0.6.2.7.dist-info/METADATA,sha256=0IT7pWYxsHXerkqBVKsS2Zh1_6qwn1u7NL3mK44c4tk,854
+imsciences-0.6.2.7.dist-info/PKG-INFO-IMS-24Ltp-3,sha256=yqZbigwHjnYoqyI81PGz_AeofRFfOrwH_Vyawyef-mg,854
+imsciences-0.6.2.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+imsciences-0.6.2.7.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
+imsciences-0.6.2.7.dist-info/RECORD,,

{imsciences-0.6.2.5.dist-info → imsciences-0.6.2.7.dist-info}/PKG-INFO-IMS-24Ltp-3 RENAMED Viewed

File without changes

{imsciences-0.6.2.5.dist-info → imsciences-0.6.2.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{imsciences-0.6.2.5.dist-info → imsciences-0.6.2.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

imsciences 0.6.2.5__py3-none-any.whl → 0.6.2.7__py3-none-any.whl

imsciences 0.6.2.5py3-none-any.whl → 0.6.2.7py3-none-any.whl