PyPI - imsciences - Versions diffs - 0.6.2.2__py3-none-any.whl → 0.6.2.3__py3-none-any.whl - Mend

imsciences 0.6.2.2py3-none-any.whl → 0.6.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

imsciences/datafunctions.py CHANGED Viewed

@@ -177,42 +177,42 @@ class dataprocessing:
         print("   - Usage: read_and_concatenate_files(folder_path, file_type='csv')")
         print("   - Example: read_and_concatenate_files(folder_path, file_type='csv')")
-        print("\n31. remove zero values")
+        print("\n31. remove_zero_values")
         print("   - Description: Remove zero values in a specified column.")
         print("   - Usage: remove_zero_values(self, data_frame, column_to_filter)")
         print("   - Example: remove_zero_values(None, df, 'Funeral_Delivery')")
-        print("\n32. upgrade all packages")
+        print("\n32. upgrade_outdated_packages")
         print("   - Description: Upgrades all packages.")
         print("   - Usage: upgrade_outdated_packages()")
         print("   - Example: upgrade_outdated_packages()")
-        print("\n33. Convert Mixed Formats Dates")
+        print("\n33. convert_mixed_formats_dates")
         print("   - Description: Convert a mix of US and UK dates to datetime.")
         print("   - Usage: convert_mixed_formats_dates(df, datecol)")
         print("   - Example: convert_mixed_formats_dates(df, 'OBS')")
-        print("\n34. Fill Weekly Missing Dates")
+        print("\n34. fill_weekly_date_range")
         print("   - Description: Fill in any missing weeks with 0.")
         print("   - Usage: fill_weekly_date_range(df, date_column, freq)")
         print("   - Example: fill_weekly_date_range(df, 'OBS', 'W-MON')")
-        print("\n35. Add Prefix and/or Suffix to Column Headers")
+        print("\n35. add_prefix_and_suffix")
         print("   - Description: Add Prefix and/or Suffix to Column Headers.")
         print("   - Usage: add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)")
         print("   - Example: add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')")
-        print("\n36. Change all data to dummies")
+        print("\n36. create_dummies")
         print("   - Description: Changes time series to 0s and 1s based off threshold")
         print("   - Usage: create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')")
         print("   - Example: create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')")
-        print("\n37. Replace substrings in column of strings")
+        print("\n37. replace_substrings")
         print("   - Description: Replace substrings in column of strings based off dictionary, can also change column to lower")
         print("   - Usage: replace_substrings(df, column, replacements, to_lower=False, new_column=None)")
         print("   - Example: replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True, new_column='Short Version')")
-        print("\n38. Add totals column")
+        print("\n38. add_total_column")
         print("   - Description: Sums all columns with the option to exclude an date column to create a total column")
         print("   - Usage: add_total_column(df, exclude_col=None, total_col_name='Total')")
         print("   - Example: add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')")
@@ -221,6 +221,13 @@ class dataprocessing:
         print("    - Description: Equivalent of xlookup in excel, but only based on substrings. If a substring is found in a cell, than look it up in the dictionary. Otherwise use the other label")
         print("    - Usage: apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other')")
         print("    - Example: apply_lookup_table_based_on_substring(df, 'Campaign Name', campaign_dict, new_col_name='Campaign Name Short', other_label='Full Funnel')")
+        print("\n40. compare_overlap")
+        print("    - Description: With two matching dataset, it takes the common columns and rows and takes the difference between them, outputing a differences and total differences table")
+        print("    - Usage: compare_overlap(df1, df2, date_col)")
+        print("    - Example: compare_overlap(df_1, df_2, 'obs')")
     def get_wd_levels(self, levels):
         """
@@ -1475,7 +1482,59 @@ class dataprocessing:
         df[new_col_name] = df[column_name].apply(categorize_text)
         return df
+    def compare_overlap(df1, df2, date_col):
+        """
+        Compare overlapping periods between two DataFrames and provide a summary of total differences.
+        Args:
+            df1 (pandas.DataFrame): First DataFrame containing date-based data.
+            df2 (pandas.DataFrame): Second DataFrame containing date-based data.
+            date_col (str): The name of the date column used for aligning data.
+        Returns:
+            tuple: A tuple containing the DataFrame of differences and a summary DataFrame with total differences by column.
+        """
+        # Ensure date columns are in datetime format
+        df1[date_col] = pd.to_datetime(df1[date_col])
+        df2[date_col] = pd.to_datetime(df2[date_col])
+        # Determine the overlap period
+        start_date = max(df1[date_col].min(), df2[date_col].min())
+        end_date = min(df1[date_col].max(), df2[date_col].max())
+        # Filter dataframes to the overlapping period
+        df1_overlap = df1[(df1[date_col] >= start_date) & (df1[date_col] <= end_date)]
+        df2_overlap = df2[(df2[date_col] >= start_date) & (df2[date_col] <= end_date)]
+        # Merge the dataframes on the date column to align data for comparison
+        merged_df = pd.merge(df1_overlap, df2_overlap, on=date_col, suffixes=('_df1', '_df2'))
+        # Initialize a list to collect total differences for each column
+        total_diff_list = []
+        # Compare the values in each column (excluding the date column)
+        diff_df = pd.DataFrame({date_col: merged_df[date_col]})  # Initialize diff_df with the date column
+        for col in df1.columns:
+            if col != date_col:
+                # Calculate the difference for each row
+                diff_col = f'diff_{col}'
+                diff_df[diff_col] = merged_df[f'{col}_df1'] - merged_df[f'{col}_df2']
+                # Calculate the total difference for the column and add it to the list
+                total_diff = diff_df[diff_col].sum()
+                total_diff_list.append({'Column': col, 'Total Difference': total_diff})
+        # Create a DataFrame for the summary of total differences
+        total_diff_df = pd.DataFrame(total_diff_list)
+        # Apply formatting to the numerical columns
+        float_format = "{:,.2f}".format  # Format to 2 decimal places with comma as thousand separator
+        diff_df.iloc[:, 1:] = diff_df.iloc[:, 1:].applymap(float_format)
+        total_diff_df['Total Difference'] = total_diff_df['Total Difference'].apply(float_format)
+        return diff_df, total_diff_df
 ########################################################################################################################################
 ########################################################################################################################################

{imsciences-0.6.2.2.dist-info → imsciences-0.6.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.6.2.2
+Version: 0.6.2.3
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

{imsciences-0.6.2.2.dist-info → imsciences-0.6.2.3.dist-info}/RECORD RENAMED Viewed

@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
-imsciences/datafunctions.py,sha256=6zY1sE_ucCQVCp3G2lOz0hBvKOol44nkY90Y_KZlYMg,140390
+imsciences/datafunctions.py,sha256=IrcIfw80MQnnRc2gD6QfuKIlDgVQxkZX-bTj7LKOiEU,143441
 imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
 imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
 imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
 imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
-imsciences-0.6.2.2.dist-info/METADATA,sha256=viAJgSE2MA6ykZZRL70i9xzme8eJY__JxoCFv_5PGQw,854
-imsciences-0.6.2.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-imsciences-0.6.2.2.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
-imsciences-0.6.2.2.dist-info/RECORD,,
+imsciences-0.6.2.3.dist-info/METADATA,sha256=diBYqgQ-3WJ9pcVQfeAmJkUyLzpy5tqMX1VWjD6zT7k,854
+imsciences-0.6.2.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+imsciences-0.6.2.3.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
+imsciences-0.6.2.3.dist-info/RECORD,,

{imsciences-0.6.2.2.dist-info → imsciences-0.6.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{imsciences-0.6.2.2.dist-info → imsciences-0.6.2.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

imsciences 0.6.2.2__py3-none-any.whl → 0.6.2.3__py3-none-any.whl

imsciences 0.6.2.2py3-none-any.whl → 0.6.2.3py3-none-any.whl