imsciences 0.6.2.2__py3-none-any.whl → 0.6.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -177,42 +177,42 @@ class dataprocessing:
177
177
  print(" - Usage: read_and_concatenate_files(folder_path, file_type='csv')")
178
178
  print(" - Example: read_and_concatenate_files(folder_path, file_type='csv')")
179
179
 
180
- print("\n31. remove zero values")
180
+ print("\n31. remove_zero_values")
181
181
  print(" - Description: Remove zero values in a specified column.")
182
182
  print(" - Usage: remove_zero_values(self, data_frame, column_to_filter)")
183
183
  print(" - Example: remove_zero_values(None, df, 'Funeral_Delivery')")
184
184
 
185
- print("\n32. upgrade all packages")
185
+ print("\n32. upgrade_outdated_packages")
186
186
  print(" - Description: Upgrades all packages.")
187
187
  print(" - Usage: upgrade_outdated_packages()")
188
188
  print(" - Example: upgrade_outdated_packages()")
189
189
 
190
- print("\n33. Convert Mixed Formats Dates")
190
+ print("\n33. convert_mixed_formats_dates")
191
191
  print(" - Description: Convert a mix of US and UK dates to datetime.")
192
192
  print(" - Usage: convert_mixed_formats_dates(df, datecol)")
193
193
  print(" - Example: convert_mixed_formats_dates(df, 'OBS')")
194
194
 
195
- print("\n34. Fill Weekly Missing Dates")
195
+ print("\n34. fill_weekly_date_range")
196
196
  print(" - Description: Fill in any missing weeks with 0.")
197
197
  print(" - Usage: fill_weekly_date_range(df, date_column, freq)")
198
198
  print(" - Example: fill_weekly_date_range(df, 'OBS', 'W-MON')")
199
199
 
200
- print("\n35. Add Prefix and/or Suffix to Column Headers")
200
+ print("\n35. add_prefix_and_suffix")
201
201
  print(" - Description: Add Prefix and/or Suffix to Column Headers.")
202
202
  print(" - Usage: add_prefix_and_suffix(df, prefix='', suffix='', date_col=None)")
203
203
  print(" - Example: add_prefix_and_suffix(df, prefix='media_', suffix='_spd', date_col='obs')")
204
204
 
205
- print("\n36. Change all data to dummies")
205
+ print("\n36. create_dummies")
206
206
  print(" - Description: Changes time series to 0s and 1s based off threshold")
207
207
  print(" - Usage: create_dummies(df, date_col=None, dummy_threshold=0, add_total_dummy_col='No', total_col_name='total')")
208
208
  print(" - Example: create_dummies(df, date_col='obs', dummy_threshold=100, add_total_dummy_col='Yes', total_col_name='med_total_dum')")
209
209
 
210
- print("\n37. Replace substrings in column of strings")
210
+ print("\n37. replace_substrings")
211
211
  print(" - Description: Replace substrings in column of strings based off dictionary, can also change column to lower")
212
212
  print(" - Usage: replace_substrings(df, column, replacements, to_lower=False, new_column=None)")
213
213
  print(" - Example: replace_substrings(df, 'Influencer Handle', replacement_dict, to_lower=True, new_column='Short Version')")
214
214
 
215
- print("\n38. Add totals column")
215
+ print("\n38. add_total_column")
216
216
  print(" - Description: Sums all columns with the option to exclude an date column to create a total column")
217
217
  print(" - Usage: add_total_column(df, exclude_col=None, total_col_name='Total')")
218
218
  print(" - Example: add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')")
@@ -221,6 +221,13 @@ class dataprocessing:
221
221
  print(" - Description: Equivalent of xlookup in excel, but only based on substrings. If a substring is found in a cell, than look it up in the dictionary. Otherwise use the other label")
222
222
  print(" - Usage: apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other')")
223
223
  print(" - Example: apply_lookup_table_based_on_substring(df, 'Campaign Name', campaign_dict, new_col_name='Campaign Name Short', other_label='Full Funnel')")
224
+
225
+ print("\n40. compare_overlap")
226
+ print(" - Description: With two matching dataset, it takes the common columns and rows and takes the difference between them, outputing a differences and total differences table")
227
+ print(" - Usage: compare_overlap(df1, df2, date_col)")
228
+ print(" - Example: compare_overlap(df_1, df_2, 'obs')")
229
+
230
+
224
231
 
225
232
  def get_wd_levels(self, levels):
226
233
  """
@@ -1475,7 +1482,59 @@ class dataprocessing:
1475
1482
  df[new_col_name] = df[column_name].apply(categorize_text)
1476
1483
  return df
1477
1484
 
1485
+ def compare_overlap(df1, df2, date_col):
1486
+ """
1487
+ Compare overlapping periods between two DataFrames and provide a summary of total differences.
1488
+
1489
+ Args:
1490
+ df1 (pandas.DataFrame): First DataFrame containing date-based data.
1491
+ df2 (pandas.DataFrame): Second DataFrame containing date-based data.
1492
+ date_col (str): The name of the date column used for aligning data.
1478
1493
 
1494
+ Returns:
1495
+ tuple: A tuple containing the DataFrame of differences and a summary DataFrame with total differences by column.
1496
+ """
1497
+ # Ensure date columns are in datetime format
1498
+ df1[date_col] = pd.to_datetime(df1[date_col])
1499
+ df2[date_col] = pd.to_datetime(df2[date_col])
1500
+
1501
+ # Determine the overlap period
1502
+ start_date = max(df1[date_col].min(), df2[date_col].min())
1503
+ end_date = min(df1[date_col].max(), df2[date_col].max())
1504
+
1505
+ # Filter dataframes to the overlapping period
1506
+ df1_overlap = df1[(df1[date_col] >= start_date) & (df1[date_col] <= end_date)]
1507
+ df2_overlap = df2[(df2[date_col] >= start_date) & (df2[date_col] <= end_date)]
1508
+
1509
+ # Merge the dataframes on the date column to align data for comparison
1510
+ merged_df = pd.merge(df1_overlap, df2_overlap, on=date_col, suffixes=('_df1', '_df2'))
1511
+
1512
+ # Initialize a list to collect total differences for each column
1513
+ total_diff_list = []
1514
+
1515
+ # Compare the values in each column (excluding the date column)
1516
+ diff_df = pd.DataFrame({date_col: merged_df[date_col]}) # Initialize diff_df with the date column
1517
+
1518
+ for col in df1.columns:
1519
+ if col != date_col:
1520
+ # Calculate the difference for each row
1521
+ diff_col = f'diff_{col}'
1522
+ diff_df[diff_col] = merged_df[f'{col}_df1'] - merged_df[f'{col}_df2']
1523
+
1524
+ # Calculate the total difference for the column and add it to the list
1525
+ total_diff = diff_df[diff_col].sum()
1526
+ total_diff_list.append({'Column': col, 'Total Difference': total_diff})
1527
+
1528
+ # Create a DataFrame for the summary of total differences
1529
+ total_diff_df = pd.DataFrame(total_diff_list)
1530
+
1531
+ # Apply formatting to the numerical columns
1532
+ float_format = "{:,.2f}".format # Format to 2 decimal places with comma as thousand separator
1533
+ diff_df.iloc[:, 1:] = diff_df.iloc[:, 1:].applymap(float_format)
1534
+ total_diff_df['Total Difference'] = total_diff_df['Total Difference'].apply(float_format)
1535
+
1536
+ return diff_df, total_diff_df
1537
+
1479
1538
  ########################################################################################################################################
1480
1539
  ########################################################################################################################################
1481
1540
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.6.2.2
3
+ Version: 0.6.2.3
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
2
2
  dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
3
3
  dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
4
4
  imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
5
- imsciences/datafunctions.py,sha256=6zY1sE_ucCQVCp3G2lOz0hBvKOol44nkY90Y_KZlYMg,140390
5
+ imsciences/datafunctions.py,sha256=IrcIfw80MQnnRc2gD6QfuKIlDgVQxkZX-bTj7LKOiEU,143441
6
6
  imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
7
7
  imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
8
8
  imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
9
9
  imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
10
10
  imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
11
- imsciences-0.6.2.2.dist-info/METADATA,sha256=viAJgSE2MA6ykZZRL70i9xzme8eJY__JxoCFv_5PGQw,854
12
- imsciences-0.6.2.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
- imsciences-0.6.2.2.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
14
- imsciences-0.6.2.2.dist-info/RECORD,,
11
+ imsciences-0.6.2.3.dist-info/METADATA,sha256=diBYqgQ-3WJ9pcVQfeAmJkUyLzpy5tqMX1VWjD6zT7k,854
12
+ imsciences-0.6.2.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
+ imsciences-0.6.2.3.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
14
+ imsciences-0.6.2.3.dist-info/RECORD,,