imsciences 0.6.1.7__py3-none-any.whl → 0.6.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -216,6 +216,11 @@ class dataprocessing:
216
216
  print(" - Description: Sums all columns with the option to exclude an date column to create a total column")
217
217
  print(" - Usage: add_total_column(df, exclude_col=None, total_col_name='Total')")
218
218
  print(" - Example: add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')")
219
+
220
+ print("\n39. apply_lookup_table_based_on_substring")
221
+ print(" - Description: Equivalent of xlookup in excel, but only based on substrings. If a substring is found in a cell, than look it up in the dictionary. Otherwise use the other label")
222
+ print(" - Usage: apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other')")
223
+ print(" - Example: apply_lookup_table_based_on_substring(df, 'Campaign Name', campaign_dict, new_col_name='Campaign Name Short', other_label='Full Funnel')")
219
224
 
220
225
  def get_wd_levels(self, levels):
221
226
  """
@@ -1426,7 +1431,43 @@ class dataprocessing:
1426
1431
 
1427
1432
  return df
1428
1433
 
1434
+ def apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other'):
1435
+ """
1436
+ Categorizes text in a specified DataFrame column by applying a lookup table based on substrings.
1437
+
1438
+ Parameters:
1439
+ - df (pd.DataFrame): The DataFrame containing the column to categorize.
1440
+ - column_name (str): The name of the column in the DataFrame that contains the text data to categorize.
1441
+ - category_dict (dict): A dictionary where keys are substrings to search for in the text and values are
1442
+ the categories to assign when a substring is found.
1443
+ - new_col_name (str): The name of the new column to be created in the DataFrame, which will hold the
1444
+ resulting categories. Default is 'Category'.
1445
+
1446
+ Returns:
1447
+ - pd.DataFrame: The original DataFrame with an additional column containing the assigned categories.
1448
+ """
1429
1449
 
1450
+ def categorize_text(text, category_dict):
1451
+ """
1452
+ Assigns a category to a single text string based on the presence of substrings from a dictionary.
1453
+
1454
+ Parameters:
1455
+ - text (str): The text string to categorize.
1456
+ - category_dict (dict): A dictionary where keys are substrings to search for in the text and
1457
+ values are the categories to assign if a substring is found.
1458
+
1459
+ Returns:
1460
+ - str: The category assigned based on the first matching substring found in the text. If no
1461
+ matching substring is found, returns 'Full Funnel'.
1462
+ """
1463
+ for key, category in category_dict.items():
1464
+ if key.lower() in text.lower(): # Check if the substring is in the text (case-insensitive)
1465
+ return category
1466
+ return other_label # Default category if no match is found
1467
+
1468
+ # Apply the categorize_text function to each element in the specified column
1469
+ df[new_col_name] = df[column_name].apply(categorize_text, category_dict=category_dict)
1470
+ return df
1430
1471
 
1431
1472
 
1432
1473
  ########################################################################################################################################
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: imsciences
3
- Version: 0.6.1.7
3
+ Version: 0.6.1.8
4
4
  Summary: IMS Data Processing Package
5
5
  Author: IMS
6
6
  Author-email: cam@im-sciences.com
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
2
2
  dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
3
3
  dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
4
4
  imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
5
- imsciences/datafunctions.py,sha256=kSvo0MiPJuSquKMt0RNNAjcloij-FGgffunf-ZBTYGw,137112
5
+ imsciences/datafunctions.py,sha256=kyspeS4ifoNb_Z_f4OfmWlEMHO6sUO8JjconGzq8Mz8,139793
6
6
  imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
7
7
  imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
8
8
  imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
9
9
  imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
10
10
  imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
11
- imsciences-0.6.1.7.dist-info/METADATA,sha256=VISHFJ2z2oHyTyD2T2B3yhd-F83Q5Kb-XL8cr4EzOlI,854
12
- imsciences-0.6.1.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
- imsciences-0.6.1.7.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
14
- imsciences-0.6.1.7.dist-info/RECORD,,
11
+ imsciences-0.6.1.8.dist-info/METADATA,sha256=2NgdiqLw89mBc9VsqR9ZJ_GZmX7-CslnYmzS6mIvybw,854
12
+ imsciences-0.6.1.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
+ imsciences-0.6.1.8.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
14
+ imsciences-0.6.1.8.dist-info/RECORD,,