imsciences 0.6.1.6__py3-none-any.whl → 0.6.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/datafunctions.py +72 -31
- {imsciences-0.6.1.6.dist-info → imsciences-0.6.1.8.dist-info}/METADATA +1 -1
- {imsciences-0.6.1.6.dist-info → imsciences-0.6.1.8.dist-info}/RECORD +5 -5
- {imsciences-0.6.1.6.dist-info → imsciences-0.6.1.8.dist-info}/WHEEL +0 -0
- {imsciences-0.6.1.6.dist-info → imsciences-0.6.1.8.dist-info}/top_level.txt +0 -0
imsciences/datafunctions.py
CHANGED
|
@@ -216,6 +216,11 @@ class dataprocessing:
|
|
|
216
216
|
print(" - Description: Sums all columns with the option to exclude an date column to create a total column")
|
|
217
217
|
print(" - Usage: add_total_column(df, exclude_col=None, total_col_name='Total')")
|
|
218
218
|
print(" - Example: add_total_column(df, exclude_col='obs', total_col_name='total_media_spd')")
|
|
219
|
+
|
|
220
|
+
print("\n39. apply_lookup_table_based_on_substring")
|
|
221
|
+
print(" - Description: Equivalent of xlookup in excel, but only based on substrings. If a substring is found in a cell, than look it up in the dictionary. Otherwise use the other label")
|
|
222
|
+
print(" - Usage: apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other')")
|
|
223
|
+
print(" - Example: apply_lookup_table_based_on_substring(df, 'Campaign Name', campaign_dict, new_col_name='Campaign Name Short', other_label='Full Funnel')")
|
|
219
224
|
|
|
220
225
|
def get_wd_levels(self, levels):
|
|
221
226
|
"""
|
|
@@ -1371,39 +1376,39 @@ class dataprocessing:
|
|
|
1371
1376
|
|
|
1372
1377
|
return df
|
|
1373
1378
|
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1379
|
+
def replace_substrings(self, df, column, replacements, to_lower=False, new_column=None):
|
|
1380
|
+
"""
|
|
1381
|
+
Replaces substrings in a column of a DataFrame based on a dictionary of replacements.
|
|
1382
|
+
Optionally converts the column values to lowercase and allows creating a new column or modifying the existing one.
|
|
1378
1383
|
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1384
|
+
Args:
|
|
1385
|
+
df (pd.DataFrame): The DataFrame containing the column to modify.
|
|
1386
|
+
column (str): The column name where the replacements will be made.
|
|
1387
|
+
replacements (dict): A dictionary where keys are substrings to replace and values are the replacement strings.
|
|
1388
|
+
to_lower (bool, optional): If True, the column values will be converted to lowercase before applying replacements. Default is False.
|
|
1389
|
+
new_column (str, optional): If provided, the replacements will be applied to this new column. If None, the existing column will be modified. Default is None.
|
|
1385
1390
|
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1391
|
+
Returns:
|
|
1392
|
+
pd.DataFrame: The DataFrame with the specified replacements made, and optionally with lowercase strings.
|
|
1393
|
+
"""
|
|
1394
|
+
|
|
1395
|
+
if new_column is not None:
|
|
1396
|
+
# Create a new column for replacements
|
|
1397
|
+
df[new_column] = df[column]
|
|
1398
|
+
temp_column = new_column
|
|
1399
|
+
else:
|
|
1400
|
+
# Modify the existing column
|
|
1401
|
+
temp_column = column
|
|
1402
|
+
|
|
1403
|
+
# Apply substring replacements
|
|
1404
|
+
for old, new in replacements.items():
|
|
1405
|
+
df[temp_column] = df[temp_column].str.replace(old, new, regex=False)
|
|
1406
|
+
|
|
1407
|
+
# Optionally convert to lowercase
|
|
1408
|
+
if to_lower:
|
|
1409
|
+
df[temp_column] = df[temp_column].str.lower()
|
|
1410
|
+
|
|
1411
|
+
return df
|
|
1407
1412
|
|
|
1408
1413
|
def add_total_column(self, df, exclude_col=None, total_col_name='Total'):
|
|
1409
1414
|
"""
|
|
@@ -1426,7 +1431,43 @@ class dataprocessing:
|
|
|
1426
1431
|
|
|
1427
1432
|
return df
|
|
1428
1433
|
|
|
1434
|
+
def apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other'):
|
|
1435
|
+
"""
|
|
1436
|
+
Categorizes text in a specified DataFrame column by applying a lookup table based on substrings.
|
|
1437
|
+
|
|
1438
|
+
Parameters:
|
|
1439
|
+
- df (pd.DataFrame): The DataFrame containing the column to categorize.
|
|
1440
|
+
- column_name (str): The name of the column in the DataFrame that contains the text data to categorize.
|
|
1441
|
+
- category_dict (dict): A dictionary where keys are substrings to search for in the text and values are
|
|
1442
|
+
the categories to assign when a substring is found.
|
|
1443
|
+
- new_col_name (str): The name of the new column to be created in the DataFrame, which will hold the
|
|
1444
|
+
resulting categories. Default is 'Category'.
|
|
1445
|
+
|
|
1446
|
+
Returns:
|
|
1447
|
+
- pd.DataFrame: The original DataFrame with an additional column containing the assigned categories.
|
|
1448
|
+
"""
|
|
1449
|
+
|
|
1450
|
+
def categorize_text(text, category_dict):
|
|
1451
|
+
"""
|
|
1452
|
+
Assigns a category to a single text string based on the presence of substrings from a dictionary.
|
|
1453
|
+
|
|
1454
|
+
Parameters:
|
|
1455
|
+
- text (str): The text string to categorize.
|
|
1456
|
+
- category_dict (dict): A dictionary where keys are substrings to search for in the text and
|
|
1457
|
+
values are the categories to assign if a substring is found.
|
|
1429
1458
|
|
|
1459
|
+
Returns:
|
|
1460
|
+
- str: The category assigned based on the first matching substring found in the text. If no
|
|
1461
|
+
matching substring is found, returns 'Full Funnel'.
|
|
1462
|
+
"""
|
|
1463
|
+
for key, category in category_dict.items():
|
|
1464
|
+
if key.lower() in text.lower(): # Check if the substring is in the text (case-insensitive)
|
|
1465
|
+
return category
|
|
1466
|
+
return other_label # Default category if no match is found
|
|
1467
|
+
|
|
1468
|
+
# Apply the categorize_text function to each element in the specified column
|
|
1469
|
+
df[new_col_name] = df[column_name].apply(categorize_text, category_dict=category_dict)
|
|
1470
|
+
return df
|
|
1430
1471
|
|
|
1431
1472
|
|
|
1432
1473
|
########################################################################################################################################
|
|
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
|
2
2
|
dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
3
3
|
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
4
|
imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
|
|
5
|
-
imsciences/datafunctions.py,sha256=
|
|
5
|
+
imsciences/datafunctions.py,sha256=kyspeS4ifoNb_Z_f4OfmWlEMHO6sUO8JjconGzq8Mz8,139793
|
|
6
6
|
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
7
7
|
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
8
8
|
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
9
9
|
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
10
10
|
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
11
|
-
imsciences-0.6.1.
|
|
12
|
-
imsciences-0.6.1.
|
|
13
|
-
imsciences-0.6.1.
|
|
14
|
-
imsciences-0.6.1.
|
|
11
|
+
imsciences-0.6.1.8.dist-info/METADATA,sha256=2NgdiqLw89mBc9VsqR9ZJ_GZmX7-CslnYmzS6mIvybw,854
|
|
12
|
+
imsciences-0.6.1.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
13
|
+
imsciences-0.6.1.8.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
14
|
+
imsciences-0.6.1.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|