imsciences 0.6.1.9__py3-none-any.whl → 0.6.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/datafunctions.py +14 -17
- {imsciences-0.6.1.9.dist-info → imsciences-0.6.2.1.dist-info}/METADATA +1 -1
- {imsciences-0.6.1.9.dist-info → imsciences-0.6.2.1.dist-info}/RECORD +5 -5
- {imsciences-0.6.1.9.dist-info → imsciences-0.6.2.1.dist-info}/WHEEL +0 -0
- {imsciences-0.6.1.9.dist-info → imsciences-0.6.2.1.dist-info}/top_level.txt +0 -0
imsciences/datafunctions.py
CHANGED
|
@@ -1431,40 +1431,37 @@ class dataprocessing:
|
|
|
1431
1431
|
|
|
1432
1432
|
return df
|
|
1433
1433
|
|
|
1434
|
-
def apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other'):
|
|
1434
|
+
def apply_lookup_table_based_on_substring(self, df, column_name, category_dict, new_col_name='Category', other_label='Other'):
|
|
1435
1435
|
"""
|
|
1436
1436
|
Categorizes text in a specified DataFrame column by applying a lookup table based on substrings.
|
|
1437
1437
|
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
resulting categories. Default is 'Category'.
|
|
1438
|
+
Args:
|
|
1439
|
+
df (pd.DataFrame): The DataFrame containing the column to categorize.
|
|
1440
|
+
column_name (str): The name of the column in the DataFrame that contains the text data to categorize.
|
|
1441
|
+
category_dict (dict): A dictionary where keys are substrings to search for in the text and values are the categories to assign when a substring is found.
|
|
1442
|
+
new_col_name (str, optional): The name of the new column to be created in the DataFrame, which will hold the resulting categories. Default is 'Category'.
|
|
1443
|
+
other_label (str, optional): The name given to category if no substring from the dictionary is found in the cell
|
|
1445
1444
|
|
|
1446
1445
|
Returns:
|
|
1447
|
-
|
|
1446
|
+
pd.DataFrame: The original DataFrame with an additional column containing the assigned categories.
|
|
1448
1447
|
"""
|
|
1449
1448
|
|
|
1450
|
-
def categorize_text(text
|
|
1449
|
+
def categorize_text(text):
|
|
1451
1450
|
"""
|
|
1452
1451
|
Assigns a category to a single text string based on the presence of substrings from a dictionary.
|
|
1453
1452
|
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
- category_dict (dict): A dictionary where keys are substrings to search for in the text and
|
|
1457
|
-
values are the categories to assign if a substring is found.
|
|
1453
|
+
Args:
|
|
1454
|
+
text (str): The text string to categorize.
|
|
1458
1455
|
|
|
1459
1456
|
Returns:
|
|
1460
|
-
|
|
1461
|
-
matching substring is found, returns
|
|
1457
|
+
str: The category assigned based on the first matching substring found in the text. If no
|
|
1458
|
+
matching substring is found, returns other_name.
|
|
1462
1459
|
"""
|
|
1463
1460
|
for key, category in category_dict.items():
|
|
1464
1461
|
if key.lower() in text.lower(): # Check if the substring is in the text (case-insensitive)
|
|
1465
1462
|
return category
|
|
1466
1463
|
return other_label # Default category if no match is found
|
|
1467
|
-
|
|
1464
|
+
|
|
1468
1465
|
# Apply the categorize_text function to each element in the specified column
|
|
1469
1466
|
df[new_col_name] = df[column_name].apply(categorize_text)
|
|
1470
1467
|
return df
|
|
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
|
2
2
|
dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
3
3
|
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
4
|
imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
|
|
5
|
-
imsciences/datafunctions.py,sha256=
|
|
5
|
+
imsciences/datafunctions.py,sha256=zI_vhjBQfa4Lef2NucUViYAJFenEB2RlJ1rnXIIBG5Y,139645
|
|
6
6
|
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
7
7
|
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
8
8
|
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
9
9
|
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
10
10
|
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
11
|
-
imsciences-0.6.1.
|
|
12
|
-
imsciences-0.6.1.
|
|
13
|
-
imsciences-0.6.1.
|
|
14
|
-
imsciences-0.6.1.
|
|
11
|
+
imsciences-0.6.2.1.dist-info/METADATA,sha256=4p9HLTYPZbsBAkr2dzC1dvvQL-GWZsjTrNXEKGb_5hc,854
|
|
12
|
+
imsciences-0.6.2.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
13
|
+
imsciences-0.6.2.1.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
14
|
+
imsciences-0.6.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|