imsciences 0.6.2.0__py3-none-any.whl → 0.6.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/datafunctions.py +9 -12
- {imsciences-0.6.2.0.dist-info → imsciences-0.6.2.1.dist-info}/METADATA +1 -1
- {imsciences-0.6.2.0.dist-info → imsciences-0.6.2.1.dist-info}/RECORD +5 -5
- {imsciences-0.6.2.0.dist-info → imsciences-0.6.2.1.dist-info}/WHEEL +0 -0
- {imsciences-0.6.2.0.dist-info → imsciences-0.6.2.1.dist-info}/top_level.txt +0 -0
imsciences/datafunctions.py
CHANGED
|
@@ -1436,15 +1436,14 @@ class dataprocessing:
|
|
|
1436
1436
|
Categorizes text in a specified DataFrame column by applying a lookup table based on substrings.
|
|
1437
1437
|
|
|
1438
1438
|
Args:
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
resulting categories. Default is 'Category'.
|
|
1439
|
+
df (pd.DataFrame): The DataFrame containing the column to categorize.
|
|
1440
|
+
column_name (str): The name of the column in the DataFrame that contains the text data to categorize.
|
|
1441
|
+
category_dict (dict): A dictionary where keys are substrings to search for in the text and values are the categories to assign when a substring is found.
|
|
1442
|
+
new_col_name (str, optional): The name of the new column to be created in the DataFrame, which will hold the resulting categories. Default is 'Category'.
|
|
1443
|
+
other_label (str, optional): The name given to category if no substring from the dictionary is found in the cell
|
|
1445
1444
|
|
|
1446
1445
|
Returns:
|
|
1447
|
-
|
|
1446
|
+
pd.DataFrame: The original DataFrame with an additional column containing the assigned categories.
|
|
1448
1447
|
"""
|
|
1449
1448
|
|
|
1450
1449
|
def categorize_text(text):
|
|
@@ -1452,13 +1451,11 @@ class dataprocessing:
|
|
|
1452
1451
|
Assigns a category to a single text string based on the presence of substrings from a dictionary.
|
|
1453
1452
|
|
|
1454
1453
|
Args:
|
|
1455
|
-
|
|
1456
|
-
- category_dict (dict): A dictionary where keys are substrings to search for in the text and
|
|
1457
|
-
values are the categories to assign if a substring is found.
|
|
1454
|
+
text (str): The text string to categorize.
|
|
1458
1455
|
|
|
1459
1456
|
Returns:
|
|
1460
|
-
|
|
1461
|
-
matching substring is found, returns
|
|
1457
|
+
str: The category assigned based on the first matching substring found in the text. If no
|
|
1458
|
+
matching substring is found, returns other_name.
|
|
1462
1459
|
"""
|
|
1463
1460
|
for key, category in category_dict.items():
|
|
1464
1461
|
if key.lower() in text.lower(): # Check if the substring is in the text (case-insensitive)
|
|
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
|
2
2
|
dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
3
3
|
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
4
|
imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
|
|
5
|
-
imsciences/datafunctions.py,sha256=
|
|
5
|
+
imsciences/datafunctions.py,sha256=zI_vhjBQfa4Lef2NucUViYAJFenEB2RlJ1rnXIIBG5Y,139645
|
|
6
6
|
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
7
7
|
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
8
8
|
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
9
9
|
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
10
10
|
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
11
|
-
imsciences-0.6.2.
|
|
12
|
-
imsciences-0.6.2.
|
|
13
|
-
imsciences-0.6.2.
|
|
14
|
-
imsciences-0.6.2.
|
|
11
|
+
imsciences-0.6.2.1.dist-info/METADATA,sha256=4p9HLTYPZbsBAkr2dzC1dvvQL-GWZsjTrNXEKGb_5hc,854
|
|
12
|
+
imsciences-0.6.2.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
13
|
+
imsciences-0.6.2.1.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
14
|
+
imsciences-0.6.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|