imsciences 0.6.1.9__py3-none-any.whl → 0.6.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/datafunctions.py +5 -5
- {imsciences-0.6.1.9.dist-info → imsciences-0.6.2.0.dist-info}/METADATA +1 -1
- {imsciences-0.6.1.9.dist-info → imsciences-0.6.2.0.dist-info}/RECORD +5 -5
- {imsciences-0.6.1.9.dist-info → imsciences-0.6.2.0.dist-info}/WHEEL +0 -0
- {imsciences-0.6.1.9.dist-info → imsciences-0.6.2.0.dist-info}/top_level.txt +0 -0
imsciences/datafunctions.py
CHANGED
|
@@ -1431,11 +1431,11 @@ class dataprocessing:
|
|
|
1431
1431
|
|
|
1432
1432
|
return df
|
|
1433
1433
|
|
|
1434
|
-
def apply_lookup_table_based_on_substring(df, column_name, category_dict, new_col_name='Category', other_label='Other'):
|
|
1434
|
+
def apply_lookup_table_based_on_substring(self, df, column_name, category_dict, new_col_name='Category', other_label='Other'):
|
|
1435
1435
|
"""
|
|
1436
1436
|
Categorizes text in a specified DataFrame column by applying a lookup table based on substrings.
|
|
1437
1437
|
|
|
1438
|
-
|
|
1438
|
+
Args:
|
|
1439
1439
|
- df (pd.DataFrame): The DataFrame containing the column to categorize.
|
|
1440
1440
|
- column_name (str): The name of the column in the DataFrame that contains the text data to categorize.
|
|
1441
1441
|
- category_dict (dict): A dictionary where keys are substrings to search for in the text and values are
|
|
@@ -1447,11 +1447,11 @@ class dataprocessing:
|
|
|
1447
1447
|
- pd.DataFrame: The original DataFrame with an additional column containing the assigned categories.
|
|
1448
1448
|
"""
|
|
1449
1449
|
|
|
1450
|
-
def categorize_text(text
|
|
1450
|
+
def categorize_text(text):
|
|
1451
1451
|
"""
|
|
1452
1452
|
Assigns a category to a single text string based on the presence of substrings from a dictionary.
|
|
1453
1453
|
|
|
1454
|
-
|
|
1454
|
+
Args:
|
|
1455
1455
|
- text (str): The text string to categorize.
|
|
1456
1456
|
- category_dict (dict): A dictionary where keys are substrings to search for in the text and
|
|
1457
1457
|
values are the categories to assign if a substring is found.
|
|
@@ -1464,7 +1464,7 @@ class dataprocessing:
|
|
|
1464
1464
|
if key.lower() in text.lower(): # Check if the substring is in the text (case-insensitive)
|
|
1465
1465
|
return category
|
|
1466
1466
|
return other_label # Default category if no match is found
|
|
1467
|
-
|
|
1467
|
+
|
|
1468
1468
|
# Apply the categorize_text function to each element in the specified column
|
|
1469
1469
|
df[new_col_name] = df[column_name].apply(categorize_text)
|
|
1470
1470
|
return df
|
|
@@ -2,13 +2,13 @@ dataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
|
2
2
|
dataprocessing/data-processing-functions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
3
3
|
dataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
4
4
|
imsciences/__init__.py,sha256=GIPbLmWc06sVcOySWwNvMNUr6XGOHqPLryFIWgtpHh8,78
|
|
5
|
-
imsciences/datafunctions.py,sha256=
|
|
5
|
+
imsciences/datafunctions.py,sha256=d37Fu7_FxlWkdyzMn0io_1FUske6VAL_hbhvpNf3KKM,139739
|
|
6
6
|
imsciences/datapull.py,sha256=TPY0LDgOkcKTBk8OekbD0Grg5x0SomAK2dZ7MuT6X1E,19000
|
|
7
7
|
imsciencesdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
8
8
|
imsciencesdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
9
9
|
imsdataprocessing/__init__.py,sha256=quSwsLs6IuLoA5Rzi0ZD40xZaQudwDteF7_ai9JfTPk,32
|
|
10
10
|
imsdataprocessing/datafunctions.py,sha256=vE1vsZ8xOSbR9Bwlp9SWXwEHXQ0nFydwGkvzHXf2f1Y,41
|
|
11
|
-
imsciences-0.6.
|
|
12
|
-
imsciences-0.6.
|
|
13
|
-
imsciences-0.6.
|
|
14
|
-
imsciences-0.6.
|
|
11
|
+
imsciences-0.6.2.0.dist-info/METADATA,sha256=842dB5tjX2VVtJmemQtVZdson-VXiSafE6YOWAs0NWY,854
|
|
12
|
+
imsciences-0.6.2.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
13
|
+
imsciences-0.6.2.0.dist-info/top_level.txt,sha256=hsENS-AlDVRh8tQJ6-426iUQlla9bPcGc0-UlFF0_iU,11
|
|
14
|
+
imsciences-0.6.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|