imsciences 0.9.6.9__tar.gz → 0.9.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of imsciences might be problematic. Click here for more details.
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/PKG-INFO +1 -1
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences/mmm.py +12 -17
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences.egg-info/PKG-INFO +1 -1
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/setup.py +1 -1
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/LICENSE.txt +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/README.md +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences/__init__.py +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences/geo.py +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences/pull.py +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences/unittesting.py +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences/vis.py +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences.egg-info/PKG-INFO-TomG-HP-290722 +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences.egg-info/SOURCES.txt +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences.egg-info/dependency_links.txt +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences.egg-info/requires.txt +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences.egg-info/top_level.txt +0 -0
- {imsciences-0.9.6.9 → imsciences-0.9.7.0}/setup.cfg +0 -0
|
@@ -583,37 +583,32 @@ class dataprocessing:
|
|
|
583
583
|
|
|
584
584
|
return pivoted_df
|
|
585
585
|
|
|
586
|
-
def apply_lookup_table_for_columns(
|
|
586
|
+
def apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict="Other", new_column_name="Mapping"):
|
|
587
587
|
"""
|
|
588
|
-
Creates a new DataFrame column based on a look up table,
|
|
588
|
+
Creates a new DataFrame column based on a look up table, using exact matches.
|
|
589
589
|
|
|
590
590
|
Parameters:
|
|
591
591
|
df (pandas.DataFrame): The DataFrame containing the data.
|
|
592
|
-
col_names (list of str):
|
|
593
|
-
to_find_dict (dict):
|
|
594
|
-
if_not_in_dict (str, optional):
|
|
595
|
-
new_column_name (str, optional):
|
|
592
|
+
col_names (list of str): List of column names to use for lookup. If more than one, values are merged with '|'.
|
|
593
|
+
to_find_dict (dict): Lookup dictionary with exact keys to match.
|
|
594
|
+
if_not_in_dict (str, optional): Value used if no match is found. Defaults to "Other".
|
|
595
|
+
new_column_name (str, optional): Name of new output column. Defaults to "Mapping".
|
|
596
596
|
|
|
597
597
|
Returns:
|
|
598
|
-
pandas.DataFrame: DataFrame with a new column containing
|
|
598
|
+
pandas.DataFrame: DataFrame with a new column containing lookup results.
|
|
599
599
|
"""
|
|
600
600
|
|
|
601
|
-
# Create regex pattern with word boundaries from the dictionary
|
|
602
|
-
regex_pattern = "|".join(r'\b' + re.escape(key) + r'\b' for key in to_find_dict.keys())
|
|
603
|
-
|
|
604
601
|
# Preprocess DataFrame if multiple columns
|
|
605
602
|
if len(col_names) > 1:
|
|
606
|
-
df["Merged"] = df[col_names].astype(str).
|
|
603
|
+
df["Merged"] = df[col_names].astype(str).agg('|'.join, axis=1)
|
|
607
604
|
col_to_use = "Merged"
|
|
608
605
|
else:
|
|
609
606
|
col_to_use = col_names[0]
|
|
610
607
|
|
|
611
|
-
#
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
df[new_column_name] = matches.str.lower().map({k.lower(): v for k, v in to_find_dict.items()}).fillna(if_not_in_dict)
|
|
616
|
-
|
|
608
|
+
# Normalize case for matching
|
|
609
|
+
lookup = {k.lower(): v for k, v in to_find_dict.items()}
|
|
610
|
+
df[new_column_name] = df[col_to_use].str.lower().map(lookup).fillna(if_not_in_dict)
|
|
611
|
+
|
|
617
612
|
# Drop intermediate column if created
|
|
618
613
|
if len(col_names) > 1:
|
|
619
614
|
df.drop(columns=["Merged"], inplace=True)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|