PyPI - imsciences - Versions diffs - 0.9.6.9__tar.gz → 0.9.7.0__tar.gz - Mend

imsciences 0.9.6.9tar.gz → 0.9.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of imsciences might be problematic. Click here for more details.

Files changed (17) hide show

{imsciences-0.9.6.9 → imsciences-0.9.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.9.6.9
+Version: 0.9.7.0
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

{imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences/mmm.py RENAMED Viewed

@@ -583,37 +583,32 @@ class dataprocessing:
         return pivoted_df
-    def apply_lookup_table_for_columns(self, df, col_names, to_find_dict, if_not_in_dict="Other", new_column_name="Mapping"):
+    def apply_lookup_table_for_columns(df, col_names, to_find_dict, if_not_in_dict="Other", new_column_name="Mapping"):
         """
-        Creates a new DataFrame column based on a look up table, possibly with multiple columns to look up on (dictionary of substrings to class mappings).
+        Creates a new DataFrame column based on a look up table, using exact matches.
         Parameters:
         df (pandas.DataFrame): The DataFrame containing the data.
-        col_names (list of str): these are the columns which are used for the lookup. One column or several columns can be inputted as a list, provided there is a merged column to lookup on. If there are multiple columns to look up on then a merged column must be inputted as the key of the dictionary of format e.g. col1|col2|col3
-        to_find_dict (dict): your look up table, where keys are the values being looked up, and the values are the resulting mappings.
-        if_not_in_dict (str, optional): default value if no substring matches are found in the look up table dictionary. Defaults to "Other".
-        new_column_name (str, optional): name of new column. Defaults to "Mapping".
+        col_names (list of str): List of column names to use for lookup. If more than one, values are merged with '|'.
+        to_find_dict (dict): Lookup dictionary with exact keys to match.
+        if_not_in_dict (str, optional): Value used if no match is found. Defaults to "Other".
+        new_column_name (str, optional): Name of new output column. Defaults to "Mapping".
         Returns:
-        pandas.DataFrame: DataFrame with a new column containing the look up table results.
+        pandas.DataFrame: DataFrame with a new column containing lookup results.
         """
-        # Create regex pattern with word boundaries from the dictionary
-        regex_pattern = "|".join(r'\b' + re.escape(key) + r'\b' for key in to_find_dict.keys())
         # Preprocess DataFrame if multiple columns
         if len(col_names) > 1:
-            df["Merged"] = df[col_names].astype(str).apply('|'.join, axis=1)
+            df["Merged"] = df[col_names].astype(str).agg('|'.join, axis=1)
             col_to_use = "Merged"
         else:
             col_to_use = col_names[0]
-        # Extract the first match using the regex pattern
-        matches = df[col_to_use].str.extract(f'({regex_pattern})', expand=False, flags=re.IGNORECASE)
-        # Map the matches to the corresponding values in the dictionary
-        df[new_column_name] = matches.str.lower().map({k.lower(): v for k, v in to_find_dict.items()}).fillna(if_not_in_dict)
+        # Normalize case for matching
+        lookup = {k.lower(): v for k, v in to_find_dict.items()}
+        df[new_column_name] = df[col_to_use].str.lower().map(lookup).fillna(if_not_in_dict)
         # Drop intermediate column if created
         if len(col_names) > 1:
             df.drop(columns=["Merged"], inplace=True)

{imsciences-0.9.6.9 → imsciences-0.9.7.0}/imsciences.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: imsciences
-Version: 0.9.6.9
+Version: 0.9.7.0
 Summary: IMS Data Processing Package
 Author: IMS
 Author-email: cam@im-sciences.com

{imsciences-0.9.6.9 → imsciences-0.9.7.0}/setup.py RENAMED Viewed

@@ -8,7 +8,7 @@ def read_md(file_name):
             return f.read()
     return ''
-VERSION = '0.9.6.9'
+VERSION = '0.9.7.0'
 DESCRIPTION = 'IMS Data Processing Package'
 LONG_DESCRIPTION = read_md('README.md')