PyPI - carrot-transform - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

carrot-transform 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of carrot-transform might be problematic. Click here for more details.

Files changed (20) hide show

carrot_transform-0.3.5.dist-info/METADATA +106 -0
carrot_transform-0.3.5.dist-info/RECORD +25 -0
{carrot_transform-0.3.3.dist-info → carrot_transform-0.3.5.dist-info}/WHEEL +1 -1
carrot_transform-0.3.5.dist-info/entry_points.txt +3 -0
carrottransform/_version.py +6 -2
carrottransform/cli/subcommands/run.py +445 -193
carrottransform/examples/test/inputs/Covid19_test.csv +801 -0
carrottransform/examples/test/inputs/Demographics.csv +1001 -0
carrottransform/examples/test/inputs/Symptoms.csv +801 -0
carrottransform/examples/test/inputs/covid19_antibody.csv +1001 -0
carrottransform/examples/test/inputs/vaccine.csv +501 -0
carrottransform/examples/test/rules/rules_14June2021.json +300 -0
carrottransform/tools/click.py +21 -0
carrottransform/tools/file_helpers.py +30 -4
carrottransform/tools/mappingrules.py +13 -10
carrottransform/tools/metrics.py +212 -40
carrottransform/tools/omopcdm.py +17 -5
carrot_transform-0.3.3.dist-info/METADATA +0 -48
carrot_transform-0.3.3.dist-info/RECORD +0 -17
{carrot_transform-0.3.3.dist-info → carrot_transform-0.3.5.dist-info}/LICENSE +0 -0

carrottransform/examples/test/rules/rules_14June2021.json ADDED Viewed

@@ -0,0 +1,300 @@
+{
+      "metadata": {
+            "date_created": "2021-06-14T15:27:37.123947",
+            "dataset": "Test"
+      },
+      "cdm": {
+            "observation": {
+                  "observation_0": {
+                        "observation_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "Asian": 35825508
+                              }
+                        },
+                        "observation_datetime": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "date_of_birth"
+                        },
+                        "observation_source_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "Asian": 35825508
+                              }
+                        },
+                        "observation_source_value": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity"
+                        },
+                        "person_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "PersonID"
+                        }
+                  },
+                  "observation_1":{
+                        "observation_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "Bangladeshi": 35825531
+                              }
+                        },
+                        "observation_datetime": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "date_of_birth"
+                        },
+                        "observation_source_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "Bangladeshi": 35825531
+                              }
+                        },
+                        "observation_source_value": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity"
+                        },
+                        "person_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "PersonID"
+                        }
+                  },
+                  "observation_2":{
+                        "observation_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "Indian": 35826241
+                              }
+                        },
+                        "observation_datetime": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "date_of_birth"
+                        },
+                        "observation_source_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "Indian": 35826241
+                              }
+                        },
+                        "observation_source_value": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity"
+                        },
+                        "person_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "PersonID"
+                        }
+                  },
+                  "observation_3":{
+                        "observation_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "White": 35827394
+                              }
+                        },
+                        "observation_datetime": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "date_of_birth"
+                        },
+                        "observation_source_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "White": 35827394
+                              }
+                        },
+                        "observation_source_value": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity"
+                        },
+                        "person_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "PersonID"
+                        }
+                  },
+                  "observation_4":{
+                        "observation_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "Black": 35825567
+                              }
+                        },
+                        "observation_datetime": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "date_of_birth"
+                        },
+                        "observation_source_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "Black": 35825567
+                              }
+                        },
+                        "observation_source_value": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity"
+                        },
+                        "person_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "PersonID"
+                        }
+                  },
+                  "observation_5":{
+                        "observation_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "White and Asian": 35827395
+                              }
+                        },
+                        "observation_datetime": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "date_of_birth"
+                        },
+                        "observation_source_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity",
+                              "term_mapping": {
+                                    "White and Asian": 35827395
+                              }
+                        },
+                        "observation_source_value": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "ethnicity"
+                        },
+                        "person_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "PersonID"
+                        }
+                  }
+            },
+            "condition_occurrence": {
+                  "condition_occurrence_0":{
+                        "condition_concept_id": {
+                              "source_table": "Symptoms.csv",
+                              "source_field": "symptom1",
+                              "term_mapping": {
+                                    "Y": 254761
+                              }
+                        },
+                        "condition_end_datetime": {
+                              "source_table": "Symptoms.csv",
+                              "source_field": "visit_date"
+                        },
+                        "condition_source_concept_id": {
+                              "source_table": "Symptoms.csv",
+                              "source_field": "symptom1",
+                              "term_mapping": {
+                                    "Y": 254761
+                              }
+                        },
+                        "condition_source_value": {
+                              "source_table": "Symptoms.csv",
+                              "source_field": "symptom1"
+                        },
+                        "condition_start_datetime": {
+                              "source_table": "Symptoms.csv",
+                              "source_field": "visit_date"
+                        },
+                        "person_id": {
+                              "source_table": "Symptoms.csv",
+                              "source_field": "PersonID"
+                        }
+                  }
+            },
+            "person": {
+                  "female":{
+                        "birth_datetime": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "date_of_birth"
+                        },
+                        "gender_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "sex",
+                              "term_mapping": {
+                                    "F": 8532
+                              }
+                        },
+                        "gender_source_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "sex",
+                              "term_mapping": {
+                                    "F": 8532
+                              }
+                        },
+                        "gender_source_value": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "sex"
+                        },
+                        "person_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "PersonID"
+                        }
+                  },
+                  "male":{
+                        "birth_datetime": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "date_of_birth"
+                        },
+                        "gender_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "sex",
+                              "term_mapping": {
+                                    "M": 8507
+                              }
+                        },
+                        "gender_source_concept_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "sex",
+                              "term_mapping": {
+                                    "M": 8507
+                              }
+                        },
+                        "gender_source_value": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "sex"
+                        },
+                        "person_id": {
+                              "source_table": "Demographics.csv",
+                              "source_field": "PersonID"
+                        }
+                  }
+            },
+	    "measurement": {
+		  "covid_antibody":{
+			"value_as_number": {
+			      "source_table": "covid19_antibody.csv",
+			      "source_field": "IgG"
+			},
+			"measurement_source_value": {
+			      "source_table": "covid19_antibody.csv",
+			      "source_field": "IgG"
+			},
+			"measurement_concept_id": {
+			      "source_table": "covid19_antibody.csv",
+			      "source_field": "IgG",
+			      "term_mapping": 37398191
+			},
+			"measurement_source_concept_id": {
+			      "source_table": "covid19_antibody.csv",
+			      "source_field": "IgG",
+			      "term_mapping": 37398191
+			},
+			"measurement_datetime": {
+			      "source_table": "covid19_antibody.csv",
+			      "source_field": "date"
+			},
+			"person_id": {
+			      "source_table": "covid19_antibody.csv",
+			      "source_field": "PersonID"
+			}
+		  }
+            }
+      }
+}

carrottransform/tools/click.py ADDED Viewed

@@ -0,0 +1,21 @@
+import click
+from pathlib import Path
+def PathArgs():
+    """used by the click library for CLI args that are files"""
+    class PathArgs(click.ParamType):
+        name = "pathlib.Path"
+        def convert(self, value, param, ctx):
+            try:
+                return Path(value)
+            except Exception as e:
+                self.fail(f"Invalid path: {value} ({e})", param, ctx)
+    return PathArgs()
+# use this
+PathArgs = PathArgs()

carrottransform/tools/file_helpers.py CHANGED Viewed

@@ -1,15 +1,41 @@
+import json
+import logging
 import os
 import sys
 import json
+import importlib.resources as resources
+from typing import List, Optional
+from pathlib import Path
+logger = logging.getLogger(__name__)
 # Function inherited from the "old" CaRROT-CDM (modfied to exit on error)
-def load_json(f_in):
+def load_json(f_in: Path):
     try:
-        data = json.load(open(f_in))
+        data = json.load(f_in.open())
     except Exception as err:
-        print ("{0} not found. Or cannot parse as json".format(f_in))
+        logger.exception("{0} not found. Or cannot parse as json".format(f_in))
         sys.exit()
     return data
+def resolve_paths(args: List[Optional[Path]]) -> List[Optional[Path]]:
+    """Resolve special path syntaxes in command line arguments."""
+    try:
+        with resources.files('carrottransform').joinpath('__init__.py') as f:
+            package_path = f.parent
+    except Exception:
+        # Fallback for development environment
+        import carrottransform
+        package_path = Path(carrottransform.__file__).resolve().parent
+    # Handle None values and replace @carrot with the actual package path
+    prefix = '@carrot'
+    return [
+        package_path / Path(str(arg).replace(prefix, '').lstrip('/')) if arg is not None and str(arg).startswith(prefix) else arg
+        for arg in args
+    ]

carrottransform/tools/mappingrules.py CHANGED Viewed

@@ -3,13 +3,17 @@ import json
 import carrottransform.tools as tools
 from .omopcdm import OmopCDM
+import logging
+logger = logging.getLogger(__name__)
 class MappingRules:
     """
     self.rules_data stores the mapping rules as untransformed json, as each input file is processed rules are reorganised
     as a file-specific dictionary allowing rules to be "looked-up" depending on data content
     """
-    def __init__(self, rulesfilepath, omopcdm):
+    def __init__(self, rulesfilepath: os.PathLike, omopcdm: OmopCDM):
+        ## just loads the json directly
         self.rules_data = tools.load_json(rulesfilepath)
         self.omopcdm = omopcdm
@@ -34,12 +38,7 @@ class MappingRules:
         return self.dataset_name
     def get_all_outfile_names(self):
-        file_list = []
-        for outfilename in self.rules_data["cdm"]:
-            file_list.append(outfilename)
-        return file_list
+        return list(self.rules_data["cdm"])
     def get_all_infile_names(self):
         file_list = []
@@ -84,11 +83,11 @@ class MappingRules:
             outfile = keydata[-1]
             for outfield_elem in outfield_data:
                 for infield, outfield_list in outfield_elem.items():
-                    #print("{0}, {1}, {2}".format(outfile, infield, str(outfield_list)))
+                    logger.debug("{0}, {1}, {2}".format(outfile, infield, str(outfield_list)))
                     for outfield in outfield_list:
-                        if outfield in self.omopcdm.get_omop_datetime_fields(outfile):
+                        if outfield.split('~')[0] in self.omopcdm.get_omop_datetime_fields(outfile):
                             datetime_source = infield
-                        if outfield == self.omopcdm.get_omop_person_id_field(outfile):
+                        if outfield.split('~')[0] == self.omopcdm.get_omop_person_id_field(outfile):
                             person_id_source = infield
         return datetime_source, person_id_source
@@ -101,6 +100,7 @@ class MappingRules:
         person_id_source = None
         if tgtfilename in self.rules_data["cdm"]:
             source_rules_data = self.rules_data["cdm"][tgtfilename]
+            ## this loops over all the fields in the person part of the rules, which will lead to overwriting of the source variables and unneccesary looping
             for rule_name, rule_fields in source_rules_data.items():
                 if "birth_datetime" in rule_fields:
                     birth_datetime_source = rule_fields["birth_datetime"]["source_field"]
@@ -113,6 +113,7 @@ class MappingRules:
         """
         Parse rules to produce a map of source to target data for a given input file
         """
+        ## creates a dict of dicts that has input files as keys, and infile~field~data~target as keys for the underlying keys, which contain a list of dicts of lists
         if infilename in self.outfile_names and infilename in self.parsed_rules:
             return self.outfile_names[infilename], self.parsed_rules[infilename]
         outfilenames = []
@@ -141,6 +142,7 @@ class MappingRules:
         plain_key = ""
         term_value_key = ""
+        ## iterate through the rules, looking for rules that apply to the input file.
         for outfield, source_info in rules.items():
             if source_info["source_field"] not in data:
                 data[source_info["source_field"]] = []
@@ -148,6 +150,7 @@ class MappingRules:
                 if "term_mapping" in source_info:
                     if type(source_info["term_mapping"]) is dict:
                         for inputvalue, term in source_info["term_mapping"].items():
+                            ## add a key/add to the list of data in the dict for the given input file
                             term_value_key = infilename + "~" + source_info["source_field"] + "~" + str(inputvalue) + "~" + outfilename
                             data[source_info["source_field"]].append(outfield + "~" + str(source_info["term_mapping"][str(inputvalue)]))
                     else:

carrot-transform 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

Potentially problematic release.

carrot-transform 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl