PyPI - genhpf - Versions diffs - 1.0.11__py3-none-any.whl - Mend

genhpf 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

genhpf/__init__.py +9 -0
genhpf/configs/__init__.py +23 -0
genhpf/configs/config.yaml +8 -0
genhpf/configs/configs.py +240 -0
genhpf/configs/constants.py +29 -0
genhpf/configs/initialize.py +58 -0
genhpf/configs/utils.py +29 -0
genhpf/criterions/__init__.py +74 -0
genhpf/criterions/binary_cross_entropy.py +114 -0
genhpf/criterions/binary_cross_entropy_with_logits.py +115 -0
genhpf/criterions/criterion.py +87 -0
genhpf/criterions/cross_entropy.py +202 -0
genhpf/criterions/multi_task_criterion.py +177 -0
genhpf/criterions/simclr_criterion.py +84 -0
genhpf/criterions/wav2vec2_criterion.py +130 -0
genhpf/datasets/__init__.py +84 -0
genhpf/datasets/dataset.py +109 -0
genhpf/datasets/genhpf_dataset.py +451 -0
genhpf/datasets/meds_dataset.py +232 -0
genhpf/loggings/__init__.py +0 -0
genhpf/loggings/meters.py +374 -0
genhpf/loggings/metrics.py +155 -0
genhpf/loggings/progress_bar.py +445 -0
genhpf/models/__init__.py +73 -0
genhpf/models/genhpf.py +244 -0
genhpf/models/genhpf_mlm.py +64 -0
genhpf/models/genhpf_predictor.py +73 -0
genhpf/models/genhpf_simclr.py +58 -0
genhpf/models/genhpf_wav2vec2.py +304 -0
genhpf/modules/__init__.py +15 -0
genhpf/modules/gather_layer.py +23 -0
genhpf/modules/grad_multiply.py +12 -0
genhpf/modules/gumbel_vector_quantizer.py +204 -0
genhpf/modules/identity_layer.py +8 -0
genhpf/modules/layer_norm.py +27 -0
genhpf/modules/positional_encoding.py +24 -0
genhpf/scripts/__init__.py +0 -0
genhpf/scripts/preprocess/__init__.py +0 -0
genhpf/scripts/preprocess/genhpf/README.md +75 -0
genhpf/scripts/preprocess/genhpf/__init__.py +0 -0
genhpf/scripts/preprocess/genhpf/ehrs/__init__.py +36 -0
genhpf/scripts/preprocess/genhpf/ehrs/ehr.py +919 -0
genhpf/scripts/preprocess/genhpf/ehrs/eicu.py +550 -0
genhpf/scripts/preprocess/genhpf/ehrs/mimiciii.py +839 -0
genhpf/scripts/preprocess/genhpf/ehrs/mimiciv.py +619 -0
genhpf/scripts/preprocess/genhpf/main.py +175 -0
genhpf/scripts/preprocess/genhpf/manifest.py +79 -0
genhpf/scripts/preprocess/genhpf/sample_dataset.py +177 -0
genhpf/scripts/preprocess/genhpf/utils/__init__.py +3 -0
genhpf/scripts/preprocess/genhpf/utils/utils.py +16 -0
genhpf/scripts/preprocess/manifest.py +83 -0
genhpf/scripts/preprocess/preprocess_meds.py +674 -0
genhpf/scripts/test.py +264 -0
genhpf/scripts/train.py +365 -0
genhpf/trainer.py +370 -0
genhpf/utils/checkpoint_utils.py +171 -0
genhpf/utils/data_utils.py +130 -0
genhpf/utils/distributed_utils.py +497 -0
genhpf/utils/file_io.py +170 -0
genhpf/utils/pdb.py +38 -0
genhpf/utils/utils.py +204 -0
genhpf-1.0.11.dist-info/LICENSE +21 -0
genhpf-1.0.11.dist-info/METADATA +202 -0
genhpf-1.0.11.dist-info/RECORD +67 -0
genhpf-1.0.11.dist-info/WHEEL +5 -0
genhpf-1.0.11.dist-info/entry_points.txt +6 -0
genhpf-1.0.11.dist-info/top_level.txt +1 -0

genhpf/loggings/meters.py ADDED Viewed

@@ -0,0 +1,374 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import bisect
+import time
+from collections import OrderedDict
+from typing import Dict, Optional
+import torch
+import numpy as np
+from sklearn.metrics import average_precision_score, roc_auc_score
+def warn(*args, **kwargs):
+    pass
+import warnings
+warnings.warn = warn
+def type_as(a, b):
+    if torch.is_tensor(a) and torch.is_tensor(b):
+        return a.to(b)
+    else:
+        return a
+class Meter(object):
+    """Base class for Meters."""
+    def __init__(self):
+        pass
+    def state_dict(self):
+        return {}
+    def load_state_dict(self, state_dict):
+        pass
+    def reset(self):
+        raise NotImplementedError
+    @property
+    def smoothed_value(self) -> float:
+        """Smoothed value used for logging."""
+        raise NotImplementedError
+def safe_round(number, ndigits):
+    if hasattr(number, "__round__"):
+        return round(number, ndigits)
+    elif torch.is_tensor(number) and number.numel() == 1:
+        return safe_round(number.item(), ndigits)
+    elif np.ndim(number) == 0 and hasattr(number, "item"):
+        return safe_round(number.item(), ndigits)
+    else:
+        return number
+class SumMeter(Meter):
+    """Computes and stores the sum"""
+    def __init__(self, round: Optional[int] = None):
+        self.round = round
+        self.reset()
+    def reset(self):
+        self.sum = 0  # sum from all updates
+    def update(self, val):
+        if val is not None:
+            self.sum = type_as(self.sum, val) + val
+    def state_dict(self):
+        return {
+            "sum": self.sum,
+            "round": self.round,
+        }
+    def load_state_dict(self, state_dict):
+        self.sum = state_dict["sum"]
+        self.round = state_dict.get("round", None)
+    @property
+    def smoothed_value(self) -> float:
+        val = self.sum
+        if self.round is not None and val is not None:
+            val = safe_round(val, self.round)
+        return val
+class AverageMeter(Meter):
+    """Computes and stores the average and current value"""
+    def __init__(self, round: Optional[int] = None):
+        self.round = round
+        self.reset()
+    def reset(self):
+        self.val = None # most recent update
+        self.sum = 0 # sum from all updates
+        self.count = 0 # total n from all updates
+    def update(self, val, n = 1):
+        if val is not None:
+            self.val = val
+            if n > 0:
+                self.sum = type_as(self.sum, val) + (val * n)
+                self.count = type_as(self.count, n) + n
+    def state_dict(self):
+        return {
+            "val" : self.val,
+            "sum" : self.sum,
+            "count" : self.count,
+            "round" : self.round
+        }
+    def load_state_dict(self, state_dict):
+        self.val = state_dict["val"]
+        self.sum = state_dict["sum"]
+        self.count = state_dict["count"]
+        self.round = state_dict.get("round", None)
+    @property
+    def avg(self):
+        return self.sum / self.count if self.count > 0 else self.val
+    @property
+    def smoothed_value(self) -> float:
+        val = self.avg
+        if self.round is not None and val is not None:
+            val = safe_round(val, self.round)
+        return val
+class TimeMeter(Meter):
+    """Compute the average occurrence of some event per second"""
+    def __init__(
+        self,
+        init: int = 0,
+        n: int = 0,
+        round: Optional[int] = None
+    ):
+        self.round = round
+        self.reset(init, n)
+    def reset(self, init = 0, n = 0):
+        self.init = init
+        self.start = time.perf_counter()
+        self.n = n
+        self.i = 0
+    def update(self, val = 1):
+        self.n = type_as(self.n, val) + val
+        self.i += 1
+    def state_dict(self):
+        return {
+            "init": self.elapsed_time,
+            "n": self.n,
+            "round": self.round
+        }
+    def load_state_dict(self, state_dict):
+        if "start" in state_dict:
+            # backwards compatibility for old state_dicts
+            self.reset(init = state_dict["init"])
+        else:
+            self.reset(init = state_dict["init"], n = state_dict["n"])
+            self.round = state_dict.get("round", None)
+    @property
+    def avg(self):
+        return self.n / self.elapsed_time
+    @property
+    def elapsed_time(self):
+        return self.init + (time.perf_counter() - self.start)
+    @property
+    def smoothed_value(self) -> float:
+        val = self.avg
+        if self.round is not None and val is not None:
+            val = safe_round(val, self.round)
+        return val
+class AUCMeter(Meter):
+    "Stores scores / targets to compute AUROC and AUPRC"
+    def __init__(self,):
+        self.reset()
+    def reset(self):
+        self.scores = []
+        self.targets = []
+    def update(self, prob, target):
+        if torch.is_tensor(prob):
+            prob = prob.cpu().numpy()
+        if torch.is_tensor(target):
+            target = target.cpu().numpy()
+        self.scores.append(prob)
+        self.targets.append(target)
+    def state_dict(self):
+        return {
+            "scores": self.scores,
+            "targets": self.targets,
+        }
+    def load_state_dict(self, state_dict):
+        self.scores = state_dict["scores"]
+        self.targets = state_dict["targets"]
+        self.round = state_dict.get("round", None)
+    @property
+    def auroc(self):
+        y_true = np.concatenate(self.targets)
+        y_score = np.concatenate(self.scores)
+        # if y_true.shape != y_score.shape:
+        #     y_true = np.eye(y_score.shape[1])[y_true]
+        if y_true.shape[0] > 127 and len(y_true.shape) >1:
+            mask = (y_true.sum(axis=0)!=0)
+            y_true = y_true[:, mask]
+            y_score = y_score[:, mask]
+        try:
+            return roc_auc_score(y_true=y_true, y_score=y_score, average='macro')
+        except ValueError:
+            return float("nan")
+    @property
+    def auprc(self):
+        y_true = np.concatenate(self.targets)
+        y_score = np.concatenate(self.scores)
+        if y_true.shape != y_score.shape:
+            y_true = np.eye(y_score.shape[1])[y_true]
+        try:
+            return average_precision_score(y_true=y_true, y_score=y_score, average='micro')
+        except ValueError:
+            return float("nan")
+    @property
+    def smoothed_value(self) -> float:
+        raise AttributeError(
+            "AUC meter cannot have smoothed values. Please "
+            "make sure the key of this meter starts with '_'."
+        )
+class StopwatchMeter(Meter):
+    """Computes the sum/avg duration of some event in seconds"""
+    def __init__(self, round: Optional[int] = None):
+        self.round = round
+        self.sum = 0
+        self.n = 0
+        self.start_time = None
+    def start(self):
+        self.start_time = time.perf_counter()
+    def stop(self, n = 1, prehook = None):
+        if self.start_time is not None:
+            if prehook is not None:
+                prehook()
+            delta = time.perf_counter() - self.start_time
+            self.sum = self.sum + delta
+            self.n = type_as(self.n, n) + n
+    def reset(self):
+        self.sum = 0 # cumulative time during which stopwatch was active
+        self.n = 0 # total n across all start/stop
+        self.start()
+    def state_dict(self):
+        return {
+            "sum": self.sum,
+            "n": self.n,
+            "round": self.round
+        }
+    def load_state_dict(self, state_dict):
+        self.sum = state_dict["sum"]
+        self.n = state_dict["n"]
+        self.start_time = None
+        self.round = state_dict.get("round", None)
+    @property
+    def avg(self):
+        return self.sum / self.n if self.n > 0 else self.sum
+    @property
+    def elapsed_time(self):
+        if self.start_time is None:
+            return 0.0
+        return time.perf_counter() - self.start_time
+    @property
+    def smoothed_value(self) -> float:
+        val = self.avg if self.sum > 0 else self.elapsed_time
+        if self.round is not None and val is not None:
+            val = safe_round(val, self.round)
+        return val
+class MetersDict(OrderedDict):
+    """A sorted dictionary of :class:`Meters`.
+    Meters are sorted according to a priority that is given when the
+    meter is first added to the dictionary.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.priorities = []
+    def __setitem__(self, key, value):
+        assert key not in self, "MetersDict doesn't support reassignment"
+        priority, value = value
+        bisect.insort(self.priorities, (priority, len(self.priorities), key))
+        super().__setitem__(key, value)
+        for _, _, key in self.priorities: # reorder dict to match priorities
+            self.move_to_end(key)
+    def add_meter(self, key, meter, priority):
+        self.__setitem__(key, (priority, meter))
+    def state_dict(self):
+        return [
+            (pri, key, self[key].__class__.__name__, self[key].state_dict())
+            for pri, _, key in self.priorities
+            # can't serialize DerivedMeter instances
+            if not isinstance(self[key], MetersDict._DerivedMeter)
+        ]
+    def load_state_dict(self, state_dict):
+        self.clear()
+        self.priorities.clear()
+        for pri, key, meter_cls, meter_state in state_dict:
+            meter = globals()[meter_cls]()
+            meter.load_state_dict(meter_state)
+            self.add_meter(key, meter, pri)
+    def get_smoothed_value(self, key: str) -> float:
+        """Get a single smoothed value."""
+        meter = self[key]
+        if isinstance(meter, MetersDict._DerivedMeter):
+            # print("hello: ", key, meter.fn(self))
+            return meter.fn(self)
+        else:
+            return meter.smoothed_value
+    def get_smoothed_values(self) -> Dict[str, float]:
+        """Get all smoothed values."""
+        return OrderedDict(
+            [
+                (key, self.get_smoothed_value(key))
+                for key in self.keys()
+                if not key.startswith("_")
+            ]
+        )
+    def reset(self):
+        """Reset Meter instances."""
+        for meter in self.values():
+            if isinstance(meter, MetersDict._DerivedMeter):
+                continue
+            meter.reset()
+    class _DerivedMeter(Meter):
+        """A Meter whose values are derived from other Meters."""
+        def __init__(self, fn):
+            self.fn = fn
+        def reset(self):
+            pass

genhpf/loggings/metrics.py ADDED Viewed

@@ -0,0 +1,155 @@
+import contextlib
+import uuid
+from collections import defaultdict, OrderedDict
+from typing import Callable, List, Optional
+from .meters import *
+_aggregators = OrderedDict()
+_active_aggregators = OrderedDict()
+_active_aggregators_cnt = defaultdict(lambda: 0)
+def reset() -> None:
+    _aggregators.clear()
+    _active_aggregators.clear()
+    _active_aggregators_cnt.clear()
+    _aggregators['default'] = MetersDict()
+    _active_aggregators['default'] = _aggregators['default']
+    _active_aggregators_cnt['default'] = 1
+reset()
+@contextlib.contextmanager
+def aggregate(name=None, new_root=False):
+    if name is None:
+        name = str(uuid.uuid4())
+        assert name not in _aggregators
+        agg = MetersDict()
+    else:
+        assert name != 'default'
+        agg = _aggregators.setdefault(name, MetersDict())
+    if new_root:
+        backup_aggregators = _active_aggregators.copy()
+        _active_aggregators.clear()
+        backup_aggregators_cnt = _active_aggregators_cnt.copy()
+        _active_aggregators_cnt.clear()
+    _active_aggregators[name] = agg
+    _active_aggregators_cnt[name] += 1
+    yield agg
+    _active_aggregators_cnt[name] -= 1
+    if _active_aggregators_cnt[name] == 0 and name in _active_aggregators:
+        del _active_aggregators[name]
+    if new_root:
+        _active_aggregators.clear()
+        _active_aggregators.update(backup_aggregators)
+        _active_aggregators_cnt.clear()
+        _active_aggregators_cnt.update(backup_aggregators_cnt)
+def get_active_aggregators() -> List[MetersDict]:
+    return list(_active_aggregators.values())
+def log_scalar(
+    key,
+    value,
+    weight=1,
+    priority=10,
+    round=None
+):
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, AverageMeter(round=round), priority)
+        agg[key].update(value, weight)
+def log_scalar_sum(
+    key,
+    value,
+    priority=10,
+    round=None
+):
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, SumMeter(round=round), priority)
+        agg[key].update(value)
+def log_derived(
+    key,
+    fn: Callable[[MetersDict], float],
+    priority=20
+):
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, MetersDict._DerivedMeter(fn), priority)
+def log_speed(
+    key,
+    value,
+    priority=30,
+    round=None
+):
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, TimeMeter(round=round), priority)
+            agg[key].reset()
+        else:
+            agg[key].update(value)
+def log_start_time(
+    key,
+    priority=40,
+    round=None
+):
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, StopwatchMeter(round=round), priority)
+    agg[key].start()
+def log_stop_time(
+    key,
+    weight=0.0,
+    prehook=None
+):
+    for agg in get_active_aggregators():
+        if key in agg:
+            agg[key].stop(weight, prehook)
+def log_custom(
+    new_meter_fn: Callable[[], Meter],
+    key,
+    *args,
+    priority=50,
+    **kwargs,
+):
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, new_meter_fn(), priority)
+        agg[key].update(*args, **kwargs)
+def reset_meter(name, key) -> None:
+    meter = get_meter(name, key)
+    if meter is not None:
+        meter.reset()
+def reset_meters(name) -> None:
+    meters = get_meters(name)
+    if meters is not None:
+        meters.reset()
+def get_meter(name, key) -> Meter:
+    if name not in _aggregators:
+        return None
+    return _aggregators[name].get(key, None)
+def get_meters(name) -> MetersDict:
+    return _aggregators.get(name, None)
+def get_smoothed_values(name) -> Dict[str, float]:
+    return _aggregators[name].get_smoothed_values()
+def state_dict():
+    return OrderedDict([(name, agg.state_dict()) for name, agg in _aggregators.items()])