PyPI - cehrgpt - Versions diffs - 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

cehrgpt 0.0.1py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

cehrgpt/data/hf_cehrgpt_dataset.py +24 -4
cehrgpt/data/hf_cehrgpt_dataset_collator.py +260 -84
cehrgpt/data/hf_cehrgpt_dataset_mapping.py +279 -2
cehrgpt/data/sample_packing_sampler.py +151 -0
cehrgpt/generation/generate_batch_hf_gpt_sequence.py +12 -9
cehrgpt/generation/omop_converter_batch.py +3 -0
cehrgpt/models/config.py +10 -0
cehrgpt/models/hf_cehrgpt.py +244 -73
cehrgpt/models/tokenization_hf_cehrgpt.py +6 -2
cehrgpt/runners/data_utils.py +243 -0
cehrgpt/runners/gpt_runner_util.py +0 -10
cehrgpt/runners/hf_cehrgpt_finetune_runner.py +154 -260
cehrgpt/runners/hf_cehrgpt_pretrain_runner.py +250 -90
cehrgpt/runners/hf_gpt_runner_argument_dataclass.py +46 -0
cehrgpt/runners/hyperparameter_search_util.py +4 -1
cehrgpt/runners/sample_packing_trainer.py +168 -0
cehrgpt/simulations/__init__.py +0 -0
cehrgpt/simulations/generate_plots.py +95 -0
cehrgpt/simulations/run_simulation.sh +24 -0
cehrgpt/simulations/time_embedding_simulation.py +250 -0
cehrgpt/simulations/time_token_simulation.py +177 -0
cehrgpt/tools/generate_causal_patient_split_by_age.py +146 -0
cehrgpt/tools/linear_prob/__init__.py +0 -0
cehrgpt/tools/linear_prob/compute_cehrgpt_features.py +467 -0
cehrgpt/tools/linear_prob/train_with_cehrgpt_features.py +152 -0
{cehrgpt-0.0.1.dist-info → cehrgpt-0.1.0.dist-info}/METADATA +57 -9
{cehrgpt-0.0.1.dist-info → cehrgpt-0.1.0.dist-info}/RECORD +30 -18
{cehrgpt-0.0.1.dist-info → cehrgpt-0.1.0.dist-info}/WHEEL +1 -1
{cehrgpt-0.0.1.dist-info → cehrgpt-0.1.0.dist-info/licenses}/LICENSE +0 -0
{cehrgpt-0.0.1.dist-info → cehrgpt-0.1.0.dist-info}/top_level.txt +0 -0

cehrgpt/data/hf_cehrgpt_dataset_mapping.py CHANGED Viewed

@@ -1,8 +1,23 @@
 import datetime
-from typing import Any, Dict
+from typing import Any, Dict, Generator, Optional
 import numpy as np
-from cehrbert.data_generators.hf_data_generator.hf_dataset_mapping import DatasetMapping
+import pandas as pd
+from cehrbert.data_generators.hf_data_generator.hf_dataset_mapping import (
+    ED_VISIT_TYPE_CODES,
+    INPATIENT_VISIT_TYPE_CODES,
+    INPATIENT_VISIT_TYPES,
+    DatasetMapping,
+    VisitObject,
+    get_value,
+    has_events_and_get_events,
+    replace_escape_chars,
+)
+from cehrbert.med_extension.schema_extension import Event
+from cehrbert.runners.hf_runner_argument_dataclass import DataTrainingArguments
+from cehrbert_data.const.common import NA
+from cehrbert_data.decorators.patient_event_decorator_base import get_att_function
+from dateutil.relativedelta import relativedelta
 from cehrgpt.models.tokenization_hf_cehrgpt import (
     NONE_BIN,
@@ -17,6 +32,268 @@ def convert_date_to_posix_time(index_date: datetime.date) -> float:
     ).timestamp()
+class MedToCehrGPTDatasetMapping(DatasetMapping):
+    def __init__(
+        self,
+        data_args: DataTrainingArguments,
+        include_inpatient_hour_token: bool = True,
+    ):
+        self._time_token_function = get_att_function(data_args.att_function_type)
+        self._include_auxiliary_token = data_args.include_auxiliary_token
+        self._inpatient_time_token_function = get_att_function(
+            data_args.inpatient_att_function_type
+        )
+        self._include_demographic_prompt = data_args.include_demographic_prompt
+        self._include_inpatient_hour_token = include_inpatient_hour_token
+    """
+    This mapping function converts the MED (https://github.com/Medical-Event-Data-Standard/meds/tree/main) extension
+    to the CehrGPT format. We make several assumptions
+    - The first event contains the demographic information
+    - From the second event onward
+        - the time of the event is visit_start_datetime.
+        - the first measurement contains the code indicating a standard OMOP Visit concept_id (e.g. 9201, 9202)
+        - in case of inpatient visits, the last measurement is assumed to
+            contain the standard OMOP concept id for discharge facilities (e.g 8536)
+        - in case of inpatient visits, datetime_value of the last measurement stores visit_end_datetime
+    """
+    def remove_columns(self):
+        return ["patient_id", "visits", "birth_datetime"]
+    @staticmethod
+    def _update_cehrgpt_record(
+        cehrgpt_record: Dict[str, Any],
+        code: str,
+        concept_value_mask: int = 0,
+        number_as_value: float = 0.0,
+        concept_as_value: str = "0",
+        is_numeric_type: int = 0,
+        unit: str = NA,
+    ) -> None:
+        cehrgpt_record["concept_ids"].append(replace_escape_chars(code))
+        cehrgpt_record["concept_value_masks"].append(concept_value_mask)
+        cehrgpt_record["number_as_values"].append(number_as_value)
+        cehrgpt_record["concept_as_values"].append(concept_as_value)
+        cehrgpt_record["units"].append(unit)
+        cehrgpt_record["is_numeric_types"].append(is_numeric_type)
+    def transform(self, record: Dict[str, Any]) -> Dict[str, Any]:
+        cehrgpt_record = {
+            "person_id": record["patient_id"],
+            "concept_ids": [],
+            "concept_value_masks": [],
+            "number_as_values": [],
+            "concept_as_values": [],
+            "units": [],
+            "is_numeric_types": [],
+        }
+        # Extract the demographic information
+        birth_datetime = record["birth_datetime"]
+        if isinstance(birth_datetime, pd.Timestamp):
+            birth_datetime = birth_datetime.to_pydatetime()
+        gender = record["gender"]
+        race = record["race"]
+        visits = record["visits"]
+        # This indicates this is columnar format
+        if isinstance(visits, dict):
+            visits = sorted(self.convert_visit_columnar_to_python(visits))
+        else:
+            visits = sorted(visits, key=lambda _: get_value(_, "visit_start_datetime"))
+        # Add the demographic tokens
+        first_visit = visits[0]
+        first_visit_start_datetime: datetime.datetime = get_value(
+            first_visit, "visit_start_datetime"
+        )
+        year_str = f"year:{str(first_visit_start_datetime.year)}"
+        age_str = f"age:{str(relativedelta(first_visit_start_datetime, birth_datetime).years)}"
+        self._update_cehrgpt_record(cehrgpt_record, year_str)
+        self._update_cehrgpt_record(cehrgpt_record, age_str)
+        self._update_cehrgpt_record(cehrgpt_record, gender)
+        self._update_cehrgpt_record(cehrgpt_record, race)
+        # Use a data cursor to keep track of time
+        datetime_cursor: Optional[datetime.datetime] = None
+        visit: VisitObject
+        # Loop through all the visits
+        for i, visit in enumerate(visits):
+            events: Generator[Event, None, None] = get_value(visit, "events")
+            has_events, events = has_events_and_get_events(events)
+            if not has_events:
+                continue
+            visit_start_datetime: datetime.datetime = get_value(
+                visit, "visit_start_datetime"
+            )
+            # If visit_end_datetime is populated for the inpatient visit, we update the datetime_cursor
+            visit_end_datetime: Optional[datetime.datetime] = get_value(
+                visit, "visit_end_datetime"
+            )
+            # We assume the first measurement to be the visit type of the current visit
+            visit_type = get_value(visit, "visit_type")
+            is_er_or_inpatient = (
+                visit_type in INPATIENT_VISIT_TYPES
+                or visit_type in INPATIENT_VISIT_TYPE_CODES
+                or visit_type in ED_VISIT_TYPE_CODES
+            )
+            # Add artificial time tokens to the patient timeline if timedelta exists
+            if datetime_cursor is not None:
+                time_delta = max((visit_start_datetime - datetime_cursor).days, 0)
+                # This generates an artificial time token depending on the choice of the time token functions
+                self._update_cehrgpt_record(
+                    cehrgpt_record,
+                    code=self._time_token_function(time_delta),
+                )
+            datetime_cursor = visit_start_datetime
+            # Add a [VS] token
+            self._update_cehrgpt_record(
+                cehrgpt_record,
+                code="[VS]",
+            )
+            # Add a visit type token
+            self._update_cehrgpt_record(
+                cehrgpt_record,
+                code=visit_type,
+            )
+            # We need to insert an inpatient hour token right after the visit type, we calculate the hour interval
+            # with respect to the midnight of the day
+            if is_er_or_inpatient and self._include_inpatient_hour_token:
+                if datetime_cursor.hour > 0:
+                    # This generates an artificial time token depending on the choice of the time token functions
+                    self._update_cehrgpt_record(
+                        cehrgpt_record,
+                        code=f"i-H{datetime_cursor.hour}",
+                    )
+            # Keep track of the existing outpatient events, we don't want to add them again
+            existing_duplicate_events = list()
+            for e in events:
+                # If the event doesn't have a time stamp, we skip it
+                event_time: datetime.datetime = e["time"]
+                if not event_time:
+                    continue
+                # If numeric_value exists, this is a concept/value tuple, we indicate this using a concept_value_mask
+                numeric_value = e.get("numeric_value", None)
+                text_value = e.get("text_value", None)
+                # The unit might be populated with a None value
+                unit = e.get("unit", NA) if e.get("unit", NA) else NA
+                concept_value_mask = int(
+                    numeric_value is not None or text_value is not None
+                )
+                is_numeric_type = int(numeric_value is not None)
+                code = replace_escape_chars(e["code"])
+                # Create the event identity
+                event_identity = (
+                    (event_time, code, text_value, unit)
+                    if is_er_or_inpatient
+                    else (event_time.date(), code, text_value, unit)
+                )
+                # Add a medical token to the patient timeline
+                # If this is an inpatient visit, we use the event time stamps to calculate age and date
+                # because the patient can stay in the hospital for a period of time.
+                if is_er_or_inpatient:
+                    # Calculate the time diff in days w.r.t the previous measurement
+                    time_diff_days = (event_time - datetime_cursor).days
+                    # Update the datetime_cursor if the time diff between two neighboring measurements is greater than and
+                    # equal to 1 day
+                    if self._inpatient_time_token_function and time_diff_days > 0:
+                        # This generates an artificial time token depending on the choice of the time token functions
+                        self._update_cehrgpt_record(
+                            cehrgpt_record,
+                            code=f"i-{self._inpatient_time_token_function(time_diff_days)}",
+                        )
+                    if self._include_inpatient_hour_token:
+                        # if the time difference in days is greater than 0, we calculate the hour interval
+                        # with respect to the midnight of the day
+                        time_diff_hours = (
+                            event_time.hour
+                            if time_diff_days > 0
+                            else int(
+                                (event_time - datetime_cursor).total_seconds() // 3600
+                            )
+                        )
+                        if time_diff_hours > 0:
+                            # This generates an artificial time token depending on the choice of the time token functions
+                            self._update_cehrgpt_record(
+                                cehrgpt_record,
+                                code=f"i-H{time_diff_hours}",
+                            )
+                if event_identity in existing_duplicate_events:
+                    continue
+                self._update_cehrgpt_record(
+                    cehrgpt_record,
+                    code=code,
+                    concept_value_mask=concept_value_mask,
+                    unit=unit,
+                    number_as_value=numeric_value if numeric_value else 0.0,
+                    concept_as_value=(
+                        replace_escape_chars(text_value) if text_value else "0"
+                    ),
+                    is_numeric_type=is_numeric_type,
+                )
+                existing_duplicate_events.append(event_identity)
+                # we only want to update the time stamp when data_cursor is less than the event time
+                if datetime_cursor < event_time or datetime_cursor is None:
+                    datetime_cursor = event_time
+                    # We need to bound the datetime_cursor if the current visit is an admission type of visit
+                    # as the associated events could be generated after the visits are complete
+                    if is_er_or_inpatient and visit_end_datetime is not None:
+                        datetime_cursor = min(datetime_cursor, visit_end_datetime)
+            # For inpatient or ER visits, we want to discharge_facility to the end of the visit
+            if is_er_or_inpatient:
+                # If visit_end_datetime is populated for the inpatient visit, we update the datetime_cursor
+                if visit_end_datetime is not None:
+                    datetime_cursor = visit_end_datetime
+                if self._include_auxiliary_token:
+                    # Reuse the age and date calculated for the last event in the patient timeline for the discharge
+                    # facility event
+                    discharge_facility = get_value(visit, "discharge_facility")
+                    if not discharge_facility:
+                        discharge_facility = "0"
+                    self._update_cehrgpt_record(
+                        cehrgpt_record,
+                        code=discharge_facility,
+                    )
+            # Reuse the age and date calculated for the last event in the patient timeline
+            self._update_cehrgpt_record(
+                cehrgpt_record,
+                code="[VE]",
+            )
+        # Generate the orders of the concepts that the cehrbert dataset mapping function expects
+        cehrgpt_record["orders"] = list(
+            range(1, len(cehrgpt_record["concept_ids"]) + 1)
+        )
+        # Add some count information for this sequence
+        cehrgpt_record["num_of_concepts"] = len(cehrgpt_record["concept_ids"])
+        cehrgpt_record["num_of_visits"] = len(visits)
+        if record.get("index_date", None):
+            cehrgpt_record["index_date"] = record["index_date"]
+        if record.get("label", None):
+            cehrgpt_record["label"] = record["label"]
+        if record.get("age_at_index", None):
+            cehrgpt_record["age_at_index"] = record["age_at_index"]
+        return cehrgpt_record
 class HFCehrGptTokenizationMapping(DatasetMapping):
     def __init__(
         self,

cehrgpt/data/sample_packing_sampler.py ADDED Viewed

@@ -0,0 +1,151 @@
+from typing import Iterator, List, Optional
+import torch
+import torch.distributed as dist
+from torch.utils.data import Sampler
+from transformers import logging
+LOG = logging.get_logger("transformers")
+class SamplePlacerHolder:
+    def __init__(self):
+        self.epoch = 0
+    def set_epoch(self, epoch):
+        self.epoch = epoch
+class SamplePackingBatchSampler(Sampler[List[int]]):
+    """
+    A batch sampler that creates batches by packing samples together.
+    to maximize GPU utilization, ensuring the total tokens per batch
+    doesn't exceed max_tokens.
+    """
+    def __init__(
+        self,
+        lengths: List[int],
+        max_tokens_per_batch: int,
+        max_position_embeddings: int,
+        num_replicas: Optional[int] = None,
+        rank: Optional[int] = None,
+        seed: int = 0,
+        drop_last: bool = False,
+    ):
+        """
+        Args:
+            lengths: List of sequence lengths for each sample
+            max_tokens: Maximum number of tokens in a batch
+            drop_last: Whether to drop the last incomplete batch
+        """
+        super().__init__()
+        if num_replicas is None:
+            if dist.is_available() and dist.is_initialized():
+                num_replicas = dist.get_world_size()
+                LOG.info(
+                    "torch.distributed is initialized and there are %s of replicas",
+                    num_replicas,
+                )
+            else:
+                num_replicas = 1
+                LOG.info(
+                    "torch.dist is not initialized and therefore default to 1 for num_replicas"
+                )
+        if rank is None:
+            if dist.is_available() and dist.is_initialized():
+                rank = dist.get_rank()
+                LOG.info(
+                    "torch.distributed is initialized and the current rank is %s", rank
+                )
+            else:
+                rank = 0
+                LOG.info(
+                    "torch.distributed is not initialized and therefore default to 0 for rank"
+                )
+        if not (0 <= rank < num_replicas):
+            raise ValueError(
+                f"Invalid rank {rank}, rank should be in the interval [0, {num_replicas - 1}]"
+            )
+        self.lengths = lengths
+        self.max_tokens_per_batch = max_tokens_per_batch
+        self.max_position_embeddings = max_position_embeddings
+        self.num_replicas = num_replicas
+        self.rank = rank
+        self.seed = seed
+        self.drop_last = drop_last
+        # Trainer https://github.com/huggingface/transformers/blame/main/src/transformers/trainer.py#L2470
+        # http://github.com/huggingface/accelerate/blob/v0.31.0/src/accelerate/data_loader.py#L482
+        # the huggingface trainer will call the accelerate.data_loader.DataLoaderShard.set_epoch,
+        # which will call batch_sampler.sample.set_epoch
+        self.sampler = SamplePlacerHolder()
+    def __iter__(self) -> Iterator[List[int]]:
+        # deterministically shuffle based on epoch and seed
+        g = torch.Generator()
+        g.manual_seed(self.seed + self.sampler.epoch)
+        indices = torch.randperm(len(self.lengths), generator=g).tolist()
+        # Partition indices for this rank
+        indices = indices[self.rank :: self.num_replicas]
+        batch = []
+        current_batch_tokens = 0
+        for idx in indices:
+            # We take the minimum of the two because each sequence will be truncated to fit
+            # the context window of the model
+            sample_length = min(self.lengths[idx], self.max_position_embeddings)
+            # If adding this sample would exceed max_tokens_per_batch, yield the current batch
+            if (
+                current_batch_tokens + sample_length + 2 > self.max_tokens_per_batch
+                and batch
+            ):
+                yield batch
+                batch = []
+                current_batch_tokens = 0
+            # Add the sample to the current batch
+            batch.append(idx)
+            # plus extract one for the [END] and [PAD] tokens to separate samples
+            current_batch_tokens += sample_length + 2
+        # Yield the last batch if it's not empty and we're not dropping it
+        if batch and not self.drop_last:
+            yield batch
+    def __len__(self) -> int:
+        """
+        Estimates the number of batches that will be generated.
+        This is an approximation since the exact number depends on the specific
+        sequence lengths and their order.
+        """
+        if len(self.lengths) == 0:
+            return 0
+        # We need to truncate the lengths due to the context window limit imposed by the model
+        truncated_lengths = [
+            min(self.max_position_embeddings, length + 2) for length in self.lengths
+        ]
+        # Calculate average sequence length
+        avg_seq_length = sum(truncated_lengths) // len(truncated_lengths)
+        # Estimate average number of sequences per batch
+        seqs_per_batch = self.max_tokens_per_batch // avg_seq_length
+        # Estimate total number of batches
+        if self.drop_last:
+            # If dropping last incomplete batch
+            return len(truncated_lengths) // seqs_per_batch * self.num_replicas
+        else:
+            # If keeping last incomplete batch, ensure at least 1 batch
+            return max(1, len(truncated_lengths) // seqs_per_batch) * self.num_replicas

cehrgpt/generation/generate_batch_hf_gpt_sequence.py CHANGED Viewed

@@ -93,9 +93,9 @@ def generate_single_batch(
             temperature=temperature,
             top_p=top_p,
             top_k=top_k,
-            bos_token_id=tokenizer.end_token_id,
-            eos_token_id=tokenizer.end_token_id,
-            pad_token_id=tokenizer.pad_token_id,
+            bos_token_id=model.generation_config.bos_token_id,
+            eos_token_id=model.generation_config.eos_token_id,
+            pad_token_id=model.generation_config.pad_token_id,
             do_sample=True,
             use_cache=True,
             return_dict_in_generate=True,
@@ -150,15 +150,11 @@ def main(args):
             attn_implementation=(
                 "flash_attention_2" if is_flash_attn_2_available() else "eager"
             ),
-            torch_dtype=(
-                torch.bfloat16
-                if is_flash_attn_2_available() and args.use_bfloat16
-                else torch.float32
-            ),
         )
         .eval()
         .to(device)
     )
     cehrgpt_model.generation_config.pad_token_id = cehrgpt_tokenizer.pad_token_id
     cehrgpt_model.generation_config.eos_token_id = cehrgpt_tokenizer.end_token_id
     cehrgpt_model.generation_config.bos_token_id = cehrgpt_tokenizer.end_token_id
@@ -192,6 +188,7 @@ def main(args):
     LOG.info(f"Top P {args.top_p}")
     LOG.info(f"Top K {args.top_k}")
     LOG.info(f"Loading demographic_info at {args.demographic_data_path}")
+    LOG.info(f"MEDS format: {args.meds_format}")
     dataset = load_parquet_as_dataset(args.demographic_data_path)
     total_rows = len(dataset)
@@ -199,6 +196,7 @@ def main(args):
     num_of_batches = args.num_of_patients // args.batch_size + 1
     sequence_to_flush = []
     current_person_id = 1
+    prompt_size = 2 if args.meds_format else START_TOKEN_SIZE
     for i in range(num_of_batches):
         LOG.info(f"{datetime.datetime.now()}: Batch {i} started")
@@ -215,7 +213,7 @@ def main(args):
                     <= max_seq_allowed
                 ):
                     random_prompts.append(
-                        cehrgpt_tokenizer.encode(row["concept_ids"][:START_TOKEN_SIZE])
+                        cehrgpt_tokenizer.encode(row["concept_ids"][:prompt_size])
                     )
                 iter += 1
                 if not random_prompts and iter > 10:
@@ -326,6 +324,11 @@ def create_arg_parser():
         dest="drop_long_sequences",
         action="store_true",
     )
+    base_arg_parser.add_argument(
+        "--meds_format",
+        dest="meds_format",
+        action="store_true",
+    )
     return base_arg_parser

cehrgpt/generation/omop_converter_batch.py CHANGED Viewed

@@ -35,6 +35,7 @@ from cehrgpt.models.tokenization_hf_cehrgpt import END_TOKEN
 # TODO: move these to cehrbert_data
 STOP_TOKENS = ["VE", "[VE]", END_TOKEN]
+OOV = "[OOV]"
 CURRENT_PATH = Path(__file__).parent
 START_TOKEN_SIZE = 4
 ATT_TIME_TOKENS = generate_artificial_time_tokens()
@@ -297,6 +298,8 @@ def gpt_to_omop_converter_batch(
         inpatient_visit_indicator = False
         for event_idx, event in enumerate(clinical_events, 0):
+            if event == OOV:
+                continue
             # For bad sequences, we don't proceed further and break from the for loop
             if bad_sequence:
                 break

cehrgpt/models/config.py CHANGED Viewed

@@ -133,14 +133,17 @@ class CEHRGPTConfig(PretrainedConfig):
         n_pretrained_embeddings_layers=2,
         pretrained_embedding_dim=768,
         pretrained_token_ids: List[int] = None,
+        next_token_prediction_loss_weight=1.0,
         time_token_loss_weight=1.0,
         time_to_visit_loss_weight=1.0,
         causal_sfm=False,
         demographics_size=4,
         lab_token_penalty=False,
         lab_token_loss_weight=0.9,
+        value_prediction_loss_weight=1.0,
         entropy_penalty=False,
         entropy_penalty_alpha=0.01,
+        sample_packing_max_positions=None,
         **kwargs,
     ):
         if token_to_time_token_mapping is None:
@@ -150,6 +153,11 @@ class CEHRGPTConfig(PretrainedConfig):
         self.vocab_size = vocab_size
         self.time_token_vocab_size = time_token_vocab_size
         self.n_positions = n_positions
+        self.sample_packing_max_positions = (
+            sample_packing_max_positions
+            if sample_packing_max_positions
+            else n_positions
+        )
         self.n_embd = n_embd
         self.n_layer = n_layer
         self.n_head = n_head
@@ -178,6 +186,7 @@ class CEHRGPTConfig(PretrainedConfig):
         self.include_values = include_values
         self.value_vocab_size = value_vocab_size
+        self.next_token_prediction_loss_weight = next_token_prediction_loss_weight
         self.include_ttv_prediction = include_ttv_prediction
         self.use_sub_time_tokenization = use_sub_time_tokenization
         self._token_to_time_token_mapping = token_to_time_token_mapping
@@ -195,6 +204,7 @@ class CEHRGPTConfig(PretrainedConfig):
         self.lab_token_loss_weight = lab_token_loss_weight
         self.entropy_penalty = entropy_penalty
         self.entropy_penalty_alpha = entropy_penalty_alpha
+        self.value_prediction_loss_weight = value_prediction_loss_weight
         kwargs["tie_word_embeddings"] = not use_pretrained_embeddings

cehrgpt 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

cehrgpt 0.0.1py3-none-any.whl → 0.1.0py3-none-any.whl