PyPI - cehrgpt - Versions diffs - 0.0.2__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

cehrgpt 0.0.2py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

cehrgpt/data/hf_cehrgpt_dataset.py +24 -4
cehrgpt/data/hf_cehrgpt_dataset_collator.py +260 -84
cehrgpt/data/hf_cehrgpt_dataset_mapping.py +99 -88
cehrgpt/data/sample_packing_sampler.py +151 -0
cehrgpt/generation/generate_batch_hf_gpt_sequence.py +12 -9
cehrgpt/models/config.py +10 -0
cehrgpt/models/hf_cehrgpt.py +243 -73
cehrgpt/models/tokenization_hf_cehrgpt.py +4 -0
cehrgpt/runners/data_utils.py +243 -0
cehrgpt/runners/gpt_runner_util.py +0 -10
cehrgpt/runners/hf_cehrgpt_finetune_runner.py +152 -279
cehrgpt/runners/hf_cehrgpt_pretrain_runner.py +229 -105
cehrgpt/runners/hf_gpt_runner_argument_dataclass.py +42 -0
cehrgpt/runners/hyperparameter_search_util.py +4 -1
cehrgpt/runners/sample_packing_trainer.py +168 -0
cehrgpt/simulations/generate_plots.py +95 -0
cehrgpt/simulations/run_simulation.sh +24 -0
cehrgpt/simulations/time_embedding_simulation.py +250 -0
cehrgpt/simulations/time_token_simulation.py +177 -0
cehrgpt/tools/linear_prob/__init__.py +0 -0
cehrgpt/tools/linear_prob/compute_cehrgpt_features.py +467 -0
cehrgpt/tools/linear_prob/train_with_cehrgpt_features.py +152 -0
{cehrgpt-0.0.2.dist-info → cehrgpt-0.1.0.dist-info}/METADATA +7 -5
{cehrgpt-0.0.2.dist-info → cehrgpt-0.1.0.dist-info}/RECORD +28 -26
{cehrgpt-0.0.2.dist-info → cehrgpt-0.1.0.dist-info}/WHEEL +1 -1
cehrgpt/data/hf_cehrgpt_dpo_collator.py +0 -71
cehrgpt/data/hf_cehrgpt_dpo_dataset_mapping.py +0 -61
cehrgpt/generation/generate_paired_cehrgpt_sequence.py +0 -224
cehrgpt/rl_finetune/cehrgpt_dpo_trainer.py +0 -586
cehrgpt/rl_finetune/cehrgpt_ppo_trainer.py +0 -464
cehrgpt/rl_finetune/ppo_finetune.py +0 -394
cehrgpt/rl_finetune/ppo_finetune_v2.py +0 -373
cehrgpt/runners/hf_cehrgpt_dpo_runner.py +0 -119
/cehrgpt/{rl_finetune → simulations}/__init__.py +0 -0
{cehrgpt-0.0.2.dist-info → cehrgpt-0.1.0.dist-info/licenses}/LICENSE +0 -0
{cehrgpt-0.0.2.dist-info → cehrgpt-0.1.0.dist-info}/top_level.txt +0 -0

cehrgpt/data/hf_cehrgpt_dataset_mapping.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import datetime
-from typing import Any, Dict
+from typing import Any, Dict, Generator, Optional
 import numpy as np
 import pandas as pd
@@ -8,8 +8,12 @@ from cehrbert.data_generators.hf_data_generator.hf_dataset_mapping import (
     INPATIENT_VISIT_TYPE_CODES,
     INPATIENT_VISIT_TYPES,
     DatasetMapping,
+    VisitObject,
+    get_value,
+    has_events_and_get_events,
     replace_escape_chars,
 )
+from cehrbert.med_extension.schema_extension import Event
 from cehrbert.runners.hf_runner_argument_dataclass import DataTrainingArguments
 from cehrbert_data.const.common import NA
 from cehrbert_data.decorators.patient_event_decorator_base import get_att_function
@@ -32,7 +36,6 @@ class MedToCehrGPTDatasetMapping(DatasetMapping):
     def __init__(
         self,
         data_args: DataTrainingArguments,
-        is_pretraining: bool = True,
         include_inpatient_hour_token: bool = True,
     ):
         self._time_token_function = get_att_function(data_args.att_function_type)
@@ -41,7 +44,6 @@ class MedToCehrGPTDatasetMapping(DatasetMapping):
             data_args.inpatient_att_function_type
         )
         self._include_demographic_prompt = data_args.include_demographic_prompt
-        self._is_pretraining = is_pretraining
         self._include_inpatient_hour_token = include_inpatient_hour_token
     """
@@ -57,14 +59,7 @@ class MedToCehrGPTDatasetMapping(DatasetMapping):
     """
     def remove_columns(self):
-        if self._is_pretraining:
-            return ["visits", "birth_datetime", "index_date"]
-        else:
-            return [
-                "visits",
-                "birth_datetime",
-                "visit_concept_ids",
-            ]
+        return ["patient_id", "visits", "birth_datetime"]
     @staticmethod
     def _update_cehrgpt_record(
@@ -99,38 +94,45 @@ class MedToCehrGPTDatasetMapping(DatasetMapping):
             birth_datetime = birth_datetime.to_pydatetime()
         gender = record["gender"]
         race = record["race"]
+        visits = record["visits"]
+        # This indicates this is columnar format
+        if isinstance(visits, dict):
+            visits = sorted(self.convert_visit_columnar_to_python(visits))
+        else:
+            visits = sorted(visits, key=lambda _: get_value(_, "visit_start_datetime"))
         # Add the demographic tokens
-        first_visit = record["visits"][0]
-        year_str = f'year:{str(first_visit["visit_start_datetime"].year)}'
-        age_str = f'age:{str(relativedelta(first_visit["visit_start_datetime"], birth_datetime).years)}'
+        first_visit = visits[0]
+        first_visit_start_datetime: datetime.datetime = get_value(
+            first_visit, "visit_start_datetime"
+        )
+        year_str = f"year:{str(first_visit_start_datetime.year)}"
+        age_str = f"age:{str(relativedelta(first_visit_start_datetime, birth_datetime).years)}"
         self._update_cehrgpt_record(cehrgpt_record, year_str)
         self._update_cehrgpt_record(cehrgpt_record, age_str)
         self._update_cehrgpt_record(cehrgpt_record, gender)
         self._update_cehrgpt_record(cehrgpt_record, race)
         # Use a data cursor to keep track of time
-        date_cursor = None
-        # Loop through all the visits excluding the first event containing the demographics
-        for i, visit in enumerate(
-            sorted(record["visits"], key=lambda e: e["visit_start_datetime"])
-        ):
-            events = visit["events"]
-            # Skip this visit if the number measurements in the event is zero
-            if events is None or len(events) == 0:
+        datetime_cursor: Optional[datetime.datetime] = None
+        visit: VisitObject
+        # Loop through all the visits
+        for i, visit in enumerate(visits):
+            events: Generator[Event, None, None] = get_value(visit, "events")
+            has_events, events = has_events_and_get_events(events)
+            if not has_events:
                 continue
-            visit_start_datetime = visit["visit_start_datetime"]
-            time_delta = (
-                (visit_start_datetime - date_cursor).days if date_cursor else None
+            visit_start_datetime: datetime.datetime = get_value(
+                visit, "visit_start_datetime"
+            )
+            # If visit_end_datetime is populated for the inpatient visit, we update the datetime_cursor
+            visit_end_datetime: Optional[datetime.datetime] = get_value(
+                visit, "visit_end_datetime"
             )
-            date_cursor = visit_start_datetime
             # We assume the first measurement to be the visit type of the current visit
-            visit_type = visit["visit_type"]
+            visit_type = get_value(visit, "visit_type")
             is_er_or_inpatient = (
                 visit_type in INPATIENT_VISIT_TYPES
                 or visit_type in INPATIENT_VISIT_TYPE_CODES
@@ -138,21 +140,15 @@ class MedToCehrGPTDatasetMapping(DatasetMapping):
             )
             # Add artificial time tokens to the patient timeline if timedelta exists
-            if time_delta is not None:
+            if datetime_cursor is not None:
+                time_delta = max((visit_start_datetime - datetime_cursor).days, 0)
                 # This generates an artificial time token depending on the choice of the time token functions
                 self._update_cehrgpt_record(
                     cehrgpt_record,
                     code=self._time_token_function(time_delta),
                 )
-            # Add the VS token to the patient timeline to mark the start of a visit
-            relativedelta(visit["visit_start_datetime"], birth_datetime).years
-            # Calculate the week number since the epoch time
-            date = (
-                visit["visit_start_datetime"]
-                - datetime.datetime(year=1970, month=1, day=1)
-            ).days // 7
+            datetime_cursor = visit_start_datetime
             # Add a [VS] token
             self._update_cehrgpt_record(
                 cehrgpt_record,
@@ -163,11 +159,22 @@ class MedToCehrGPTDatasetMapping(DatasetMapping):
                 cehrgpt_record,
                 code=visit_type,
             )
+            # We need to insert an inpatient hour token right after the visit type, we calculate the hour interval
+            # with respect to the midnight of the day
+            if is_er_or_inpatient and self._include_inpatient_hour_token:
+                if datetime_cursor.hour > 0:
+                    # This generates an artificial time token depending on the choice of the time token functions
+                    self._update_cehrgpt_record(
+                        cehrgpt_record,
+                        code=f"i-H{datetime_cursor.hour}",
+                    )
             # Keep track of the existing outpatient events, we don't want to add them again
-            existing_outpatient_events = list()
+            existing_duplicate_events = list()
             for e in events:
                 # If the event doesn't have a time stamp, we skip it
-                if not e["time"]:
+                event_time: datetime.datetime = e["time"]
+                if not event_time:
                     continue
                 # If numeric_value exists, this is a concept/value tuple, we indicate this using a concept_value_mask
@@ -181,40 +188,48 @@ class MedToCehrGPTDatasetMapping(DatasetMapping):
                 is_numeric_type = int(numeric_value is not None)
                 code = replace_escape_chars(e["code"])
+                # Create the event identity
+                event_identity = (
+                    (event_time, code, text_value, unit)
+                    if is_er_or_inpatient
+                    else (event_time.date(), code, text_value, unit)
+                )
                 # Add a medical token to the patient timeline
                 # If this is an inpatient visit, we use the event time stamps to calculate age and date
                 # because the patient can stay in the hospital for a period of time.
                 if is_er_or_inpatient:
-                    # Calculate the week number since the epoch time
-                    date = (
-                        e["time"] - datetime.datetime(year=1970, month=1, day=1)
-                    ).days // 7
                     # Calculate the time diff in days w.r.t the previous measurement
-                    meas_time_diff = (e["time"] - date_cursor).days
-                    # Update the date_cursor if the time diff between two neighboring measurements is greater than and
+                    time_diff_days = (event_time - datetime_cursor).days
+                    # Update the datetime_cursor if the time diff between two neighboring measurements is greater than and
                     # equal to 1 day
-                    if meas_time_diff > 0:
-                        date_cursor = e["time"]
-                        if self._inpatient_time_token_function:
+                    if self._inpatient_time_token_function and time_diff_days > 0:
+                        # This generates an artificial time token depending on the choice of the time token functions
+                        self._update_cehrgpt_record(
+                            cehrgpt_record,
+                            code=f"i-{self._inpatient_time_token_function(time_diff_days)}",
+                        )
+                    if self._include_inpatient_hour_token:
+                        # if the time difference in days is greater than 0, we calculate the hour interval
+                        # with respect to the midnight of the day
+                        time_diff_hours = (
+                            event_time.hour
+                            if time_diff_days > 0
+                            else int(
+                                (event_time - datetime_cursor).total_seconds() // 3600
+                            )
+                        )
+                        if time_diff_hours > 0:
                             # This generates an artificial time token depending on the choice of the time token functions
                             self._update_cehrgpt_record(
                                 cehrgpt_record,
-                                code=f"i-{self._inpatient_time_token_function(meas_time_diff)}",
+                                code=f"i-H{time_diff_hours}",
                             )
-                else:
-                    # For outpatient visits, we use the visit time stamp to calculate age and time because we assume
-                    # the outpatient visits start and end on the same day.
-                    # We check whether the date/code/value combination already exists in the existing events
-                    # If they exist, we do not add them to the patient timeline for outpatient visits.
-                    if (
-                        date,
-                        code,
-                        numeric_value,
-                        text_value,
-                        concept_value_mask,
-                        numeric_value,
-                    ) in existing_outpatient_events:
-                        continue
+                if event_identity in existing_duplicate_events:
+                    continue
                 self._update_cehrgpt_record(
                     cehrgpt_record,
@@ -227,33 +242,27 @@ class MedToCehrGPTDatasetMapping(DatasetMapping):
                     ),
                     is_numeric_type=is_numeric_type,
                 )
-                existing_outpatient_events.append(
-                    (
-                        date,
-                        code,
-                        numeric_value,
-                        text_value,
-                        concept_value_mask,
-                        numeric_value,
-                    )
-                )
+                existing_duplicate_events.append(event_identity)
+                # we only want to update the time stamp when data_cursor is less than the event time
+                if datetime_cursor < event_time or datetime_cursor is None:
+                    datetime_cursor = event_time
+                    # We need to bound the datetime_cursor if the current visit is an admission type of visit
+                    # as the associated events could be generated after the visits are complete
+                    if is_er_or_inpatient and visit_end_datetime is not None:
+                        datetime_cursor = min(datetime_cursor, visit_end_datetime)
             # For inpatient or ER visits, we want to discharge_facility to the end of the visit
             if is_er_or_inpatient:
-                # If visit_end_datetime is populated for the inpatient visit, we update the date_cursor
-                visit_end_datetime = visit.get("visit_end_datetime", None)
-                if visit_end_datetime:
-                    date_cursor = visit_end_datetime
+                # If visit_end_datetime is populated for the inpatient visit, we update the datetime_cursor
+                if visit_end_datetime is not None:
+                    datetime_cursor = visit_end_datetime
                 if self._include_auxiliary_token:
                     # Reuse the age and date calculated for the last event in the patient timeline for the discharge
                     # facility event
-                    discharge_facility = (
-                        visit["discharge_facility"]
-                        if ("discharge_facility" in visit)
-                        and visit["discharge_facility"]
-                        else "0"
-                    )
+                    discharge_facility = get_value(visit, "discharge_facility")
+                    if not discharge_facility:
+                        discharge_facility = "0"
                     self._update_cehrgpt_record(
                         cehrgpt_record,
@@ -273,11 +282,13 @@ class MedToCehrGPTDatasetMapping(DatasetMapping):
         # Add some count information for this sequence
         cehrgpt_record["num_of_concepts"] = len(cehrgpt_record["concept_ids"])
-        cehrgpt_record["num_of_visits"] = len(record["visits"])
+        cehrgpt_record["num_of_visits"] = len(visits)
-        if "label" in record:
+        if record.get("index_date", None):
+            cehrgpt_record["index_date"] = record["index_date"]
+        if record.get("label", None):
             cehrgpt_record["label"] = record["label"]
-        if "age_at_index" in record:
+        if record.get("age_at_index", None):
             cehrgpt_record["age_at_index"] = record["age_at_index"]
         return cehrgpt_record

cehrgpt/data/sample_packing_sampler.py ADDED Viewed

@@ -0,0 +1,151 @@
+from typing import Iterator, List, Optional
+import torch
+import torch.distributed as dist
+from torch.utils.data import Sampler
+from transformers import logging
+LOG = logging.get_logger("transformers")
+class SamplePlacerHolder:
+    def __init__(self):
+        self.epoch = 0
+    def set_epoch(self, epoch):
+        self.epoch = epoch
+class SamplePackingBatchSampler(Sampler[List[int]]):
+    """
+    A batch sampler that creates batches by packing samples together.
+    to maximize GPU utilization, ensuring the total tokens per batch
+    doesn't exceed max_tokens.
+    """
+    def __init__(
+        self,
+        lengths: List[int],
+        max_tokens_per_batch: int,
+        max_position_embeddings: int,
+        num_replicas: Optional[int] = None,
+        rank: Optional[int] = None,
+        seed: int = 0,
+        drop_last: bool = False,
+    ):
+        """
+        Args:
+            lengths: List of sequence lengths for each sample
+            max_tokens: Maximum number of tokens in a batch
+            drop_last: Whether to drop the last incomplete batch
+        """
+        super().__init__()
+        if num_replicas is None:
+            if dist.is_available() and dist.is_initialized():
+                num_replicas = dist.get_world_size()
+                LOG.info(
+                    "torch.distributed is initialized and there are %s of replicas",
+                    num_replicas,
+                )
+            else:
+                num_replicas = 1
+                LOG.info(
+                    "torch.dist is not initialized and therefore default to 1 for num_replicas"
+                )
+        if rank is None:
+            if dist.is_available() and dist.is_initialized():
+                rank = dist.get_rank()
+                LOG.info(
+                    "torch.distributed is initialized and the current rank is %s", rank
+                )
+            else:
+                rank = 0
+                LOG.info(
+                    "torch.distributed is not initialized and therefore default to 0 for rank"
+                )
+        if not (0 <= rank < num_replicas):
+            raise ValueError(
+                f"Invalid rank {rank}, rank should be in the interval [0, {num_replicas - 1}]"
+            )
+        self.lengths = lengths
+        self.max_tokens_per_batch = max_tokens_per_batch
+        self.max_position_embeddings = max_position_embeddings
+        self.num_replicas = num_replicas
+        self.rank = rank
+        self.seed = seed
+        self.drop_last = drop_last
+        # Trainer https://github.com/huggingface/transformers/blame/main/src/transformers/trainer.py#L2470
+        # http://github.com/huggingface/accelerate/blob/v0.31.0/src/accelerate/data_loader.py#L482
+        # the huggingface trainer will call the accelerate.data_loader.DataLoaderShard.set_epoch,
+        # which will call batch_sampler.sample.set_epoch
+        self.sampler = SamplePlacerHolder()
+    def __iter__(self) -> Iterator[List[int]]:
+        # deterministically shuffle based on epoch and seed
+        g = torch.Generator()
+        g.manual_seed(self.seed + self.sampler.epoch)
+        indices = torch.randperm(len(self.lengths), generator=g).tolist()
+        # Partition indices for this rank
+        indices = indices[self.rank :: self.num_replicas]
+        batch = []
+        current_batch_tokens = 0
+        for idx in indices:
+            # We take the minimum of the two because each sequence will be truncated to fit
+            # the context window of the model
+            sample_length = min(self.lengths[idx], self.max_position_embeddings)
+            # If adding this sample would exceed max_tokens_per_batch, yield the current batch
+            if (
+                current_batch_tokens + sample_length + 2 > self.max_tokens_per_batch
+                and batch
+            ):
+                yield batch
+                batch = []
+                current_batch_tokens = 0
+            # Add the sample to the current batch
+            batch.append(idx)
+            # plus extract one for the [END] and [PAD] tokens to separate samples
+            current_batch_tokens += sample_length + 2
+        # Yield the last batch if it's not empty and we're not dropping it
+        if batch and not self.drop_last:
+            yield batch
+    def __len__(self) -> int:
+        """
+        Estimates the number of batches that will be generated.
+        This is an approximation since the exact number depends on the specific
+        sequence lengths and their order.
+        """
+        if len(self.lengths) == 0:
+            return 0
+        # We need to truncate the lengths due to the context window limit imposed by the model
+        truncated_lengths = [
+            min(self.max_position_embeddings, length + 2) for length in self.lengths
+        ]
+        # Calculate average sequence length
+        avg_seq_length = sum(truncated_lengths) // len(truncated_lengths)
+        # Estimate average number of sequences per batch
+        seqs_per_batch = self.max_tokens_per_batch // avg_seq_length
+        # Estimate total number of batches
+        if self.drop_last:
+            # If dropping last incomplete batch
+            return len(truncated_lengths) // seqs_per_batch * self.num_replicas
+        else:
+            # If keeping last incomplete batch, ensure at least 1 batch
+            return max(1, len(truncated_lengths) // seqs_per_batch) * self.num_replicas

cehrgpt/generation/generate_batch_hf_gpt_sequence.py CHANGED Viewed

@@ -93,9 +93,9 @@ def generate_single_batch(
             temperature=temperature,
             top_p=top_p,
             top_k=top_k,
-            bos_token_id=tokenizer.end_token_id,
-            eos_token_id=tokenizer.end_token_id,
-            pad_token_id=tokenizer.pad_token_id,
+            bos_token_id=model.generation_config.bos_token_id,
+            eos_token_id=model.generation_config.eos_token_id,
+            pad_token_id=model.generation_config.pad_token_id,
             do_sample=True,
             use_cache=True,
             return_dict_in_generate=True,
@@ -150,15 +150,11 @@ def main(args):
             attn_implementation=(
                 "flash_attention_2" if is_flash_attn_2_available() else "eager"
             ),
-            torch_dtype=(
-                torch.bfloat16
-                if is_flash_attn_2_available() and args.use_bfloat16
-                else torch.float32
-            ),
         )
         .eval()
         .to(device)
     )
     cehrgpt_model.generation_config.pad_token_id = cehrgpt_tokenizer.pad_token_id
     cehrgpt_model.generation_config.eos_token_id = cehrgpt_tokenizer.end_token_id
     cehrgpt_model.generation_config.bos_token_id = cehrgpt_tokenizer.end_token_id
@@ -192,6 +188,7 @@ def main(args):
     LOG.info(f"Top P {args.top_p}")
     LOG.info(f"Top K {args.top_k}")
     LOG.info(f"Loading demographic_info at {args.demographic_data_path}")
+    LOG.info(f"MEDS format: {args.meds_format}")
     dataset = load_parquet_as_dataset(args.demographic_data_path)
     total_rows = len(dataset)
@@ -199,6 +196,7 @@ def main(args):
     num_of_batches = args.num_of_patients // args.batch_size + 1
     sequence_to_flush = []
     current_person_id = 1
+    prompt_size = 2 if args.meds_format else START_TOKEN_SIZE
     for i in range(num_of_batches):
         LOG.info(f"{datetime.datetime.now()}: Batch {i} started")
@@ -215,7 +213,7 @@ def main(args):
                     <= max_seq_allowed
                 ):
                     random_prompts.append(
-                        cehrgpt_tokenizer.encode(row["concept_ids"][:START_TOKEN_SIZE])
+                        cehrgpt_tokenizer.encode(row["concept_ids"][:prompt_size])
                     )
                 iter += 1
                 if not random_prompts and iter > 10:
@@ -326,6 +324,11 @@ def create_arg_parser():
         dest="drop_long_sequences",
         action="store_true",
     )
+    base_arg_parser.add_argument(
+        "--meds_format",
+        dest="meds_format",
+        action="store_true",
+    )
     return base_arg_parser

cehrgpt/models/config.py CHANGED Viewed

@@ -133,14 +133,17 @@ class CEHRGPTConfig(PretrainedConfig):
         n_pretrained_embeddings_layers=2,
         pretrained_embedding_dim=768,
         pretrained_token_ids: List[int] = None,
+        next_token_prediction_loss_weight=1.0,
         time_token_loss_weight=1.0,
         time_to_visit_loss_weight=1.0,
         causal_sfm=False,
         demographics_size=4,
         lab_token_penalty=False,
         lab_token_loss_weight=0.9,
+        value_prediction_loss_weight=1.0,
         entropy_penalty=False,
         entropy_penalty_alpha=0.01,
+        sample_packing_max_positions=None,
         **kwargs,
     ):
         if token_to_time_token_mapping is None:
@@ -150,6 +153,11 @@ class CEHRGPTConfig(PretrainedConfig):
         self.vocab_size = vocab_size
         self.time_token_vocab_size = time_token_vocab_size
         self.n_positions = n_positions
+        self.sample_packing_max_positions = (
+            sample_packing_max_positions
+            if sample_packing_max_positions
+            else n_positions
+        )
         self.n_embd = n_embd
         self.n_layer = n_layer
         self.n_head = n_head
@@ -178,6 +186,7 @@ class CEHRGPTConfig(PretrainedConfig):
         self.include_values = include_values
         self.value_vocab_size = value_vocab_size
+        self.next_token_prediction_loss_weight = next_token_prediction_loss_weight
         self.include_ttv_prediction = include_ttv_prediction
         self.use_sub_time_tokenization = use_sub_time_tokenization
         self._token_to_time_token_mapping = token_to_time_token_mapping
@@ -195,6 +204,7 @@ class CEHRGPTConfig(PretrainedConfig):
         self.lab_token_loss_weight = lab_token_loss_weight
         self.entropy_penalty = entropy_penalty
         self.entropy_penalty_alpha = entropy_penalty_alpha
+        self.value_prediction_loss_weight = value_prediction_loss_weight
         kwargs["tie_word_embeddings"] = not use_pretrained_embeddings

cehrgpt 0.0.2__py3-none-any.whl → 0.1.0__py3-none-any.whl

cehrgpt 0.0.2py3-none-any.whl → 0.1.0py3-none-any.whl