PyPI - cehrgpt - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

cehrgpt 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

cehrgpt/analysis/irregularity.py +36 -0
cehrgpt/data/hf_cehrgpt_dataset.py +1 -0
cehrgpt/data/hf_cehrgpt_dataset_collator.py +398 -36
cehrgpt/data/hf_cehrgpt_dataset_mapping.py +214 -12
cehrgpt/data/sample_packing_sampler.py +36 -6
cehrgpt/generation/omop_converter_batch.py +32 -2
cehrgpt/gpt_utils.py +20 -2
cehrgpt/models/config.py +25 -0
cehrgpt/models/hf_cehrgpt.py +227 -33
cehrgpt/models/hf_modeling_outputs.py +1 -0
cehrgpt/models/special_tokens.py +1 -0
cehrgpt/models/tokenization_hf_cehrgpt.py +354 -71
cehrgpt/runners/data_utils.py +117 -2
cehrgpt/runners/hf_cehrgpt_finetune_runner.py +75 -50
cehrgpt/runners/hf_cehrgpt_pretrain_runner.py +59 -7
cehrgpt/runners/hf_gpt_runner_argument_dataclass.py +48 -0
cehrgpt/runners/hyperparameter_search_util.py +6 -7
cehrgpt/runners/sample_packing_trainer.py +17 -0
cehrgpt/time_to_event/config/1_year_cabg.yaml +23 -0
cehrgpt/time_to_event/time_to_event_model.py +2 -13
cehrgpt/time_to_event/time_to_event_prediction.py +27 -13
cehrgpt/tools/linear_prob/compute_cehrgpt_features.py +85 -57
{cehrgpt-0.1.0.dist-info → cehrgpt-0.1.1.dist-info}/METADATA +8 -7
{cehrgpt-0.1.0.dist-info → cehrgpt-0.1.1.dist-info}/RECORD +27 -25
{cehrgpt-0.1.0.dist-info → cehrgpt-0.1.1.dist-info}/WHEEL +1 -1
{cehrgpt-0.1.0.dist-info → cehrgpt-0.1.1.dist-info}/licenses/LICENSE +0 -0
{cehrgpt-0.1.0.dist-info → cehrgpt-0.1.1.dist-info}/top_level.txt +0 -0

cehrgpt/time_to_event/time_to_event_model.py CHANGED Viewed

@@ -80,20 +80,9 @@ class TimeToEventModel:
         return token in self.outcome_events
     def simulate(
-        self, partial_history: Union[np.ndarray, List[str]]
+        self,
+        partial_history: Union[np.ndarray, List[str]],
     ) -> List[List[str]]:
-        sequence_is_demographics = len(partial_history) == 4 and partial_history[
-            0
-        ].startswith("year")
-        sequence_ends_ve = is_visit_end(partial_history[-1])
-        if not (sequence_is_demographics | sequence_ends_ve):
-            raise ValueError(
-                "There are only two types of sequences allowed. 1) the sequence only contains "
-                "demographics; 2) the sequence ends on VE;"
-            )
         token_ids = self.tokenizer.encode(partial_history)
         prompt = torch.tensor(token_ids).unsqueeze(0).to(self.device)

cehrgpt/time_to_event/time_to_event_prediction.py CHANGED Viewed

@@ -118,9 +118,9 @@ def main(args):
     LOG.info(f"Top P {args.top_p}")
     LOG.info(f"Top K {args.top_k}")
-    cehrgpt_model.resize_position_embeddings(
-        cehrgpt_model.config.max_position_embeddings + task_config.max_new_tokens
-    )
+    # cehrgpt_model.resize_position_embeddings(
+    #     cehrgpt_model.config.max_position_embeddings + task_config.max_new_tokens
+    # )
     generation_config = TimeToEventModel.get_generation_config(
         tokenizer=cehrgpt_tokenizer,
@@ -190,14 +190,22 @@ def main(args):
             args.max_n_trial,
         )
         visit_counter = sum([int(is_visit_end(_)) for _ in partial_history])
+        predicted_boolean_probability = (
+            sum([event != "0" for event in concept_time_to_event.outcome_events])
+            / len(concept_time_to_event.outcome_events)
+            if concept_time_to_event
+            else 0.0
+        )
         tte_outputs.append(
             {
-                "person_id": record["person_id"],
-                "index_date": record["index_date"],
+                "subject_id": record["person_id"],
+                "prediction_time": record["index_date"],
                 "visit_counter": visit_counter,
-                "label": label,
+                "boolean_value": label,
+                "predicted_boolean_probability": predicted_boolean_probability,
+                "predicted_boolean_value": None,
                 "time_to_event": time_to_event,
-                "prediction": (
+                "trials": (
                     asdict(concept_time_to_event) if concept_time_to_event else None
                 ),
             }
@@ -263,9 +271,13 @@ def filter_out_existing_results(
     parquet_files = glob.glob(os.path.join(prediction_output_folder_name, "*parquet"))
     if parquet_files:
         cohort_members = set()
-        results_dataframe = pd.read_parquet(parquet_files)[["person_id", "index_date"]]
+        results_dataframe = pd.read_parquet(parquet_files)[
+            ["subject_id", "prediction_time"]
+        ]
         for row in results_dataframe.itertuples():
-            cohort_members.add((row.person_id, row.index_date.strftime("%Y-%m-%d")))
+            cohort_members.add(
+                (row.subject_id, row.prediction_time.strftime("%Y-%m-%d"))
+            )
         def filter_func(batched):
             return [
@@ -292,12 +304,14 @@ def flush_to_disk_if_full(
         pd.DataFrame(
             tte_outputs,
             columns=[
-                "person_id",
-                "index_date",
+                "subject_id",
+                "prediction_time",
                 "visit_counter",
-                "label",
+                "boolean_value",
+                "predicted_boolean_probability",
+                "predicted_boolean_value",
                 "time_to_event",
-                "prediction",
+                "trials",
             ],
         ).to_parquet(output_parquet_file)
         tte_outputs.clear()

cehrgpt/tools/linear_prob/compute_cehrgpt_features.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import Optional, Union
 import numpy as np
 import pandas as pd
+import polars as pl
 import torch
 import torch.distributed as dist
 from cehrbert.data_generators.hf_data_generator.meds_utils import CacheFileCollector
@@ -24,13 +25,18 @@ from cehrgpt.data.hf_cehrgpt_dataset_collator import (
     CehrGptDataCollator,
     SamplePackingCehrGptDataCollator,
 )
+from cehrgpt.data.hf_cehrgpt_dataset_mapping import ExtractTokenizedSequenceDataMapping
 from cehrgpt.data.sample_packing_sampler import SamplePackingBatchSampler
 from cehrgpt.models.hf_cehrgpt import (
     CEHRGPT2Model,
     extract_features_from_packed_sequence,
 )
+from cehrgpt.models.special_tokens import LINEAR_PROB_TOKEN
 from cehrgpt.models.tokenization_hf_cehrgpt import CehrGptTokenizer
-from cehrgpt.runners.data_utils import prepare_finetune_dataset
+from cehrgpt.runners.data_utils import (
+    extract_cohort_sequences,
+    prepare_finetune_dataset,
+)
 from cehrgpt.runners.gpt_runner_util import parse_runner_args
 from cehrgpt.runners.hf_cehrgpt_pretrain_runner import tokenizer_exists
@@ -112,6 +118,11 @@ def main():
         .eval()
         .to(device)
     )
+    if LINEAR_PROB_TOKEN not in cehrgpt_tokenizer.get_vocab():
+        cehrgpt_tokenizer.add_tokens(LINEAR_PROB_TOKEN)
+        cehrgpt_model.resize_token_embeddings(cehrgpt_tokenizer.vocab_size)
     prepared_ds_path = generate_prepared_ds_path(
         data_args, model_args, data_folder=data_args.cohort_folder
     )
@@ -137,39 +148,48 @@ def main():
     if processed_dataset is None:
         if is_main_process(training_args.local_rank):
-            # Organize them into a single DatasetDict
-            final_splits = prepare_finetune_dataset(
-                data_args, training_args, cehrgpt_args, cache_file_collector
-            )
-            if cehrgpt_args.expand_tokenizer:
-                new_tokenizer_path = os.path.expanduser(training_args.output_dir)
-                if tokenizer_exists(new_tokenizer_path):
-                    cehrgpt_tokenizer = CehrGptTokenizer.from_pretrained(
-                        new_tokenizer_path
-                    )
-                else:
-                    cehrgpt_tokenizer = CehrGptTokenizer.expand_trained_tokenizer(
-                        cehrgpt_tokenizer=cehrgpt_tokenizer,
-                        dataset=final_splits["train"],
-                        data_args=data_args,
-                        concept_name_mapping={},
-                    )
-                    cehrgpt_tokenizer.save_pretrained(
-                        os.path.expanduser(training_args.output_dir)
-                    )
-                # TODO: temp solution, this column is mixed typed and causes an issue when transforming the data
-            if not data_args.streaming:
-                all_columns = final_splits["train"].column_names
-                if "visit_concept_ids" in all_columns:
-                    final_splits = final_splits.remove_columns(["visit_concept_ids"])
-            processed_dataset = create_cehrgpt_finetuning_dataset(
-                dataset=final_splits,
-                cehrgpt_tokenizer=cehrgpt_tokenizer,
-                data_args=data_args,
-                cache_file_collector=cache_file_collector,
-            )
+            # If the full dataset has been tokenized, we don't want to tokenize the cohort containing
+            # the subset of the data. We should slice out the portion of the tokenized sequences for each sample
+            if cehrgpt_args.tokenized_full_dataset_path is not None:
+                processed_dataset = extract_cohort_sequences(
+                    data_args, cehrgpt_args, cache_file_collector
+                )
+            else:
+                # Organize them into a single DatasetDict
+                final_splits = prepare_finetune_dataset(
+                    data_args, training_args, cehrgpt_args, cache_file_collector
+                )
+                if cehrgpt_args.expand_tokenizer:
+                    new_tokenizer_path = os.path.expanduser(training_args.output_dir)
+                    if tokenizer_exists(new_tokenizer_path):
+                        cehrgpt_tokenizer = CehrGptTokenizer.from_pretrained(
+                            new_tokenizer_path
+                        )
+                    else:
+                        cehrgpt_tokenizer = CehrGptTokenizer.expand_trained_tokenizer(
+                            cehrgpt_tokenizer=cehrgpt_tokenizer,
+                            dataset=final_splits["train"],
+                            data_args=data_args,
+                            concept_name_mapping={},
+                        )
+                        cehrgpt_tokenizer.save_pretrained(
+                            os.path.expanduser(training_args.output_dir)
+                        )
+                    # TODO: temp solution, this column is mixed typed and causes an issue when transforming the data
+                if not data_args.streaming:
+                    all_columns = final_splits["train"].column_names
+                    if "visit_concept_ids" in all_columns:
+                        final_splits = final_splits.remove_columns(
+                            ["visit_concept_ids"]
+                        )
+                processed_dataset = create_cehrgpt_finetuning_dataset(
+                    dataset=final_splits,
+                    cehrgpt_tokenizer=cehrgpt_tokenizer,
+                    data_args=data_args,
+                    cache_file_collector=cache_file_collector,
+                )
             if not data_args.streaming:
                 processed_dataset.save_to_disk(prepared_ds_path)
                 processed_dataset.cleanup_cache_files()
@@ -244,6 +264,7 @@ def main():
             SamplePackingCehrGptDataCollator,
             cehrgpt_args.max_tokens_per_batch,
             cehrgpt_model.config.max_position_embeddings,
+            add_end_token_in_sample_packing=cehrgpt_args.add_end_token_in_sample_packing,
         )
         train_batch_sampler = SamplePackingBatchSampler(
             lengths=train_set["num_of_concepts"],
@@ -278,6 +299,7 @@ def main():
         include_ttv_prediction=False,
         use_sub_time_tokenization=False,
         include_demographics=cehrgpt_args.include_demographics,
+        add_linear_prob_token=True,
     )
     train_loader = DataLoader(
@@ -298,30 +320,36 @@ def main():
         batch_sampler=test_batch_sampler,
     )
-    # Loading demographics
-    print("Loading demographics as a dictionary")
-    demographics_df = pd.concat(
-        [
-            pd.read_parquet(
-                data_dir,
-                columns=[
-                    "person_id",
-                    "index_date",
-                    "gender_concept_id",
-                    "race_concept_id",
-                ],
-            )
-            for data_dir in [data_args.data_folder, data_args.test_data_folder]
-        ]
-    )
-    demographics_df["index_date"] = demographics_df.index_date.dt.date
-    demographics_dict = {
-        (row["person_id"], row["index_date"]): {
-            "gender_concept_id": row["gender_concept_id"],
-            "race_concept_id": row["race_concept_id"],
+    if data_args.is_data_in_meds:
+        demographics_dict = dict()
+    else:
+        # Loading demographics
+        print("Loading demographics as a dictionary")
+        demographics_df = pd.concat(
+            [
+                pd.read_parquet(
+                    data_dir,
+                    columns=[
+                        "person_id",
+                        "index_date",
+                        "gender_concept_id",
+                        "race_concept_id",
+                    ],
+                )
+                for data_dir in [data_args.data_folder, data_args.test_data_folder]
+            ]
+        )
+        # This is a pre-caution in case the index_date is not a datetime type
+        demographics_df["index_date"] = pd.to_datetime(
+            demographics_df["index_date"]
+        ).dt.date
+        demographics_dict = {
+            (row["person_id"], row["index_date"]): {
+                "gender_concept_id": row["gender_concept_id"],
+                "race_concept_id": row["race_concept_id"],
+            }
+            for _, row in demographics_df.iterrows()
         }
-        for _, row in demographics_df.iterrows()
-    }
     data_loaders = [("train", train_loader), ("test", test_dataloader)]

{cehrgpt-0.1.0.dist-info → cehrgpt-0.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cehrgpt
-Version: 0.1.0
+Version: 0.1.1
 Summary: CEHR-GPT: Generating Electronic Health Records with Chronological Patient Timelines
 Author-email: Chao Pang <chaopang229@gmail.com>, Xinzhuo Jiang <xj2193@cumc.columbia.edu>, Krishna Kalluri <kk3326@cumc.columbia.edu>, Elise Minto <em3697@cumc.columbia.edu>, Jason Patterson <jp3477@cumc.columbia.edu>, Nishanth Parameshwar Pavinkurve <np2689@cumc.columbia.edu>, Karthik Natarajan <kn2174@cumc.columbia.edu>
 License: MIT License
@@ -12,14 +12,15 @@ Classifier: Programming Language :: Python :: 3
 Requires-Python: >=3.10.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: cehrbert==1.4.1
-Requires-Dist: cehrbert_data==0.0.7
+Requires-Dist: cehrbert==1.4.5
+Requires-Dist: cehrbert_data==0.0.11
 Requires-Dist: openai==1.54.3
 Requires-Dist: optuna==4.0.0
-Requires-Dist: transformers==4.44.0
+Requires-Dist: transformers==4.44.1
 Requires-Dist: tokenizers==0.19.0
 Requires-Dist: peft==0.10.0
 Requires-Dist: lightgbm
+Requires-Dist: polars
 Provides-Extra: dev
 Requires-Dist: pre-commit; extra == "dev"
 Requires-Dist: pytest; extra == "dev"
@@ -36,9 +37,9 @@ Dynamic: license-file
 [![PyPI - Version](https://img.shields.io/pypi/v/cehrgpt)](https://pypi.org/project/cehrgpt/)
 ![Python](https://img.shields.io/badge/-Python_3.11-blue?logo=python&logoColor=white)
-[![tests](https://github.com/knatarajan-lab/cehrgpt-public/actions/workflows/tests.yaml/badge.svg)](https://github.com/knatarajan-lab/cehrgpt-public/actions/workflows/tests.yml)
-[![license](https://img.shields.io/badge/License-MIT-green.svg?labelColor=gray)](https://github.com/knatarajan-lab/cehrgpt-public/blob/main/LICENSE)
-[![contributors](https://img.shields.io/github/contributors/knatarajan-lab/cehrgpt-public.svg)](https://github.com/knatarajan-lab/cehrgpt-public/graphs/contributors)
+[![tests](https://github.com/knatarajan-lab/cehrgpt/actions/workflows/tests.yaml/badge.svg)](https://github.com/knatarajan-lab/cehrgpt/actions/workflows/tests.yaml)
+[![license](https://img.shields.io/badge/License-MIT-green.svg?labelColor=gray)](https://github.com/knatarajan-lab/cehrgpt/blob/main/LICENSE)
+[![contributors](https://img.shields.io/github/contributors/knatarajan-lab/cehrgpt.svg)](https://github.com/knatarajan-lab/cehrgpt/graphs/contributors)
 ## Description
 CEHRGPT is a synthetic data generation model developed to handle structured electronic health records (EHR) with enhanced privacy and reliability. It leverages state-of-the-art natural language processing techniques to create realistic, anonymized patient data that can be used for research and development without compromising patient privacy.

{cehrgpt-0.1.0.dist-info → cehrgpt-0.1.1.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,9 @@
 __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cehrgpt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cehrgpt/cehrgpt_args.py,sha256=zPLp9Qjlq5PapWx3R15BNnyaX8zV3dxr4PuWj71r0Lg,3516
-cehrgpt/gpt_utils.py,sha256=bksHCXMX4j859VSv1Q284rVr4gn1Y8dCx4a_V-g4mug,10939
+cehrgpt/gpt_utils.py,sha256=IA5qw-hxcKkGO07AB47lDNRU6mlb9jblpKO7KeLLN78,11342
 cehrgpt/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+cehrgpt/analysis/irregularity.py,sha256=Rfl_daMvSh9cZ68vUwfmuH-JYCFXdAph2ITHHffYC0Y,1047
 cehrgpt/analysis/privacy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cehrgpt/analysis/privacy/attribute_inference.py,sha256=0ANVW0I5uvOl6IxQ15-vMVQd0mugOgSGReBUQQESImg,9368
 cehrgpt/analysis/privacy/attribute_inference_config.yml,sha256=hfLfpBlDqqsNOynpRHK414vV24edKA6ta-inmEhM2ao,103272
@@ -11,22 +12,22 @@ cehrgpt/analysis/privacy/nearest_neighbor_inference.py,sha256=qoJgWW7VsUMzjMGpTa
 cehrgpt/analysis/privacy/reid_inference.py,sha256=Pypd3QJXQNY8VljpnIEa5zeAbTZHMjQOazaL-9VsBGw,13955
 cehrgpt/analysis/privacy/utils.py,sha256=CRA4H9mPLBjMQGKzZ_x_3ro3tMap-NjsMDVqSOjHSVQ,8226
 cehrgpt/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cehrgpt/data/hf_cehrgpt_dataset.py,sha256=t9vpN05e--CiKgIlxLP0aLacISnvWWDPXtuFuJi3ksE,3736
-cehrgpt/data/hf_cehrgpt_dataset_collator.py,sha256=DOvIF4Wzkd8-IO3zpIRZkX1j0IdvefaiSnrDn1YivCk,27912
-cehrgpt/data/hf_cehrgpt_dataset_mapping.py,sha256=eI8CTk6yJ4DlNJWrNAkEmhWh353NeLqg5rwPpKqKT-U,17308
-cehrgpt/data/sample_packing_sampler.py,sha256=0uKTbvtXpfS81esy_3epJ88eohyJPK46bfmxhle1fws,5419
+cehrgpt/data/hf_cehrgpt_dataset.py,sha256=hwJlGW7XiJIr6cXtmwvReQf9yLZJPD-dvJGvRg5ERqU,3755
+cehrgpt/data/hf_cehrgpt_dataset_collator.py,sha256=ACMXiaYnR3bKD5dRleL0_siEvhL-2HAFcy5eBgvxnH4,44412
+cehrgpt/data/hf_cehrgpt_dataset_mapping.py,sha256=KU0WMjc2vT1zBAl7JJkOc8dgGxsL1uFDy4dDrv-RkII,25668
+cehrgpt/data/sample_packing_sampler.py,sha256=vovGMtmhG70DRkSCeiaDEJ_rjKZ38y-YLaI1kkhFEkI,6747
 cehrgpt/generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cehrgpt/generation/chatgpt_generation.py,sha256=SrnLwHLdNtnAOEg36gNjqfoT9yd12iyPgpZffL2AFJo,4428
 cehrgpt/generation/generate_batch_hf_gpt_sequence.py,sha256=uSEh8aMmPD61nGewIaPSkIqm-2AxDjCBiu4cBfxHxU4,11503
-cehrgpt/generation/omop_converter_batch.py,sha256=-c0AlDVy5pJ5Afhr8ERiCHhoRrEk8ozJi3g0yFdWaMI,25348
+cehrgpt/generation/omop_converter_batch.py,sha256=LUmCD-t_6ZP1YfNDZCqYewl-XIIaIgRZ_dAxuR_VdCQ,26275
 cehrgpt/generation/omop_entity.py,sha256=Q5Sr0AlyuPAm1FRPfnJO13q-u1fqRgYVHXruZ9g4xNE,19400
 cehrgpt/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cehrgpt/models/config.py,sha256=Y3CiXZWniLP9_RlpU80Oe9gjn5leLmTYnNe_fWqfJLQ,10158
-cehrgpt/models/hf_cehrgpt.py,sha256=3EQIOfa--oz4f8bM8KzbDi98G3XrUEQkox1vmBN001M,83321
-cehrgpt/models/hf_modeling_outputs.py,sha256=LaWa1jI6BRIKMEjWOy1QUeOfTur5y_p2c-JyuGVTdtw,10301
+cehrgpt/models/config.py,sha256=nOAKgH5420HLCcy7n1hE7MbqR861Iq4DTutKoAd25tg,11090
+cehrgpt/models/hf_cehrgpt.py,sha256=77CAkdMPgxD4xSpFU7gYGzRn6_Iv-4q7FnHpnZGsKxw,92450
+cehrgpt/models/hf_modeling_outputs.py,sha256=5X4WEYKqT37phv_e5ZAv3A_N0wqdAUJLJRm6TxS6dDQ,10356
 cehrgpt/models/pretrained_embeddings.py,sha256=vLLVs17TLpXRqCVEWQxGGwPHkUJUO7laNTeBuyBK_yk,3238
-cehrgpt/models/special_tokens.py,sha256=-a7HPJBbdIH0qQ6B3CcRKqvpG6FZlm4nbVPTswGSJ4U,485
-cehrgpt/models/tokenization_hf_cehrgpt.py,sha256=jjCRqS29IzMnKp40jNOs80UKh2z9lK5S6M02GSB-4mk,42351
+cehrgpt/models/special_tokens.py,sha256=lrw45B4tea4Dsajn09Cz6w5D2TfHmYXikZkgwnstu_o,521
+cehrgpt/models/tokenization_hf_cehrgpt.py,sha256=cAxHTctpVBxfWfC3XcwDQavN1zwWN9Nid_Fajd5zQWQ,53159
 cehrgpt/omop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cehrgpt/omop/condition_era.py,sha256=hPZALz2XaWnro_1bwIYNkI48foOJjueyg3CZ1BliCno,626
 cehrgpt/omop/observation_period.py,sha256=TRMgv5Ya2RaS2im7oQ6BLC_5JL9EJYNYR62ApxIuHvg,1211
@@ -37,22 +38,23 @@ cehrgpt/omop/queries/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
 cehrgpt/omop/queries/condition_era.py,sha256=LFB6vBAvshHJxtYIRkl7cfrF0kf7ay0piBKpmHBwrpE,2578
 cehrgpt/omop/queries/observation_period.py,sha256=fpzr5DMNw-QLoSwp2Iatfch88E3hyhZ75usiIdG3A0U,6410
 cehrgpt/runners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cehrgpt/runners/data_utils.py,sha256=ScZZnfXwgXKaMvKgFzdb4vtQ7F_lw97O5uNsFbfsyP4,10620
+cehrgpt/runners/data_utils.py,sha256=I6k1TkiiZR8ggw3eVO16g2lVPY-Hu3b-nbrIOKlFIO0,15528
 cehrgpt/runners/gpt_runner_util.py,sha256=YJQSRW9Mo4TjXSOUOTf6BUFcs1MGFiXU5T4ztKZcYhU,3485
-cehrgpt/runners/hf_cehrgpt_finetune_runner.py,sha256=bkPl30Y9CSXBlmMkH-3cA3-aW8XJK36Q-adx___WjkE,26921
-cehrgpt/runners/hf_cehrgpt_pretrain_runner.py,sha256=ViVa_flEGdk_SO0psMR7ho-o79igsz_l1x80u81WJ3A,23875
-cehrgpt/runners/hf_gpt_runner_argument_dataclass.py,sha256=VrqgDSiAMfGyHEIodoOg_8LU5O0ndWf9EE0YOKDFKKA,7019
-cehrgpt/runners/hyperparameter_search_util.py,sha256=pWFmGo9Ezju4YmuZ-ohbAbYB0GGMfIDVUCyvcTxS1iU,9153
-cehrgpt/runners/sample_packing_trainer.py,sha256=aezX30vxpP1DDcH5hO-yn395NqBKi2Xhb0mFNHi9OBs,7340
+cehrgpt/runners/hf_cehrgpt_finetune_runner.py,sha256=GVbHHqf5TWGbVWlQG-XurgYH8pKRjTk8ug_ib9L9U7E,28118
+cehrgpt/runners/hf_cehrgpt_pretrain_runner.py,sha256=ERSnvB38fPYVghtKQeNTZ8VfeXnoRcCHB0cWISWaZ84,26523
+cehrgpt/runners/hf_gpt_runner_argument_dataclass.py,sha256=ejAFLM9g765p1fyeF5MITsiIeWHKkz9wTeFDeVgxSto,8851
+cehrgpt/runners/hyperparameter_search_util.py,sha256=YWdFQ1igQs-G_wqWUrUzYraGiz8OSpSYyvid-I5nhWA,9262
+cehrgpt/runners/sample_packing_trainer.py,sha256=Zb7Aqwnk8-VqrjEKUVeg5XzZWmHxXOU2sDn1YURS-FU,7960
 cehrgpt/simulations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cehrgpt/simulations/generate_plots.py,sha256=BTZ71r8Kah0PMorkiO3vw55_p_9U1Z8KiD3GsPfaV0s,2520
 cehrgpt/simulations/run_simulation.sh,sha256=DcJ6B19jIteUO0pZ0Tc21876lB9XxQHFAxlre7MtAzk,795
 cehrgpt/simulations/time_embedding_simulation.py,sha256=HZ-imXH-bN-QYZN1PAfcERmNtaWIwKjbf0UrZduwCiA,8687
 cehrgpt/simulations/time_token_simulation.py,sha256=sLg8vVXydvR_zk3BbqyrlA7sDIdhFnS-s5pSKcCilSc,6057
 cehrgpt/time_to_event/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cehrgpt/time_to_event/time_to_event_model.py,sha256=tfXa24l_0q1TBZ68BPRrHRC_3KRWYxrWGIv4myJlIb8,8497
-cehrgpt/time_to_event/time_to_event_prediction.py,sha256=Ajesq2gSsILghWHCTLiiBhcyOCa7m6JPPMdi_xvBlR4,12624
+cehrgpt/time_to_event/time_to_event_model.py,sha256=Plm0bZxvlAbnMl82DTBXWvaXLvrqcdkzcP_celX8WC4,8055
+cehrgpt/time_to_event/time_to_event_prediction.py,sha256=W2e7UqIV7ELdfTy997HS66vggjnhdncCKt840knI0Dw,13183
 cehrgpt/time_to_event/time_to_event_utils.py,sha256=KN4hwGgxy2nJtO7osbYQBF3-HpmGUWefNfexzPYiEwc,1937
+cehrgpt/time_to_event/config/1_year_cabg.yaml,sha256=SFF2-F5D02pDSMRddDrEUoERBCd0t2Hzln_xC-Mo2hA,407
 cehrgpt/time_to_event/config/30_day_readmission.yaml,sha256=Hn5KnEXMtSV_CtCpmAU4wjkc0-gTXvniaH991TSbUXA,234
 cehrgpt/time_to_event/config/next_visit_type_prediction.yaml,sha256=WMj2ZutEvHKIMyGG51xtXaL6MyRANKvpg9xT8ouctLc,319
 cehrgpt/time_to_event/config/t2dm_hf.yaml,sha256=_oMQzh2eJTYzEaMOpmhAzbX-qmdsKlkORELL6HxOxHo,202
@@ -63,10 +65,10 @@ cehrgpt/tools/generate_pretrained_embeddings.py,sha256=lhFSacGv8bMld6qigKZN8Op8e
 cehrgpt/tools/merge_synthetic_real_dataasets.py,sha256=O1dbQ32Le0t15fwymwAh9mfNVLEWuFwW53DNvESrWbY,7589
 cehrgpt/tools/upload_omop_tables.py,sha256=vdBAbkeAsGPA4NsyhNjelPVj3gS8yzmS1sKNM1Qk96g,3791
 cehrgpt/tools/linear_prob/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cehrgpt/tools/linear_prob/compute_cehrgpt_features.py,sha256=jVgAmBrZKp7ABfqKkzwV5Vl_G9jDCjPl98NSVmSwHpE,19291
+cehrgpt/tools/linear_prob/compute_cehrgpt_features.py,sha256=q0rmlBWDDEkjHjwcTouGUhCYa32a1vRicaDOAMsdW0I,20741
 cehrgpt/tools/linear_prob/train_with_cehrgpt_features.py,sha256=w0UvzMKYGenN_KDVnbzutmy8IPLUxW5hPvpKKxDSL5U,5820
-cehrgpt-0.1.0.dist-info/licenses/LICENSE,sha256=LOfC32zkfUIdGm8e_098jPbt8OHKtNWymDzxn2pA9Zk,1093
-cehrgpt-0.1.0.dist-info/METADATA,sha256=V02vsptjJRD_bybXVRFXPrJa-By9CX4j-oAA3EfXFq4,4933
-cehrgpt-0.1.0.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
-cehrgpt-0.1.0.dist-info/top_level.txt,sha256=akNCJBbMSLV8nkOzdVzdy13hMJ5CIQURnAS_YYEDVwA,17
-cehrgpt-0.1.0.dist-info/RECORD,,
+cehrgpt-0.1.1.dist-info/licenses/LICENSE,sha256=LOfC32zkfUIdGm8e_098jPbt8OHKtNWymDzxn2pA9Zk,1093
+cehrgpt-0.1.1.dist-info/METADATA,sha256=VnXH74vJQZaV7VxGiIvJnFhQA0jzJQNx86yHFkygobM,4922
+cehrgpt-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+cehrgpt-0.1.1.dist-info/top_level.txt,sha256=akNCJBbMSLV8nkOzdVzdy13hMJ5CIQURnAS_YYEDVwA,17
+cehrgpt-0.1.1.dist-info/RECORD,,

{cehrgpt-0.1.0.dist-info → cehrgpt-0.1.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.7.1)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{cehrgpt-0.1.0.dist-info → cehrgpt-0.1.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{cehrgpt-0.1.0.dist-info → cehrgpt-0.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

cehrgpt 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

cehrgpt 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl