PyPI - genhpf - Versions diffs - 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl - Mend

genhpf 1.0.7py3-none-any.whl → 1.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of genhpf might be problematic. Click here for more details.

Files changed (10) hide show

genhpf/models/genhpf.py CHANGED Viewed

@@ -73,6 +73,7 @@ class GenHPFConfig(BaseConfig):
     )
     vocab_size: int = II("dataset.vocab_size")
+    debug: bool = II("common.debug")
 class GenHPF(nn.Module):
@@ -80,6 +81,16 @@ class GenHPF(nn.Module):
         super().__init__()
         self.cfg = cfg
+        if cfg.debug:
+            cfg.encoder_layers = 1
+            cfg.encoder_embed_dim = 32
+            cfg.encoder_ffn_embed_dim = 128
+            cfg.encoder_attention_heads = 2
+            cfg.agg_layers = 1
+            cfg.agg_embed_dim = 32
+            cfg.agg_ffn_embed_dim = 128
+            cfg.agg_attention_heads = 2
         self.structure = cfg.structure
         assert self.structure in GENHPF_MODEL_ARCH_CHOICES

genhpf/scripts/preprocess/preprocess_meds.py CHANGED Viewed

@@ -210,9 +210,16 @@ def main():
         data_path = Path(data_path)
         subdir = data_path.relative_to(root_path).parent
         if data_path.suffix == ".csv":
-            data = pl.scan_csv(data_path)
+            data = pl.scan_csv(
+                data_path,
+                low_memory=True if args.debug else False,
+            )
         elif data_path.suffix == ".parquet":
-            data = pl.scan_parquet(data_path)
+            data = pl.scan_parquet(
+                data_path,
+                parallel="none" if args.debug else "auto",
+                low_memory=True if args.debug else False,
+            )
         else:
             raise ValueError(f"Unsupported file format: {data_path.suffix}")
@@ -312,6 +319,9 @@ def main():
             pl.col("time").list.sample(n=pl.col("code").list.len(), with_replacement=True)
         )
+        if args.debug:
+            data = data[:5000]
         if str(subdir) != ".":
             output_name = str(subdir)
         else:
@@ -348,6 +358,7 @@ def main():
                 d_labitems,
                 warned_codes,
                 max_event_length,
+                args.debug,
             )
             # meds --> remed
@@ -403,6 +414,7 @@ def meds_to_remed(
     d_labitems,
     warned_codes,
     max_event_length,
+    debug,
     df_chunk,
 ):
     code_matching_pattern = re.compile(r"\d+")
@@ -591,6 +603,14 @@ def meds_to_remed(
         maintain_order=True,
     ).agg(pl.all())
+    if debug:
+        df_chunk = df_chunk.with_columns(
+            [
+            pl.col("time").map_elements(lambda x: x[-100:], return_dtype=pl.List(pl.List(str))),
+            pl.col("data_index").map_elements(lambda x: x[-100:], return_dtype=pl.List(pl.List(int)))
+            ]
+        )
     df_chunk = df_chunk.sort(by=["subject_id", "cohort_end"])
     # regard {subject_id} as {cohort_id}: {subject_id}_{cohort_number}
     df_chunk = df_chunk.with_columns(pl.col("subject_id").cum_count().over("subject_id").alias("suffix"))
@@ -616,11 +636,15 @@ def meds_to_remed(
             sample_result = result.create_group(sample[0])
+            times = np.concatenate(sample[2])
             data_indices = np.concatenate(sample[3])
+            if debug:
+                data_indices = data_indices[-100:]
+                times = times[-100:]
             data = events_data[data_indices]
             sample_result.create_dataset("hi", data=data, dtype="i2", compression="lzf", shuffle=True)
-            times = np.concatenate(sample[2])
             times = [datetime.strptime(x, "%Y-%m-%d %H:%M:%S") for x in times]
             times = np.cumsum(np.diff(times))
             times = list(map(lambda x: round(x.total_seconds() / 60), times))

genhpf/scripts/train.py CHANGED Viewed

@@ -36,6 +36,13 @@ def main(cfg: Config) -> None:
         # make hydra logging work with ddp (see # see https://github.com/facebookresearch/hydra/issues/1126)
         logging.config.dictConfig(OmegaConf.to_container(cfg.job_logging_cfg))
+    if cfg.common.debug:
+        os.environ["OMP_NUM_THREADS"] = "4"
+        os.environ["MKL_NUM_THREADS"] = "4"
+        torch.set_num_threads(4)
+        torch.set_num_interop_threads(4)
+        cfg.optimization.max_epoch = 1
     assert cfg.dataset.batch_size is not None, "batch_size must be specified"
     metrics.reset()

genhpf/trainer.py CHANGED Viewed

@@ -202,7 +202,7 @@ class Trainer(object):
             dataset,
             batch_size=self.cfg.dataset.batch_size,
             shuffle=True if not dist.is_initialized() else False,
-            num_workers=self.cfg.dataset.num_workers,
+            num_workers=self.cfg.dataset.num_workers if not self.cfg.common.debug else 0,
             collate_fn=dataset.collator,
             sampler=batch_sampler,
         )
@@ -220,7 +220,7 @@ class Trainer(object):
             dataset,
             batch_size=self.cfg.dataset.batch_size,
             shuffle=False,
-            num_workers=self.cfg.dataset.num_workers,
+            num_workers=self.cfg.dataset.num_workers if not self.cfg.common.debug else 0,
             collate_fn=dataset.collator,
             sampler=batch_sampler,
         )

{genhpf-1.0.7.dist-info → genhpf-1.0.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: genhpf
-Version: 1.0.7
+Version: 1.0.9
 Summary: GenHPF: General Healthcare Predictive Framework with Multi-task Multi-source Learning
 Author-email: Jungwoo Oh <ojw0123@kaist.ac.kr>, Kyunghoon Hur <pacesun@kaist.ac.kr>
 License: MIT license

{genhpf-1.0.7.dist-info → genhpf-1.0.9.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 genhpf/__init__.py,sha256=uh6oTFMxEX_AwRqlfDmNeS3kU4QhY-KXG6nsQ2kjWNo,219
-genhpf/trainer.py,sha256=TXertjuaRNPVxvgrwI1PTJBHvRFYgNVeaZ65R1tFPmI,13267
+genhpf/trainer.py,sha256=v8wadlwI_HCopbCyEkaHw_abu2MscPibJjBWMg5pFw0,13339
 genhpf/configs/__init__.py,sha256=L0heECTJaH5SyESeCWxbnpjAnJAIh8z05M8--DlQI8k,393
 genhpf/configs/config.yaml,sha256=0Y8eL7b8lh3ZVSO8h7JhTPHi_CcPQ69zBv-2iTocjAg,63
 genhpf/configs/configs.py,sha256=WpO_EzUoM32sKVtiVV4ynKrMGSt1Crdjf1C0Sc9Rhfg,10723
@@ -23,7 +23,7 @@ genhpf/loggings/meters.py,sha256=ECdJTwFHx_4D22iNbv9VRxlh9iibX8aU9QeHPkqNmXQ,107
 genhpf/loggings/metrics.py,sha256=3CSBA5C3bd-G-zNer7BeOqSZj-tn6twbpLqAlt-FQ_A,3935
 genhpf/loggings/progress_bar.py,sha256=9-24WAFDsp6WSS-JncnQtQMwo7DnNEYakAt7a8pkhF0,14140
 genhpf/models/__init__.py,sha256=EG4YnL8Uiem8iUNm72euHJlim0IZj3inzFVFCFOvPCE,2223
-genhpf/models/genhpf.py,sha256=vqyJn9wv3eXLaxi0RTkqwuYTVzH3vTAVR1e5LOGUlhc,9344
+genhpf/models/genhpf.py,sha256=Y9f8H3fgUm1H-QWTnRzcQMu1Pkl6i0ZNNRuSmZZ6Zh0,9712
 genhpf/models/genhpf_mlm.py,sha256=rExPpm1HDjljAjgFbYx2bgS6VSaIKF6-P7VJcq6YLB0,1882
 genhpf/models/genhpf_predictor.py,sha256=i-XIh7S3ozpB_r4JZI27sfdnbANyQYpBIOrDDgsiWvc,2163
 genhpf/models/genhpf_simclr.py,sha256=Iuqx0fy0AQurkTk0e5hEv12eJyeGGGiQJiRKXGgOTnI,1629
@@ -37,10 +37,10 @@ genhpf/modules/layer_norm.py,sha256=-aVKThi1pWvVMbMAzyQG1co6MHPBCUZgxWJKYzIqsPQ,
 genhpf/modules/positional_encoding.py,sha256=Rf_qHdQArljEggRO4EHufc_JHq9-i44Oog1w9Bh51DQ,754
 genhpf/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 genhpf/scripts/test.py,sha256=DZPiZa-Tm6kKLcK3R1EH82gjq4Hbl098IAY4kA3fQxg,10288
-genhpf/scripts/train.py,sha256=V4abCZ0r6qWxeJZdXyk4uXSVFXscqH_dhzk7CZuWrBA,12872
+genhpf/scripts/train.py,sha256=juUgfSVLAXhtBPzIEG09W5lkLlKIv2GHIbMn7IgBJjc,13099
 genhpf/scripts/preprocess/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 genhpf/scripts/preprocess/manifest.py,sha256=ZIK16e4vs_cS2K_tM1GaT38hc1nBHk6JB9Uga6OjgU4,2711
-genhpf/scripts/preprocess/preprocess_meds.py,sha256=KvWDSdK13HMetGWUyY47sXHRazeeMVulmbUtQzgWsAk,25120
+genhpf/scripts/preprocess/preprocess_meds.py,sha256=mch8Zl9Ht28fx7nsYfuFb0sc_PN6l1kBQ5iCeEEcrFw,25856
 genhpf/scripts/preprocess/genhpf/README.md,sha256=qtpM_ABJk5yI8xbsUj1sZ71yX5bybx9ZvAymo0Lh5Vc,2877
 genhpf/scripts/preprocess/genhpf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 genhpf/scripts/preprocess/genhpf/main.py,sha256=EF3sce0ltowMHIGK7zLEQEOnzOWQ_WJxoBowknHV3mQ,6161
@@ -59,9 +59,9 @@ genhpf/utils/distributed_utils.py,sha256=000xKlw8SLoSH16o6n2bB3eueGR0aVD_DufPYES
 genhpf/utils/file_io.py,sha256=hnZXdMtAibfFDoIfn-SDusl-v7ZImeUEh0eD2MIxbG4,4919
 genhpf/utils/pdb.py,sha256=400rk1pVfOpVpzKIFHnTRlZ2VCtBqRh9G-pRRwu2Oqo,930
 genhpf/utils/utils.py,sha256=BoC_7Gz8uCHbUBCpcXGBMD-5irApi_6xM7nU-2ac4aA,6176
-genhpf-1.0.7.dist-info/LICENSE,sha256=VK_rvhY2Xi_DAIZHtauni5O9-1_do5SNWjrskv4amg8,1065
-genhpf-1.0.7.dist-info/METADATA,sha256=wvTqvgGe0phIuSht6jnB070jehnVvhAIu1Dff6CKqy8,10589
-genhpf-1.0.7.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
-genhpf-1.0.7.dist-info/entry_points.txt,sha256=Wp94VV2w9KasBDLaluLM5EnjLgjNOAQVu44wKRDAwmQ,288
-genhpf-1.0.7.dist-info/top_level.txt,sha256=lk846Vmnvydb6UZn8xmowj60nkrZYexNOGGnPM-IbhA,7
-genhpf-1.0.7.dist-info/RECORD,,
+genhpf-1.0.9.dist-info/LICENSE,sha256=VK_rvhY2Xi_DAIZHtauni5O9-1_do5SNWjrskv4amg8,1065
+genhpf-1.0.9.dist-info/METADATA,sha256=0YRTk9CjFLdEVayQOm7mvdDUi1oBVTLv-v-GANBbuaY,10589
+genhpf-1.0.9.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
+genhpf-1.0.9.dist-info/entry_points.txt,sha256=Wp94VV2w9KasBDLaluLM5EnjLgjNOAQVu44wKRDAwmQ,288
+genhpf-1.0.9.dist-info/top_level.txt,sha256=lk846Vmnvydb6UZn8xmowj60nkrZYexNOGGnPM-IbhA,7
+genhpf-1.0.9.dist-info/RECORD,,

{genhpf-1.0.7.dist-info → genhpf-1.0.9.dist-info}/LICENSE RENAMED Viewed

File without changes

{genhpf-1.0.7.dist-info → genhpf-1.0.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{genhpf-1.0.7.dist-info → genhpf-1.0.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{genhpf-1.0.7.dist-info → genhpf-1.0.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

genhpf 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

Potentially problematic release.

genhpf 1.0.7py3-none-any.whl → 1.0.9py3-none-any.whl