PyPI - genhpf - Versions diffs - 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl - Mend

genhpf 1.0.6py3-none-any.whl → 1.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of genhpf might be problematic. Click here for more details.

Files changed (9) hide show

genhpf/scripts/preprocess/preprocess_meds.py CHANGED Viewed

@@ -129,6 +129,7 @@ def main():
     num_workers = max(args.workers, 1)
     if args.debug:
         num_workers = 1
+        os.environ["RAYON_RS_NUM_CPUS"] = "1"
     else:
         cpu_count = multiprocessing.cpu_count()
         if num_workers > cpu_count:
@@ -209,9 +210,16 @@ def main():
         data_path = Path(data_path)
         subdir = data_path.relative_to(root_path).parent
         if data_path.suffix == ".csv":
-            data = pl.scan_csv(data_path)
+            data = pl.scan_csv(
+                data_path,
+                low_memory=True if args.debug else False,
+            )
         elif data_path.suffix == ".parquet":
-            data = pl.scan_parquet(data_path)
+            data = pl.scan_parquet(
+                data_path,
+                parallel="none" if args.debug else "auto",
+                low_memory=True if args.debug else False,
+            )
         else:
             raise ValueError(f"Unsupported file format: {data_path.suffix}")
@@ -311,6 +319,9 @@ def main():
             pl.col("time").list.sample(n=pl.col("code").list.len(), with_replacement=True)
         )
+        if args.debug:
+            data = data[:5000]
         if str(subdir) != ".":
             output_name = str(subdir)
         else:
@@ -347,6 +358,7 @@ def main():
                 d_labitems,
                 warned_codes,
                 max_event_length,
+                args.debug,
             )
             # meds --> remed
@@ -402,6 +414,7 @@ def meds_to_remed(
     d_labitems,
     warned_codes,
     max_event_length,
+    debug,
     df_chunk,
 ):
     code_matching_pattern = re.compile(r"\d+")
@@ -590,6 +603,14 @@ def meds_to_remed(
         maintain_order=True,
     ).agg(pl.all())
+    if debug:
+        df_chunk = df_chunk.with_columns(
+            [
+            pl.col("time").map_elements(lambda x: x[-100:], return_dtype=pl.List(pl.List(str))),
+            pl.col("data_index").map_elements(lambda x: x[-100:], return_dtype=pl.List(pl.List(int)))
+            ]
+        )
     df_chunk = df_chunk.sort(by=["subject_id", "cohort_end"])
     # regard {subject_id} as {cohort_id}: {subject_id}_{cohort_number}
     df_chunk = df_chunk.with_columns(pl.col("subject_id").cum_count().over("subject_id").alias("suffix"))
@@ -615,11 +636,15 @@ def meds_to_remed(
             sample_result = result.create_group(sample[0])
+            times = np.concatenate(sample[2])
             data_indices = np.concatenate(sample[3])
+            if debug:
+                data_indices = data_indices[-100:]
+                times = times[-100:]
             data = events_data[data_indices]
             sample_result.create_dataset("hi", data=data, dtype="i2", compression="lzf", shuffle=True)
-            times = np.concatenate(sample[2])
             times = [datetime.strptime(x, "%Y-%m-%d %H:%M:%S") for x in times]
             times = np.cumsum(np.diff(times))
             times = list(map(lambda x: round(x.total_seconds() / 60), times))

genhpf/scripts/train.py CHANGED Viewed

@@ -36,6 +36,9 @@ def main(cfg: Config) -> None:
         # make hydra logging work with ddp (see # see https://github.com/facebookresearch/hydra/issues/1126)
         logging.config.dictConfig(OmegaConf.to_container(cfg.job_logging_cfg))
+    if cfg.common.debug:
+        os.environ["OMP_NUM_THREADS"] = "1"
     assert cfg.dataset.batch_size is not None, "batch_size must be specified"
     metrics.reset()

genhpf/trainer.py CHANGED Viewed

@@ -202,7 +202,7 @@ class Trainer(object):
             dataset,
             batch_size=self.cfg.dataset.batch_size,
             shuffle=True if not dist.is_initialized() else False,
-            num_workers=self.cfg.dataset.num_workers,
+            num_workers=self.cfg.dataset.num_workers if not self.cfg.common.debug else 0,
             collate_fn=dataset.collator,
             sampler=batch_sampler,
         )
@@ -220,7 +220,7 @@ class Trainer(object):
             dataset,
             batch_size=self.cfg.dataset.batch_size,
             shuffle=False,
-            num_workers=self.cfg.dataset.num_workers,
+            num_workers=self.cfg.dataset.num_workers if not self.cfg.common.debug else 0,
             collate_fn=dataset.collator,
             sampler=batch_sampler,
         )

{genhpf-1.0.6.dist-info → genhpf-1.0.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: genhpf
-Version: 1.0.6
+Version: 1.0.8
 Summary: GenHPF: General Healthcare Predictive Framework with Multi-task Multi-source Learning
 Author-email: Jungwoo Oh <ojw0123@kaist.ac.kr>, Kyunghoon Hur <pacesun@kaist.ac.kr>
 License: MIT license

{genhpf-1.0.6.dist-info → genhpf-1.0.8.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 genhpf/__init__.py,sha256=uh6oTFMxEX_AwRqlfDmNeS3kU4QhY-KXG6nsQ2kjWNo,219
-genhpf/trainer.py,sha256=TXertjuaRNPVxvgrwI1PTJBHvRFYgNVeaZ65R1tFPmI,13267
+genhpf/trainer.py,sha256=v8wadlwI_HCopbCyEkaHw_abu2MscPibJjBWMg5pFw0,13339
 genhpf/configs/__init__.py,sha256=L0heECTJaH5SyESeCWxbnpjAnJAIh8z05M8--DlQI8k,393
 genhpf/configs/config.yaml,sha256=0Y8eL7b8lh3ZVSO8h7JhTPHi_CcPQ69zBv-2iTocjAg,63
 genhpf/configs/configs.py,sha256=WpO_EzUoM32sKVtiVV4ynKrMGSt1Crdjf1C0Sc9Rhfg,10723
@@ -37,10 +37,10 @@ genhpf/modules/layer_norm.py,sha256=-aVKThi1pWvVMbMAzyQG1co6MHPBCUZgxWJKYzIqsPQ,
 genhpf/modules/positional_encoding.py,sha256=Rf_qHdQArljEggRO4EHufc_JHq9-i44Oog1w9Bh51DQ,754
 genhpf/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 genhpf/scripts/test.py,sha256=DZPiZa-Tm6kKLcK3R1EH82gjq4Hbl098IAY4kA3fQxg,10288
-genhpf/scripts/train.py,sha256=V4abCZ0r6qWxeJZdXyk4uXSVFXscqH_dhzk7CZuWrBA,12872
+genhpf/scripts/train.py,sha256=-CY_OLRAX3wbthmH3fzkzSuZEEjHGKg0J4jzbbr9HoU,12942
 genhpf/scripts/preprocess/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 genhpf/scripts/preprocess/manifest.py,sha256=ZIK16e4vs_cS2K_tM1GaT38hc1nBHk6JB9Uga6OjgU4,2711
-genhpf/scripts/preprocess/preprocess_meds.py,sha256=4GIK-_sQwB5A11FSbr_VnABY2MHxxNbFVmzSo71KpgQ,25074
+genhpf/scripts/preprocess/preprocess_meds.py,sha256=mch8Zl9Ht28fx7nsYfuFb0sc_PN6l1kBQ5iCeEEcrFw,25856
 genhpf/scripts/preprocess/genhpf/README.md,sha256=qtpM_ABJk5yI8xbsUj1sZ71yX5bybx9ZvAymo0Lh5Vc,2877
 genhpf/scripts/preprocess/genhpf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 genhpf/scripts/preprocess/genhpf/main.py,sha256=EF3sce0ltowMHIGK7zLEQEOnzOWQ_WJxoBowknHV3mQ,6161
@@ -59,9 +59,9 @@ genhpf/utils/distributed_utils.py,sha256=000xKlw8SLoSH16o6n2bB3eueGR0aVD_DufPYES
 genhpf/utils/file_io.py,sha256=hnZXdMtAibfFDoIfn-SDusl-v7ZImeUEh0eD2MIxbG4,4919
 genhpf/utils/pdb.py,sha256=400rk1pVfOpVpzKIFHnTRlZ2VCtBqRh9G-pRRwu2Oqo,930
 genhpf/utils/utils.py,sha256=BoC_7Gz8uCHbUBCpcXGBMD-5irApi_6xM7nU-2ac4aA,6176
-genhpf-1.0.6.dist-info/LICENSE,sha256=VK_rvhY2Xi_DAIZHtauni5O9-1_do5SNWjrskv4amg8,1065
-genhpf-1.0.6.dist-info/METADATA,sha256=jNN97lqcfOLt3fbpmwd643IcTT-PlVof7IlkCur9zQs,10589
-genhpf-1.0.6.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
-genhpf-1.0.6.dist-info/entry_points.txt,sha256=Wp94VV2w9KasBDLaluLM5EnjLgjNOAQVu44wKRDAwmQ,288
-genhpf-1.0.6.dist-info/top_level.txt,sha256=lk846Vmnvydb6UZn8xmowj60nkrZYexNOGGnPM-IbhA,7
-genhpf-1.0.6.dist-info/RECORD,,
+genhpf-1.0.8.dist-info/LICENSE,sha256=VK_rvhY2Xi_DAIZHtauni5O9-1_do5SNWjrskv4amg8,1065
+genhpf-1.0.8.dist-info/METADATA,sha256=k5-iE6UYfJ0rx_NJTuHVM4uw5IdhuJvztoORAtpc_6Q,10589
+genhpf-1.0.8.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
+genhpf-1.0.8.dist-info/entry_points.txt,sha256=Wp94VV2w9KasBDLaluLM5EnjLgjNOAQVu44wKRDAwmQ,288
+genhpf-1.0.8.dist-info/top_level.txt,sha256=lk846Vmnvydb6UZn8xmowj60nkrZYexNOGGnPM-IbhA,7
+genhpf-1.0.8.dist-info/RECORD,,

{genhpf-1.0.6.dist-info → genhpf-1.0.8.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.2)
+Generator: setuptools (76.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{genhpf-1.0.6.dist-info → genhpf-1.0.8.dist-info}/LICENSE RENAMED Viewed

File without changes

{genhpf-1.0.6.dist-info → genhpf-1.0.8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{genhpf-1.0.6.dist-info → genhpf-1.0.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

genhpf 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl

Potentially problematic release.

genhpf 1.0.6py3-none-any.whl → 1.0.8py3-none-any.whl