genhpf 1.0.7__tar.gz → 1.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of genhpf might be problematic. Click here for more details.

Files changed (84) hide show
  1. {genhpf-1.0.7/src/genhpf.egg-info → genhpf-1.0.8}/PKG-INFO +1 -1
  2. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/preprocess_meds.py +27 -3
  3. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/train.py +3 -0
  4. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/trainer.py +2 -2
  5. {genhpf-1.0.7 → genhpf-1.0.8/src/genhpf.egg-info}/PKG-INFO +1 -1
  6. {genhpf-1.0.7 → genhpf-1.0.8}/.gitignore +0 -0
  7. {genhpf-1.0.7 → genhpf-1.0.8}/.pre-commit-config.yaml +0 -0
  8. {genhpf-1.0.7 → genhpf-1.0.8}/LICENSE +0 -0
  9. {genhpf-1.0.7 → genhpf-1.0.8}/README.md +0 -0
  10. {genhpf-1.0.7 → genhpf-1.0.8}/examples/pretrain/mlm/genhpf/flattened_pt.yaml +0 -0
  11. {genhpf-1.0.7 → genhpf-1.0.8}/examples/pretrain/simclr/genhpf/genhpf_hierarchical_pt.yaml +0 -0
  12. {genhpf-1.0.7 → genhpf-1.0.8}/examples/pretrain/wav2vec2/genhpf/hierarchical_pt.yaml +0 -0
  13. {genhpf-1.0.7 → genhpf-1.0.8}/examples/test/genhpf/genhpf_flattened.yaml +0 -0
  14. {genhpf-1.0.7 → genhpf-1.0.8}/examples/test/genhpf/genhpf_hierarchical.yaml +0 -0
  15. {genhpf-1.0.7 → genhpf-1.0.8}/examples/test/genhpf/meds_hierarchical.yaml +0 -0
  16. {genhpf-1.0.7 → genhpf-1.0.8}/examples/train/genhpf/genhpf_flattened_ft.yaml +0 -0
  17. {genhpf-1.0.7 → genhpf-1.0.8}/examples/train/genhpf/genhpf_hierarchical_ft.yaml +0 -0
  18. {genhpf-1.0.7 → genhpf-1.0.8}/examples/train/genhpf/genhpf_hierarchical_scr.yaml +0 -0
  19. {genhpf-1.0.7 → genhpf-1.0.8}/examples/train/genhpf/meds_hierarchical_scr.yaml +0 -0
  20. {genhpf-1.0.7 → genhpf-1.0.8}/pyproject.toml +0 -0
  21. {genhpf-1.0.7 → genhpf-1.0.8}/setup.cfg +0 -0
  22. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/__init__.py +0 -0
  23. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/configs/__init__.py +0 -0
  24. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/configs/config.yaml +0 -0
  25. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/configs/configs.py +0 -0
  26. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/configs/constants.py +0 -0
  27. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/configs/initialize.py +0 -0
  28. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/configs/utils.py +0 -0
  29. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/criterions/__init__.py +0 -0
  30. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/criterions/binary_cross_entropy.py +0 -0
  31. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/criterions/binary_cross_entropy_with_logits.py +0 -0
  32. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/criterions/criterion.py +0 -0
  33. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/criterions/cross_entropy.py +0 -0
  34. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/criterions/multi_task_criterion.py +0 -0
  35. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/criterions/simclr_criterion.py +0 -0
  36. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/criterions/wav2vec2_criterion.py +0 -0
  37. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/datasets/__init__.py +0 -0
  38. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/datasets/dataset.py +0 -0
  39. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/datasets/genhpf_dataset.py +0 -0
  40. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/datasets/meds_dataset.py +0 -0
  41. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/loggings/__init__.py +0 -0
  42. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/loggings/meters.py +0 -0
  43. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/loggings/metrics.py +0 -0
  44. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/loggings/progress_bar.py +0 -0
  45. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/models/__init__.py +0 -0
  46. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/models/genhpf.py +0 -0
  47. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/models/genhpf_mlm.py +0 -0
  48. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/models/genhpf_predictor.py +0 -0
  49. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/models/genhpf_simclr.py +0 -0
  50. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/models/genhpf_wav2vec2.py +0 -0
  51. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/modules/__init__.py +0 -0
  52. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/modules/gather_layer.py +0 -0
  53. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/modules/grad_multiply.py +0 -0
  54. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/modules/gumbel_vector_quantizer.py +0 -0
  55. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/modules/identity_layer.py +0 -0
  56. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/modules/layer_norm.py +0 -0
  57. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/modules/positional_encoding.py +0 -0
  58. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/__init__.py +0 -0
  59. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/__init__.py +0 -0
  60. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/README.md +0 -0
  61. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/__init__.py +0 -0
  62. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/ehrs/__init__.py +0 -0
  63. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/ehrs/ehr.py +0 -0
  64. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/ehrs/eicu.py +0 -0
  65. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/ehrs/mimiciii.py +0 -0
  66. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/ehrs/mimiciv.py +0 -0
  67. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/main.py +0 -0
  68. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/manifest.py +0 -0
  69. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/sample_dataset.py +0 -0
  70. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/utils/__init__.py +0 -0
  71. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/genhpf/utils/utils.py +0 -0
  72. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/preprocess/manifest.py +0 -0
  73. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/scripts/test.py +0 -0
  74. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/utils/checkpoint_utils.py +0 -0
  75. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/utils/data_utils.py +0 -0
  76. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/utils/distributed_utils.py +0 -0
  77. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/utils/file_io.py +0 -0
  78. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/utils/pdb.py +0 -0
  79. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf/utils/utils.py +0 -0
  80. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf.egg-info/SOURCES.txt +0 -0
  81. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf.egg-info/dependency_links.txt +0 -0
  82. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf.egg-info/entry_points.txt +0 -0
  83. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf.egg-info/requires.txt +0 -0
  84. {genhpf-1.0.7 → genhpf-1.0.8}/src/genhpf.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: genhpf
3
- Version: 1.0.7
3
+ Version: 1.0.8
4
4
  Summary: GenHPF: General Healthcare Predictive Framework with Multi-task Multi-source Learning
5
5
  Author-email: Jungwoo Oh <ojw0123@kaist.ac.kr>, Kyunghoon Hur <pacesun@kaist.ac.kr>
6
6
  License: MIT license
@@ -210,9 +210,16 @@ def main():
210
210
  data_path = Path(data_path)
211
211
  subdir = data_path.relative_to(root_path).parent
212
212
  if data_path.suffix == ".csv":
213
- data = pl.scan_csv(data_path)
213
+ data = pl.scan_csv(
214
+ data_path,
215
+ low_memory=True if args.debug else False,
216
+ )
214
217
  elif data_path.suffix == ".parquet":
215
- data = pl.scan_parquet(data_path)
218
+ data = pl.scan_parquet(
219
+ data_path,
220
+ parallel="none" if args.debug else "auto",
221
+ low_memory=True if args.debug else False,
222
+ )
216
223
  else:
217
224
  raise ValueError(f"Unsupported file format: {data_path.suffix}")
218
225
 
@@ -312,6 +319,9 @@ def main():
312
319
  pl.col("time").list.sample(n=pl.col("code").list.len(), with_replacement=True)
313
320
  )
314
321
 
322
+ if args.debug:
323
+ data = data[:5000]
324
+
315
325
  if str(subdir) != ".":
316
326
  output_name = str(subdir)
317
327
  else:
@@ -348,6 +358,7 @@ def main():
348
358
  d_labitems,
349
359
  warned_codes,
350
360
  max_event_length,
361
+ args.debug,
351
362
  )
352
363
 
353
364
  # meds --> remed
@@ -403,6 +414,7 @@ def meds_to_remed(
403
414
  d_labitems,
404
415
  warned_codes,
405
416
  max_event_length,
417
+ debug,
406
418
  df_chunk,
407
419
  ):
408
420
  code_matching_pattern = re.compile(r"\d+")
@@ -591,6 +603,14 @@ def meds_to_remed(
591
603
  maintain_order=True,
592
604
  ).agg(pl.all())
593
605
 
606
+ if debug:
607
+ df_chunk = df_chunk.with_columns(
608
+ [
609
+ pl.col("time").map_elements(lambda x: x[-100:], return_dtype=pl.List(pl.List(str))),
610
+ pl.col("data_index").map_elements(lambda x: x[-100:], return_dtype=pl.List(pl.List(int)))
611
+ ]
612
+ )
613
+
594
614
  df_chunk = df_chunk.sort(by=["subject_id", "cohort_end"])
595
615
  # regard {subject_id} as {cohort_id}: {subject_id}_{cohort_number}
596
616
  df_chunk = df_chunk.with_columns(pl.col("subject_id").cum_count().over("subject_id").alias("suffix"))
@@ -616,11 +636,15 @@ def meds_to_remed(
616
636
 
617
637
  sample_result = result.create_group(sample[0])
618
638
 
639
+ times = np.concatenate(sample[2])
619
640
  data_indices = np.concatenate(sample[3])
641
+ if debug:
642
+ data_indices = data_indices[-100:]
643
+ times = times[-100:]
644
+
620
645
  data = events_data[data_indices]
621
646
  sample_result.create_dataset("hi", data=data, dtype="i2", compression="lzf", shuffle=True)
622
647
 
623
- times = np.concatenate(sample[2])
624
648
  times = [datetime.strptime(x, "%Y-%m-%d %H:%M:%S") for x in times]
625
649
  times = np.cumsum(np.diff(times))
626
650
  times = list(map(lambda x: round(x.total_seconds() / 60), times))
@@ -36,6 +36,9 @@ def main(cfg: Config) -> None:
36
36
  # make hydra logging work with ddp (see # see https://github.com/facebookresearch/hydra/issues/1126)
37
37
  logging.config.dictConfig(OmegaConf.to_container(cfg.job_logging_cfg))
38
38
 
39
+ if cfg.common.debug:
40
+ os.environ["OMP_NUM_THREADS"] = "1"
41
+
39
42
  assert cfg.dataset.batch_size is not None, "batch_size must be specified"
40
43
  metrics.reset()
41
44
 
@@ -202,7 +202,7 @@ class Trainer(object):
202
202
  dataset,
203
203
  batch_size=self.cfg.dataset.batch_size,
204
204
  shuffle=True if not dist.is_initialized() else False,
205
- num_workers=self.cfg.dataset.num_workers,
205
+ num_workers=self.cfg.dataset.num_workers if not self.cfg.common.debug else 0,
206
206
  collate_fn=dataset.collator,
207
207
  sampler=batch_sampler,
208
208
  )
@@ -220,7 +220,7 @@ class Trainer(object):
220
220
  dataset,
221
221
  batch_size=self.cfg.dataset.batch_size,
222
222
  shuffle=False,
223
- num_workers=self.cfg.dataset.num_workers,
223
+ num_workers=self.cfg.dataset.num_workers if not self.cfg.common.debug else 0,
224
224
  collate_fn=dataset.collator,
225
225
  sampler=batch_sampler,
226
226
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: genhpf
3
- Version: 1.0.7
3
+ Version: 1.0.8
4
4
  Summary: GenHPF: General Healthcare Predictive Framework with Multi-task Multi-source Learning
5
5
  Author-email: Jungwoo Oh <ojw0123@kaist.ac.kr>, Kyunghoon Hur <pacesun@kaist.ac.kr>
6
6
  License: MIT license
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes