genhpf 1.0.2__tar.gz → 1.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of genhpf might be problematic. Click here for more details.
- {genhpf-1.0.2/src/genhpf.egg-info → genhpf-1.0.3}/PKG-INFO +1 -1
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/main.py +5 -4
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/preprocess_meds.py +45 -20
- {genhpf-1.0.2 → genhpf-1.0.3/src/genhpf.egg-info}/PKG-INFO +1 -1
- {genhpf-1.0.2 → genhpf-1.0.3}/.gitignore +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/.pre-commit-config.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/LICENSE +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/README.md +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/examples/pretrain/mlm/genhpf/flattened_pt.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/examples/pretrain/simclr/genhpf/genhpf_hierarchical_pt.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/examples/pretrain/wav2vec2/genhpf/hierarchical_pt.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/examples/test/genhpf/genhpf_flattened.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/examples/test/genhpf/genhpf_hierarchical.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/examples/test/genhpf/meds_hierarchical.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/examples/train/genhpf/genhpf_flattened_ft.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/examples/train/genhpf/genhpf_hierarchical_ft.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/examples/train/genhpf/genhpf_hierarchical_scr.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/examples/train/genhpf/meds_hierarchical_scr.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/pyproject.toml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/requirements.txt +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/setup.cfg +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/configs/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/configs/config.yaml +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/configs/configs.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/configs/constants.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/configs/initialize.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/configs/utils.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/criterions/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/criterions/binary_cross_entropy.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/criterions/binary_cross_entropy_with_logits.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/criterions/criterion.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/criterions/cross_entropy.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/criterions/multi_task_criterion.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/criterions/simclr_criterion.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/criterions/wav2vec2_criterion.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/datasets/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/datasets/dataset.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/datasets/genhpf_dataset.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/datasets/meds_dataset.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/loggings/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/loggings/meters.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/loggings/metrics.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/loggings/progress_bar.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/models/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/models/genhpf.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/models/genhpf_mlm.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/models/genhpf_predictor.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/models/genhpf_simclr.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/models/genhpf_wav2vec2.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/modules/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/modules/gather_layer.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/modules/grad_multiply.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/modules/gumbel_vector_quantizer.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/modules/identity_layer.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/modules/layer_norm.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/modules/positional_encoding.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/README.md +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/ehrs/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/ehrs/ehr.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/ehrs/eicu.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/ehrs/mimiciii.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/ehrs/mimiciv.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/manifest.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/sample_dataset.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/utils/__init__.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/genhpf/utils/utils.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/preprocess/manifest.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/test.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/scripts/train.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/trainer.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/utils/checkpoint_utils.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/utils/data_utils.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/utils/distributed_utils.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/utils/file_io.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/utils/pdb.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf/utils/utils.py +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf.egg-info/SOURCES.txt +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf.egg-info/dependency_links.txt +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf.egg-info/entry_points.txt +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf.egg-info/requires.txt +0 -0
- {genhpf-1.0.2 → genhpf-1.0.3}/src/genhpf.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: genhpf
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.3
|
|
4
4
|
Summary: GenHPF: General Healthcare Predictive Framework with Multi-task Multi-source Learning
|
|
5
5
|
Author-email: Jungwoo Oh <ojw0123@kaist.ac.kr>, Kyunghoon Hur <pacesun@kaist.ac.kr>
|
|
6
6
|
License: MIT license
|
|
@@ -151,7 +151,10 @@ def get_parser():
|
|
|
151
151
|
return parser
|
|
152
152
|
|
|
153
153
|
|
|
154
|
-
def main(
|
|
154
|
+
def main():
|
|
155
|
+
parser = get_parser()
|
|
156
|
+
args = parser.parse_args()
|
|
157
|
+
|
|
155
158
|
if not os.path.exists(args.dest):
|
|
156
159
|
os.makedirs(args.dest)
|
|
157
160
|
|
|
@@ -169,6 +172,4 @@ def main(args):
|
|
|
169
172
|
|
|
170
173
|
|
|
171
174
|
if __name__ == "__main__":
|
|
172
|
-
|
|
173
|
-
args = parser.parse_args()
|
|
174
|
-
main(args)
|
|
175
|
+
main()
|
|
@@ -71,6 +71,12 @@ def get_parser():
|
|
|
71
71
|
default="outputs",
|
|
72
72
|
help="directory to save processed outputs.",
|
|
73
73
|
)
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
"--skip-if-exists",
|
|
76
|
+
action="store_true",
|
|
77
|
+
help="whether or not to skip the processing if the output directory already "
|
|
78
|
+
"exists.",
|
|
79
|
+
)
|
|
74
80
|
parser.add_argument(
|
|
75
81
|
"--rebase",
|
|
76
82
|
action="store_true",
|
|
@@ -101,23 +107,16 @@ def get_parser():
|
|
|
101
107
|
return parser
|
|
102
108
|
|
|
103
109
|
|
|
104
|
-
def main(
|
|
110
|
+
def main():
|
|
111
|
+
parser = get_parser()
|
|
112
|
+
args = parser.parse_args()
|
|
113
|
+
|
|
105
114
|
root_path = Path(args.root)
|
|
106
115
|
output_dir = Path(args.output_dir)
|
|
107
116
|
metadata_dir = Path(args.metadata_dir)
|
|
108
117
|
mimic_dir = Path(args.mimic_dir) if args.mimic_dir is not None else None
|
|
109
118
|
|
|
110
|
-
|
|
111
|
-
output_dir.mkdir()
|
|
112
|
-
else:
|
|
113
|
-
if args.rebase:
|
|
114
|
-
shutil.rmtree(output_dir)
|
|
115
|
-
if output_dir.exists():
|
|
116
|
-
raise ValueError(
|
|
117
|
-
f"File exists: '{str(output_dir.resolve())}'. If you want to rebase the "
|
|
118
|
-
"directory, please run the script with --rebase."
|
|
119
|
-
)
|
|
120
|
-
output_dir.mkdir()
|
|
119
|
+
num_workers = max(args.workers, 1)
|
|
121
120
|
|
|
122
121
|
if root_path.is_dir():
|
|
123
122
|
data_paths = glob.glob(str(root_path / "**/*.csv"), recursive=True)
|
|
@@ -128,6 +127,34 @@ def main(args):
|
|
|
128
127
|
else:
|
|
129
128
|
data_paths = [root_path]
|
|
130
129
|
|
|
130
|
+
if not output_dir.exists():
|
|
131
|
+
output_dir.mkdir()
|
|
132
|
+
else:
|
|
133
|
+
if args.rebase:
|
|
134
|
+
shutil.rmtree(output_dir)
|
|
135
|
+
if output_dir.exists():
|
|
136
|
+
if args.skip_if_exists:
|
|
137
|
+
ls = glob.glob(str(output_dir / "**/*"), recursive=True)
|
|
138
|
+
expected_files = []
|
|
139
|
+
for subset in set(os.path.dirname(x) for x in data_paths):
|
|
140
|
+
expected_files.extend([
|
|
141
|
+
os.path.join(str(output_dir), os.path.basename(subset), f"{i}.h5")
|
|
142
|
+
for i in range(num_workers)
|
|
143
|
+
])
|
|
144
|
+
if set(expected_files).issubset(set(ls)):
|
|
145
|
+
print(
|
|
146
|
+
f"Output directory already contains the expected files. Skipping the "
|
|
147
|
+
"processing as --skip-if-exists is set. If you want to rebase the directory, "
|
|
148
|
+
"please run the script with --rebase."
|
|
149
|
+
)
|
|
150
|
+
return
|
|
151
|
+
else:
|
|
152
|
+
raise ValueError(
|
|
153
|
+
f"File exists: '{str(output_dir.resolve())}'. If you want to rebase the "
|
|
154
|
+
"directory, please run the script with --rebase."
|
|
155
|
+
)
|
|
156
|
+
output_dir.mkdir()
|
|
157
|
+
|
|
131
158
|
label_col_name = args.cohort_label_name
|
|
132
159
|
|
|
133
160
|
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
|
|
@@ -295,7 +322,7 @@ def main(args):
|
|
|
295
322
|
codes_metadata,
|
|
296
323
|
output_dir,
|
|
297
324
|
output_name,
|
|
298
|
-
|
|
325
|
+
num_workers,
|
|
299
326
|
d_items,
|
|
300
327
|
d_labitems,
|
|
301
328
|
warned_codes,
|
|
@@ -304,25 +331,25 @@ def main(args):
|
|
|
304
331
|
|
|
305
332
|
# meds --> remed
|
|
306
333
|
print("Processing...")
|
|
307
|
-
if
|
|
334
|
+
if num_workers <= 1:
|
|
308
335
|
length_per_subject_gathered = [meds_to_remed_partial(data)]
|
|
309
336
|
del data
|
|
310
337
|
else:
|
|
311
338
|
subject_ids = data["subject_id"].unique().to_list()
|
|
312
|
-
n =
|
|
339
|
+
n = num_workers
|
|
313
340
|
subject_id_chunks = [subject_ids[i::n] for i in range(n)]
|
|
314
341
|
data_chunks = []
|
|
315
342
|
for subject_id_chunk in subject_id_chunks:
|
|
316
343
|
data_chunks.append(data.filter(pl.col("subject_id").is_in(subject_id_chunk)))
|
|
317
344
|
del data
|
|
318
|
-
pool = multiprocessing.get_context("spawn").Pool(processes=
|
|
345
|
+
pool = multiprocessing.get_context("spawn").Pool(processes=num_workers)
|
|
319
346
|
# the order is preserved
|
|
320
347
|
length_per_subject_gathered = pool.map(meds_to_remed_partial, data_chunks)
|
|
321
348
|
pool.close()
|
|
322
349
|
pool.join()
|
|
323
350
|
del data_chunks
|
|
324
351
|
|
|
325
|
-
if len(length_per_subject_gathered) !=
|
|
352
|
+
if len(length_per_subject_gathered) != num_workers:
|
|
326
353
|
print(
|
|
327
354
|
"Number of processed workers were smaller than the specified num workers "
|
|
328
355
|
"(--workers) due to the small size of data. Consider reducing the number of "
|
|
@@ -579,6 +606,4 @@ def meds_to_remed(
|
|
|
579
606
|
|
|
580
607
|
|
|
581
608
|
if __name__ == "__main__":
|
|
582
|
-
|
|
583
|
-
args = parser.parse_args()
|
|
584
|
-
main(args)
|
|
609
|
+
main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: genhpf
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.3
|
|
4
4
|
Summary: GenHPF: General Healthcare Predictive Framework with Multi-task Multi-source Learning
|
|
5
5
|
Author-email: Jungwoo Oh <ojw0123@kaist.ac.kr>, Kyunghoon Hur <pacesun@kaist.ac.kr>
|
|
6
6
|
License: MIT license
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|