cehrgpt 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/.github/workflows/tests.yaml +1 -0
  2. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/PKG-INFO +6 -4
  3. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/README.md +3 -1
  4. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/pyproject.toml +2 -2
  5. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/data/cehrgpt_data_processor.py +6 -5
  6. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/data/hf_cehrgpt_dataset_collator.py +14 -0
  7. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt.egg-info/PKG-INFO +6 -4
  8. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt.egg-info/requires.txt +2 -2
  9. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/.github/workflows/build-python.yaml +0 -0
  10. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/.gitignore +0 -0
  11. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/.pre-commit-config.yaml +0 -0
  12. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/LICENSE +0 -0
  13. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/constraints.txt +0 -0
  14. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/data_generation.md +0 -0
  15. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/feature_representation.md +0 -0
  16. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/sample_configs/cehrgpt_pretrain_sample_config.yaml +0 -0
  17. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/sample_configs/credential_file_sample.ini +0 -0
  18. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/sample_data/omop_vocab/concept/concept.parquet +0 -0
  19. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/sample_data/pretrain/patient_sequence.parquet +0 -0
  20. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/sample_data/pretrained_embeddings/pretrained_embedding_concepts.pkl +0 -0
  21. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/sample_data/pretrained_embeddings/pretrained_embedding_vectors.npy +0 -0
  22. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/scripts/create_cehrgpt_pretraining_data.sh +0 -0
  23. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/scripts/extract_features_gpt.sh +0 -0
  24. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/scripts/level_three_evaluation.sh +0 -0
  25. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/scripts/omop_pipeline.sh +0 -0
  26. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/scripts/pool_generated_sequences.sh +0 -0
  27. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/scripts/run_linear_prob.sh +0 -0
  28. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/setup.cfg +0 -0
  29. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/spark_setup.md +0 -0
  30. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/__init__.py +0 -0
  31. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/__init__.py +0 -0
  32. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/__init__.py +0 -0
  33. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/htn_treatment_pathway.py +0 -0
  34. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/irregularity.py +0 -0
  35. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/privacy/__init__.py +0 -0
  36. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/privacy/attribute_inference.py +0 -0
  37. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/privacy/attribute_inference_config.yml +0 -0
  38. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/privacy/member_inference.py +0 -0
  39. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/privacy/nearest_neighbor_inference.py +0 -0
  40. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/privacy/reid_inference.py +0 -0
  41. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/privacy/utils.py +0 -0
  42. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/treatment_pathway/__init__.py +0 -0
  43. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/treatment_pathway/depression_treatment_pathway.py +0 -0
  44. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/treatment_pathway/diabetes_treatment_pathway.py +0 -0
  45. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/treatment_pathway/htn_treatment_pathway.py +0 -0
  46. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/analysis/treatment_pathway/treatment_pathway.py +0 -0
  47. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/cehrgpt_args.py +0 -0
  48. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/data/__init__.py +0 -0
  49. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/data/hf_cehrgpt_dataset.py +0 -0
  50. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/data/hf_cehrgpt_dataset_mapping.py +0 -0
  51. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/data/sample_packing_sampler.py +0 -0
  52. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/generation/__init__.py +0 -0
  53. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/generation/cehrgpt_conditional_generation.py +0 -0
  54. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/generation/chatgpt_generation.py +0 -0
  55. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/generation/generate_batch_hf_gpt_sequence.py +0 -0
  56. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/generation/omop_converter_batch.py +0 -0
  57. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/generation/omop_entity.py +0 -0
  58. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/gpt_utils.py +0 -0
  59. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/models/__init__.py +0 -0
  60. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/models/activations.py +0 -0
  61. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/models/config.py +0 -0
  62. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/models/gpt2.py +0 -0
  63. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/models/hf_cehrgpt.py +0 -0
  64. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/models/hf_modeling_outputs.py +0 -0
  65. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/models/pretrained_embeddings.py +0 -0
  66. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/models/special_tokens.py +0 -0
  67. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/models/tokenization_hf_cehrgpt.py +0 -0
  68. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/omop/__init__.py +0 -0
  69. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/omop/condition_era.py +0 -0
  70. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/omop/observation_period.py +0 -0
  71. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/omop/omop_argparse.py +0 -0
  72. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/omop/omop_table_builder.py +0 -0
  73. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/omop/ontology.py +0 -0
  74. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/omop/queries/__init__.py +0 -0
  75. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/omop/queries/condition_era.py +0 -0
  76. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/omop/queries/observation_period.py +0 -0
  77. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/omop/sample_omop_tables.py +0 -0
  78. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/runners/__init__.py +0 -0
  79. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/runners/data_utils.py +0 -0
  80. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/runners/gpt_runner_util.py +0 -0
  81. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/runners/hf_cehrgpt_finetune_runner.py +0 -0
  82. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/runners/hf_cehrgpt_pretrain_runner.py +0 -0
  83. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/runners/hf_gpt_runner_argument_dataclass.py +0 -0
  84. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/runners/hyperparameter_search_util.py +0 -0
  85. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/runners/sample_packing_trainer.py +0 -0
  86. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/simulations/__init__.py +0 -0
  87. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/simulations/generate_plots.py +0 -0
  88. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/simulations/run_simulation.sh +0 -0
  89. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/simulations/time_embedding_simulation.py +0 -0
  90. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/simulations/time_token_simulation.py +0 -0
  91. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/time_to_event/__init__.py +0 -0
  92. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/time_to_event/config/1_year_cabg.yaml +0 -0
  93. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/time_to_event/config/30_day_readmission.yaml +0 -0
  94. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/time_to_event/config/next_visit_type_prediction.yaml +0 -0
  95. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/time_to_event/config/t2dm_hf.yaml +0 -0
  96. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/time_to_event/time_to_event_model.py +0 -0
  97. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/time_to_event/time_to_event_prediction.py +0 -0
  98. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/time_to_event/time_to_event_utils.py +0 -0
  99. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/tools/__init__.py +0 -0
  100. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/tools/ehrshot_benchmark.py +0 -0
  101. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/tools/generate_causal_patient_split_by_age.py +0 -0
  102. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/tools/generate_pretrained_embeddings.py +0 -0
  103. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/tools/linear_prob/__init__.py +0 -0
  104. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/tools/linear_prob/compute_cehrgpt_features.py +0 -0
  105. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/tools/linear_prob/train_with_cehrgpt_features.py +0 -0
  106. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/tools/merge_synthetic_real_datasets.py +0 -0
  107. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt/tools/upload_omop_tables.py +0 -0
  108. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt.egg-info/SOURCES.txt +0 -0
  109. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt.egg-info/dependency_links.txt +0 -0
  110. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/src/cehrgpt.egg-info/top_level.txt +0 -0
  111. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/synthetic_data_generation.md +0 -0
  112. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/__init__.py +0 -0
  113. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/integration_tests/__init__.py +0 -0
  114. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/integration_tests/runners/__init__.py +0 -0
  115. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/integration_tests/runners/hf_cehrgpt_pretrain_runner_test.py +0 -0
  116. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/integration_tests/runners/hf_cehrgpt_pretrain_sample_packing_runner_test.py +0 -0
  117. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/integration_tests/runners/hf_cehrgpt_pretrain_sfm_runner_test.py +0 -0
  118. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/__init__.py +0 -0
  119. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/gpt_utils_test.py +0 -0
  120. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/models/__init__.py +0 -0
  121. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/models/model_utils_test.py +0 -0
  122. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/models/rotary_embedding_test.py +0 -0
  123. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/models/tokenization/__init__.py +0 -0
  124. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/models/tokenization/create_bins_with_spline_test.py +0 -0
  125. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/models/tokenization/create_sample_from_bins_test.py +0 -0
  126. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/numeric_concept_statistics_test.py +0 -0
  127. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/runners/__init__.py +0 -0
  128. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/runners/hf_cehrgpt_finetune_runner_test.py +0 -0
  129. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/tokenization_test.py +0 -0
  130. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/tools/__init__.py +0 -0
  131. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/tests/unit_tests/tools/upload_omop_tables_test.py +0 -0
  132. {cehrgpt-0.1.4 → cehrgpt-0.1.6}/zero_shot_prediction.md +0 -0
@@ -27,6 +27,7 @@ jobs:
27
27
  run: |
28
28
  python -m pip install --upgrade pip
29
29
  pip install flake8 pytest
30
+ pip install tensorflow==2.15.0 tensorflow-datasets==4.5.2
30
31
  pip install -e .
31
32
  - name: Lint with flake8
32
33
  run: |
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cehrgpt
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: CEHR-GPT: Generating Electronic Health Records with Chronological Patient Timelines
5
5
  Author-email: Chao Pang <chaopang229@gmail.com>, Xinzhuo Jiang <xj2193@cumc.columbia.edu>, Krishna Kalluri <kk3326@cumc.columbia.edu>, Elise Minto <em3697@cumc.columbia.edu>, Jason Patterson <jp3477@cumc.columbia.edu>, Nishanth Parameshwar Pavinkurve <np2689@cumc.columbia.edu>, Karthik Natarajan <kn2174@cumc.columbia.edu>
6
6
  License: MIT License
@@ -12,8 +12,8 @@ Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.10.0
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
- Requires-Dist: cehrbert>=1.4.8
16
- Requires-Dist: cehrbert_data>=0.1.1
15
+ Requires-Dist: cehrbert==1.4.8
16
+ Requires-Dist: cehrbert_data==0.1.1
17
17
  Requires-Dist: openai==1.54.3
18
18
  Requires-Dist: optuna==4.0.0
19
19
  Requires-Dist: transformers==4.44.1
@@ -104,7 +104,9 @@ python -u -m cehrgpt.runners.hf_cehrgpt_pretrain_runner \
104
104
  --sample_packing --max_tokens_per_batch 16384 \
105
105
  --warmup_ratio 0.01 --weight_decay 0.01 \
106
106
  --num_train_epochs 50 --learning_rate 0.0002 \
107
- --use_early_stopping --early_stopping_threshold 0.001
107
+ --use_early_stopping \
108
+ --load_best_model_at_end true \
109
+ --early_stopping_threshold 0.001
108
110
  ```
109
111
 
110
112
  > **Tip**: Increase `max_position_embeddings` for longer context windows based on your use case.
@@ -69,7 +69,9 @@ python -u -m cehrgpt.runners.hf_cehrgpt_pretrain_runner \
69
69
  --sample_packing --max_tokens_per_batch 16384 \
70
70
  --warmup_ratio 0.01 --weight_decay 0.01 \
71
71
  --num_train_epochs 50 --learning_rate 0.0002 \
72
- --use_early_stopping --early_stopping_threshold 0.001
72
+ --use_early_stopping \
73
+ --load_best_model_at_end true \
74
+ --early_stopping_threshold 0.001
73
75
  ```
74
76
 
75
77
  > **Tip**: Increase `max_position_embeddings` for longer context windows based on your use case.
@@ -28,8 +28,8 @@ classifiers = [
28
28
  ]
29
29
 
30
30
  dependencies = [
31
- "cehrbert>=1.4.8",
32
- "cehrbert_data>=0.1.1",
31
+ "cehrbert==1.4.8",
32
+ "cehrbert_data==0.1.1",
33
33
  "openai==1.54.3",
34
34
  "optuna==4.0.0",
35
35
  "transformers==4.44.1",
@@ -275,11 +275,7 @@ class CehrGptDataProcessor(DatasetMapping):
275
275
  if demographic_tokens is not None
276
276
  else self.empty_array
277
277
  ),
278
- np.asarray(
279
- self._convert_time_to_event(
280
- record["concept_ids"][start_index:end_index]
281
- )
282
- ),
278
+ np.asarray(record["time_to_visits"][start_index:end_index]),
283
279
  np.asarray([-100.0]) if add_last_token else self.empty_array,
284
280
  ]
285
281
  ).astype(np.float32)
@@ -303,6 +299,11 @@ class CehrGptDataProcessor(DatasetMapping):
303
299
  record["concept_ids"], record["epoch_times"]
304
300
  )
305
301
 
302
+ if self.include_ttv_prediction:
303
+ record["time_to_visits"] = np.asarray(
304
+ self._convert_time_to_event(record["concept_ids"])
305
+ )
306
+
306
307
  # Return the record directly if the actual sequence length is less than the max sequence
307
308
  if seq_length <= new_max_length:
308
309
  # We only add [END] to the end of the sequence in pre-training
@@ -528,6 +528,7 @@ class SamplePackingCehrGptDataCollator(CehrGptDataCollator):
528
528
  current_epoch_times = []
529
529
  current_value_indicators = []
530
530
  current_values = []
531
+ current_time_to_visits = []
531
532
 
532
533
  # MOTOR inputs
533
534
  current_motor_censor_times = []
@@ -567,6 +568,16 @@ class SamplePackingCehrGptDataCollator(CehrGptDataCollator):
567
568
  )
568
569
  current_epoch_times.extend(epoch_times + [max(epoch_times)])
569
570
 
571
+ if self.include_ttv_prediction:
572
+ current_time_to_visits.extend(
573
+ (
574
+ example["time_to_visits"].tolist()
575
+ if isinstance(example["time_to_visits"], torch.Tensor)
576
+ else list(example["time_to_visits"])
577
+ )
578
+ + [-100]
579
+ )
580
+
570
581
  if self.include_values:
571
582
  current_value_indicators.extend(
572
583
  (
@@ -649,6 +660,9 @@ class SamplePackingCehrGptDataCollator(CehrGptDataCollator):
649
660
  "epoch_times": current_epoch_times,
650
661
  }
651
662
 
663
+ if self.include_ttv_prediction:
664
+ packed_example.update({"time_to_visits": current_time_to_visits})
665
+
652
666
  if self.include_values:
653
667
  packed_example.update(
654
668
  {"value_indicators": current_value_indicators, "values": current_values}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cehrgpt
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: CEHR-GPT: Generating Electronic Health Records with Chronological Patient Timelines
5
5
  Author-email: Chao Pang <chaopang229@gmail.com>, Xinzhuo Jiang <xj2193@cumc.columbia.edu>, Krishna Kalluri <kk3326@cumc.columbia.edu>, Elise Minto <em3697@cumc.columbia.edu>, Jason Patterson <jp3477@cumc.columbia.edu>, Nishanth Parameshwar Pavinkurve <np2689@cumc.columbia.edu>, Karthik Natarajan <kn2174@cumc.columbia.edu>
6
6
  License: MIT License
@@ -12,8 +12,8 @@ Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.10.0
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
- Requires-Dist: cehrbert>=1.4.8
16
- Requires-Dist: cehrbert_data>=0.1.1
15
+ Requires-Dist: cehrbert==1.4.8
16
+ Requires-Dist: cehrbert_data==0.1.1
17
17
  Requires-Dist: openai==1.54.3
18
18
  Requires-Dist: optuna==4.0.0
19
19
  Requires-Dist: transformers==4.44.1
@@ -104,7 +104,9 @@ python -u -m cehrgpt.runners.hf_cehrgpt_pretrain_runner \
104
104
  --sample_packing --max_tokens_per_batch 16384 \
105
105
  --warmup_ratio 0.01 --weight_decay 0.01 \
106
106
  --num_train_epochs 50 --learning_rate 0.0002 \
107
- --use_early_stopping --early_stopping_threshold 0.001
107
+ --use_early_stopping \
108
+ --load_best_model_at_end true \
109
+ --early_stopping_threshold 0.001
108
110
  ```
109
111
 
110
112
  > **Tip**: Increase `max_position_embeddings` for longer context windows based on your use case.
@@ -1,5 +1,5 @@
1
- cehrbert>=1.4.8
2
- cehrbert_data>=0.1.1
1
+ cehrbert==1.4.8
2
+ cehrbert_data==0.1.1
3
3
  openai==1.54.3
4
4
  optuna==4.0.0
5
5
  transformers==4.44.1
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes