polymon 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. polymon-1.0.2/PKG-INFO +23 -0
  2. polymon-1.0.2/README.md +42 -0
  3. polymon-1.0.2/polymon/__init__.py +1 -0
  4. polymon-1.0.2/polymon/cli/__init__.py +0 -0
  5. polymon-1.0.2/polymon/cli/main.py +330 -0
  6. polymon-1.0.2/polymon/cli/merge.py +368 -0
  7. polymon-1.0.2/polymon/cli/predict.py +38 -0
  8. polymon-1.0.2/polymon/cli/train_dl.py +194 -0
  9. polymon-1.0.2/polymon/cli/train_ml.py +282 -0
  10. polymon-1.0.2/polymon/data/__init__.py +2 -0
  11. polymon-1.0.2/polymon/data/_dmpnn_transform.py +223 -0
  12. polymon-1.0.2/polymon/data/_pretrained.py +146 -0
  13. polymon-1.0.2/polymon/data/cgcnn.json +1 -0
  14. polymon-1.0.2/polymon/data/dataset.py +316 -0
  15. polymon-1.0.2/polymon/data/dedup.py +215 -0
  16. polymon-1.0.2/polymon/data/featurizer.py +905 -0
  17. polymon-1.0.2/polymon/data/mordred_unstable.txt +909 -0
  18. polymon-1.0.2/polymon/data/polymer.py +157 -0
  19. polymon-1.0.2/polymon/data/utils.py +156 -0
  20. polymon-1.0.2/polymon/data/xenonpy_elements.csv +96 -0
  21. polymon-1.0.2/polymon/estimator/FFV_atom_contrib.npy +0 -0
  22. polymon-1.0.2/polymon/estimator/Tc_atom_contrib.npy +0 -0
  23. polymon-1.0.2/polymon/estimator/__init__.py +35 -0
  24. polymon-1.0.2/polymon/estimator/atom_contrib.py +102 -0
  25. polymon-1.0.2/polymon/estimator/base.py +25 -0
  26. polymon-1.0.2/polymon/estimator/density.py +50 -0
  27. polymon-1.0.2/polymon/estimator/density_Fedors.py +292 -0
  28. polymon-1.0.2/polymon/estimator/density_ibm.py +396 -0
  29. polymon-1.0.2/polymon/estimator/low_fidelity.py +49 -0
  30. polymon-1.0.2/polymon/estimator/ml.py +70 -0
  31. polymon-1.0.2/polymon/estimator/nx_rg.py +124 -0
  32. polymon-1.0.2/polymon/estimator/rg.py +103 -0
  33. polymon-1.0.2/polymon/exp/__init__.py +0 -0
  34. polymon-1.0.2/polymon/exp/pipeline.py +715 -0
  35. polymon-1.0.2/polymon/exp/score.py +85 -0
  36. polymon-1.0.2/polymon/exp/train.py +226 -0
  37. polymon-1.0.2/polymon/exp/utils.py +104 -0
  38. polymon-1.0.2/polymon/hparams.py +534 -0
  39. polymon-1.0.2/polymon/model/__init__.py +265 -0
  40. polymon-1.0.2/polymon/model/base.py +357 -0
  41. polymon-1.0.2/polymon/model/dmpnn.py +458 -0
  42. polymon-1.0.2/polymon/model/ensemble.py +483 -0
  43. polymon-1.0.2/polymon/model/gatv2/__init__.py +0 -0
  44. polymon-1.0.2/polymon/model/gatv2/embed_residual.py +128 -0
  45. polymon-1.0.2/polymon/model/gatv2/fastkan_gatv2.py +108 -0
  46. polymon-1.0.2/polymon/model/gatv2/gat_chain_readout.py +174 -0
  47. polymon-1.0.2/polymon/model/gatv2/gatv2_sage.py +118 -0
  48. polymon-1.0.2/polymon/model/gatv2/kan_gatv2.py +214 -0
  49. polymon-1.0.2/polymon/model/gatv2/lineevo.py +299 -0
  50. polymon-1.0.2/polymon/model/gatv2/multi_fidelity.py +144 -0
  51. polymon-1.0.2/polymon/model/gatv2/position_encoding.py +190 -0
  52. polymon-1.0.2/polymon/model/gnn.py +879 -0
  53. polymon-1.0.2/polymon/model/gps/__init__.py +0 -0
  54. polymon-1.0.2/polymon/model/gps/conv.py +201 -0
  55. polymon-1.0.2/polymon/model/gps/gps.py +160 -0
  56. polymon-1.0.2/polymon/model/gvp.py +451 -0
  57. polymon-1.0.2/polymon/model/kan/__init__.py +0 -0
  58. polymon-1.0.2/polymon/model/kan/dmpnn.py +452 -0
  59. polymon-1.0.2/polymon/model/kan/efficient_kan.py +363 -0
  60. polymon-1.0.2/polymon/model/kan/fast_kan.py +261 -0
  61. polymon-1.0.2/polymon/model/kan/fourier_kan.py +66 -0
  62. polymon-1.0.2/polymon/model/kan/gcn.py +349 -0
  63. polymon-1.0.2/polymon/model/kan/gin.py +158 -0
  64. polymon-1.0.2/polymon/model/kan/vanilla.py +32 -0
  65. polymon-1.0.2/polymon/model/mlp.py +37 -0
  66. polymon-1.0.2/polymon/model/polycl/__init__.py +0 -0
  67. polymon-1.0.2/polymon/model/polycl/polycl.py +210 -0
  68. polymon-1.0.2/polymon/model/register.py +31 -0
  69. polymon-1.0.2/polymon/model/utils.py +218 -0
  70. polymon-1.0.2/polymon/setting.py +52 -0
  71. polymon-1.0.2/polymon.egg-info/PKG-INFO +23 -0
  72. polymon-1.0.2/polymon.egg-info/SOURCES.txt +76 -0
  73. polymon-1.0.2/polymon.egg-info/dependency_links.txt +1 -0
  74. polymon-1.0.2/polymon.egg-info/entry_points.txt +2 -0
  75. polymon-1.0.2/polymon.egg-info/requires.txt +15 -0
  76. polymon-1.0.2/polymon.egg-info/top_level.txt +1 -0
  77. polymon-1.0.2/setup.cfg +4 -0
  78. polymon-1.0.2/setup.py +39 -0
polymon-1.0.2/PKG-INFO ADDED
@@ -0,0 +1,23 @@
1
+ Metadata-Version: 2.4
2
+ Name: polymon
3
+ Version: 1.0.2
4
+ Summary: PolyMon
5
+ Author: PolyMon Group
6
+ Requires-Dist: mordredcommunity
7
+ Requires-Dist: mordred==1.2.0
8
+ Requires-Dist: rdkit==2023.09.6
9
+ Requires-Dist: xenonpy
10
+ Requires-Dist: xgboost
11
+ Requires-Dist: catboost
12
+ Requires-Dist: lightgbm
13
+ Requires-Dist: loguru
14
+ Requires-Dist: scikit-learn
15
+ Requires-Dist: tabpfn
16
+ Requires-Dist: torchensemble
17
+ Requires-Dist: optuna
18
+ Requires-Dist: lightning
19
+ Requires-Dist: torch_geometric
20
+ Requires-Dist: pykan
21
+ Dynamic: author
22
+ Dynamic: requires-dist
23
+ Dynamic: summary
@@ -0,0 +1,42 @@
1
+ <img src="assets/polymon.png" alt="Polymon Icon">
2
+
3
+ `PolyMon` is a unified framework for polymer property prediction. It is designed to be flexible and easy to use. The framework include various models, featurizers, and different training strategies.
4
+
5
+ <p align="center">
6
+ <img src="assets/framework.png" alt="framework" width="400">
7
+ </p>
8
+
9
+
10
+ ## Installation
11
+ This package requires `torch>=2.2.2` and `torch_geometric>=2.5.3`. We recommend installing these packages manually before installing this package.
12
+
13
+ ```bash
14
+ conda install -y pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 \
15
+ pytorch-cuda=11.8 -c pytorch -c nvidia
16
+ pip install torch_geometric
17
+ pip install torch_scatter torch_sparse -f https://data.pyg.org/whl/torch-2.3.0+cu118.html
18
+
19
+ pip install polymon
20
+ ```
21
+
22
+ ## Usage
23
+ ### Train
24
+ Train a tabular model or a GNN model for polymer property prediction.
25
+ ```bash
26
+ polymon train --help
27
+ ```
28
+
29
+ ### Merge
30
+ Merge two datasets into one based on different acquisition functions.
31
+ ```bash
32
+ polymon merge --help
33
+ ```
34
+
35
+ ### Predict
36
+ Predict labels for a given dataset.
37
+ ```bash
38
+ polymon predict --help
39
+ ```
40
+
41
+ ## Citation
42
+ To be updated.
@@ -0,0 +1 @@
1
+ __version__ = '1.0.2'
File without changes
@@ -0,0 +1,330 @@
1
+ import argparse
2
+
3
+ from polymon.cli.merge import main as main_merge
4
+ from polymon.cli.train_dl import main as main_dl
5
+ from polymon.cli.train_ml import MODELS
6
+ from polymon.cli.train_ml import main as main_ml
7
+ from polymon.cli.predict import main as main_predict
8
+
9
+
10
+
11
+ def parse_args():
12
+ parser = argparse.ArgumentParser()
13
+ subparsers = parser.add_subparsers(dest='mode', required=True)
14
+
15
+ # Train
16
+ train_parser = subparsers.add_parser('train', help='Train a ML/DL model')
17
+ train_parser.add_argument(
18
+ '--raw-csv',
19
+ type=str,
20
+ default='database/database.csv',
21
+ help='Path to the raw csv file'
22
+ )
23
+ train_parser.add_argument(
24
+ '--sources',
25
+ type=str,
26
+ nargs='+',
27
+ default=['Kaggle'],
28
+ help='Sources to use for training'
29
+ )
30
+ train_parser.add_argument(
31
+ '--tag',
32
+ type=str,
33
+ default='debug',
34
+ help='Tag to use for training'
35
+ )
36
+ train_parser.add_argument(
37
+ '--labels',
38
+ nargs='+',
39
+ required=True,
40
+ help='Labels to use for training'
41
+ )
42
+ train_parser.add_argument(
43
+ '--feature-names',
44
+ type=str,
45
+ nargs='+',
46
+ default=['rdkit2d'],
47
+ help='Feature names to use for training'
48
+ )
49
+ train_parser.add_argument(
50
+ '--n-trials',
51
+ type=int,
52
+ default=None,
53
+ help='Number of trials to run for hyperparameter optimization.'
54
+ )
55
+ train_parser.add_argument(
56
+ '--out-dir',
57
+ type=str,
58
+ default='./results',
59
+ help='Path to the output directory'
60
+ )
61
+ train_parser.add_argument(
62
+ '--hparams-from',
63
+ type=str,
64
+ default=None,
65
+ help='Path to the hparams file. Allowed formats: .json, .pt, .pkl'
66
+ )
67
+ train_parser.add_argument(
68
+ '--n-fold',
69
+ type=int,
70
+ default=1,
71
+ help='Number of folds to use for cross-validation'
72
+ )
73
+ train_parser.add_argument(
74
+ '--split-mode',
75
+ type=str,
76
+ default='random',
77
+ help='Mode to split the data into training, validation, and test sets'
78
+ )
79
+ train_parser.add_argument(
80
+ '--seed',
81
+ type=int,
82
+ default=42,
83
+ help='Seed to use for training'
84
+ )
85
+ train_parser.add_argument(
86
+ '--remove-hydrogens',
87
+ action='store_true',
88
+ help='Whether to remove hydrogens from the molecules'
89
+ )
90
+ train_parser.add_argument(
91
+ '--descriptors',
92
+ type=str,
93
+ nargs='+',
94
+ default=None,
95
+ help='Descriptors to use for training. For ML models, this must be specified.'
96
+ )
97
+ train_parser.add_argument(
98
+ '--model',
99
+ type=str,
100
+ default='rf',
101
+ help='Model to use for training'
102
+ )
103
+
104
+ # If not model in `MODELS.keys()`, add more arguments for DL models
105
+ train_parser.add_argument(
106
+ '--hidden-dim',
107
+ type=int,
108
+ default=32,
109
+ help='Hidden dimension of the model'
110
+ )
111
+ train_parser.add_argument(
112
+ '--num-layers',
113
+ type=int,
114
+ default=3,
115
+ help='Number of layers of the model'
116
+ )
117
+
118
+ # DL Training arguments
119
+ train_parser.add_argument(
120
+ '--batch-size',
121
+ type=int,
122
+ default=128,
123
+ help='Batch size to use for training'
124
+ )
125
+ train_parser.add_argument(
126
+ '--lr',
127
+ type=float,
128
+ default=1e-3,
129
+ help='Learning rate to use for training'
130
+ )
131
+ train_parser.add_argument(
132
+ '--num-epochs',
133
+ type=int,
134
+ default=2500,
135
+ help='Number of epochs to use for training'
136
+ )
137
+ train_parser.add_argument(
138
+ '--early-stopping-patience',
139
+ type=int,
140
+ default=250,
141
+ help='Number of epochs to wait before early stopping'
142
+ )
143
+ train_parser.add_argument(
144
+ '--device',
145
+ type=str,
146
+ default='cuda',
147
+ help='Device to use for training'
148
+ )
149
+ train_parser.add_argument(
150
+ '--run-production',
151
+ action='store_true',
152
+ help=(
153
+ 'Whether to run the training in production mode, which means '
154
+ 'train:val:test splits will be forced to 0.95:0.05:0.0'
155
+ )
156
+ )
157
+ train_parser.add_argument(
158
+ '--finetune',
159
+ action='store_true',
160
+ help='Whether to finetune the model'
161
+ )
162
+ train_parser.add_argument(
163
+ '--finetune-csv-path',
164
+ type=str,
165
+ default=None,
166
+ help='Path to the csv file to finetune the model on'
167
+ )
168
+ train_parser.add_argument(
169
+ '--pretrained-model',
170
+ type=str,
171
+ default=None,
172
+ help='Path to the pretrained model'
173
+ )
174
+ train_parser.add_argument(
175
+ '--n-estimator',
176
+ type=int,
177
+ default=1,
178
+ help='Number of estimators to use for training'
179
+ )
180
+ train_parser.add_argument(
181
+ '--additional-features',
182
+ type=str,
183
+ nargs='+',
184
+ default=None,
185
+ help='Additional features to use for training'
186
+ )
187
+ train_parser.add_argument(
188
+ '--skip-train',
189
+ action='store_true',
190
+ help='Whether to skip the training step'
191
+ )
192
+ train_parser.add_argument(
193
+ '--low-fidelity-model',
194
+ type=str,
195
+ default=None,
196
+ help='Path to the low fidelity model'
197
+ )
198
+ train_parser.add_argument(
199
+ '--estimator-name',
200
+ type=str,
201
+ default=None,
202
+ help='Name of the estimator to give base predictions'
203
+ )
204
+ train_parser.add_argument(
205
+ '--emb-model',
206
+ type=str,
207
+ default=None,
208
+ help='Name of the embedding model for base graph embeddings'
209
+ )
210
+ train_parser.add_argument(
211
+ '--ensemble-type',
212
+ type=str,
213
+ default='voting',
214
+ help='Type of ensemble to use for training'
215
+ )
216
+ train_parser.add_argument(
217
+ '--train-residual',
218
+ action='store_true',
219
+ help='Whether to train the residual of the model'
220
+ )
221
+ train_parser.add_argument(
222
+ '--normalizer-type',
223
+ type=str,
224
+ default='normalizer',
225
+ choices=['normalizer', 'log_normalizer', 'none'],
226
+ help='Type of normalizer to use for training'
227
+ )
228
+ train_parser.add_argument(
229
+ '--augmentation',
230
+ action='store_true',
231
+ help='Whether to use data augmentation'
232
+ )
233
+
234
+ # Merge
235
+ merge_parser = subparsers.add_parser('merge', help='Merge two datasets')
236
+ merge_parser.add_argument(
237
+ '--sources',
238
+ type=str,
239
+ required=True,
240
+ nargs='+',
241
+ help='Sources to merge'
242
+ )
243
+ merge_parser.add_argument(
244
+ '--label',
245
+ type=str,
246
+ required=True,
247
+ help='Label to merge'
248
+ )
249
+ merge_parser.add_argument(
250
+ '--hparams-from',
251
+ type=str,
252
+ required=True,
253
+ help='Path to the hparams file'
254
+ )
255
+ merge_parser.add_argument(
256
+ '--acquisition',
257
+ type=str,
258
+ required=True,
259
+ choices=['epig', 'uncertainty', 'difference'],
260
+ help='Acquisition function to use for merging'
261
+ )
262
+ merge_parser.add_argument(
263
+ '--sample-size',
264
+ type=int,
265
+ default=20,
266
+ help='Sample size to use for merging'
267
+ )
268
+ merge_parser.add_argument(
269
+ '--uncertainty-threshold',
270
+ type=float,
271
+ default=0.1,
272
+ help='Uncertainty threshold to use for merging'
273
+ )
274
+ merge_parser.add_argument(
275
+ '--difference-threshold',
276
+ type=float,
277
+ default=0.1,
278
+ help='Difference threshold to use for merging'
279
+ )
280
+ merge_parser.add_argument(
281
+ '--target-size',
282
+ type=int,
283
+ default=1000,
284
+ help='Target size to use for merging'
285
+ )
286
+ merge_parser.add_argument(
287
+ '--base-csv',
288
+ type=str,
289
+ default=None,
290
+ help='Path to the base csv file'
291
+ )
292
+
293
+ # Predict
294
+ predict_parser = subparsers.add_parser('predict', help='Predict labels')
295
+ predict_parser.add_argument(
296
+ '--model-path',
297
+ type=str,
298
+ required=True,
299
+ help='Path to the model'
300
+ )
301
+ predict_parser.add_argument(
302
+ '--csv-path',
303
+ type=str,
304
+ required=True,
305
+ help='Path to the csv file'
306
+ )
307
+ predict_parser.add_argument(
308
+ '--smiles-column',
309
+ type=str,
310
+ required=True,
311
+ help='Name of the smiles column'
312
+ )
313
+ return parser.parse_args()
314
+
315
+
316
+ def main():
317
+ args = parse_args()
318
+ if args.mode == 'train':
319
+ if args.model in MODELS.keys():
320
+ main_ml(args)
321
+ else:
322
+ main_dl(args)
323
+ elif args.mode == 'merge':
324
+ main_merge(args)
325
+ elif args.mode == 'predict':
326
+ main_predict(args)
327
+
328
+
329
+ if __name__ == '__main__':
330
+ main()