polymon 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- polymon-1.0.2/PKG-INFO +23 -0
- polymon-1.0.2/README.md +42 -0
- polymon-1.0.2/polymon/__init__.py +1 -0
- polymon-1.0.2/polymon/cli/__init__.py +0 -0
- polymon-1.0.2/polymon/cli/main.py +330 -0
- polymon-1.0.2/polymon/cli/merge.py +368 -0
- polymon-1.0.2/polymon/cli/predict.py +38 -0
- polymon-1.0.2/polymon/cli/train_dl.py +194 -0
- polymon-1.0.2/polymon/cli/train_ml.py +282 -0
- polymon-1.0.2/polymon/data/__init__.py +2 -0
- polymon-1.0.2/polymon/data/_dmpnn_transform.py +223 -0
- polymon-1.0.2/polymon/data/_pretrained.py +146 -0
- polymon-1.0.2/polymon/data/cgcnn.json +1 -0
- polymon-1.0.2/polymon/data/dataset.py +316 -0
- polymon-1.0.2/polymon/data/dedup.py +215 -0
- polymon-1.0.2/polymon/data/featurizer.py +905 -0
- polymon-1.0.2/polymon/data/mordred_unstable.txt +909 -0
- polymon-1.0.2/polymon/data/polymer.py +157 -0
- polymon-1.0.2/polymon/data/utils.py +156 -0
- polymon-1.0.2/polymon/data/xenonpy_elements.csv +96 -0
- polymon-1.0.2/polymon/estimator/FFV_atom_contrib.npy +0 -0
- polymon-1.0.2/polymon/estimator/Tc_atom_contrib.npy +0 -0
- polymon-1.0.2/polymon/estimator/__init__.py +35 -0
- polymon-1.0.2/polymon/estimator/atom_contrib.py +102 -0
- polymon-1.0.2/polymon/estimator/base.py +25 -0
- polymon-1.0.2/polymon/estimator/density.py +50 -0
- polymon-1.0.2/polymon/estimator/density_Fedors.py +292 -0
- polymon-1.0.2/polymon/estimator/density_ibm.py +396 -0
- polymon-1.0.2/polymon/estimator/low_fidelity.py +49 -0
- polymon-1.0.2/polymon/estimator/ml.py +70 -0
- polymon-1.0.2/polymon/estimator/nx_rg.py +124 -0
- polymon-1.0.2/polymon/estimator/rg.py +103 -0
- polymon-1.0.2/polymon/exp/__init__.py +0 -0
- polymon-1.0.2/polymon/exp/pipeline.py +715 -0
- polymon-1.0.2/polymon/exp/score.py +85 -0
- polymon-1.0.2/polymon/exp/train.py +226 -0
- polymon-1.0.2/polymon/exp/utils.py +104 -0
- polymon-1.0.2/polymon/hparams.py +534 -0
- polymon-1.0.2/polymon/model/__init__.py +265 -0
- polymon-1.0.2/polymon/model/base.py +357 -0
- polymon-1.0.2/polymon/model/dmpnn.py +458 -0
- polymon-1.0.2/polymon/model/ensemble.py +483 -0
- polymon-1.0.2/polymon/model/gatv2/__init__.py +0 -0
- polymon-1.0.2/polymon/model/gatv2/embed_residual.py +128 -0
- polymon-1.0.2/polymon/model/gatv2/fastkan_gatv2.py +108 -0
- polymon-1.0.2/polymon/model/gatv2/gat_chain_readout.py +174 -0
- polymon-1.0.2/polymon/model/gatv2/gatv2_sage.py +118 -0
- polymon-1.0.2/polymon/model/gatv2/kan_gatv2.py +214 -0
- polymon-1.0.2/polymon/model/gatv2/lineevo.py +299 -0
- polymon-1.0.2/polymon/model/gatv2/multi_fidelity.py +144 -0
- polymon-1.0.2/polymon/model/gatv2/position_encoding.py +190 -0
- polymon-1.0.2/polymon/model/gnn.py +879 -0
- polymon-1.0.2/polymon/model/gps/__init__.py +0 -0
- polymon-1.0.2/polymon/model/gps/conv.py +201 -0
- polymon-1.0.2/polymon/model/gps/gps.py +160 -0
- polymon-1.0.2/polymon/model/gvp.py +451 -0
- polymon-1.0.2/polymon/model/kan/__init__.py +0 -0
- polymon-1.0.2/polymon/model/kan/dmpnn.py +452 -0
- polymon-1.0.2/polymon/model/kan/efficient_kan.py +363 -0
- polymon-1.0.2/polymon/model/kan/fast_kan.py +261 -0
- polymon-1.0.2/polymon/model/kan/fourier_kan.py +66 -0
- polymon-1.0.2/polymon/model/kan/gcn.py +349 -0
- polymon-1.0.2/polymon/model/kan/gin.py +158 -0
- polymon-1.0.2/polymon/model/kan/vanilla.py +32 -0
- polymon-1.0.2/polymon/model/mlp.py +37 -0
- polymon-1.0.2/polymon/model/polycl/__init__.py +0 -0
- polymon-1.0.2/polymon/model/polycl/polycl.py +210 -0
- polymon-1.0.2/polymon/model/register.py +31 -0
- polymon-1.0.2/polymon/model/utils.py +218 -0
- polymon-1.0.2/polymon/setting.py +52 -0
- polymon-1.0.2/polymon.egg-info/PKG-INFO +23 -0
- polymon-1.0.2/polymon.egg-info/SOURCES.txt +76 -0
- polymon-1.0.2/polymon.egg-info/dependency_links.txt +1 -0
- polymon-1.0.2/polymon.egg-info/entry_points.txt +2 -0
- polymon-1.0.2/polymon.egg-info/requires.txt +15 -0
- polymon-1.0.2/polymon.egg-info/top_level.txt +1 -0
- polymon-1.0.2/setup.cfg +4 -0
- polymon-1.0.2/setup.py +39 -0
polymon-1.0.2/PKG-INFO
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: polymon
|
|
3
|
+
Version: 1.0.2
|
|
4
|
+
Summary: PolyMon
|
|
5
|
+
Author: PolyMon Group
|
|
6
|
+
Requires-Dist: mordredcommunity
|
|
7
|
+
Requires-Dist: mordred==1.2.0
|
|
8
|
+
Requires-Dist: rdkit==2023.09.6
|
|
9
|
+
Requires-Dist: xenonpy
|
|
10
|
+
Requires-Dist: xgboost
|
|
11
|
+
Requires-Dist: catboost
|
|
12
|
+
Requires-Dist: lightgbm
|
|
13
|
+
Requires-Dist: loguru
|
|
14
|
+
Requires-Dist: scikit-learn
|
|
15
|
+
Requires-Dist: tabpfn
|
|
16
|
+
Requires-Dist: torchensemble
|
|
17
|
+
Requires-Dist: optuna
|
|
18
|
+
Requires-Dist: lightning
|
|
19
|
+
Requires-Dist: torch_geometric
|
|
20
|
+
Requires-Dist: pykan
|
|
21
|
+
Dynamic: author
|
|
22
|
+
Dynamic: requires-dist
|
|
23
|
+
Dynamic: summary
|
polymon-1.0.2/README.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
<img src="assets/polymon.png" alt="Polymon Icon">
|
|
2
|
+
|
|
3
|
+
`PolyMon` is a unified framework for polymer property prediction. It is designed to be flexible and easy to use. The framework include various models, featurizers, and different training strategies.
|
|
4
|
+
|
|
5
|
+
<p align="center">
|
|
6
|
+
<img src="assets/framework.png" alt="framework" width="400">
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
This package requires `torch>=2.2.2` and `torch_geometric>=2.5.3`. We recommend installing these packages manually before installing this package.
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
conda install -y pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 \
|
|
15
|
+
pytorch-cuda=11.8 -c pytorch -c nvidia
|
|
16
|
+
pip install torch_geometric
|
|
17
|
+
pip install torch_scatter torch_sparse -f https://data.pyg.org/whl/torch-2.3.0+cu118.html
|
|
18
|
+
|
|
19
|
+
pip install polymon
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Usage
|
|
23
|
+
### Train
|
|
24
|
+
Train a tabular model or a GNN model for polymer property prediction.
|
|
25
|
+
```bash
|
|
26
|
+
polymon train --help
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Merge
|
|
30
|
+
Merge two datasets into one based on different acquisition functions.
|
|
31
|
+
```bash
|
|
32
|
+
polymon merge --help
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Predict
|
|
36
|
+
Predict labels for a given dataset.
|
|
37
|
+
```bash
|
|
38
|
+
polymon predict --help
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Citation
|
|
42
|
+
To be updated.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '1.0.2'
|
|
File without changes
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
|
|
3
|
+
from polymon.cli.merge import main as main_merge
|
|
4
|
+
from polymon.cli.train_dl import main as main_dl
|
|
5
|
+
from polymon.cli.train_ml import MODELS
|
|
6
|
+
from polymon.cli.train_ml import main as main_ml
|
|
7
|
+
from polymon.cli.predict import main as main_predict
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def parse_args():
|
|
12
|
+
parser = argparse.ArgumentParser()
|
|
13
|
+
subparsers = parser.add_subparsers(dest='mode', required=True)
|
|
14
|
+
|
|
15
|
+
# Train
|
|
16
|
+
train_parser = subparsers.add_parser('train', help='Train a ML/DL model')
|
|
17
|
+
train_parser.add_argument(
|
|
18
|
+
'--raw-csv',
|
|
19
|
+
type=str,
|
|
20
|
+
default='database/database.csv',
|
|
21
|
+
help='Path to the raw csv file'
|
|
22
|
+
)
|
|
23
|
+
train_parser.add_argument(
|
|
24
|
+
'--sources',
|
|
25
|
+
type=str,
|
|
26
|
+
nargs='+',
|
|
27
|
+
default=['Kaggle'],
|
|
28
|
+
help='Sources to use for training'
|
|
29
|
+
)
|
|
30
|
+
train_parser.add_argument(
|
|
31
|
+
'--tag',
|
|
32
|
+
type=str,
|
|
33
|
+
default='debug',
|
|
34
|
+
help='Tag to use for training'
|
|
35
|
+
)
|
|
36
|
+
train_parser.add_argument(
|
|
37
|
+
'--labels',
|
|
38
|
+
nargs='+',
|
|
39
|
+
required=True,
|
|
40
|
+
help='Labels to use for training'
|
|
41
|
+
)
|
|
42
|
+
train_parser.add_argument(
|
|
43
|
+
'--feature-names',
|
|
44
|
+
type=str,
|
|
45
|
+
nargs='+',
|
|
46
|
+
default=['rdkit2d'],
|
|
47
|
+
help='Feature names to use for training'
|
|
48
|
+
)
|
|
49
|
+
train_parser.add_argument(
|
|
50
|
+
'--n-trials',
|
|
51
|
+
type=int,
|
|
52
|
+
default=None,
|
|
53
|
+
help='Number of trials to run for hyperparameter optimization.'
|
|
54
|
+
)
|
|
55
|
+
train_parser.add_argument(
|
|
56
|
+
'--out-dir',
|
|
57
|
+
type=str,
|
|
58
|
+
default='./results',
|
|
59
|
+
help='Path to the output directory'
|
|
60
|
+
)
|
|
61
|
+
train_parser.add_argument(
|
|
62
|
+
'--hparams-from',
|
|
63
|
+
type=str,
|
|
64
|
+
default=None,
|
|
65
|
+
help='Path to the hparams file. Allowed formats: .json, .pt, .pkl'
|
|
66
|
+
)
|
|
67
|
+
train_parser.add_argument(
|
|
68
|
+
'--n-fold',
|
|
69
|
+
type=int,
|
|
70
|
+
default=1,
|
|
71
|
+
help='Number of folds to use for cross-validation'
|
|
72
|
+
)
|
|
73
|
+
train_parser.add_argument(
|
|
74
|
+
'--split-mode',
|
|
75
|
+
type=str,
|
|
76
|
+
default='random',
|
|
77
|
+
help='Mode to split the data into training, validation, and test sets'
|
|
78
|
+
)
|
|
79
|
+
train_parser.add_argument(
|
|
80
|
+
'--seed',
|
|
81
|
+
type=int,
|
|
82
|
+
default=42,
|
|
83
|
+
help='Seed to use for training'
|
|
84
|
+
)
|
|
85
|
+
train_parser.add_argument(
|
|
86
|
+
'--remove-hydrogens',
|
|
87
|
+
action='store_true',
|
|
88
|
+
help='Whether to remove hydrogens from the molecules'
|
|
89
|
+
)
|
|
90
|
+
train_parser.add_argument(
|
|
91
|
+
'--descriptors',
|
|
92
|
+
type=str,
|
|
93
|
+
nargs='+',
|
|
94
|
+
default=None,
|
|
95
|
+
help='Descriptors to use for training. For ML models, this must be specified.'
|
|
96
|
+
)
|
|
97
|
+
train_parser.add_argument(
|
|
98
|
+
'--model',
|
|
99
|
+
type=str,
|
|
100
|
+
default='rf',
|
|
101
|
+
help='Model to use for training'
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# If not model in `MODELS.keys()`, add more arguments for DL models
|
|
105
|
+
train_parser.add_argument(
|
|
106
|
+
'--hidden-dim',
|
|
107
|
+
type=int,
|
|
108
|
+
default=32,
|
|
109
|
+
help='Hidden dimension of the model'
|
|
110
|
+
)
|
|
111
|
+
train_parser.add_argument(
|
|
112
|
+
'--num-layers',
|
|
113
|
+
type=int,
|
|
114
|
+
default=3,
|
|
115
|
+
help='Number of layers of the model'
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# DL Training arguments
|
|
119
|
+
train_parser.add_argument(
|
|
120
|
+
'--batch-size',
|
|
121
|
+
type=int,
|
|
122
|
+
default=128,
|
|
123
|
+
help='Batch size to use for training'
|
|
124
|
+
)
|
|
125
|
+
train_parser.add_argument(
|
|
126
|
+
'--lr',
|
|
127
|
+
type=float,
|
|
128
|
+
default=1e-3,
|
|
129
|
+
help='Learning rate to use for training'
|
|
130
|
+
)
|
|
131
|
+
train_parser.add_argument(
|
|
132
|
+
'--num-epochs',
|
|
133
|
+
type=int,
|
|
134
|
+
default=2500,
|
|
135
|
+
help='Number of epochs to use for training'
|
|
136
|
+
)
|
|
137
|
+
train_parser.add_argument(
|
|
138
|
+
'--early-stopping-patience',
|
|
139
|
+
type=int,
|
|
140
|
+
default=250,
|
|
141
|
+
help='Number of epochs to wait before early stopping'
|
|
142
|
+
)
|
|
143
|
+
train_parser.add_argument(
|
|
144
|
+
'--device',
|
|
145
|
+
type=str,
|
|
146
|
+
default='cuda',
|
|
147
|
+
help='Device to use for training'
|
|
148
|
+
)
|
|
149
|
+
train_parser.add_argument(
|
|
150
|
+
'--run-production',
|
|
151
|
+
action='store_true',
|
|
152
|
+
help=(
|
|
153
|
+
'Whether to run the training in production mode, which means '
|
|
154
|
+
'train:val:test splits will be forced to 0.95:0.05:0.0'
|
|
155
|
+
)
|
|
156
|
+
)
|
|
157
|
+
train_parser.add_argument(
|
|
158
|
+
'--finetune',
|
|
159
|
+
action='store_true',
|
|
160
|
+
help='Whether to finetune the model'
|
|
161
|
+
)
|
|
162
|
+
train_parser.add_argument(
|
|
163
|
+
'--finetune-csv-path',
|
|
164
|
+
type=str,
|
|
165
|
+
default=None,
|
|
166
|
+
help='Path to the csv file to finetune the model on'
|
|
167
|
+
)
|
|
168
|
+
train_parser.add_argument(
|
|
169
|
+
'--pretrained-model',
|
|
170
|
+
type=str,
|
|
171
|
+
default=None,
|
|
172
|
+
help='Path to the pretrained model'
|
|
173
|
+
)
|
|
174
|
+
train_parser.add_argument(
|
|
175
|
+
'--n-estimator',
|
|
176
|
+
type=int,
|
|
177
|
+
default=1,
|
|
178
|
+
help='Number of estimators to use for training'
|
|
179
|
+
)
|
|
180
|
+
train_parser.add_argument(
|
|
181
|
+
'--additional-features',
|
|
182
|
+
type=str,
|
|
183
|
+
nargs='+',
|
|
184
|
+
default=None,
|
|
185
|
+
help='Additional features to use for training'
|
|
186
|
+
)
|
|
187
|
+
train_parser.add_argument(
|
|
188
|
+
'--skip-train',
|
|
189
|
+
action='store_true',
|
|
190
|
+
help='Whether to skip the training step'
|
|
191
|
+
)
|
|
192
|
+
train_parser.add_argument(
|
|
193
|
+
'--low-fidelity-model',
|
|
194
|
+
type=str,
|
|
195
|
+
default=None,
|
|
196
|
+
help='Path to the low fidelity model'
|
|
197
|
+
)
|
|
198
|
+
train_parser.add_argument(
|
|
199
|
+
'--estimator-name',
|
|
200
|
+
type=str,
|
|
201
|
+
default=None,
|
|
202
|
+
help='Name of the estimator to give base predictions'
|
|
203
|
+
)
|
|
204
|
+
train_parser.add_argument(
|
|
205
|
+
'--emb-model',
|
|
206
|
+
type=str,
|
|
207
|
+
default=None,
|
|
208
|
+
help='Name of the embedding model for base graph embeddings'
|
|
209
|
+
)
|
|
210
|
+
train_parser.add_argument(
|
|
211
|
+
'--ensemble-type',
|
|
212
|
+
type=str,
|
|
213
|
+
default='voting',
|
|
214
|
+
help='Type of ensemble to use for training'
|
|
215
|
+
)
|
|
216
|
+
train_parser.add_argument(
|
|
217
|
+
'--train-residual',
|
|
218
|
+
action='store_true',
|
|
219
|
+
help='Whether to train the residual of the model'
|
|
220
|
+
)
|
|
221
|
+
train_parser.add_argument(
|
|
222
|
+
'--normalizer-type',
|
|
223
|
+
type=str,
|
|
224
|
+
default='normalizer',
|
|
225
|
+
choices=['normalizer', 'log_normalizer', 'none'],
|
|
226
|
+
help='Type of normalizer to use for training'
|
|
227
|
+
)
|
|
228
|
+
train_parser.add_argument(
|
|
229
|
+
'--augmentation',
|
|
230
|
+
action='store_true',
|
|
231
|
+
help='Whether to use data augmentation'
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Merge
|
|
235
|
+
merge_parser = subparsers.add_parser('merge', help='Merge two datasets')
|
|
236
|
+
merge_parser.add_argument(
|
|
237
|
+
'--sources',
|
|
238
|
+
type=str,
|
|
239
|
+
required=True,
|
|
240
|
+
nargs='+',
|
|
241
|
+
help='Sources to merge'
|
|
242
|
+
)
|
|
243
|
+
merge_parser.add_argument(
|
|
244
|
+
'--label',
|
|
245
|
+
type=str,
|
|
246
|
+
required=True,
|
|
247
|
+
help='Label to merge'
|
|
248
|
+
)
|
|
249
|
+
merge_parser.add_argument(
|
|
250
|
+
'--hparams-from',
|
|
251
|
+
type=str,
|
|
252
|
+
required=True,
|
|
253
|
+
help='Path to the hparams file'
|
|
254
|
+
)
|
|
255
|
+
merge_parser.add_argument(
|
|
256
|
+
'--acquisition',
|
|
257
|
+
type=str,
|
|
258
|
+
required=True,
|
|
259
|
+
choices=['epig', 'uncertainty', 'difference'],
|
|
260
|
+
help='Acquisition function to use for merging'
|
|
261
|
+
)
|
|
262
|
+
merge_parser.add_argument(
|
|
263
|
+
'--sample-size',
|
|
264
|
+
type=int,
|
|
265
|
+
default=20,
|
|
266
|
+
help='Sample size to use for merging'
|
|
267
|
+
)
|
|
268
|
+
merge_parser.add_argument(
|
|
269
|
+
'--uncertainty-threshold',
|
|
270
|
+
type=float,
|
|
271
|
+
default=0.1,
|
|
272
|
+
help='Uncertainty threshold to use for merging'
|
|
273
|
+
)
|
|
274
|
+
merge_parser.add_argument(
|
|
275
|
+
'--difference-threshold',
|
|
276
|
+
type=float,
|
|
277
|
+
default=0.1,
|
|
278
|
+
help='Difference threshold to use for merging'
|
|
279
|
+
)
|
|
280
|
+
merge_parser.add_argument(
|
|
281
|
+
'--target-size',
|
|
282
|
+
type=int,
|
|
283
|
+
default=1000,
|
|
284
|
+
help='Target size to use for merging'
|
|
285
|
+
)
|
|
286
|
+
merge_parser.add_argument(
|
|
287
|
+
'--base-csv',
|
|
288
|
+
type=str,
|
|
289
|
+
default=None,
|
|
290
|
+
help='Path to the base csv file'
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Predict
|
|
294
|
+
predict_parser = subparsers.add_parser('predict', help='Predict labels')
|
|
295
|
+
predict_parser.add_argument(
|
|
296
|
+
'--model-path',
|
|
297
|
+
type=str,
|
|
298
|
+
required=True,
|
|
299
|
+
help='Path to the model'
|
|
300
|
+
)
|
|
301
|
+
predict_parser.add_argument(
|
|
302
|
+
'--csv-path',
|
|
303
|
+
type=str,
|
|
304
|
+
required=True,
|
|
305
|
+
help='Path to the csv file'
|
|
306
|
+
)
|
|
307
|
+
predict_parser.add_argument(
|
|
308
|
+
'--smiles-column',
|
|
309
|
+
type=str,
|
|
310
|
+
required=True,
|
|
311
|
+
help='Name of the smiles column'
|
|
312
|
+
)
|
|
313
|
+
return parser.parse_args()
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def main():
|
|
317
|
+
args = parse_args()
|
|
318
|
+
if args.mode == 'train':
|
|
319
|
+
if args.model in MODELS.keys():
|
|
320
|
+
main_ml(args)
|
|
321
|
+
else:
|
|
322
|
+
main_dl(args)
|
|
323
|
+
elif args.mode == 'merge':
|
|
324
|
+
main_merge(args)
|
|
325
|
+
elif args.mode == 'predict':
|
|
326
|
+
main_predict(args)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
if __name__ == '__main__':
|
|
330
|
+
main()
|