atom-hifi 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atom_hifi-0.3.0/LICENSE +21 -0
- atom_hifi-0.3.0/PKG-INFO +228 -0
- atom_hifi-0.3.0/README.md +175 -0
- atom_hifi-0.3.0/atom_hifi/__init__.py +11 -0
- atom_hifi-0.3.0/atom_hifi/soap_ML/__init__.py +1 -0
- atom_hifi-0.3.0/atom_hifi/soap_ML/soap.py +125 -0
- atom_hifi-0.3.0/atom_hifi/training/__init__.py +3 -0
- atom_hifi-0.3.0/atom_hifi/training/fr_workflow.py +3414 -0
- atom_hifi-0.3.0/atom_hifi/training/judge.py +1761 -0
- atom_hifi-0.3.0/atom_hifi/training/optimizer.py +519 -0
- atom_hifi-0.3.0/atom_hifi.egg-info/PKG-INFO +228 -0
- atom_hifi-0.3.0/atom_hifi.egg-info/SOURCES.txt +18 -0
- atom_hifi-0.3.0/atom_hifi.egg-info/dependency_links.txt +1 -0
- atom_hifi-0.3.0/atom_hifi.egg-info/requires.txt +8 -0
- atom_hifi-0.3.0/atom_hifi.egg-info/top_level.txt +1 -0
- atom_hifi-0.3.0/pyproject.toml +43 -0
- atom_hifi-0.3.0/setup.cfg +4 -0
- atom_hifi-0.3.0/tests/test_optimizer.py +93 -0
- atom_hifi-0.3.0/tests/test_per_element.py +502 -0
- atom_hifi-0.3.0/tests/test_suite14.py +365 -0
atom_hifi-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Yihua Song
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
atom_hifi-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: atom-hifi
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Atom-HiFi: atomistic high-fidelity representative-set selection framework
|
|
5
|
+
Author-email: Yihua Song <mothinesong@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024 Yihua Song
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://gitlab.mpcdf.mpg.de/yhsong/atom-hifi
|
|
29
|
+
Project-URL: Source, https://gitlab.mpcdf.mpg.de/yhsong/atom-hifi
|
|
30
|
+
Project-URL: Issue Tracker, https://gitlab.mpcdf.mpg.de/yhsong/atom-hifi/-/issues
|
|
31
|
+
Keywords: machine learning,interatomic potentials,training set,SOAP,atomic environments,active learning
|
|
32
|
+
Classifier: Development Status :: 3 - Alpha
|
|
33
|
+
Classifier: Intended Audience :: Science/Research
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
40
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
41
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
42
|
+
Requires-Python: >=3.9
|
|
43
|
+
Description-Content-Type: text/markdown
|
|
44
|
+
License-File: LICENSE
|
|
45
|
+
Requires-Dist: numpy
|
|
46
|
+
Requires-Dist: ase
|
|
47
|
+
Requires-Dist: matplotlib
|
|
48
|
+
Requires-Dist: pandas
|
|
49
|
+
Requires-Dist: scipy
|
|
50
|
+
Requires-Dist: scikit-learn
|
|
51
|
+
Requires-Dist: dscribe
|
|
52
|
+
Requires-Dist: pymoo>=0.6
|
|
53
|
+
|
|
54
|
+
# Atom-HiFi
|
|
55
|
+
|
|
56
|
+
**Atom**istic **Hi**gh-**Fi**delity representative-set selection framework.
|
|
57
|
+
|
|
58
|
+
Applications include:
|
|
59
|
+
- MLIP training-set curation and active-learning loops
|
|
60
|
+
- Chemical motif identification and distribution analysis
|
|
61
|
+
- Diversity-aware structure sampling from large databases
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## What is Atom-HiFi?
|
|
66
|
+
|
|
67
|
+
Atom-HiFi selects the smallest subset **S** of structures such that the
|
|
68
|
+
atomic-environment distribution of **S** covers the full library with
|
|
69
|
+
user-specified fidelity **F**. It works with any per-atom descriptor
|
|
70
|
+
(ED-SOAP built-in; MACE ACE supported) and is agnostic to the downstream
|
|
71
|
+
task — training-set curation, motif analysis, or database sampling.
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Key concepts
|
|
76
|
+
|
|
77
|
+
### Fidelity / Redundancy (F/R)
|
|
78
|
+
|
|
79
|
+
Each atom is assigned to a **microstate** (a Voronoi cell in the whitened
|
|
80
|
+
descriptor space produced by k-means). **Fidelity** F measures how uniformly
|
|
81
|
+
the selected set's microstate population matches the full library; **Redundancy**
|
|
82
|
+
R measures how many atoms are packed per occupied microstate.
|
|
83
|
+
|
|
84
|
+
```
|
|
85
|
+
F = H(S) / H(L)
|
|
86
|
+
H = -Σ p_i ln p_i (Shannon entropy over microstate populations)
|
|
87
|
+
|
|
88
|
+
R = (N_S / k_occ^S) / (N_L / k_occ^L)
|
|
89
|
+
N_S, N_L = total atoms in selected set / full library
|
|
90
|
+
k_occ^S, k_occ^L = occupied microstates in S / L
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
The scan sweeps a bandwidth parameter **c** (scaling factor on ε_noise) and
|
|
94
|
+
finds the operating point **c*** that maximises F/R subject to F ≥ F_TOL
|
|
95
|
+
(default 0.90).
|
|
96
|
+
|
|
97
|
+
### ED-SOAP descriptor
|
|
98
|
+
|
|
99
|
+
Two concatenated SOAP power-spectrum vectors per atom — one short-range
|
|
100
|
+
(bonding geometry) and one long-range (coordination shell) — normalised by a
|
|
101
|
+
system-specific `lengthscale`. No GPU required. The full parameter set is
|
|
102
|
+
exposed in `fr_workflow_tutorial.py` under the `EDS_*` variables.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Installation
|
|
107
|
+
|
|
108
|
+
**Step 1 — install `decaf`** (required; not on PyPI):
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
pip install git+https://gitlab.mpcdf.mpg.de/klai/decaf.git
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
**Step 2 — install Atom-HiFi**:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
pip install atom-hifi
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
> Python ≥ 3.9 required.
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Quick start
|
|
125
|
+
|
|
126
|
+
Copy the tutorial script to your working directory and set the four variables at
|
|
127
|
+
the top:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
cp fr_workflow_tutorial.py ./my_run.py
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Edit `my_run.py`:
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
LIB_PATH = 'train_structs.xyz' # ASE-readable structure library
|
|
137
|
+
FOCUS_ELEMENTS = ['Ni', 'O'] # elements to cluster on
|
|
138
|
+
DESCRIPTOR = 'eds' # 'eds', 'ace', or 'custom'
|
|
139
|
+
OUTPUT_DIR = 'fr_results'
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Run:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
python -u my_run.py 2>&1 | tee fr_results/run.out
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## Output files
|
|
151
|
+
|
|
152
|
+
| File | Description |
|
|
153
|
+
|---|---|
|
|
154
|
+
| `representatives.xyz` | Selected representative structures |
|
|
155
|
+
| `fine_scan.out` | F, R, FR, \|S\|, atoms for every fine-scan point |
|
|
156
|
+
| `FR_final.png` | Coarse + fine F/R scan diagnostic plot |
|
|
157
|
+
| `learning_curve.png` | AL loop convergence (only with `RUN_LOOP=True`) |
|
|
158
|
+
| `eps_noise_raw.npz` | Cached per-element ε_noise values |
|
|
159
|
+
| `desc_lib.pkl` | Cached per-structure descriptors |
|
|
160
|
+
| `surroundings_{el}.xyz` | Per-group coordination spheres (`EXTRACT_SURROUNDINGS=True`) |
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Configuration reference
|
|
165
|
+
|
|
166
|
+
All settings live at the top of `fr_workflow_tutorial.py`.
|
|
167
|
+
|
|
168
|
+
| Group | Variables |
|
|
169
|
+
|---|---|
|
|
170
|
+
| **Paths** | `LIB_PATH`, `PATIENT_PATH`, `FOCUS_ELEMENTS`, `OUTPUT_DIR` |
|
|
171
|
+
| **Descriptor** | `DESCRIPTOR`, `EDS_LENGTHSCALE`, `EDS_S_CUT`, `EDS_S_NMAX`, `EDS_S_LMAX`, `EDS_L_CUT`, `EDS_L_NMAX`, `EDS_L_LMAX`, `EDS_PERIODIC`, `EDS_R_CUT` |
|
|
172
|
+
| **Scan** | `F_TOL`, `N_COARSE`, `N_FINE`, `N_JOBS`, `C_FACTOR_RANGE` |
|
|
173
|
+
| **Refit** | `REFIT_DELTA`, `REFIT_GRID_POINT` |
|
|
174
|
+
| **Optional stages** | `RUN_LOOP`, `RUN_GRID_SCAN`, `RUN_NSGA2`, `EXTRACT_SURROUNDINGS` |
|
|
175
|
+
|
|
176
|
+
Full inline documentation for every variable is in the tutorial script.
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Advanced usage
|
|
181
|
+
|
|
182
|
+
<details>
|
|
183
|
+
<summary>Active-learning loop (<code>RUN_LOOP=True</code>)</summary>
|
|
184
|
+
|
|
185
|
+
Iteratively expands the training pool by sampling batches from the full library.
|
|
186
|
+
Inner iterations use a coarse scan only; one final fine scan runs at the end.
|
|
187
|
+
Set `INITIAL_SAMPLE` and `LOOP_SKIP_FINE_SCAN` to control the initial pool size
|
|
188
|
+
and inner-scan resolution.
|
|
189
|
+
|
|
190
|
+
</details>
|
|
191
|
+
|
|
192
|
+
<details>
|
|
193
|
+
<summary>Per-element ND grid scan (<code>RUN_GRID_SCAN=True</code>)</summary>
|
|
194
|
+
|
|
195
|
+
Sweeps independent c-factors per focus element on a Cartesian grid, reusing
|
|
196
|
+
cached per-element DECAF fits from the 1-D scan. Cost is O(n^N_el) cover
|
|
197
|
+
evaluations instead of O(n^N_el × N_el) DECAF fits — tractable for N_el ≤ 3–4.
|
|
198
|
+
Results in `scan_grid.csv` and `scan_grid_report.png`.
|
|
199
|
+
|
|
200
|
+
</details>
|
|
201
|
+
|
|
202
|
+
<details>
|
|
203
|
+
<summary>NSGA-II Pareto optimisation (<code>RUN_NSGA2=True</code>)</summary>
|
|
204
|
+
|
|
205
|
+
Stochastic multi-objective optimisation of per-element c-factors via NSGA-II
|
|
206
|
+
(requires `pymoo`). Use when the grid is too large (N_el ≥ 4) or you want a
|
|
207
|
+
continuous Pareto front. Results in `pareto_front.csv` and three diagnostic
|
|
208
|
+
PNGs.
|
|
209
|
+
|
|
210
|
+
</details>
|
|
211
|
+
|
|
212
|
+
<details>
|
|
213
|
+
<summary>Representative environment extraction (<code>EXTRACT_SURROUNDINGS=True</code>)</summary>
|
|
214
|
+
|
|
215
|
+
Exports the local coordination sphere around the centroid-closest atom of each
|
|
216
|
+
DECAF group. Two modes: `'sphere'` (non-periodic ASE Atoms cluster) and
|
|
217
|
+
`'full_structure'` (original cell with center/neighbour/rest tags). Output:
|
|
218
|
+
`surroundings_{el}.xyz` per focus element.
|
|
219
|
+
|
|
220
|
+
</details>
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Citation
|
|
225
|
+
|
|
226
|
+
If you use Atom-HiFi in your research, please cite:
|
|
227
|
+
|
|
228
|
+
> [paper in preparation — citation will be added upon publication]
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# Atom-HiFi
|
|
2
|
+
|
|
3
|
+
**Atom**istic **Hi**gh-**Fi**delity representative-set selection framework.
|
|
4
|
+
|
|
5
|
+
Applications include:
|
|
6
|
+
- MLIP training-set curation and active-learning loops
|
|
7
|
+
- Chemical motif identification and distribution analysis
|
|
8
|
+
- Diversity-aware structure sampling from large databases
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## What is Atom-HiFi?
|
|
13
|
+
|
|
14
|
+
Atom-HiFi selects the smallest subset **S** of structures such that the
|
|
15
|
+
atomic-environment distribution of **S** covers the full library with
|
|
16
|
+
user-specified fidelity **F**. It works with any per-atom descriptor
|
|
17
|
+
(ED-SOAP built-in; MACE ACE supported) and is agnostic to the downstream
|
|
18
|
+
task — training-set curation, motif analysis, or database sampling.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Key concepts
|
|
23
|
+
|
|
24
|
+
### Fidelity / Redundancy (F/R)
|
|
25
|
+
|
|
26
|
+
Each atom is assigned to a **microstate** (a Voronoi cell in the whitened
|
|
27
|
+
descriptor space produced by k-means). **Fidelity** F measures how uniformly
|
|
28
|
+
the selected set's microstate population matches the full library; **Redundancy**
|
|
29
|
+
R measures how many atoms are packed per occupied microstate.
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
F = H(S) / H(L)
|
|
33
|
+
H = -Σ p_i ln p_i (Shannon entropy over microstate populations)
|
|
34
|
+
|
|
35
|
+
R = (N_S / k_occ^S) / (N_L / k_occ^L)
|
|
36
|
+
N_S, N_L = total atoms in selected set / full library
|
|
37
|
+
k_occ^S, k_occ^L = occupied microstates in S / L
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
The scan sweeps a bandwidth parameter **c** (scaling factor on ε_noise) and
|
|
41
|
+
finds the operating point **c*** that maximises F/R subject to F ≥ F_TOL
|
|
42
|
+
(default 0.90).
|
|
43
|
+
|
|
44
|
+
### ED-SOAP descriptor
|
|
45
|
+
|
|
46
|
+
Two concatenated SOAP power-spectrum vectors per atom — one short-range
|
|
47
|
+
(bonding geometry) and one long-range (coordination shell) — normalised by a
|
|
48
|
+
system-specific `lengthscale`. No GPU required. The full parameter set is
|
|
49
|
+
exposed in `fr_workflow_tutorial.py` under the `EDS_*` variables.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
**Step 1 — install `decaf`** (required; not on PyPI):
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install git+https://gitlab.mpcdf.mpg.de/klai/decaf.git
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**Step 2 — install Atom-HiFi**:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install atom-hifi
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
> Python ≥ 3.9 required.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Quick start
|
|
72
|
+
|
|
73
|
+
Copy the tutorial script to your working directory and set the four variables at
|
|
74
|
+
the top:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
cp fr_workflow_tutorial.py ./my_run.py
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Edit `my_run.py`:
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
LIB_PATH = 'train_structs.xyz' # ASE-readable structure library
|
|
84
|
+
FOCUS_ELEMENTS = ['Ni', 'O'] # elements to cluster on
|
|
85
|
+
DESCRIPTOR = 'eds' # 'eds', 'ace', or 'custom'
|
|
86
|
+
OUTPUT_DIR = 'fr_results'
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Run:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
python -u my_run.py 2>&1 | tee fr_results/run.out
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## Output files
|
|
98
|
+
|
|
99
|
+
| File | Description |
|
|
100
|
+
|---|---|
|
|
101
|
+
| `representatives.xyz` | Selected representative structures |
|
|
102
|
+
| `fine_scan.out` | F, R, FR, \|S\|, atoms for every fine-scan point |
|
|
103
|
+
| `FR_final.png` | Coarse + fine F/R scan diagnostic plot |
|
|
104
|
+
| `learning_curve.png` | AL loop convergence (only with `RUN_LOOP=True`) |
|
|
105
|
+
| `eps_noise_raw.npz` | Cached per-element ε_noise values |
|
|
106
|
+
| `desc_lib.pkl` | Cached per-structure descriptors |
|
|
107
|
+
| `surroundings_{el}.xyz` | Per-group coordination spheres (`EXTRACT_SURROUNDINGS=True`) |
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Configuration reference
|
|
112
|
+
|
|
113
|
+
All settings live at the top of `fr_workflow_tutorial.py`.
|
|
114
|
+
|
|
115
|
+
| Group | Variables |
|
|
116
|
+
|---|---|
|
|
117
|
+
| **Paths** | `LIB_PATH`, `PATIENT_PATH`, `FOCUS_ELEMENTS`, `OUTPUT_DIR` |
|
|
118
|
+
| **Descriptor** | `DESCRIPTOR`, `EDS_LENGTHSCALE`, `EDS_S_CUT`, `EDS_S_NMAX`, `EDS_S_LMAX`, `EDS_L_CUT`, `EDS_L_NMAX`, `EDS_L_LMAX`, `EDS_PERIODIC`, `EDS_R_CUT` |
|
|
119
|
+
| **Scan** | `F_TOL`, `N_COARSE`, `N_FINE`, `N_JOBS`, `C_FACTOR_RANGE` |
|
|
120
|
+
| **Refit** | `REFIT_DELTA`, `REFIT_GRID_POINT` |
|
|
121
|
+
| **Optional stages** | `RUN_LOOP`, `RUN_GRID_SCAN`, `RUN_NSGA2`, `EXTRACT_SURROUNDINGS` |
|
|
122
|
+
|
|
123
|
+
Full inline documentation for every variable is in the tutorial script.
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Advanced usage
|
|
128
|
+
|
|
129
|
+
<details>
|
|
130
|
+
<summary>Active-learning loop (<code>RUN_LOOP=True</code>)</summary>
|
|
131
|
+
|
|
132
|
+
Iteratively expands the training pool by sampling batches from the full library.
|
|
133
|
+
Inner iterations use a coarse scan only; one final fine scan runs at the end.
|
|
134
|
+
Set `INITIAL_SAMPLE` and `LOOP_SKIP_FINE_SCAN` to control the initial pool size
|
|
135
|
+
and inner-scan resolution.
|
|
136
|
+
|
|
137
|
+
</details>
|
|
138
|
+
|
|
139
|
+
<details>
|
|
140
|
+
<summary>Per-element ND grid scan (<code>RUN_GRID_SCAN=True</code>)</summary>
|
|
141
|
+
|
|
142
|
+
Sweeps independent c-factors per focus element on a Cartesian grid, reusing
|
|
143
|
+
cached per-element DECAF fits from the 1-D scan. Cost is O(n^N_el) cover
|
|
144
|
+
evaluations instead of O(n^N_el × N_el) DECAF fits — tractable for N_el ≤ 3–4.
|
|
145
|
+
Results in `scan_grid.csv` and `scan_grid_report.png`.
|
|
146
|
+
|
|
147
|
+
</details>
|
|
148
|
+
|
|
149
|
+
<details>
|
|
150
|
+
<summary>NSGA-II Pareto optimisation (<code>RUN_NSGA2=True</code>)</summary>
|
|
151
|
+
|
|
152
|
+
Stochastic multi-objective optimisation of per-element c-factors via NSGA-II
|
|
153
|
+
(requires `pymoo`). Use when the grid is too large (N_el ≥ 4) or you want a
|
|
154
|
+
continuous Pareto front. Results in `pareto_front.csv` and three diagnostic
|
|
155
|
+
PNGs.
|
|
156
|
+
|
|
157
|
+
</details>
|
|
158
|
+
|
|
159
|
+
<details>
|
|
160
|
+
<summary>Representative environment extraction (<code>EXTRACT_SURROUNDINGS=True</code>)</summary>
|
|
161
|
+
|
|
162
|
+
Exports the local coordination sphere around the centroid-closest atom of each
|
|
163
|
+
DECAF group. Two modes: `'sphere'` (non-periodic ASE Atoms cluster) and
|
|
164
|
+
`'full_structure'` (original cell with center/neighbour/rest tags). Output:
|
|
165
|
+
`surroundings_{el}.xyz` per focus element.
|
|
166
|
+
|
|
167
|
+
</details>
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## Citation
|
|
172
|
+
|
|
173
|
+
If you use Atom-HiFi in your research, please cite:
|
|
174
|
+
|
|
175
|
+
> [paper in preparation — citation will be added upon publication]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
__version__ = "0.3.0"
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
import decaf # noqa: F401
|
|
5
|
+
except ImportError:
|
|
6
|
+
raise ImportError(
|
|
7
|
+
"atom-hifi requires 'decaf', which is not on PyPI.\n"
|
|
8
|
+
"Install it first:\n\n"
|
|
9
|
+
" pip install git+https://gitlab.mpcdf.mpg.de/klai/decaf.git\n\n"
|
|
10
|
+
"Then re-import atom_hifi."
|
|
11
|
+
) from None
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from atom_hifi.soap_ML.soap import generate_ED_SOAP
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from dscribe.descriptors import SOAP
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
from scipy.spatial.distance import pdist, squareform
|
|
4
|
+
import numpy as np
|
|
5
|
+
import decaf
|
|
6
|
+
|
|
7
|
+
class SOAP_Conts:
|
|
8
|
+
systems_soap: List[SOAP] = []
|
|
9
|
+
systems: List = []
|
|
10
|
+
|
|
11
|
+
def __init__(self, species, periodic=True, r_cut=4.0, n_max=8, l_max=6, sparse=False):
|
|
12
|
+
|
|
13
|
+
self.species = species
|
|
14
|
+
self.periodic = periodic
|
|
15
|
+
self.r_cut = r_cut
|
|
16
|
+
self.n_max = n_max
|
|
17
|
+
self.l_max = l_max
|
|
18
|
+
self.sparse = sparse
|
|
19
|
+
|
|
20
|
+
self.soap = SOAP(
|
|
21
|
+
species=species,
|
|
22
|
+
periodic=periodic,
|
|
23
|
+
r_cut=r_cut,
|
|
24
|
+
n_max=n_max,
|
|
25
|
+
l_max=l_max,
|
|
26
|
+
sparse=sparse
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
def create(self, system, use_ape:bool=False, **soap_params):
|
|
30
|
+
|
|
31
|
+
self.systems.append(system)
|
|
32
|
+
if use_ape:
|
|
33
|
+
_soap = generate_ED_SOAP([system], species=self.species, **soap_params )
|
|
34
|
+
self.systems_soap.append(_soap)
|
|
35
|
+
else:
|
|
36
|
+
self.systems_soap.append(self.soap.create(system))
|
|
37
|
+
#self.systems_2center.append(self.get_2center(system, self.systems_soap[-1]))
|
|
38
|
+
return self.systems_soap[-1], self.species
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_dist(soap_1, soap_2):
|
|
42
|
+
|
|
43
|
+
return squareform(pdist(np.vstack([soap_1, soap_2])))
|
|
44
|
+
|
|
45
|
+
def generate_ED_SOAP(lib: List,
|
|
46
|
+
lengthscale=1.2,
|
|
47
|
+
SOAP_S_cut=(1+np.sqrt(3))/2,
|
|
48
|
+
SOAP_S_nmax=8,
|
|
49
|
+
SOAP_S_lmax=4,
|
|
50
|
+
SOAP_L_cut=(2+3)/2,
|
|
51
|
+
SOAP_L_nmax=4,
|
|
52
|
+
SOAP_L_lmax=3,
|
|
53
|
+
species = None,
|
|
54
|
+
periodic=True
|
|
55
|
+
):
|
|
56
|
+
"""Embedded Doubled SOAP (ED-SOAP): concatenates a short-range and a
|
|
57
|
+
long-range SOAP per atom (hence 'doubled'), scaled by `lengthscale`.
|
|
58
|
+
Designed for use with DECAF's PCA embedding + MSC clustering
|
|
59
|
+
(`decaf.embed_cluster`).
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
# Typical procedures to load in the ASE Atoms from files, here we are using .con,
|
|
63
|
+
# please adapt accordingly in your situations.
|
|
64
|
+
#filenames=['PAH_benzene.con','PAH_naphthalene.con','PAH_anthracene.con']
|
|
65
|
+
# This line has to be edited if you have multiple structures in one file
|
|
66
|
+
structures=lib
|
|
67
|
+
|
|
68
|
+
# Length-scale in angstrom, this a normalization factor of cut-off radii, sigmas below,
|
|
69
|
+
# a typcial nearest neighbor separartion in the system would be appropriate
|
|
70
|
+
# For carbon-carbon bonding, we take ~1.42\AA
|
|
71
|
+
# lengthscale = 1.20
|
|
72
|
+
|
|
73
|
+
# Settings for the short range SOAP:
|
|
74
|
+
# Cut-off of short range SOAP, in the middle of 1st and 2nd coordination shell of carbon in graphene
|
|
75
|
+
# decaf.get_SOAP() will pass (SOAP_S_cut * lengthscale) into DScribe
|
|
76
|
+
# SOAP_S_cut=(1+np.sqrt(3))/2
|
|
77
|
+
|
|
78
|
+
# Usually 1/8 of the cut-off would be appropriate, but please tune it around to optimize
|
|
79
|
+
# decaf.get_SOAP() will pass (SOAP_S_sigma * lengthscale) into DScribe
|
|
80
|
+
SOAP_S_sigma=SOAP_S_cut/8
|
|
81
|
+
|
|
82
|
+
# The maximum degrees of expansion of the spherical harmonics in SOAP computation
|
|
83
|
+
# SOAP_S_nmax=8
|
|
84
|
+
# SOAP_S_lmax=4
|
|
85
|
+
|
|
86
|
+
# Settings for the long range SOAP:
|
|
87
|
+
# Cut-off of long range SOAP, in the middle of 3rd and 4th coordination shell of carbon in graphene
|
|
88
|
+
# decaf.get_SOAP() will pass (SOAP_L_rcut * lengthscale) into DScribe
|
|
89
|
+
# SOAP_L_cut=(2+3)/2
|
|
90
|
+
|
|
91
|
+
# Usually 1/8 of the cut-off would be appropriate, but please tune it around to optimize
|
|
92
|
+
# decaf.get_SOAP() will pass (SOAP_L_sigma * lengthscale) into DScribe
|
|
93
|
+
SOAP_L_sigma=SOAP_L_cut/8
|
|
94
|
+
|
|
95
|
+
# The maximum degrees of expansion of the spherical harmonics in SOAP computation
|
|
96
|
+
# SOAP_L_nmax=4
|
|
97
|
+
# SOAP_L_lmax=3
|
|
98
|
+
|
|
99
|
+
# The list of chemical species (atomic number) included in the computation of SOAP
|
|
100
|
+
# In this example, we take all chemical species in the system [1,6]
|
|
101
|
+
species = np.unique(np.concatenate([struct.numbers for struct in structures])).astype('int') if species is None else species
|
|
102
|
+
|
|
103
|
+
# Obtain the full SOAP of the structures
|
|
104
|
+
# Also speicify if the structures are periodic (False in this case)
|
|
105
|
+
training_set=np.concatenate([decaf.get_SOAP(struct,lengthscale,species=species,\
|
|
106
|
+
SOAP_S_cut=SOAP_S_cut,SOAP_S_sigma=SOAP_S_sigma,\
|
|
107
|
+
SOAP_S_nmax=SOAP_S_nmax, SOAP_S_lmax=SOAP_S_lmax,\
|
|
108
|
+
SOAP_L_cut=SOAP_L_cut,SOAP_L_sigma=SOAP_L_sigma,\
|
|
109
|
+
SOAP_L_nmax=SOAP_L_nmax, SOAP_L_lmax=SOAP_L_lmax,\
|
|
110
|
+
periodic=True) for struct in structures])
|
|
111
|
+
|
|
112
|
+
print('The shape of the training set is [atoms,soap_shape]:',training_set.shape)
|
|
113
|
+
|
|
114
|
+
return training_set
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def generate_ape(*args, **kwargs):
|
|
118
|
+
"""Deprecated alias for generate_ED_SOAP."""
|
|
119
|
+
import warnings
|
|
120
|
+
warnings.warn(
|
|
121
|
+
'generate_ape is deprecated; use generate_ED_SOAP instead.',
|
|
122
|
+
DeprecationWarning, stacklevel=2,
|
|
123
|
+
)
|
|
124
|
+
return generate_ED_SOAP(*args, **kwargs)
|
|
125
|
+
|