geodesic-interpolate 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geodesic_interpolate-1.0.0/LICENSE +21 -0
- geodesic_interpolate-1.0.0/MANIFEST.in +2 -0
- geodesic_interpolate-1.0.0/PKG-INFO +155 -0
- geodesic_interpolate-1.0.0/README.md +116 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate/__init__.py +2 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate/__main__.py +86 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate/coord_utils.py +221 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate/fileio.py +49 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate/geodesic.py +233 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate/interpolation.py +134 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate.egg-info/PKG-INFO +155 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate.egg-info/SOURCES.txt +17 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate.egg-info/dependency_links.txt +1 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate.egg-info/entry_points.txt +2 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate.egg-info/requires.txt +2 -0
- geodesic_interpolate-1.0.0/geodesic_interpolate.egg-info/top_level.txt +1 -0
- geodesic_interpolate-1.0.0/pyproject.toml +3 -0
- geodesic_interpolate-1.0.0/setup.cfg +4 -0
- geodesic_interpolate-1.0.0/setup.py +49 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Xiaolei Zhu
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: geodesic_interpolate
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Interpolation and smoothing of reaction paths with geodesics in redundant internal coordinates.
|
|
5
|
+
Home-page: https://github.com/virtualzx-nad/geodesic-interpolate
|
|
6
|
+
Author: Xiaolei Zhu
|
|
7
|
+
Author-email: virtualzx@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Project-URL: Bug Reports, https://github.com/virtualzx-nad/geodesic-interpolate/issues
|
|
10
|
+
Project-URL: Source, https://github.com/virtualzx-nad/geodesic-interpolate
|
|
11
|
+
Keywords: chemistry,molecular dynamics,reaction paths,geodesics
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy>=1.13
|
|
25
|
+
Requires-Dist: scipy>=0.19
|
|
26
|
+
Dynamic: author
|
|
27
|
+
Dynamic: author-email
|
|
28
|
+
Dynamic: classifier
|
|
29
|
+
Dynamic: description
|
|
30
|
+
Dynamic: description-content-type
|
|
31
|
+
Dynamic: home-page
|
|
32
|
+
Dynamic: keywords
|
|
33
|
+
Dynamic: license
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
Dynamic: project-url
|
|
36
|
+
Dynamic: requires-dist
|
|
37
|
+
Dynamic: requires-python
|
|
38
|
+
Dynamic: summary
|
|
39
|
+
|
|
40
|
+
Geodesic interpolations of reaction pathways
|
|
41
|
+
====
|
|
42
|
+
Constructing interpolation paths between molecular geometries to obtain reaction path.
|
|
43
|
+
|
|
44
|
+
Traditional interpolation methods encounter difficulty when it comes to redundant internal coordinate spaces, because the feasible physical space compose only a very small and highly curved subspace. In this method, we avoid the problem of feasibility by operating strictly in feasible space, and bring in the benefit of internal coordinates through proper application of the corresponding metric tensor. With this new formulation, we view the configuration space as a Riemannian manifold with a metric generated from a set of internal coordinates. The interpolation paths are defined as geodesic curves on such manifolds. In other words the integrated total coordinate change is minimized. Such a definition ensures that the constructed paths are smooth and well behaved. The package is also used for smoothing discontinuous or noisy trajectories obtained from MD simulations.
|
|
45
|
+
|
|
46
|
+
It has been shown that the method generate smooth paths with reasonable barrier height even for highly complex reactions, such as protein unfolding or concerted cycloaddition reactions with many simutaneous ring formations. The default coordinate system uses Morse scaled pair-wise distances. The lengths in such coordinate systems have the physical meaning of the total number of bond changes along the path.
|
|
47
|
+
|
|
48
|
+
This is a pure python implementation, so it is not optimized for speed, but rather is intended to serve as a reference implementation of the algorithms described in the paper. Still, interpolating systems with ~1000 atoms should not be a problem.
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
Directory Structure
|
|
52
|
+
----
|
|
53
|
+
- geodesic_interpolate Python package for interpolation and smoothing by finding geodesic curves with redundant internal metrics
|
|
54
|
+
- `__init__.py` Python package file
|
|
55
|
+
- `__main__.py` Standalone script for performing interpolation and smoothings.
|
|
56
|
+
- `geodesic.py` Computation and minimization of path length in redundant internal metrics. This is used to optimize
|
|
57
|
+
a path way to find a geodesic. Cannot change the number of images during optimization
|
|
58
|
+
- `interpolation.py` Generating approximate interpolation points to be used as starting guess for the geodesic optimizations.
|
|
59
|
+
Recursively attempt to perform bisections on largest segment in internal space. Generally will create path
|
|
60
|
+
with discontinuities, so a smoothing process need to follow this.
|
|
61
|
+
- `coord_utils.py` Coordinate utilities. A simplified version of Nanoreactor coordinates module which provide a scaled
|
|
62
|
+
interatomic distance coordinate, pair screening based on threshold and connectivity, as well as trans-rotational
|
|
63
|
+
alignment based on Kabsch algorithm.
|
|
64
|
+
- `fileio.py` XYZ file reading and writing
|
|
65
|
+
- setup.py Installation script. This will install both the Python package, which can be imported by name `geodesic_interpolate`
|
|
66
|
+
and the standalone script, which is also named `geodesic_interpolate`
|
|
67
|
+
- test_cases A few test cases used to check the performance of the code. Note that the large ones may take a few minutes
|
|
68
|
+
thanks to Python. Especially need to run them when testing out alternative coordinate scaling methods.
|
|
69
|
+
- `H+CH4_CH3+H2.xyz` A simple test case. Should always work
|
|
70
|
+
- `DielsAlder.xyz` Dehydro-Diels-Alder reaction. This is an important test case because the initial structure is planar symmetric and
|
|
71
|
+
could access both the final structure and its mirror image, which as exactly the same internal coordinates. Proper
|
|
72
|
+
monitoring of geodesic length during the interpolation process is therefore crucial for this to work, without which
|
|
73
|
+
the raw interpolated path will jump between mirror images and the optimized path would contain some flopping.
|
|
74
|
+
Also tests the non-sweeping global path optimization algo in a relatively large system.
|
|
75
|
+
- `TrpCage_unfold.xyz` Unfolding a Trp-cage mini-protein. Need at least 10 images to work. Folded geometry taken from Stefan''s test
|
|
76
|
+
directory which is instead taken from Nathan. Unfolded structure is generated by force navigated MD for 1ns under
|
|
77
|
+
1nN on C and N terminal with ReaxFF, then optimizing without force at 6-31g/b3lyp level. For testing many
|
|
78
|
+
simultaneous large amplitude motions.
|
|
79
|
+
- `collagen.xyz` Interpolate between the solution and crystal structure of collagen Kunitz domain 1kun - 1kth. Solvents removed
|
|
80
|
+
from the PDB structures. Tests collision avoidance for large movements in the core of folded protein. Other
|
|
81
|
+
groups should slightly breath to create room for the part that changes.
|
|
82
|
+
- `calcium_binding.xyz` Binding two Ca2+ ions to the yeast Calmodulin N terminal domain 1f54 -1f55. The apo structure did not have
|
|
83
|
+
ions so two of which were added by hand at random locations away from the protein. It seem to be hard to avoid
|
|
84
|
+
large movements of the Ca2+ cations when they are very far away from the protein but once in contact they should
|
|
85
|
+
move smoothly. This is to test if the interpolater can correctly route and find the entry point and connect the
|
|
86
|
+
path.
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
Prerequisites
|
|
90
|
+
----
|
|
91
|
+
|
|
92
|
+
Python : >=3.8
|
|
93
|
+
|
|
94
|
+
Numpy : Tested with 1.13
|
|
95
|
+
|
|
96
|
+
Scipy : Tested with 0.19
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
Installation
|
|
100
|
+
----
|
|
101
|
+
|
|
102
|
+
### From PyPI
|
|
103
|
+
|
|
104
|
+
Once a release has been published to PyPI, install the package with:
|
|
105
|
+
|
|
106
|
+
pip install geodesic_interpolate
|
|
107
|
+
|
|
108
|
+
### From Source
|
|
109
|
+
|
|
110
|
+
The package can be used without installation from the package directory with
|
|
111
|
+
|
|
112
|
+
python -m geodesic_interpolate filename ...
|
|
113
|
+
|
|
114
|
+
To use the script from an arbitrary location or import the Python module, install the package with pip:
|
|
115
|
+
|
|
116
|
+
python -m pip install .
|
|
117
|
+
|
|
118
|
+
This will install a Python package `geodesic_interpolate` and a standalone script `geodesic_interpolate`.
|
|
119
|
+
The package can be invoked from an arbitrary location using the aforementioned command line after installation,
|
|
120
|
+
and a standalone script with the same signature can also be used
|
|
121
|
+
|
|
122
|
+
geodesic_interpolate filename.xyz --output output.xyz --nimages 20 ...
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
Usage
|
|
126
|
+
----
|
|
127
|
+
usage: geodesic_interpolate [-h] [--nimages NIMAGES] [--sweep] [--no-sweep]
|
|
128
|
+
[--output OUTPUT] [--tol TOL] [--maxiter MAXITER]
|
|
129
|
+
[--microiter MICROITER] [--scaling SCALING]
|
|
130
|
+
[--friction FRICTION] [--dist-cutoff DIST_CUTOFF]
|
|
131
|
+
[--logging {DEBUG,INFO,WARNING,ERROR}]
|
|
132
|
+
[--save-raw SAVE_RAW]
|
|
133
|
+
filename
|
|
134
|
+
|
|
135
|
+
Interpolates between two geometries
|
|
136
|
+
|
|
137
|
+
positional arguments:
|
|
138
|
+
* filename XYZ file containing geometries. If the number of images is smaller than the desired number,
|
|
139
|
+
interpolation points will be added. If the number is greater, subsampling will be performed.
|
|
140
|
+
|
|
141
|
+
optional arguments:
|
|
142
|
+
* `-h`, `--help` show this help message and exit
|
|
143
|
+
* `--nimages NIMAGES` Number of images. (default: 17)
|
|
144
|
+
* `--sweep` Sweep across the path optimizing one image at a time, instead of moving all images at the same time.
|
|
145
|
+
Default is to perform sweeping updates if there are more than 30 atoms. (default: None)
|
|
146
|
+
* `--no-sweep` Do not perform sweeping. (default: None)
|
|
147
|
+
* `--output OUTPUT` Output filename. Default is interp.xyz (default: interpolated.xyz)
|
|
148
|
+
* `--tol TOL` Convergence tolerance (default: 0.002)
|
|
149
|
+
* `--maxiter MAXITER` Maximum number of minimization iterations (default: 15)
|
|
150
|
+
* `--microiter MICROITER` Maximum number of micro iterations for sweeping algorithm. (default: 20)
|
|
151
|
+
* `--scaling SCALING` Exponential parameter for morse potential (default: 1.7)
|
|
152
|
+
* `--friction FRICTION` Size of friction term used to prevent very large change of geometry. (default: 0.01)
|
|
153
|
+
* `--dist-cutoff DIST_CUTOFF` Cut-off value for the distance between a pair of atoms to be included in the coordinate system. (default: 3)
|
|
154
|
+
* `--logging {DEBUG,INFO,WARNING,ERROR}` Logging level to adopt [ DEBUG | INFO | WARNING | ERROR ] (default: INFO)
|
|
155
|
+
* `--save-raw SAVE_RAW` When specified, save the raw path after bisections be before smoothing. (default: None)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
Geodesic interpolations of reaction pathways
|
|
2
|
+
====
|
|
3
|
+
Constructing interpolation paths between molecular geometries to obtain reaction path.
|
|
4
|
+
|
|
5
|
+
Traditional interpolation methods encounter difficulty when it comes to redundant internal coordinate spaces, because the feasible physical space compose only a very small and highly curved subspace. In this method, we avoid the problem of feasibility by operating strictly in feasible space, and bring in the benefit of internal coordinates through proper application of the corresponding metric tensor. With this new formulation, we view the configuration space as a Riemannian manifold with a metric generated from a set of internal coordinates. The interpolation paths are defined as geodesic curves on such manifolds. In other words the integrated total coordinate change is minimized. Such a definition ensures that the constructed paths are smooth and well behaved. The package is also used for smoothing discontinuous or noisy trajectories obtained from MD simulations.
|
|
6
|
+
|
|
7
|
+
It has been shown that the method generate smooth paths with reasonable barrier height even for highly complex reactions, such as protein unfolding or concerted cycloaddition reactions with many simutaneous ring formations. The default coordinate system uses Morse scaled pair-wise distances. The lengths in such coordinate systems have the physical meaning of the total number of bond changes along the path.
|
|
8
|
+
|
|
9
|
+
This is a pure python implementation, so it is not optimized for speed, but rather is intended to serve as a reference implementation of the algorithms described in the paper. Still, interpolating systems with ~1000 atoms should not be a problem.
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
Directory Structure
|
|
13
|
+
----
|
|
14
|
+
- geodesic_interpolate Python package for interpolation and smoothing by finding geodesic curves with redundant internal metrics
|
|
15
|
+
- `__init__.py` Python package file
|
|
16
|
+
- `__main__.py` Standalone script for performing interpolation and smoothings.
|
|
17
|
+
- `geodesic.py` Computation and minimization of path length in redundant internal metrics. This is used to optimize
|
|
18
|
+
a path way to find a geodesic. Cannot change the number of images during optimization
|
|
19
|
+
- `interpolation.py` Generating approximate interpolation points to be used as starting guess for the geodesic optimizations.
|
|
20
|
+
Recursively attempt to perform bisections on largest segment in internal space. Generally will create path
|
|
21
|
+
with discontinuities, so a smoothing process need to follow this.
|
|
22
|
+
- `coord_utils.py` Coordinate utilities. A simplified version of Nanoreactor coordinates module which provide a scaled
|
|
23
|
+
interatomic distance coordinate, pair screening based on threshold and connectivity, as well as trans-rotational
|
|
24
|
+
alignment based on Kabsch algorithm.
|
|
25
|
+
- `fileio.py` XYZ file reading and writing
|
|
26
|
+
- setup.py Installation script. This will install both the Python package, which can be imported by name `geodesic_interpolate`
|
|
27
|
+
and the standalone script, which is also named `geodesic_interpolate`
|
|
28
|
+
- test_cases A few test cases used to check the performance of the code. Note that the large ones may take a few minutes
|
|
29
|
+
thanks to Python. Especially need to run them when testing out alternative coordinate scaling methods.
|
|
30
|
+
- `H+CH4_CH3+H2.xyz` A simple test case. Should always work
|
|
31
|
+
- `DielsAlder.xyz` Dehydro-Diels-Alder reaction. This is an important test case because the initial structure is planar symmetric and
|
|
32
|
+
could access both the final structure and its mirror image, which as exactly the same internal coordinates. Proper
|
|
33
|
+
monitoring of geodesic length during the interpolation process is therefore crucial for this to work, without which
|
|
34
|
+
the raw interpolated path will jump between mirror images and the optimized path would contain some flopping.
|
|
35
|
+
Also tests the non-sweeping global path optimization algo in a relatively large system.
|
|
36
|
+
- `TrpCage_unfold.xyz` Unfolding a Trp-cage mini-protein. Need at least 10 images to work. Folded geometry taken from Stefan''s test
|
|
37
|
+
directory which is instead taken from Nathan. Unfolded structure is generated by force navigated MD for 1ns under
|
|
38
|
+
1nN on C and N terminal with ReaxFF, then optimizing without force at 6-31g/b3lyp level. For testing many
|
|
39
|
+
simultaneous large amplitude motions.
|
|
40
|
+
- `collagen.xyz` Interpolate between the solution and crystal structure of collagen Kunitz domain 1kun - 1kth. Solvents removed
|
|
41
|
+
from the PDB structures. Tests collision avoidance for large movements in the core of folded protein. Other
|
|
42
|
+
groups should slightly breath to create room for the part that changes.
|
|
43
|
+
- `calcium_binding.xyz` Binding two Ca2+ ions to the yeast Calmodulin N terminal domain 1f54 -1f55. The apo structure did not have
|
|
44
|
+
ions so two of which were added by hand at random locations away from the protein. It seem to be hard to avoid
|
|
45
|
+
large movements of the Ca2+ cations when they are very far away from the protein but once in contact they should
|
|
46
|
+
move smoothly. This is to test if the interpolater can correctly route and find the entry point and connect the
|
|
47
|
+
path.
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
Prerequisites
|
|
51
|
+
----
|
|
52
|
+
|
|
53
|
+
Python : >=3.8
|
|
54
|
+
|
|
55
|
+
Numpy : Tested with 1.13
|
|
56
|
+
|
|
57
|
+
Scipy : Tested with 0.19
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
Installation
|
|
61
|
+
----
|
|
62
|
+
|
|
63
|
+
### From PyPI
|
|
64
|
+
|
|
65
|
+
Once a release has been published to PyPI, install the package with:
|
|
66
|
+
|
|
67
|
+
pip install geodesic_interpolate
|
|
68
|
+
|
|
69
|
+
### From Source
|
|
70
|
+
|
|
71
|
+
The package can be used without installation from the package directory with
|
|
72
|
+
|
|
73
|
+
python -m geodesic_interpolate filename ...
|
|
74
|
+
|
|
75
|
+
To use the script from an arbitrary location or import the Python module, install the package with pip:
|
|
76
|
+
|
|
77
|
+
python -m pip install .
|
|
78
|
+
|
|
79
|
+
This will install a Python package `geodesic_interpolate` and a standalone script `geodesic_interpolate`.
|
|
80
|
+
The package can be invoked from an arbitrary location using the aforementioned command line after installation,
|
|
81
|
+
and a standalone script with the same signature can also be used
|
|
82
|
+
|
|
83
|
+
geodesic_interpolate filename.xyz --output output.xyz --nimages 20 ...
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
Usage
|
|
87
|
+
----
|
|
88
|
+
usage: geodesic_interpolate [-h] [--nimages NIMAGES] [--sweep] [--no-sweep]
|
|
89
|
+
[--output OUTPUT] [--tol TOL] [--maxiter MAXITER]
|
|
90
|
+
[--microiter MICROITER] [--scaling SCALING]
|
|
91
|
+
[--friction FRICTION] [--dist-cutoff DIST_CUTOFF]
|
|
92
|
+
[--logging {DEBUG,INFO,WARNING,ERROR}]
|
|
93
|
+
[--save-raw SAVE_RAW]
|
|
94
|
+
filename
|
|
95
|
+
|
|
96
|
+
Interpolates between two geometries
|
|
97
|
+
|
|
98
|
+
positional arguments:
|
|
99
|
+
* filename XYZ file containing geometries. If the number of images is smaller than the desired number,
|
|
100
|
+
interpolation points will be added. If the number is greater, subsampling will be performed.
|
|
101
|
+
|
|
102
|
+
optional arguments:
|
|
103
|
+
* `-h`, `--help` show this help message and exit
|
|
104
|
+
* `--nimages NIMAGES` Number of images. (default: 17)
|
|
105
|
+
* `--sweep` Sweep across the path optimizing one image at a time, instead of moving all images at the same time.
|
|
106
|
+
Default is to perform sweeping updates if there are more than 30 atoms. (default: None)
|
|
107
|
+
* `--no-sweep` Do not perform sweeping. (default: None)
|
|
108
|
+
* `--output OUTPUT` Output filename. Default is interp.xyz (default: interpolated.xyz)
|
|
109
|
+
* `--tol TOL` Convergence tolerance (default: 0.002)
|
|
110
|
+
* `--maxiter MAXITER` Maximum number of minimization iterations (default: 15)
|
|
111
|
+
* `--microiter MICROITER` Maximum number of micro iterations for sweeping algorithm. (default: 20)
|
|
112
|
+
* `--scaling SCALING` Exponential parameter for morse potential (default: 1.7)
|
|
113
|
+
* `--friction FRICTION` Size of friction term used to prevent very large change of geometry. (default: 0.01)
|
|
114
|
+
* `--dist-cutoff DIST_CUTOFF` Cut-off value for the distance between a pair of atoms to be included in the coordinate system. (default: 3)
|
|
115
|
+
* `--logging {DEBUG,INFO,WARNING,ERROR}` Logging level to adopt [ DEBUG | INFO | WARNING | ERROR ] (default: INFO)
|
|
116
|
+
* `--save-raw SAVE_RAW` When specified, save the raw path after bisections be before smoothing. (default: None)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Performing geodesic interpolation or smoothing.
|
|
2
|
+
Optimize reaction path using geometric information by minimizing path length with metrics defined by
|
|
3
|
+
redundant internal coordinates. Avoids the discontinuity and convergence problems of conventional
|
|
4
|
+
interpolation methods by incorporating internal coordinate structure while operating in Cartesian,
|
|
5
|
+
avoiding unfeasibility.
|
|
6
|
+
|
|
7
|
+
Xiaolei Zhu et al, Martinez Group, Stanford University
|
|
8
|
+
"""
|
|
9
|
+
import logging
|
|
10
|
+
import argparse
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
from .fileio import read_xyz, write_xyz
|
|
15
|
+
from .interpolation import redistribute
|
|
16
|
+
from .geodesic import Geodesic
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def main():
|
|
23
|
+
"""Main entry point of the geodesic interpolation package.
|
|
24
|
+
Parse command line arguments then activate the interpolators and smoothers."""
|
|
25
|
+
ps = argparse.ArgumentParser(description="Interpolates between two geometries",
|
|
26
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
27
|
+
ps.add_argument("filename", type=str, help="XYZ file containing geometries. If the number of images "
|
|
28
|
+
"is smaller than the desired number, interpolation points will be added. If the "
|
|
29
|
+
"number is greater, subsampling will be performed.")
|
|
30
|
+
ps.add_argument("--nimages", type=int, default=17, help="Number of images.")
|
|
31
|
+
ps.add_argument("--sweep", action="store_true", help="Sweep across the path optimizing one image at "
|
|
32
|
+
"a time, instead of moving all images at the same time. Default is to perform sweeping "
|
|
33
|
+
"updates if there are more than 30 atoms.")
|
|
34
|
+
ps.add_argument("--no-sweep", dest='sweep', action="store_false", help="Do not perform sweeping.")
|
|
35
|
+
ps.set_defaults(sweep=None)
|
|
36
|
+
ps.add_argument("--output", default="interpolated.xyz", type=str, help="Output filename. "
|
|
37
|
+
"Default is interp.xyz")
|
|
38
|
+
ps.add_argument("--tol", default=2e-3, type=float, help="Convergence tolerance")
|
|
39
|
+
ps.add_argument("--maxiter", default=15, type=int, help="Maximum number of minimization iterations")
|
|
40
|
+
ps.add_argument("--microiter", default=20, type=int, help="Maximum number of micro iterations for "
|
|
41
|
+
"sweeping algorithm.")
|
|
42
|
+
ps.add_argument("--scaling", default=1.7, type=float, help="Exponential parameter for morse potential")
|
|
43
|
+
ps.add_argument("--friction", default=1e-2, type=float, help="Size of friction term used to prevent "
|
|
44
|
+
"very large change of geometry.")
|
|
45
|
+
ps.add_argument("--dist-cutoff", dest='dist_cutoff', default=3, type=float, help="Cut-off value for the "
|
|
46
|
+
"distance between a pair of atoms to be included in the coordinate system.")
|
|
47
|
+
ps.add_argument("--logging", default="INFO", choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'],
|
|
48
|
+
help="Logging level to adopt [ DEBUG | INFO | WARNING | ERROR ]")
|
|
49
|
+
ps.add_argument("--save-raw", dest='save_raw', default=None, type=str, help="When specified, save the "
|
|
50
|
+
"raw path after bisections be before smoothing.")
|
|
51
|
+
args = ps.parse_args()
|
|
52
|
+
|
|
53
|
+
# Setup logging based on designated logging level
|
|
54
|
+
logging.basicConfig(format="[%(module)-12s]%(message)s", level=args.logging)
|
|
55
|
+
|
|
56
|
+
# Read the initial geometries.
|
|
57
|
+
symbols, X = read_xyz(args.filename)
|
|
58
|
+
logger.info('Loaded %d geometries from %s', len(X), args.filename)
|
|
59
|
+
if len(X) < 2:
|
|
60
|
+
raise ValueError("Need at least two initial geometries.")
|
|
61
|
+
|
|
62
|
+
# First redistribute number of images. Perform interpolation if too few and subsampling if too many
|
|
63
|
+
# images are given
|
|
64
|
+
raw = redistribute(symbols, X, args.nimages, tol=args.tol * 5)
|
|
65
|
+
if args.save_raw is not None:
|
|
66
|
+
write_xyz(args.save_raw, symbols, raw)
|
|
67
|
+
|
|
68
|
+
# Perform smoothing by minimizing distance in Cartesian coordinates with redundant internal metric
|
|
69
|
+
# to find the appropriate geodesic curve on the hyperspace.
|
|
70
|
+
smoother = Geodesic(symbols, raw, args.scaling, threshold=args.dist_cutoff, friction=args.friction)
|
|
71
|
+
if args.sweep is None:
|
|
72
|
+
args.sweep = len(symbols) > 35
|
|
73
|
+
try:
|
|
74
|
+
if args.sweep:
|
|
75
|
+
smoother.sweep(tol=args.tol, max_iter=args.maxiter, micro_iter=args.microiter)
|
|
76
|
+
else:
|
|
77
|
+
smoother.smooth(tol=args.tol, max_iter=args.maxiter)
|
|
78
|
+
finally:
|
|
79
|
+
# Save the smoothed path to output file. try block is to ensure output is saved if one ^C the
|
|
80
|
+
# process, or there is an error
|
|
81
|
+
logging.info('Saving final path to file %s', args.output)
|
|
82
|
+
write_xyz(args.output, symbols, smoother.path)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
if __name__ == "__main__":
|
|
86
|
+
main()
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Coordinate utilities used by the interpolation program"""
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from scipy.spatial import KDTree
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def align_path(path):
|
|
12
|
+
"""Rotate and translate images to minimize RMSD movements along the path.
|
|
13
|
+
Also moves the geometric center of all images to the origin.
|
|
14
|
+
"""
|
|
15
|
+
path = np.array(path)
|
|
16
|
+
path[0] -= np.mean(path[0], axis=0)
|
|
17
|
+
max_rmsd = 0
|
|
18
|
+
for g, nextg in zip(path, path[1:]):
|
|
19
|
+
rmsd, nextg[:] = align_geom(g, nextg)
|
|
20
|
+
max_rmsd = max(max_rmsd, rmsd)
|
|
21
|
+
return max_rmsd, path
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def align_geom(refgeom, geom):
|
|
25
|
+
"""Find translation/rotation that moves a given geometry to maximally overlap
|
|
26
|
+
with a reference geometry. Implemented with Kabsch algorithm.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
refgeom: The reference geometry to be rotated to
|
|
30
|
+
geom: The geometry to be rotated and shifted
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
RMSD: Root-mean-squared difference between the rotated geometry
|
|
34
|
+
and the reference
|
|
35
|
+
new_geom: The rotated geometry that maximumally overal with the reference
|
|
36
|
+
"""
|
|
37
|
+
center = np.mean(refgeom, axis=0) # Find the geometric center
|
|
38
|
+
ref2 = refgeom - center
|
|
39
|
+
geom2 = geom - np.mean(geom, axis=0)
|
|
40
|
+
cov = np.dot(geom2.T, ref2)
|
|
41
|
+
v, sv, w = np.linalg.svd(cov)
|
|
42
|
+
if np.linalg.det(v) * np.linalg.det(w) < 0:
|
|
43
|
+
sv[-1] = -sv[-1]
|
|
44
|
+
v[:, -1] = -v[:, -1]
|
|
45
|
+
u = np.dot(v, w)
|
|
46
|
+
new_geom = np.dot(geom2, u) + center
|
|
47
|
+
rmsd = np.sqrt(np.mean((new_geom - refgeom) ** 2))
|
|
48
|
+
return rmsd, new_geom
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
ATOMIC_RADIUS = dict(H=0.31, He=0.28,
|
|
52
|
+
Li=1.28, Be=0.96, B=0.84, C=0.76, N=0.71, O=0.66, F=0.57, Ne=0.58,
|
|
53
|
+
Na=1.66, Mg=1.41, Al=1.21, Si=1.11, P=1.07, S=1.05, Cl=1.02, Ar=1.06)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_bond_list(geom, atoms=None, threshold=4, min_neighbors=4, snapshots=30, bond_threshold=1.8,
|
|
57
|
+
enforce=()):
|
|
58
|
+
"""Get the list of all the important atom pairs.
|
|
59
|
+
Samples a number of snapshots from a list of geometries to generate all
|
|
60
|
+
distances that are below a given threshold in any of them.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
atoms: Symbols for each atoms.
|
|
64
|
+
geom: One or a list of geometries to check for pairs
|
|
65
|
+
threshold: Threshold for including a bond in the bond list
|
|
66
|
+
min_neighbors: Minimum number of neighbors to include for each atom.
|
|
67
|
+
If an atom has smaller than this number of bonds, additional
|
|
68
|
+
distances will be added to reach this number.
|
|
69
|
+
snapshots: Number of snapshots to be used in the generation, useful
|
|
70
|
+
for speeding up the process if the path is long and
|
|
71
|
+
atoms numerous.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
List of all the included interatomic distance pairs.
|
|
75
|
+
"""
|
|
76
|
+
# Type casting and value checks on input parameters
|
|
77
|
+
geom = np.asarray(geom)
|
|
78
|
+
if len(geom.shape) < 3:
|
|
79
|
+
# If there is only one geometry or it is flattened, promote to 3d
|
|
80
|
+
geom = geom.reshape(1, -1, 3)
|
|
81
|
+
min_neighbors = min(min_neighbors, geom.shape[1] - 1)
|
|
82
|
+
|
|
83
|
+
# Determine which images to be used to determine distances
|
|
84
|
+
snapshots = min(len(geom), snapshots)
|
|
85
|
+
images = [0, len(geom) - 1]
|
|
86
|
+
if snapshots > 2:
|
|
87
|
+
images.extend(np.random.choice(range(1, snapshots - 1), snapshots - 2, replace=False))
|
|
88
|
+
# Get neighbor list for included geometry and merge them
|
|
89
|
+
rijset = set(enforce)
|
|
90
|
+
for image in images:
|
|
91
|
+
tree = KDTree(geom[image])
|
|
92
|
+
pairs = tree.query_pairs(threshold)
|
|
93
|
+
rijset.update(pairs)
|
|
94
|
+
bonded = tree.query_pairs(bond_threshold)
|
|
95
|
+
neighbors = {i: {i} for i in range(geom.shape[1])}
|
|
96
|
+
for i, j in bonded:
|
|
97
|
+
neighbors[i].add(j)
|
|
98
|
+
neighbors[j].add(i)
|
|
99
|
+
for i, j in bonded:
|
|
100
|
+
for ni in neighbors[i]:
|
|
101
|
+
for nj in neighbors[j]:
|
|
102
|
+
if ni != nj:
|
|
103
|
+
pair = tuple(sorted([ni, nj]))
|
|
104
|
+
if pair not in rijset:
|
|
105
|
+
rijset.add(pair)
|
|
106
|
+
rijlist = sorted(rijset)
|
|
107
|
+
# Check neighbor count to make sure `min_neighbors` is satisfied
|
|
108
|
+
count = np.zeros(geom.shape[1], dtype=int)
|
|
109
|
+
for i, j in rijlist:
|
|
110
|
+
count[i] += 1
|
|
111
|
+
count[j] += 1
|
|
112
|
+
for idx, ct in enumerate(count):
|
|
113
|
+
if ct < min_neighbors:
|
|
114
|
+
_, neighbors = tree.query(geom[-1, idx], k=min_neighbors + 1)
|
|
115
|
+
for i in neighbors:
|
|
116
|
+
if i == idx:
|
|
117
|
+
continue
|
|
118
|
+
pair = tuple(sorted([i, idx]))
|
|
119
|
+
if pair in rijset:
|
|
120
|
+
continue
|
|
121
|
+
else:
|
|
122
|
+
rijset.add(pair)
|
|
123
|
+
rijlist.append(pair)
|
|
124
|
+
count[i] += 1
|
|
125
|
+
count[idx] += 1
|
|
126
|
+
if atoms is None:
|
|
127
|
+
re = np.full(len(rijlist), 2.0)
|
|
128
|
+
else:
|
|
129
|
+
radius = np.array([ATOMIC_RADIUS.get(atom.capitalize(), 1.5) for atom in atoms])
|
|
130
|
+
re = np.array([radius[i] + radius[j] for i, j in rijlist])
|
|
131
|
+
logger.debug("Pair list contain %d pairs", len(rijlist))
|
|
132
|
+
return rijlist, re
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def compute_rij(geom, rij_list):
|
|
136
|
+
"""Calculate a list of distances and their derivatives
|
|
137
|
+
|
|
138
|
+
Takes a set of cartesian geometries then calculate selected distances and their
|
|
139
|
+
cartesian gradients given a list of atom pairs.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
geom: Cartesian geometry of all the points. Must be 2d numpy array or list
|
|
143
|
+
with shape (natoms, 3)
|
|
144
|
+
rij_list: list of indexes of all the atom pairs
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
rij (array): Array of all the distances.
|
|
148
|
+
bmat (3d array): Cartesian gradients of all the distances."""
|
|
149
|
+
nrij = len(rij_list)
|
|
150
|
+
rij = np.zeros(nrij)
|
|
151
|
+
bmat = np.zeros((nrij, len(geom), 3))
|
|
152
|
+
for idx, (i, j) in enumerate(rij_list):
|
|
153
|
+
dvec = geom[i] - geom[j]
|
|
154
|
+
rij[idx] = r = np.sqrt(dvec[0] * dvec[0] +
|
|
155
|
+
dvec[1] * dvec[1] + dvec[2] * dvec[2])
|
|
156
|
+
grad = dvec / r
|
|
157
|
+
bmat[idx, i] = grad
|
|
158
|
+
bmat[idx, j] = -grad
|
|
159
|
+
return rij, bmat
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def compute_wij(geom, rij_list, func):
|
|
163
|
+
"""Calculate a list of scaled distances and their derivatives
|
|
164
|
+
|
|
165
|
+
Takes a set of cartesian geometries then calculate selected distances and their
|
|
166
|
+
cartesian gradients given a list of atom pairs. The distances are scaled with
|
|
167
|
+
a given scaling function.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
geom: Cartesian geometry of all the points. Must be 2d numpy array or list
|
|
171
|
+
with shape (natoms, 3)
|
|
172
|
+
rij_list: 2d numpy array of indexes of all the atom pairs
|
|
173
|
+
func: A scaling function, which returns both the value and derivative. Must
|
|
174
|
+
qualify as a numpy Ufunc in order to be broadcasted to array elements.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
wij (array): Array of all the scaled distances.
|
|
178
|
+
bmat (2d array): Cartesian gradients of all the scaled distances, with the
|
|
179
|
+
second dimension flattened (need this to be used in scipy.optimize)."""
|
|
180
|
+
geom = np.asarray(geom).reshape(-1, 3)
|
|
181
|
+
nrij = len(rij_list)
|
|
182
|
+
rij, bmat = compute_rij(geom, rij_list)
|
|
183
|
+
wij, dwdr = func(rij)
|
|
184
|
+
for idx, grad in enumerate(dwdr):
|
|
185
|
+
bmat[idx] *= grad
|
|
186
|
+
return wij, bmat.reshape(nrij, -1)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def morse_scaler(re=1.5, alpha=1.7, beta=0.01):
|
|
190
|
+
"""Returns a scaling function that determines the metric of the internal
|
|
191
|
+
coordinates using morse potential
|
|
192
|
+
|
|
193
|
+
Takes an internuclear distance, returns the scaled distance, and the
|
|
194
|
+
derivative of the scaled distance with respect to the unscaled one.
|
|
195
|
+
"""
|
|
196
|
+
def scaler(x):
|
|
197
|
+
ratio = x / re
|
|
198
|
+
val1 = np.exp(alpha * (1 - ratio))
|
|
199
|
+
val2 = beta / ratio
|
|
200
|
+
dval = -alpha / re * val1 - val2 / x
|
|
201
|
+
return val1 + val2, dval
|
|
202
|
+
return scaler
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def elu_scaler(re=2, alpha=2, beta=0.01):
|
|
206
|
+
"""Returns a scaling function that determines the metric of the internal
|
|
207
|
+
coordinates using morse potential
|
|
208
|
+
|
|
209
|
+
Takes an internuclear distance, returns the scaled distance, and the
|
|
210
|
+
derivative of the scaled distance with respect to the unscaled one.
|
|
211
|
+
"""
|
|
212
|
+
def scaler(x):
|
|
213
|
+
val1 = (1 - x / re) * alpha + 1
|
|
214
|
+
dval = np.full(x.shape, -alpha / re)
|
|
215
|
+
large = x > re
|
|
216
|
+
v1l = np.exp(alpha * (1 - x[large] / re))
|
|
217
|
+
val1[large] = v1l
|
|
218
|
+
dval[large] = -alpha / re * v1l
|
|
219
|
+
val2 = beta * re / x
|
|
220
|
+
return val1 + val2, dval - val2 / x
|
|
221
|
+
return scaler
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""File IO utilities"""
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def read_xyz(filename):
|
|
6
|
+
"""Read XYZ file and return atom names and coordinates
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
filename: Name of xyz data file
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
atom_names: Element symbols of all the atoms
|
|
13
|
+
coords: Cartesian coordinates for every frame.
|
|
14
|
+
"""
|
|
15
|
+
coords = []
|
|
16
|
+
with open(filename, 'r') as f:
|
|
17
|
+
for line in f:
|
|
18
|
+
try:
|
|
19
|
+
natm = int(line) # Read number of atoms
|
|
20
|
+
next(f) # Skip over comments
|
|
21
|
+
atom_names = []
|
|
22
|
+
geom = np.zeros((natm, 3), float)
|
|
23
|
+
for i in range(natm):
|
|
24
|
+
line = next(f).split()
|
|
25
|
+
atom_names.append(line[0])
|
|
26
|
+
geom[i] = line[1:4] # Numpy auto-converts str to float
|
|
27
|
+
except (TypeError, IOError, IndexError, StopIteration):
|
|
28
|
+
raise ValueError('Incorrect XYZ file format')
|
|
29
|
+
coords.append(geom)
|
|
30
|
+
if not coords:
|
|
31
|
+
raise ValueError("File is empty")
|
|
32
|
+
return atom_names, coords
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def write_xyz(filename, atoms, coords):
|
|
36
|
+
"""Write atom names and coordinate data to XYZ file
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
filename: Name of xyz data file
|
|
40
|
+
atoms: Iterable of atom names
|
|
41
|
+
coords: Coordinates, must be of shape nimages*natoms*3
|
|
42
|
+
"""
|
|
43
|
+
natoms = len(atoms)
|
|
44
|
+
with open(filename, 'w') as f:
|
|
45
|
+
for i, X in enumerate(np.atleast_3d(coords)):
|
|
46
|
+
f.write("%d\n" % natoms)
|
|
47
|
+
f.write("Frame %d\n" % i)
|
|
48
|
+
for a, Xa in zip(atoms, X):
|
|
49
|
+
f.write(" {:3} {:21.12f} {:21.12f} {:21.12f}\n".format(a, *Xa))
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Geodesic smoothing. Minimize the path length using redundant internal coordinate
|
|
2
|
+
metric to find geodesics directly in Cartesian, to avoid feasibility problems associated
|
|
3
|
+
with redundant internals.
|
|
4
|
+
"""
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from scipy.optimize import least_squares
|
|
9
|
+
|
|
10
|
+
from .coord_utils import align_path, get_bond_list, morse_scaler, compute_wij
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Geodesic(object):
|
|
17
|
+
"""Optimizer to obtain geodesic in redundant internal coordinates. Core part is the calculation
|
|
18
|
+
of the path length in the internal metric."""
|
|
19
|
+
def __init__(self, atoms, path, scaler=1.7, threshold=3.0, min_neighbors=4, log_level=logging.INFO,
|
|
20
|
+
friction=1e-3):
|
|
21
|
+
"""Initialize the interpolater
|
|
22
|
+
Args:
|
|
23
|
+
atoms: Atom symbols, used to lookup radii
|
|
24
|
+
path: Initial geometries of the path, must be of dimension `nimage * natoms * 3`
|
|
25
|
+
scaler: Either the alpha parameter for morse potential, or an explicit scaling function.
|
|
26
|
+
It is easier to get smoother paths with small number of data points using small
|
|
27
|
+
scaling factors, as they have large range, but larger values usually give
|
|
28
|
+
better energetics because they better represent the (sharp) energy landscape.
|
|
29
|
+
threshold: Distance cut-off for constructing inter-nuclear distance coordinates. Note that
|
|
30
|
+
any atoms linked by three or less bonds will also be added.
|
|
31
|
+
min_neighbors: Minimum number of neighbors an atom must have in the atom pair list.
|
|
32
|
+
log_level: Logging level to use.
|
|
33
|
+
friction: Friction term in the target function which regularizes the optimization step
|
|
34
|
+
size to prevent explosion.
|
|
35
|
+
"""
|
|
36
|
+
rmsd0, self.path = align_path(path)
|
|
37
|
+
logger.log(log_level, "Maximum RMSD change in initial path: %10.2f", rmsd0)
|
|
38
|
+
if self.path.ndim != 3:
|
|
39
|
+
raise ValueError('The path to be interpolated must have 3 dimensions')
|
|
40
|
+
self.nimages, self.natoms, _ = self.path.shape
|
|
41
|
+
# Construct coordinates
|
|
42
|
+
self.rij_list, self.re = get_bond_list(path, atoms, threshold=threshold, min_neighbors=min_neighbors)
|
|
43
|
+
if isinstance(scaler, float):
|
|
44
|
+
self.scaler = morse_scaler(re=self.re, alpha=1.7)
|
|
45
|
+
else:
|
|
46
|
+
self.scaler = scaler
|
|
47
|
+
self.nrij = len(self.rij_list)
|
|
48
|
+
self.friction = friction
|
|
49
|
+
# Initalize interal storages for mid points, internal coordinates and B matrices
|
|
50
|
+
logger.log(log_level, "Performing geodesic smoothing")
|
|
51
|
+
logger.log(log_level, " Images: %4d Atoms %4d Rijs %6d", self.nimages, self.natoms, len(self.rij_list))
|
|
52
|
+
self.neval = 0
|
|
53
|
+
self.w = [None] * len(path)
|
|
54
|
+
self.dwdR = [None] * len(path)
|
|
55
|
+
self.X_mid = [None] * (len(path) - 1)
|
|
56
|
+
self.w_mid = [None] * (len(path) - 1)
|
|
57
|
+
self.dwdR_mid = [None] * (len(path) - 1)
|
|
58
|
+
self.disps = self.grad = self.segment = None
|
|
59
|
+
self.conv_path = []
|
|
60
|
+
|
|
61
|
+
def update_intc(self):
|
|
62
|
+
"""Adjust unknown locations of mid points and compute missing values of internal coordinates
|
|
63
|
+
and their derivatives. Any missing values will be marked with None values in internal storage,
|
|
64
|
+
and this routine finds and calculates them. This is to avoid redundant evaluation of value and
|
|
65
|
+
gradients of internal coordinates."""
|
|
66
|
+
for i, (X, w, dwdR) in enumerate(zip(self.path, self.w, self.dwdR)):
|
|
67
|
+
if w is None:
|
|
68
|
+
self.w[i], self.dwdR[i] = compute_wij(X, self.rij_list, self.scaler)
|
|
69
|
+
for i, (X0, X1, w) in enumerate(zip(self.path, self.path[1:], self.w_mid)):
|
|
70
|
+
if w is None:
|
|
71
|
+
self.X_mid[i] = Xm = (X0 + X1) / 2
|
|
72
|
+
self.w_mid[i], self.dwdR_mid[i] = compute_wij(Xm, self.rij_list, self.scaler)
|
|
73
|
+
|
|
74
|
+
def update_geometry(self, X, start, end):
|
|
75
|
+
"""Update the geometry of a segment of the path, then set the corresponding internal
|
|
76
|
+
coordinate, derivatives and midpoint locations to unknown"""
|
|
77
|
+
X = X.reshape(self.path[start:end].shape)
|
|
78
|
+
if np.array_equal(X, self.path[start:end]):
|
|
79
|
+
return False
|
|
80
|
+
self.path[start:end] = X
|
|
81
|
+
for i in range(start, end):
|
|
82
|
+
self.w_mid[i] = self.w[i] = None
|
|
83
|
+
self.w_mid[start - 1] = None
|
|
84
|
+
return True
|
|
85
|
+
|
|
86
|
+
def compute_disps(self, start=1, end=-1, dx=None, friction=1e-3):
|
|
87
|
+
"""Compute displacement vectors and total length between two images.
|
|
88
|
+
Only recalculate internal coordinates if they are unknown."""
|
|
89
|
+
if end < 0:
|
|
90
|
+
end += self.nimages
|
|
91
|
+
self.update_intc()
|
|
92
|
+
# Calculate displacement vectors in each segment, and the total length
|
|
93
|
+
vecs_l = [wm - wl for wl, wm in zip(self.w[start - 1:end], self.w_mid[start - 1:end])]
|
|
94
|
+
vecs_r = [wr - wm for wr, wm in zip(self.w[start:end + 1], self.w_mid[start - 1:end])]
|
|
95
|
+
self.length = np.sum(np.linalg.norm(vecs_l, axis=1)) + np.sum(np.linalg.norm(vecs_r, axis=1))
|
|
96
|
+
if dx is None:
|
|
97
|
+
trans = np.zeros(self.path[start:end].size)
|
|
98
|
+
else:
|
|
99
|
+
trans = friction * dx # Translation from initial geometry. friction term
|
|
100
|
+
self.disps = np.concatenate(vecs_l + vecs_r + [trans])
|
|
101
|
+
self.disps0 = self.disps[:len(vecs_l) * 2]
|
|
102
|
+
|
|
103
|
+
def compute_disp_grad(self, start, end, friction=1e-3):
|
|
104
|
+
"""Compute derivatives of the displacement vectors with respect to the Cartesian coordinates"""
|
|
105
|
+
# Calculate derivatives of displacement vectors with respect to image Cartesians
|
|
106
|
+
l = end - start + 1
|
|
107
|
+
self.grad = np.zeros((l * 2 * self.nrij + 3 * (end - start) * self.natoms, (end - start) * 3 * self.natoms))
|
|
108
|
+
self.grad0 = self.grad[:l * 2 * self.nrij]
|
|
109
|
+
grad_shape = (l, self.nrij, end - start, 3 * self.natoms)
|
|
110
|
+
grad_l = self.grad[:l * self.nrij].reshape(grad_shape)
|
|
111
|
+
grad_r = self.grad[l * self.nrij:l * self.nrij * 2].reshape(grad_shape)
|
|
112
|
+
for i, image in enumerate(range(start, end)):
|
|
113
|
+
dmid1 = self.dwdR_mid[image - 1] / 2
|
|
114
|
+
dmid2 = self.dwdR_mid[image] / 2
|
|
115
|
+
grad_l[i + 1, :, i, :] = dmid2 - self.dwdR[image]
|
|
116
|
+
grad_l[i, :, i, :] = dmid1
|
|
117
|
+
grad_r[i + 1, :, i, :] = -dmid2
|
|
118
|
+
grad_r[i, :, i, :] = self.dwdR[image] - dmid1
|
|
119
|
+
for idx in range((end - start) * 3 * self.natoms):
|
|
120
|
+
self.grad[l * self.nrij * 2 + idx, idx] = friction
|
|
121
|
+
|
|
122
|
+
def compute_target_func(self, X=None, start=1, end=-1, log_level=logging.INFO, x0=None, friction=1e-3):
|
|
123
|
+
"""Compute the vectorized target function, which is then used for least
|
|
124
|
+
squares minimization."""
|
|
125
|
+
if end < 0:
|
|
126
|
+
end += self.nimages
|
|
127
|
+
if X is not None and not self.update_geometry(X, start, end) and self.segment == (start, end):
|
|
128
|
+
return
|
|
129
|
+
self.segment = start, end
|
|
130
|
+
dx = np.zeros(self.path[start:end].size) if x0 is None else self.path[start:end].ravel() - x0.ravel()
|
|
131
|
+
self.compute_disps(start, end, dx=dx, friction=friction)
|
|
132
|
+
self.compute_disp_grad(start, end, friction=friction)
|
|
133
|
+
self.optimality = np.linalg.norm(np.einsum('i,i...', self.disps, self.grad), ord=np.inf)
|
|
134
|
+
logger.log(log_level, " Iteration %3d: Length %10.3f |dL|=%7.3e", self.neval, self.length, self.optimality)
|
|
135
|
+
self.conv_path.append(self.path[1].copy())
|
|
136
|
+
self.neval += 1
|
|
137
|
+
|
|
138
|
+
def target_func(self, X, **kwargs):
|
|
139
|
+
"""Wrapper around `compute_target_func` to prevent repeated evaluation at
|
|
140
|
+
the same geometry"""
|
|
141
|
+
self.compute_target_func(X, **kwargs)
|
|
142
|
+
return self.disps
|
|
143
|
+
|
|
144
|
+
def target_deriv(self, X, **kwargs):
|
|
145
|
+
"""Wrapper around `compute_target_func` to prevent repeated evaluation at
|
|
146
|
+
the same geometry"""
|
|
147
|
+
self.compute_target_func(X, **kwargs)
|
|
148
|
+
return self.grad
|
|
149
|
+
|
|
150
|
+
def smooth(self, tol=1e-3, max_iter=50, start=1, end=-1, log_level=logging.INFO, friction=None,
|
|
151
|
+
xref=None):
|
|
152
|
+
"""Minimize the path length as an overall function of the coordinates of all the images.
|
|
153
|
+
This should in principle be very efficient, but may be quite costly for large systems with
|
|
154
|
+
many images.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
tol: Convergence tolerance of the optimality. (.i.e uniform gradient of target func)
|
|
158
|
+
max_iter: Maximum number of iterations to run.
|
|
159
|
+
start, end: Specify which section of the path to optimize.
|
|
160
|
+
log_level: Logging level during the optimization
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
The optimized path. This is also stored in self.path
|
|
164
|
+
"""
|
|
165
|
+
X0 = np.array(self.path[start:end]).ravel()
|
|
166
|
+
if xref is None:
|
|
167
|
+
xref= X0
|
|
168
|
+
self.disps = self.grad = self.segment = None
|
|
169
|
+
logger.log(log_level, " Degree of freedoms %6d: ", len(X0))
|
|
170
|
+
if friction is None:
|
|
171
|
+
friction = self.friction
|
|
172
|
+
# Configure the keyword arguments that will be sent to the target function.
|
|
173
|
+
kwargs = dict(start=start, end=end, log_level=log_level, x0=xref, friction=friction)
|
|
174
|
+
self.compute_target_func(**kwargs) # Compute length and optimality
|
|
175
|
+
if self.optimality > tol:
|
|
176
|
+
result = least_squares(self.target_func, X0, self.target_deriv, ftol=tol, gtol=tol,
|
|
177
|
+
max_nfev=max_iter, kwargs=kwargs, loss='soft_l1')
|
|
178
|
+
self.update_geometry(result['x'], start, end)
|
|
179
|
+
logger.log(log_level, "Smoothing converged after %d iterations", result['nfev'])
|
|
180
|
+
else:
|
|
181
|
+
logger.log(log_level, "Skipping smoothing: path already optimal.")
|
|
182
|
+
rmsd, self.path = align_path(self.path)
|
|
183
|
+
logger.log(log_level, "Final path length: %12.5f Max RMSD in path: %10.2f", self.length, rmsd)
|
|
184
|
+
return self.path
|
|
185
|
+
|
|
186
|
+
def sweep(self, tol=1e-3, max_iter=50, micro_iter=20, start=1, end=-1):
|
|
187
|
+
"""Minimize the path length by adjusting one image at a time and sweeping the optimization
|
|
188
|
+
side across the chain. This is not as efficient, but scales much more friendly with the
|
|
189
|
+
size of the system given the slowness of scipy's optimizers. Also allows more detailed
|
|
190
|
+
control and easy way of skipping nearly optimal points than the overall case.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
tol: Convergence tolerance of the optimality. (.i.e uniform gradient of target func)
|
|
194
|
+
max_iter: Maximum number of sweeps through the path.
|
|
195
|
+
micro_iter: Number of micro-iterations to be performed when optimizing each image.
|
|
196
|
+
start, end: Specify which section of the path to optimize.
|
|
197
|
+
log_level: Logging level during the optimization
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
The optimized path. This is also stored in self.path
|
|
201
|
+
"""
|
|
202
|
+
if end < 0:
|
|
203
|
+
end = self.nimages + end
|
|
204
|
+
self.neval = 0
|
|
205
|
+
images = range(start, end)
|
|
206
|
+
logger.info(" Degree of freedoms %6d: ", (end - start) * 3 * self.natoms)
|
|
207
|
+
# Microiteration convergence tolerances are adjusted on the fly based on level of convergence.
|
|
208
|
+
curr_tol = tol * 10
|
|
209
|
+
self.compute_disps() # Compute and print the initial path length
|
|
210
|
+
logger.info(" Initial length: %8.3f", self.length)
|
|
211
|
+
for iteration in range(max_iter):
|
|
212
|
+
max_dL = 0
|
|
213
|
+
X0 = self.path.copy()
|
|
214
|
+
for i in images[:-1]: # Use self.smooth() to optimize individual images
|
|
215
|
+
xmid = (self.path[i - 1] + self.path[i + 1]) * 0.5
|
|
216
|
+
self.smooth(curr_tol, max_iter=min(micro_iter, iteration + 6),
|
|
217
|
+
start=i, end=i + 1, log_level=logging.DEBUG,
|
|
218
|
+
friction=self.friction if iteration else 0.1,
|
|
219
|
+
xref=xmid)
|
|
220
|
+
max_dL = max(max_dL, self.optimality)
|
|
221
|
+
self.compute_disps() # Compute final length after sweep
|
|
222
|
+
logger.info("Sweep %3d: L=%7.2f dX=%7.2e tol=%7.3e dL=%7.3e",
|
|
223
|
+
iteration, self.length, np.linalg.norm(self.path - X0), curr_tol, max_dL)
|
|
224
|
+
if max_dL < tol: # Check for convergence.
|
|
225
|
+
logger.info("Optimization converged after %d iteartions", iteration)
|
|
226
|
+
break
|
|
227
|
+
curr_tol = max(tol * 0.5, max_dL * 0.2) # Adjust micro-iteration threshold
|
|
228
|
+
images = list(reversed(images)) # Alternate sweeping direction.
|
|
229
|
+
else:
|
|
230
|
+
logger.info("Optimization not converged after %d iteartions", iteration)
|
|
231
|
+
rmsd, self.path = align_path(self.path)
|
|
232
|
+
logger.info("Final path length: %12.5f Max RMSD in path: %10.2f", self.length, rmsd)
|
|
233
|
+
return self.path
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""Simplified geodesic interpolations module, which uses geodesic lengths as criteria
|
|
2
|
+
to add bisection points until point count meet desired number.
|
|
3
|
+
Will need another following geodesic smoothing to get final path.
|
|
4
|
+
"""
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from scipy.optimize import least_squares, minimize
|
|
9
|
+
|
|
10
|
+
from .geodesic import Geodesic
|
|
11
|
+
from .coord_utils import get_bond_list, compute_wij, morse_scaler, align_geom, align_path
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def mid_point(atoms, geom1, geom2, tol=1e-2, nudge=0.01, threshold=4):
|
|
18
|
+
"""Find the Cartesian geometry that has internal coordinate values closest to the average of
|
|
19
|
+
two geometries.
|
|
20
|
+
|
|
21
|
+
Simply perform a least-squares minimization on the difference between the current internal
|
|
22
|
+
and the average of the two end points. This is done twice, using either end point as the
|
|
23
|
+
starting guess. DON'T USE THE CARTESIAN AVERAGE AS GUESS, THINGS WILL BLOW UP.
|
|
24
|
+
|
|
25
|
+
This is used to generate an initial guess path for the later smoothing routine.
|
|
26
|
+
Genenrally, the added point may not be continuous with the both end points, but
|
|
27
|
+
provides a good enough starting guess.
|
|
28
|
+
|
|
29
|
+
Random nudges are added to the initial geometry, so running multiple times may not yield
|
|
30
|
+
the same converged geometry. For larger systems, one will never get the same geometry
|
|
31
|
+
twice. So one may want to perform multiple runs and check which yields the best result.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
geom1, geom2: Cartesian geometry of the end points
|
|
35
|
+
tol: Convergence tolarnce for the least-squares minimization process
|
|
36
|
+
nudge: Random nudges added to the initial geometry, which helps to discover different
|
|
37
|
+
solutions. Also helps in cases where optimal paths break the symmetry.
|
|
38
|
+
threshold: Threshold for including an atom-pair in the coordinate system
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Optimized mid-point which bisects the two endpoints in internal coordinates
|
|
42
|
+
"""
|
|
43
|
+
# Process the initial geometries, construct coordinate system and obtain average internals
|
|
44
|
+
geom1, geom2 = np.array(geom1), np.array(geom2)
|
|
45
|
+
add_pair = set()
|
|
46
|
+
geom_list = [geom1, geom2]
|
|
47
|
+
# This loop is for ensuring a sufficient large coordinate system. The interpolated point may
|
|
48
|
+
# have atom pairs in contact that are far away at both end-points, which may cause collision.
|
|
49
|
+
# One can include all atom pairs, but this may blow up for large molecules. Here the compromise
|
|
50
|
+
# is to use a screened list of atom pairs first, then add more if additional atoms come into
|
|
51
|
+
# contant, then rerun the minimization until the coordinate system is consistant with the
|
|
52
|
+
# interpolated geometry
|
|
53
|
+
while True:
|
|
54
|
+
rijlist, re = get_bond_list(geom_list, threshold=threshold + 1, enforce=add_pair)
|
|
55
|
+
scaler = morse_scaler(alpha=0.7, re=re)
|
|
56
|
+
w1, _ = compute_wij(geom1, rijlist, scaler)
|
|
57
|
+
w2, _ = compute_wij(geom2, rijlist, scaler)
|
|
58
|
+
w = (w1 + w2) / 2
|
|
59
|
+
d_min, x_min = np.inf, None
|
|
60
|
+
friction = 0.1 / np.sqrt(geom1.shape[0])
|
|
61
|
+
def target_func(X):
|
|
62
|
+
"""Squared difference with reference w0"""
|
|
63
|
+
wx, dwdR = compute_wij(X, rijlist, scaler)
|
|
64
|
+
delta_w = wx - w
|
|
65
|
+
val, grad = 0.5 * np.dot(delta_w, delta_w), np.einsum('i,ij->j', delta_w, dwdR)
|
|
66
|
+
logger.info("val=%10.3f ", val)
|
|
67
|
+
return val, grad
|
|
68
|
+
|
|
69
|
+
# The inner loop performs minimization using either end-point as the starting guess.
|
|
70
|
+
for coef in [0.02, 0.98]:
|
|
71
|
+
x0 = (geom1 * coef + (1 - coef) * geom2).ravel()
|
|
72
|
+
x0 += nudge * np.random.random_sample(x0.shape)
|
|
73
|
+
logger.debug('Starting least-squares minimization of bisection point at %7.2f.', coef)
|
|
74
|
+
result = least_squares(lambda x: np.concatenate([compute_wij(x, rijlist, scaler)[0] - w, (x-x0)*friction]), x0,
|
|
75
|
+
lambda x: np.vstack([compute_wij(x, rijlist, scaler)[1], np.identity(x.size) * friction]), ftol=tol, gtol=tol)
|
|
76
|
+
x_mid = result['x'].reshape(-1, 3)
|
|
77
|
+
# Take the interpolated geometry, construct new pair list and check for new contacts
|
|
78
|
+
new_list = geom_list + [x_mid]
|
|
79
|
+
new_rij, _ = get_bond_list(new_list, threshold=threshold, min_neighbors=0)
|
|
80
|
+
extras = set(new_rij) - set(rijlist)
|
|
81
|
+
if extras:
|
|
82
|
+
logger.info(' Screened pairs came into contact. Adding reference point.')
|
|
83
|
+
# Update pair list then go back to the minimization loop if new contacts are found
|
|
84
|
+
geom_list = new_list
|
|
85
|
+
add_pair |= extras
|
|
86
|
+
break
|
|
87
|
+
# Perform local geodesic optimization for the new image.
|
|
88
|
+
smoother = Geodesic(atoms, [geom1, x_mid, geom2], 0.7, threshold=threshold, log_level=logging.DEBUG, friction=1)
|
|
89
|
+
smoother.compute_disps()
|
|
90
|
+
width = max([np.sqrt(np.mean((g - smoother.path[1]) ** 2)) for g in [geom1, geom2]])
|
|
91
|
+
dist, x_mid = width + smoother.length, smoother.path[1]
|
|
92
|
+
logger.debug(' Trial path length: %8.3f after %d iterations', dist, result['nfev'])
|
|
93
|
+
if dist < d_min:
|
|
94
|
+
d_min, x_min = dist, x_mid
|
|
95
|
+
else: # Both starting guesses finished without new atom pairs. Minimization successful
|
|
96
|
+
break
|
|
97
|
+
return x_min
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def redistribute(atoms, geoms, nimages, tol=1e-2):
|
|
101
|
+
"""Add or remove images so that the path length matches the desired number.
|
|
102
|
+
|
|
103
|
+
If the number is too few, new points are added by bisecting the largest RMSD. If too numerous,
|
|
104
|
+
one image is removed at a time so that the new merged segment has the shortest RMSD.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
geoms: Geometry of the original path.
|
|
108
|
+
nimages: The desired number of images
|
|
109
|
+
tol: Convergence tolerance for bisection.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
An aligned and redistributed path with has the correct number of images.
|
|
113
|
+
"""
|
|
114
|
+
_, geoms = align_path(geoms)
|
|
115
|
+
geoms = list(geoms)
|
|
116
|
+
# If there are too few images, add bisection points
|
|
117
|
+
while len(geoms) < nimages:
|
|
118
|
+
dists = [np.sqrt(np.mean((g1 - g2) ** 2)) for g1, g2 in zip(geoms[1:], geoms)]
|
|
119
|
+
max_i = np.argmax(dists)
|
|
120
|
+
logger.info("Inserting image between %d and %d with Cartesian RMSD %10.3f. New length:%d",
|
|
121
|
+
max_i, max_i + 1, dists[max_i], len(geoms) + 1)
|
|
122
|
+
insertion = mid_point(atoms, geoms[max_i], geoms[max_i + 1], tol)
|
|
123
|
+
_, insertion = align_geom(geoms[max_i], insertion)
|
|
124
|
+
geoms.insert(max_i + 1, insertion)
|
|
125
|
+
geoms = list(align_path(geoms)[1])
|
|
126
|
+
# If there are too many images, remove points
|
|
127
|
+
while len(geoms) > nimages:
|
|
128
|
+
dists = [np.sqrt(np.mean((g1 - g2) ** 2)) for g1, g2 in zip(geoms[2:], geoms)]
|
|
129
|
+
min_i = np.argmin(dists)
|
|
130
|
+
logger.info("Removing image %d. Cartesian RMSD of merged section %10.3f",
|
|
131
|
+
min_i + 1, dists[min_i])
|
|
132
|
+
del geoms[min_i + 1]
|
|
133
|
+
geoms = list(align_path(geoms)[1])
|
|
134
|
+
return geoms
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: geodesic_interpolate
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Interpolation and smoothing of reaction paths with geodesics in redundant internal coordinates.
|
|
5
|
+
Home-page: https://github.com/virtualzx-nad/geodesic-interpolate
|
|
6
|
+
Author: Xiaolei Zhu
|
|
7
|
+
Author-email: virtualzx@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Project-URL: Bug Reports, https://github.com/virtualzx-nad/geodesic-interpolate/issues
|
|
10
|
+
Project-URL: Source, https://github.com/virtualzx-nad/geodesic-interpolate
|
|
11
|
+
Keywords: chemistry,molecular dynamics,reaction paths,geodesics
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy>=1.13
|
|
25
|
+
Requires-Dist: scipy>=0.19
|
|
26
|
+
Dynamic: author
|
|
27
|
+
Dynamic: author-email
|
|
28
|
+
Dynamic: classifier
|
|
29
|
+
Dynamic: description
|
|
30
|
+
Dynamic: description-content-type
|
|
31
|
+
Dynamic: home-page
|
|
32
|
+
Dynamic: keywords
|
|
33
|
+
Dynamic: license
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
Dynamic: project-url
|
|
36
|
+
Dynamic: requires-dist
|
|
37
|
+
Dynamic: requires-python
|
|
38
|
+
Dynamic: summary
|
|
39
|
+
|
|
40
|
+
Geodesic interpolations of reaction pathways
|
|
41
|
+
====
|
|
42
|
+
Constructing interpolation paths between molecular geometries to obtain reaction path.
|
|
43
|
+
|
|
44
|
+
Traditional interpolation methods encounter difficulty when it comes to redundant internal coordinate spaces, because the feasible physical space compose only a very small and highly curved subspace. In this method, we avoid the problem of feasibility by operating strictly in feasible space, and bring in the benefit of internal coordinates through proper application of the corresponding metric tensor. With this new formulation, we view the configuration space as a Riemannian manifold with a metric generated from a set of internal coordinates. The interpolation paths are defined as geodesic curves on such manifolds. In other words the integrated total coordinate change is minimized. Such a definition ensures that the constructed paths are smooth and well behaved. The package is also used for smoothing discontinuous or noisy trajectories obtained from MD simulations.
|
|
45
|
+
|
|
46
|
+
It has been shown that the method generate smooth paths with reasonable barrier height even for highly complex reactions, such as protein unfolding or concerted cycloaddition reactions with many simutaneous ring formations. The default coordinate system uses Morse scaled pair-wise distances. The lengths in such coordinate systems have the physical meaning of the total number of bond changes along the path.
|
|
47
|
+
|
|
48
|
+
This is a pure python implementation, so it is not optimized for speed, but rather is intended to serve as a reference implementation of the algorithms described in the paper. Still, interpolating systems with ~1000 atoms should not be a problem.
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
Directory Structure
|
|
52
|
+
----
|
|
53
|
+
- geodesic_interpolate Python package for interpolation and smoothing by finding geodesic curves with redundant internal metrics
|
|
54
|
+
- `__init__.py` Python package file
|
|
55
|
+
- `__main__.py` Standalone script for performing interpolation and smoothings.
|
|
56
|
+
- `geodesic.py` Computation and minimization of path length in redundant internal metrics. This is used to optimize
|
|
57
|
+
a path way to find a geodesic. Cannot change the number of images during optimization
|
|
58
|
+
- `interpolation.py` Generating approximate interpolation points to be used as starting guess for the geodesic optimizations.
|
|
59
|
+
Recursively attempt to perform bisections on largest segment in internal space. Generally will create path
|
|
60
|
+
with discontinuities, so a smoothing process need to follow this.
|
|
61
|
+
- `coord_utils.py` Coordinate utilities. A simplified version of Nanoreactor coordinates module which provide a scaled
|
|
62
|
+
interatomic distance coordinate, pair screening based on threshold and connectivity, as well as trans-rotational
|
|
63
|
+
alignment based on Kabsch algorithm.
|
|
64
|
+
- `fileio.py` XYZ file reading and writing
|
|
65
|
+
- setup.py Installation script. This will install both the Python package, which can be imported by name `geodesic_interpolate`
|
|
66
|
+
and the standalone script, which is also named `geodesic_interpolate`
|
|
67
|
+
- test_cases A few test cases used to check the performance of the code. Note that the large ones may take a few minutes
|
|
68
|
+
thanks to Python. Especially need to run them when testing out alternative coordinate scaling methods.
|
|
69
|
+
- `H+CH4_CH3+H2.xyz` A simple test case. Should always work
|
|
70
|
+
- `DielsAlder.xyz` Dehydro-Diels-Alder reaction. This is an important test case because the initial structure is planar symmetric and
|
|
71
|
+
could access both the final structure and its mirror image, which as exactly the same internal coordinates. Proper
|
|
72
|
+
monitoring of geodesic length during the interpolation process is therefore crucial for this to work, without which
|
|
73
|
+
the raw interpolated path will jump between mirror images and the optimized path would contain some flopping.
|
|
74
|
+
Also tests the non-sweeping global path optimization algo in a relatively large system.
|
|
75
|
+
- `TrpCage_unfold.xyz` Unfolding a Trp-cage mini-protein. Need at least 10 images to work. Folded geometry taken from Stefan''s test
|
|
76
|
+
directory which is instead taken from Nathan. Unfolded structure is generated by force navigated MD for 1ns under
|
|
77
|
+
1nN on C and N terminal with ReaxFF, then optimizing without force at 6-31g/b3lyp level. For testing many
|
|
78
|
+
simultaneous large amplitude motions.
|
|
79
|
+
- `collagen.xyz` Interpolate between the solution and crystal structure of collagen Kunitz domain 1kun - 1kth. Solvents removed
|
|
80
|
+
from the PDB structures. Tests collision avoidance for large movements in the core of folded protein. Other
|
|
81
|
+
groups should slightly breath to create room for the part that changes.
|
|
82
|
+
- `calcium_binding.xyz` Binding two Ca2+ ions to the yeast Calmodulin N terminal domain 1f54 -1f55. The apo structure did not have
|
|
83
|
+
ions so two of which were added by hand at random locations away from the protein. It seem to be hard to avoid
|
|
84
|
+
large movements of the Ca2+ cations when they are very far away from the protein but once in contact they should
|
|
85
|
+
move smoothly. This is to test if the interpolater can correctly route and find the entry point and connect the
|
|
86
|
+
path.
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
Prerequisites
|
|
90
|
+
----
|
|
91
|
+
|
|
92
|
+
Python : >=3.8
|
|
93
|
+
|
|
94
|
+
Numpy : Tested with 1.13
|
|
95
|
+
|
|
96
|
+
Scipy : Tested with 0.19
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
Installation
|
|
100
|
+
----
|
|
101
|
+
|
|
102
|
+
### From PyPI
|
|
103
|
+
|
|
104
|
+
Once a release has been published to PyPI, install the package with:
|
|
105
|
+
|
|
106
|
+
pip install geodesic_interpolate
|
|
107
|
+
|
|
108
|
+
### From Source
|
|
109
|
+
|
|
110
|
+
The package can be used without installation from the package directory with
|
|
111
|
+
|
|
112
|
+
python -m geodesic_interpolate filename ...
|
|
113
|
+
|
|
114
|
+
To use the script from an arbitrary location or import the Python module, install the package with pip:
|
|
115
|
+
|
|
116
|
+
python -m pip install .
|
|
117
|
+
|
|
118
|
+
This will install a Python package `geodesic_interpolate` and a standalone script `geodesic_interpolate`.
|
|
119
|
+
The package can be invoked from an arbitrary location using the aforementioned command line after installation,
|
|
120
|
+
and a standalone script with the same signature can also be used
|
|
121
|
+
|
|
122
|
+
geodesic_interpolate filename.xyz --output output.xyz --nimages 20 ...
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
Usage
|
|
126
|
+
----
|
|
127
|
+
usage: geodesic_interpolate [-h] [--nimages NIMAGES] [--sweep] [--no-sweep]
|
|
128
|
+
[--output OUTPUT] [--tol TOL] [--maxiter MAXITER]
|
|
129
|
+
[--microiter MICROITER] [--scaling SCALING]
|
|
130
|
+
[--friction FRICTION] [--dist-cutoff DIST_CUTOFF]
|
|
131
|
+
[--logging {DEBUG,INFO,WARNING,ERROR}]
|
|
132
|
+
[--save-raw SAVE_RAW]
|
|
133
|
+
filename
|
|
134
|
+
|
|
135
|
+
Interpolates between two geometries
|
|
136
|
+
|
|
137
|
+
positional arguments:
|
|
138
|
+
* filename XYZ file containing geometries. If the number of images is smaller than the desired number,
|
|
139
|
+
interpolation points will be added. If the number is greater, subsampling will be performed.
|
|
140
|
+
|
|
141
|
+
optional arguments:
|
|
142
|
+
* `-h`, `--help` show this help message and exit
|
|
143
|
+
* `--nimages NIMAGES` Number of images. (default: 17)
|
|
144
|
+
* `--sweep` Sweep across the path optimizing one image at a time, instead of moving all images at the same time.
|
|
145
|
+
Default is to perform sweeping updates if there are more than 30 atoms. (default: None)
|
|
146
|
+
* `--no-sweep` Do not perform sweeping. (default: None)
|
|
147
|
+
* `--output OUTPUT` Output filename. Default is interp.xyz (default: interpolated.xyz)
|
|
148
|
+
* `--tol TOL` Convergence tolerance (default: 0.002)
|
|
149
|
+
* `--maxiter MAXITER` Maximum number of minimization iterations (default: 15)
|
|
150
|
+
* `--microiter MICROITER` Maximum number of micro iterations for sweeping algorithm. (default: 20)
|
|
151
|
+
* `--scaling SCALING` Exponential parameter for morse potential (default: 1.7)
|
|
152
|
+
* `--friction FRICTION` Size of friction term used to prevent very large change of geometry. (default: 0.01)
|
|
153
|
+
* `--dist-cutoff DIST_CUTOFF` Cut-off value for the distance between a pair of atoms to be included in the coordinate system. (default: 3)
|
|
154
|
+
* `--logging {DEBUG,INFO,WARNING,ERROR}` Logging level to adopt [ DEBUG | INFO | WARNING | ERROR ] (default: INFO)
|
|
155
|
+
* `--save-raw SAVE_RAW` When specified, save the raw path after bisections be before smoothing. (default: None)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
MANIFEST.in
|
|
3
|
+
README.md
|
|
4
|
+
pyproject.toml
|
|
5
|
+
setup.py
|
|
6
|
+
geodesic_interpolate/__init__.py
|
|
7
|
+
geodesic_interpolate/__main__.py
|
|
8
|
+
geodesic_interpolate/coord_utils.py
|
|
9
|
+
geodesic_interpolate/fileio.py
|
|
10
|
+
geodesic_interpolate/geodesic.py
|
|
11
|
+
geodesic_interpolate/interpolation.py
|
|
12
|
+
geodesic_interpolate.egg-info/PKG-INFO
|
|
13
|
+
geodesic_interpolate.egg-info/SOURCES.txt
|
|
14
|
+
geodesic_interpolate.egg-info/dependency_links.txt
|
|
15
|
+
geodesic_interpolate.egg-info/entry_points.txt
|
|
16
|
+
geodesic_interpolate.egg-info/requires.txt
|
|
17
|
+
geodesic_interpolate.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
geodesic_interpolate
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Installer for geodesic interpolation package.
|
|
2
|
+
Install the package into python environment, and provide an entry point for the
|
|
3
|
+
main interpolation script.
|
|
4
|
+
"""
|
|
5
|
+
from setuptools import setup
|
|
6
|
+
import pathlib
|
|
7
|
+
|
|
8
|
+
# Read the contents of README.md
|
|
9
|
+
here = pathlib.Path(__file__).parent.resolve()
|
|
10
|
+
long_description = (here / "README.md").read_text(encoding="utf-8")
|
|
11
|
+
|
|
12
|
+
setup(
|
|
13
|
+
name='geodesic_interpolate',
|
|
14
|
+
version='1.0.0',
|
|
15
|
+
description='Interpolation and smoothing of reaction paths with geodesics in redundant internal coordinates.',
|
|
16
|
+
long_description=long_description,
|
|
17
|
+
long_description_content_type='text/markdown',
|
|
18
|
+
author='Xiaolei Zhu',
|
|
19
|
+
author_email='virtualzx@gmail.com',
|
|
20
|
+
url='https://github.com/virtualzx-nad/geodesic-interpolate',
|
|
21
|
+
license='MIT',
|
|
22
|
+
packages=['geodesic_interpolate'],
|
|
23
|
+
python_requires='>=3.8',
|
|
24
|
+
install_requires=[
|
|
25
|
+
'numpy>=1.13',
|
|
26
|
+
'scipy>=0.19',
|
|
27
|
+
],
|
|
28
|
+
entry_points = {
|
|
29
|
+
'console_scripts': [
|
|
30
|
+
'geodesic_interpolate=geodesic_interpolate.__main__:main',
|
|
31
|
+
],
|
|
32
|
+
},
|
|
33
|
+
classifiers=[
|
|
34
|
+
'Development Status :: 4 - Beta',
|
|
35
|
+
'Intended Audience :: Science/Research',
|
|
36
|
+
'Programming Language :: Python :: 3.8',
|
|
37
|
+
'Programming Language :: Python :: 3.9',
|
|
38
|
+
'Programming Language :: Python :: 3.10',
|
|
39
|
+
'Programming Language :: Python :: 3.11',
|
|
40
|
+
'Programming Language :: Python :: 3.12',
|
|
41
|
+
'Topic :: Scientific/Engineering :: Chemistry',
|
|
42
|
+
'Topic :: Scientific/Engineering :: Physics',
|
|
43
|
+
],
|
|
44
|
+
keywords='chemistry, molecular dynamics, reaction paths, geodesics',
|
|
45
|
+
project_urls={
|
|
46
|
+
'Bug Reports': 'https://github.com/virtualzx-nad/geodesic-interpolate/issues',
|
|
47
|
+
'Source': 'https://github.com/virtualzx-nad/geodesic-interpolate',
|
|
48
|
+
},
|
|
49
|
+
)
|