MultiOptPy 1.20.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multioptpy/Calculator/__init__.py +0 -0
- multioptpy/Calculator/ase_calculation_tools.py +424 -0
- multioptpy/Calculator/ase_tools/__init__.py +0 -0
- multioptpy/Calculator/ase_tools/fairchem.py +28 -0
- multioptpy/Calculator/ase_tools/gamess.py +19 -0
- multioptpy/Calculator/ase_tools/gaussian.py +165 -0
- multioptpy/Calculator/ase_tools/mace.py +28 -0
- multioptpy/Calculator/ase_tools/mopac.py +19 -0
- multioptpy/Calculator/ase_tools/nwchem.py +31 -0
- multioptpy/Calculator/ase_tools/orca.py +22 -0
- multioptpy/Calculator/ase_tools/pygfn0.py +37 -0
- multioptpy/Calculator/dxtb_calculation_tools.py +344 -0
- multioptpy/Calculator/emt_calculation_tools.py +458 -0
- multioptpy/Calculator/gpaw_calculation_tools.py +183 -0
- multioptpy/Calculator/lj_calculation_tools.py +314 -0
- multioptpy/Calculator/psi4_calculation_tools.py +334 -0
- multioptpy/Calculator/pwscf_calculation_tools.py +189 -0
- multioptpy/Calculator/pyscf_calculation_tools.py +327 -0
- multioptpy/Calculator/sqm1_calculation_tools.py +611 -0
- multioptpy/Calculator/sqm2_calculation_tools.py +376 -0
- multioptpy/Calculator/tblite_calculation_tools.py +352 -0
- multioptpy/Calculator/tersoff_calculation_tools.py +818 -0
- multioptpy/Constraint/__init__.py +0 -0
- multioptpy/Constraint/constraint_condition.py +834 -0
- multioptpy/Coordinate/__init__.py +0 -0
- multioptpy/Coordinate/polar_coordinate.py +199 -0
- multioptpy/Coordinate/redundant_coordinate.py +638 -0
- multioptpy/IRC/__init__.py +0 -0
- multioptpy/IRC/converge_criteria.py +28 -0
- multioptpy/IRC/dvv.py +544 -0
- multioptpy/IRC/euler.py +439 -0
- multioptpy/IRC/hpc.py +564 -0
- multioptpy/IRC/lqa.py +540 -0
- multioptpy/IRC/modekill.py +662 -0
- multioptpy/IRC/rk4.py +579 -0
- multioptpy/Interpolation/__init__.py +0 -0
- multioptpy/Interpolation/adaptive_interpolation.py +283 -0
- multioptpy/Interpolation/binomial_interpolation.py +179 -0
- multioptpy/Interpolation/geodesic_interpolation.py +785 -0
- multioptpy/Interpolation/interpolation.py +156 -0
- multioptpy/Interpolation/linear_interpolation.py +473 -0
- multioptpy/Interpolation/savitzky_golay_interpolation.py +252 -0
- multioptpy/Interpolation/spline_interpolation.py +353 -0
- multioptpy/MD/__init__.py +0 -0
- multioptpy/MD/thermostat.py +185 -0
- multioptpy/MEP/__init__.py +0 -0
- multioptpy/MEP/pathopt_bneb_force.py +443 -0
- multioptpy/MEP/pathopt_dmf_force.py +448 -0
- multioptpy/MEP/pathopt_dneb_force.py +130 -0
- multioptpy/MEP/pathopt_ewbneb_force.py +207 -0
- multioptpy/MEP/pathopt_gpneb_force.py +512 -0
- multioptpy/MEP/pathopt_lup_force.py +113 -0
- multioptpy/MEP/pathopt_neb_force.py +225 -0
- multioptpy/MEP/pathopt_nesb_force.py +205 -0
- multioptpy/MEP/pathopt_om_force.py +153 -0
- multioptpy/MEP/pathopt_qsm_force.py +174 -0
- multioptpy/MEP/pathopt_qsmv2_force.py +304 -0
- multioptpy/ModelFunction/__init__.py +7 -0
- multioptpy/ModelFunction/avoiding_model_function.py +29 -0
- multioptpy/ModelFunction/binary_image_ts_search_model_function.py +47 -0
- multioptpy/ModelFunction/conical_model_function.py +26 -0
- multioptpy/ModelFunction/opt_meci.py +50 -0
- multioptpy/ModelFunction/opt_mesx.py +47 -0
- multioptpy/ModelFunction/opt_mesx_2.py +49 -0
- multioptpy/ModelFunction/seam_model_function.py +27 -0
- multioptpy/ModelHessian/__init__.py +0 -0
- multioptpy/ModelHessian/approx_hessian.py +147 -0
- multioptpy/ModelHessian/calc_params.py +227 -0
- multioptpy/ModelHessian/fischer.py +236 -0
- multioptpy/ModelHessian/fischerd3.py +360 -0
- multioptpy/ModelHessian/fischerd4.py +398 -0
- multioptpy/ModelHessian/gfn0xtb.py +633 -0
- multioptpy/ModelHessian/gfnff.py +709 -0
- multioptpy/ModelHessian/lindh.py +165 -0
- multioptpy/ModelHessian/lindh2007d2.py +707 -0
- multioptpy/ModelHessian/lindh2007d3.py +822 -0
- multioptpy/ModelHessian/lindh2007d4.py +1030 -0
- multioptpy/ModelHessian/morse.py +106 -0
- multioptpy/ModelHessian/schlegel.py +144 -0
- multioptpy/ModelHessian/schlegeld3.py +322 -0
- multioptpy/ModelHessian/schlegeld4.py +559 -0
- multioptpy/ModelHessian/shortrange.py +346 -0
- multioptpy/ModelHessian/swartd2.py +496 -0
- multioptpy/ModelHessian/swartd3.py +706 -0
- multioptpy/ModelHessian/swartd4.py +918 -0
- multioptpy/ModelHessian/tshess.py +40 -0
- multioptpy/Optimizer/QHAdam.py +61 -0
- multioptpy/Optimizer/__init__.py +0 -0
- multioptpy/Optimizer/abc_fire.py +83 -0
- multioptpy/Optimizer/adabelief.py +58 -0
- multioptpy/Optimizer/adabound.py +68 -0
- multioptpy/Optimizer/adadelta.py +65 -0
- multioptpy/Optimizer/adaderivative.py +56 -0
- multioptpy/Optimizer/adadiff.py +68 -0
- multioptpy/Optimizer/adafactor.py +70 -0
- multioptpy/Optimizer/adam.py +65 -0
- multioptpy/Optimizer/adamax.py +62 -0
- multioptpy/Optimizer/adamod.py +83 -0
- multioptpy/Optimizer/adamw.py +65 -0
- multioptpy/Optimizer/adiis.py +523 -0
- multioptpy/Optimizer/afire_neb.py +282 -0
- multioptpy/Optimizer/block_hessian_update.py +709 -0
- multioptpy/Optimizer/c2diis.py +491 -0
- multioptpy/Optimizer/component_wise_scaling.py +405 -0
- multioptpy/Optimizer/conjugate_gradient.py +82 -0
- multioptpy/Optimizer/conjugate_gradient_neb.py +345 -0
- multioptpy/Optimizer/coordinate_locking.py +405 -0
- multioptpy/Optimizer/dic_rsirfo.py +1015 -0
- multioptpy/Optimizer/ediis.py +417 -0
- multioptpy/Optimizer/eve.py +76 -0
- multioptpy/Optimizer/fastadabelief.py +61 -0
- multioptpy/Optimizer/fire.py +77 -0
- multioptpy/Optimizer/fire2.py +249 -0
- multioptpy/Optimizer/fire_neb.py +92 -0
- multioptpy/Optimizer/gan_step.py +486 -0
- multioptpy/Optimizer/gdiis.py +609 -0
- multioptpy/Optimizer/gediis.py +203 -0
- multioptpy/Optimizer/geodesic_step.py +433 -0
- multioptpy/Optimizer/gpmin.py +633 -0
- multioptpy/Optimizer/gpr_step.py +364 -0
- multioptpy/Optimizer/gradientdescent.py +78 -0
- multioptpy/Optimizer/gradientdescent_neb.py +52 -0
- multioptpy/Optimizer/hessian_update.py +433 -0
- multioptpy/Optimizer/hybrid_rfo.py +998 -0
- multioptpy/Optimizer/kdiis.py +625 -0
- multioptpy/Optimizer/lars.py +21 -0
- multioptpy/Optimizer/lbfgs.py +253 -0
- multioptpy/Optimizer/lbfgs_neb.py +355 -0
- multioptpy/Optimizer/linesearch.py +236 -0
- multioptpy/Optimizer/lookahead.py +40 -0
- multioptpy/Optimizer/nadam.py +64 -0
- multioptpy/Optimizer/newton.py +200 -0
- multioptpy/Optimizer/prodigy.py +70 -0
- multioptpy/Optimizer/purtubation.py +16 -0
- multioptpy/Optimizer/quickmin_neb.py +245 -0
- multioptpy/Optimizer/radam.py +75 -0
- multioptpy/Optimizer/rfo_neb.py +302 -0
- multioptpy/Optimizer/ric_rfo.py +842 -0
- multioptpy/Optimizer/rl_step.py +627 -0
- multioptpy/Optimizer/rmspropgrave.py +65 -0
- multioptpy/Optimizer/rsirfo.py +1647 -0
- multioptpy/Optimizer/rsprfo.py +1056 -0
- multioptpy/Optimizer/sadam.py +60 -0
- multioptpy/Optimizer/samsgrad.py +63 -0
- multioptpy/Optimizer/tr_lbfgs.py +678 -0
- multioptpy/Optimizer/trim.py +273 -0
- multioptpy/Optimizer/trust_radius.py +207 -0
- multioptpy/Optimizer/trust_radius_neb.py +121 -0
- multioptpy/Optimizer/yogi.py +60 -0
- multioptpy/OtherMethod/__init__.py +0 -0
- multioptpy/OtherMethod/addf.py +1150 -0
- multioptpy/OtherMethod/dimer.py +895 -0
- multioptpy/OtherMethod/elastic_image_pair.py +629 -0
- multioptpy/OtherMethod/modelfunction.py +456 -0
- multioptpy/OtherMethod/newton_traj.py +454 -0
- multioptpy/OtherMethod/twopshs.py +1095 -0
- multioptpy/PESAnalyzer/__init__.py +0 -0
- multioptpy/PESAnalyzer/calc_irc_curvature.py +125 -0
- multioptpy/PESAnalyzer/cmds_analysis.py +152 -0
- multioptpy/PESAnalyzer/koopman_analysis.py +268 -0
- multioptpy/PESAnalyzer/pca_analysis.py +314 -0
- multioptpy/Parameters/__init__.py +0 -0
- multioptpy/Parameters/atomic_mass.py +20 -0
- multioptpy/Parameters/atomic_number.py +22 -0
- multioptpy/Parameters/covalent_radii.py +44 -0
- multioptpy/Parameters/d2.py +61 -0
- multioptpy/Parameters/d3.py +63 -0
- multioptpy/Parameters/d4.py +103 -0
- multioptpy/Parameters/dreiding.py +34 -0
- multioptpy/Parameters/gfn0xtb_param.py +137 -0
- multioptpy/Parameters/gfnff_param.py +315 -0
- multioptpy/Parameters/gnb.py +104 -0
- multioptpy/Parameters/parameter.py +22 -0
- multioptpy/Parameters/uff.py +72 -0
- multioptpy/Parameters/unit_values.py +20 -0
- multioptpy/Potential/AFIR_potential.py +55 -0
- multioptpy/Potential/LJ_repulsive_potential.py +345 -0
- multioptpy/Potential/__init__.py +0 -0
- multioptpy/Potential/anharmonic_keep_potential.py +28 -0
- multioptpy/Potential/asym_elllipsoidal_potential.py +718 -0
- multioptpy/Potential/electrostatic_potential.py +69 -0
- multioptpy/Potential/flux_potential.py +30 -0
- multioptpy/Potential/gaussian_potential.py +101 -0
- multioptpy/Potential/idpp.py +516 -0
- multioptpy/Potential/keep_angle_potential.py +146 -0
- multioptpy/Potential/keep_dihedral_angle_potential.py +105 -0
- multioptpy/Potential/keep_outofplain_angle_potential.py +70 -0
- multioptpy/Potential/keep_potential.py +99 -0
- multioptpy/Potential/mechano_force_potential.py +74 -0
- multioptpy/Potential/nanoreactor_potential.py +52 -0
- multioptpy/Potential/potential.py +896 -0
- multioptpy/Potential/spacer_model_potential.py +221 -0
- multioptpy/Potential/switching_potential.py +258 -0
- multioptpy/Potential/universal_potential.py +34 -0
- multioptpy/Potential/value_range_potential.py +36 -0
- multioptpy/Potential/void_point_potential.py +25 -0
- multioptpy/SQM/__init__.py +0 -0
- multioptpy/SQM/sqm1/__init__.py +0 -0
- multioptpy/SQM/sqm1/sqm1_core.py +1792 -0
- multioptpy/SQM/sqm2/__init__.py +0 -0
- multioptpy/SQM/sqm2/calc_tools.py +95 -0
- multioptpy/SQM/sqm2/sqm2_basis.py +850 -0
- multioptpy/SQM/sqm2/sqm2_bond.py +119 -0
- multioptpy/SQM/sqm2/sqm2_core.py +303 -0
- multioptpy/SQM/sqm2/sqm2_data.py +1229 -0
- multioptpy/SQM/sqm2/sqm2_disp.py +65 -0
- multioptpy/SQM/sqm2/sqm2_eeq.py +243 -0
- multioptpy/SQM/sqm2/sqm2_overlapint.py +704 -0
- multioptpy/SQM/sqm2/sqm2_qm.py +578 -0
- multioptpy/SQM/sqm2/sqm2_rep.py +66 -0
- multioptpy/SQM/sqm2/sqm2_srb.py +70 -0
- multioptpy/Thermo/__init__.py +0 -0
- multioptpy/Thermo/normal_mode_analyzer.py +865 -0
- multioptpy/Utils/__init__.py +0 -0
- multioptpy/Utils/bond_connectivity.py +264 -0
- multioptpy/Utils/calc_tools.py +884 -0
- multioptpy/Utils/oniom.py +96 -0
- multioptpy/Utils/pbc.py +48 -0
- multioptpy/Utils/riemann_curvature.py +208 -0
- multioptpy/Utils/symmetry_analyzer.py +482 -0
- multioptpy/Visualization/__init__.py +0 -0
- multioptpy/Visualization/visualization.py +156 -0
- multioptpy/WFAnalyzer/MO_analysis.py +104 -0
- multioptpy/WFAnalyzer/__init__.py +0 -0
- multioptpy/Wrapper/__init__.py +0 -0
- multioptpy/Wrapper/autots.py +1239 -0
- multioptpy/Wrapper/ieip_wrapper.py +93 -0
- multioptpy/Wrapper/md_wrapper.py +92 -0
- multioptpy/Wrapper/neb_wrapper.py +94 -0
- multioptpy/Wrapper/optimize_wrapper.py +76 -0
- multioptpy/__init__.py +5 -0
- multioptpy/entrypoints.py +916 -0
- multioptpy/fileio.py +660 -0
- multioptpy/ieip.py +340 -0
- multioptpy/interface.py +1086 -0
- multioptpy/irc.py +529 -0
- multioptpy/moleculardynamics.py +432 -0
- multioptpy/neb.py +1267 -0
- multioptpy/optimization.py +1553 -0
- multioptpy/optimizer.py +709 -0
- multioptpy-1.20.2.dist-info/METADATA +438 -0
- multioptpy-1.20.2.dist-info/RECORD +246 -0
- multioptpy-1.20.2.dist-info/WHEEL +5 -0
- multioptpy-1.20.2.dist-info/entry_points.txt +9 -0
- multioptpy-1.20.2.dist-info/licenses/LICENSE +674 -0
- multioptpy-1.20.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,627 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import torch
|
|
3
|
+
import torch.nn as nn
|
|
4
|
+
import torch.nn.functional as F
|
|
5
|
+
import torch.optim as optim
|
|
6
|
+
from torch.distributions import Normal
|
|
7
|
+
import copy
|
|
8
|
+
import os
|
|
9
|
+
from collections import deque
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
|
+
ref.:
|
|
13
|
+
Kabir Ahuja, William H. Green, and Yi-Pei Li
|
|
14
|
+
Journal of Chemical Theory and Computation 2021 17 (2), 818-825
|
|
15
|
+
DOI: 10.1021/acs.jctc.0c00971
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SelfAttention(nn.Module):
|
|
20
|
+
"""Self-attention module for capturing relationships between atomic coordinates."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, embed_dim, num_heads=4, dropout=0.1):
|
|
23
|
+
super(SelfAttention, self).__init__()
|
|
24
|
+
self.attention = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
|
|
25
|
+
self.norm = nn.LayerNorm(embed_dim)
|
|
26
|
+
|
|
27
|
+
def forward(self, x):
|
|
28
|
+
"""Forward pass through self-attention layer."""
|
|
29
|
+
# Reshape for attention: [seq_len, batch, embed_dim]
|
|
30
|
+
x_reshaped = x.transpose(0, 1)
|
|
31
|
+
|
|
32
|
+
# Apply self-attention
|
|
33
|
+
attn_output, _ = self.attention(x_reshaped, x_reshaped, x_reshaped)
|
|
34
|
+
|
|
35
|
+
# Add & norm (residual connection)
|
|
36
|
+
x_out = self.norm(x_reshaped + attn_output)
|
|
37
|
+
|
|
38
|
+
# Return to original shape
|
|
39
|
+
return x_out.transpose(0, 1)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class StepSizePolicy(nn.Module):
|
|
43
|
+
"""Policy network with self-attention for determining optimal step size scaling."""
|
|
44
|
+
|
|
45
|
+
def __init__(self, state_dim, hidden_dim=128, num_heads=4, dropout=0.1):
|
|
46
|
+
super(StepSizePolicy, self).__init__()
|
|
47
|
+
self.state_dim = state_dim
|
|
48
|
+
|
|
49
|
+
# Explicitly define embedding dimension
|
|
50
|
+
self.embed_dim = hidden_dim
|
|
51
|
+
self.seq_len = 4 # Number of attention sequence elements
|
|
52
|
+
|
|
53
|
+
# Feature extraction layers
|
|
54
|
+
self.embedding = nn.Sequential(
|
|
55
|
+
nn.Linear(state_dim, hidden_dim),
|
|
56
|
+
nn.ReLU()
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Attention mechanism
|
|
60
|
+
self.attn_proj = nn.Linear(hidden_dim, self.seq_len * self.embed_dim)
|
|
61
|
+
self.attention = SelfAttention(self.embed_dim, num_heads, dropout)
|
|
62
|
+
|
|
63
|
+
# Step size prediction head (mu and sigma for normal distribution)
|
|
64
|
+
self.step_size_mu = nn.Sequential(
|
|
65
|
+
nn.Linear(self.seq_len * hidden_dim, hidden_dim // 2),
|
|
66
|
+
nn.ReLU(),
|
|
67
|
+
nn.Linear(hidden_dim // 2, 1),
|
|
68
|
+
nn.Sigmoid() # Bound to [0,1] then will be scaled
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
self.step_size_sigma = nn.Sequential(
|
|
72
|
+
nn.Linear(self.seq_len * hidden_dim, hidden_dim // 2),
|
|
73
|
+
nn.ReLU(),
|
|
74
|
+
nn.Linear(hidden_dim // 2, 1),
|
|
75
|
+
nn.Softplus() # Ensure positive standard deviation
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Value head for critic
|
|
79
|
+
self.value_head = nn.Sequential(
|
|
80
|
+
nn.Linear(self.seq_len * hidden_dim, hidden_dim // 2),
|
|
81
|
+
nn.ReLU(),
|
|
82
|
+
nn.Linear(hidden_dim // 2, 1)
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def forward(self, state):
|
|
86
|
+
"""Forward pass to predict step size parameters and value."""
|
|
87
|
+
batch_size = state.shape[0]
|
|
88
|
+
|
|
89
|
+
# Extract features
|
|
90
|
+
features = self.embedding(state)
|
|
91
|
+
|
|
92
|
+
# Apply attention
|
|
93
|
+
attn_ready = self.attn_proj(features).view(batch_size, self.seq_len, self.embed_dim)
|
|
94
|
+
attn_output = self.attention(attn_ready)
|
|
95
|
+
attn_output = attn_output.reshape(batch_size, -1) # Flatten back
|
|
96
|
+
|
|
97
|
+
# Predict step size parameters
|
|
98
|
+
mu = self.step_size_mu(attn_output)
|
|
99
|
+
sigma = self.step_size_sigma(attn_output) + 0.01 # Add minimum std for exploration
|
|
100
|
+
|
|
101
|
+
# Predict state value
|
|
102
|
+
value = self.value_head(attn_output)
|
|
103
|
+
|
|
104
|
+
return mu, sigma, value
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class PPOMemory:
|
|
108
|
+
"""Memory buffer for PPO algorithm."""
|
|
109
|
+
|
|
110
|
+
def __init__(self, state_dim, buffer_size=1000, gamma=0.99, gae_lambda=0.95):
|
|
111
|
+
self.states = np.zeros((buffer_size, state_dim), dtype=np.float32)
|
|
112
|
+
self.actions = np.zeros((buffer_size, 1), dtype=np.float32)
|
|
113
|
+
self.probs = np.zeros((buffer_size,), dtype=np.float32)
|
|
114
|
+
self.vals = np.zeros((buffer_size,), dtype=np.float32)
|
|
115
|
+
self.rewards = np.zeros((buffer_size,), dtype=np.float32)
|
|
116
|
+
self.dones = np.zeros((buffer_size,), dtype=np.float32)
|
|
117
|
+
|
|
118
|
+
self.gamma = gamma
|
|
119
|
+
self.gae_lambda = gae_lambda
|
|
120
|
+
self.ptr, self.path_start_idx, self.max_size = 0, 0, buffer_size
|
|
121
|
+
|
|
122
|
+
def store(self, state, action, prob, val, reward, done):
|
|
123
|
+
"""Store transition in buffer."""
|
|
124
|
+
idx = self.ptr % self.max_size
|
|
125
|
+
|
|
126
|
+
self.states[idx] = state
|
|
127
|
+
self.actions[idx] = action
|
|
128
|
+
self.probs[idx] = prob
|
|
129
|
+
self.vals[idx] = val
|
|
130
|
+
self.rewards[idx] = reward
|
|
131
|
+
self.dones[idx] = done
|
|
132
|
+
|
|
133
|
+
self.ptr += 1
|
|
134
|
+
|
|
135
|
+
def compute_advantages(self, last_val=0):
|
|
136
|
+
"""Compute Generalized Advantage Estimation."""
|
|
137
|
+
path_slice = slice(self.path_start_idx, self.ptr)
|
|
138
|
+
rewards = np.append(self.rewards[path_slice], last_val)
|
|
139
|
+
values = np.append(self.vals[path_slice], last_val)
|
|
140
|
+
dones = np.append(self.dones[path_slice], 0)
|
|
141
|
+
|
|
142
|
+
advantages = np.zeros_like(rewards[:-1])
|
|
143
|
+
lastgaelam = 0
|
|
144
|
+
|
|
145
|
+
for t in reversed(range(len(rewards) - 1)):
|
|
146
|
+
delta = rewards[t] + self.gamma * values[t + 1] * (1 - dones[t]) - values[t]
|
|
147
|
+
advantages[t] = lastgaelam = delta + self.gamma * self.gae_lambda * (1 - dones[t]) * lastgaelam
|
|
148
|
+
|
|
149
|
+
returns = advantages + self.vals[path_slice]
|
|
150
|
+
|
|
151
|
+
self.path_start_idx = self.ptr
|
|
152
|
+
return advantages, returns
|
|
153
|
+
|
|
154
|
+
def get_batch(self):
|
|
155
|
+
"""Get all stored data as batch."""
|
|
156
|
+
assert self.ptr > self.path_start_idx, "No transitions to process"
|
|
157
|
+
path_slice = slice(self.path_start_idx, self.ptr)
|
|
158
|
+
advantages, returns = self.compute_advantages()
|
|
159
|
+
|
|
160
|
+
return (
|
|
161
|
+
self.states[path_slice],
|
|
162
|
+
self.actions[path_slice],
|
|
163
|
+
self.probs[path_slice],
|
|
164
|
+
returns,
|
|
165
|
+
advantages
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def clear(self):
|
|
169
|
+
"""Clear memory after policy update."""
|
|
170
|
+
self.ptr, self.path_start_idx = 0, 0
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class RLStepSizeOptimizer:
|
|
174
|
+
"""
|
|
175
|
+
Reinforcement Learning optimizer for adaptive step size optimization.
|
|
176
|
+
Uses PPO with self-attention to learn optimal step size scaling
|
|
177
|
+
factors based on gradient and displacement history.
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
def __init__(self):
|
|
181
|
+
# RL parameters
|
|
182
|
+
self.history_length = 5 # Number of past steps to store in history
|
|
183
|
+
self.min_step_size = 0.05 # Minimum step size scaling factor
|
|
184
|
+
self.max_step_size = 2.0 # Maximum step size scaling factor
|
|
185
|
+
self.default_step_size = 0.5 # Default step size when not using RL
|
|
186
|
+
self.safe_step_max = 1.5 # Maximum allowed step size in safe mode
|
|
187
|
+
|
|
188
|
+
# Training parameters
|
|
189
|
+
self.learning_rate = 3e-4 # Learning rate for policy updates
|
|
190
|
+
self.clip_ratio = 0.2 # PPO clipping parameter
|
|
191
|
+
self.n_epochs = 10 # Number of policy update epochs
|
|
192
|
+
self.batch_size = 64 # Batch size for policy updates
|
|
193
|
+
self.gamma = 0.99 # Discount factor
|
|
194
|
+
self.gae_lambda = 0.95 # GAE lambda parameter
|
|
195
|
+
self.training_mode = True # Whether to update policy during optimization
|
|
196
|
+
|
|
197
|
+
# Adaptive step size control
|
|
198
|
+
self.rl_weight = 0.1 # Weight of RL prediction vs default step size
|
|
199
|
+
self.rl_weight_min = 0.01 # Minimum RL weight during optimization
|
|
200
|
+
self.rl_weight_max = 0.5 # Maximum RL weight during optimization
|
|
201
|
+
self.rl_weight_decay = 0.95 # Decay factor for RL weight on failure
|
|
202
|
+
self.rl_weight_growth = 1.05 # Growth factor for RL weight on success
|
|
203
|
+
|
|
204
|
+
# Performance monitoring
|
|
205
|
+
self.step_success_threshold = 0.7 # Energy decrease ratio to consider step successful
|
|
206
|
+
self.consecutive_failures = 0 # Count of consecutive unsuccessful steps
|
|
207
|
+
self.max_failures = 3 # Max failures before reducing RL weight
|
|
208
|
+
self.recovery_steps = 2 # Steps in recovery mode
|
|
209
|
+
self.current_recovery = 0 # Current step in recovery
|
|
210
|
+
|
|
211
|
+
# History storage
|
|
212
|
+
self.geom_history = deque(maxlen=self.history_length)
|
|
213
|
+
self.grad_history = deque(maxlen=self.history_length)
|
|
214
|
+
self.displacement_history = deque(maxlen=self.history_length)
|
|
215
|
+
self.step_history = deque(maxlen=self.history_length)
|
|
216
|
+
self.energy_history = deque(maxlen=self.history_length)
|
|
217
|
+
|
|
218
|
+
# Model components
|
|
219
|
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
220
|
+
self.state_dim = None # Will be set dynamically
|
|
221
|
+
self.policy = None # RL policy network
|
|
222
|
+
self.policy_old = None # Target network for stable updates
|
|
223
|
+
self.optimizer = None # Policy optimizer
|
|
224
|
+
self.memory = None # Experience replay buffer
|
|
225
|
+
|
|
226
|
+
# Configure paths
|
|
227
|
+
self.model_dir = os.path.join(os.getcwd(), 'rl_models')
|
|
228
|
+
os.makedirs(self.model_dir, exist_ok=True)
|
|
229
|
+
|
|
230
|
+
# Initialization flags
|
|
231
|
+
self.initialization = True
|
|
232
|
+
self.iter = 0
|
|
233
|
+
|
|
234
|
+
print(f"RL Step Size Optimizer initialized. Device: {self.device}")
|
|
235
|
+
|
|
236
|
+
def _init_rl_components(self, state_dim):
|
|
237
|
+
"""Initialize RL components based on input dimensions."""
|
|
238
|
+
self.state_dim = state_dim
|
|
239
|
+
|
|
240
|
+
# Create policy network
|
|
241
|
+
self.policy = StepSizePolicy(
|
|
242
|
+
state_dim=state_dim,
|
|
243
|
+
hidden_dim=128
|
|
244
|
+
).to(self.device)
|
|
245
|
+
|
|
246
|
+
# Create target network
|
|
247
|
+
self.policy_old = copy.deepcopy(self.policy)
|
|
248
|
+
|
|
249
|
+
# Create optimizer
|
|
250
|
+
self.optimizer = optim.Adam(self.policy.parameters(), lr=self.learning_rate)
|
|
251
|
+
|
|
252
|
+
# Create replay buffer
|
|
253
|
+
self.memory = PPOMemory(
|
|
254
|
+
state_dim=state_dim,
|
|
255
|
+
buffer_size=1000,
|
|
256
|
+
gamma=self.gamma,
|
|
257
|
+
gae_lambda=self.gae_lambda
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Try to load pre-trained model if available
|
|
261
|
+
model_path = os.path.join(self.model_dir, "step_size_policy.pt")
|
|
262
|
+
if os.path.exists(model_path):
|
|
263
|
+
try:
|
|
264
|
+
self.policy.load_state_dict(torch.load(model_path, map_location=self.device))
|
|
265
|
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
|
266
|
+
print("Loaded pre-trained RL step size policy")
|
|
267
|
+
except Exception as e:
|
|
268
|
+
print(f"Could not load pre-trained model: {str(e)}")
|
|
269
|
+
|
|
270
|
+
print(f"RL components initialized with state dimension: {state_dim}")
|
|
271
|
+
|
|
272
|
+
def _get_state_representation(self, geom_num_list, B_g):
|
|
273
|
+
"""
|
|
274
|
+
Construct state representation for RL policy.
|
|
275
|
+
Includes current geometry, gradient, and history information.
|
|
276
|
+
"""
|
|
277
|
+
# Current gradient and geometry are the primary components
|
|
278
|
+
current_grad = B_g.flatten()
|
|
279
|
+
|
|
280
|
+
# Calculate gradient norm and add as a feature
|
|
281
|
+
grad_norm = np.linalg.norm(current_grad)
|
|
282
|
+
norm_feature = np.array([grad_norm])
|
|
283
|
+
|
|
284
|
+
# Initialize lists to store state components
|
|
285
|
+
state_components = [current_grad, norm_feature]
|
|
286
|
+
|
|
287
|
+
# Add gradient history
|
|
288
|
+
for past_grad in list(self.grad_history):
|
|
289
|
+
# Only use important information to keep state size reasonable
|
|
290
|
+
if len(past_grad) > 30: # If gradient is very large
|
|
291
|
+
# Subsample or use statistics
|
|
292
|
+
past_grad_norm = np.linalg.norm(past_grad)
|
|
293
|
+
past_grad_stats = np.array([past_grad_norm, past_grad.mean(), past_grad.std()])
|
|
294
|
+
state_components.append(past_grad_stats)
|
|
295
|
+
else:
|
|
296
|
+
state_components.append(past_grad)
|
|
297
|
+
|
|
298
|
+
# Add displacement history
|
|
299
|
+
for disp in list(self.displacement_history):
|
|
300
|
+
if len(disp) > 30: # If displacement is very large
|
|
301
|
+
disp_norm = np.linalg.norm(disp)
|
|
302
|
+
disp_stats = np.array([disp_norm, disp.mean(), disp.std()])
|
|
303
|
+
state_components.append(disp_stats)
|
|
304
|
+
else:
|
|
305
|
+
state_components.append(disp)
|
|
306
|
+
|
|
307
|
+
# Add step size history
|
|
308
|
+
if len(self.step_history) > 0:
|
|
309
|
+
step_sizes = np.array([step for step in self.step_history])
|
|
310
|
+
state_components.append(step_sizes)
|
|
311
|
+
else:
|
|
312
|
+
state_components.append(np.array([0.5])) # Default if no history
|
|
313
|
+
|
|
314
|
+
# Add energy change history if available
|
|
315
|
+
if len(self.energy_history) > 1:
|
|
316
|
+
# Convert to relative energy changes
|
|
317
|
+
energy_array = np.array(list(self.energy_history))
|
|
318
|
+
energy_changes = np.diff(energy_array) / (np.abs(energy_array[:-1]) + 1e-10)
|
|
319
|
+
state_components.append(energy_changes)
|
|
320
|
+
else:
|
|
321
|
+
state_components.append(np.array([0.0])) # No energy change history
|
|
322
|
+
|
|
323
|
+
# Ensure state vector has consistent size by padding or truncating
|
|
324
|
+
max_state_dim = 200 # Maximum allowed state dimension
|
|
325
|
+
state_array = np.concatenate([comp.flatten() for comp in state_components])
|
|
326
|
+
|
|
327
|
+
if len(state_array) > max_state_dim:
|
|
328
|
+
# Truncate if too large
|
|
329
|
+
print(f"Warning: State dimension {len(state_array)} exceeds max {max_state_dim}, truncating")
|
|
330
|
+
state_array = state_array[:max_state_dim]
|
|
331
|
+
elif len(state_array) < max_state_dim:
|
|
332
|
+
# Pad if too small
|
|
333
|
+
padding = np.zeros(max_state_dim - len(state_array))
|
|
334
|
+
state_array = np.concatenate([state_array, padding])
|
|
335
|
+
|
|
336
|
+
return state_array
|
|
337
|
+
|
|
338
|
+
def _predict_step_size(self, state):
|
|
339
|
+
"""Predict step size using the RL policy."""
|
|
340
|
+
try:
|
|
341
|
+
with torch.no_grad():
|
|
342
|
+
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
|
|
343
|
+
mu, sigma, value = self.policy_old(state_tensor)
|
|
344
|
+
|
|
345
|
+
# Create normal distribution
|
|
346
|
+
dist = Normal(mu, sigma)
|
|
347
|
+
|
|
348
|
+
# Sample action
|
|
349
|
+
action = dist.sample()
|
|
350
|
+
log_prob = dist.log_prob(action).sum(dim=-1)
|
|
351
|
+
|
|
352
|
+
# Convert to numpy
|
|
353
|
+
action_np = action.cpu().numpy()[0, 0]
|
|
354
|
+
log_prob_np = log_prob.cpu().numpy()[0]
|
|
355
|
+
value_np = value.cpu().numpy()[0, 0]
|
|
356
|
+
|
|
357
|
+
# Scale action from [0,1] to [min_step_size, max_step_size]
|
|
358
|
+
scaled_action = self.min_step_size + action_np * (self.max_step_size - self.min_step_size)
|
|
359
|
+
|
|
360
|
+
return scaled_action, log_prob_np, value_np
|
|
361
|
+
except Exception as e:
|
|
362
|
+
print(f"Error in step size prediction: {str(e)}")
|
|
363
|
+
return self.default_step_size, 0.0, 0.0
|
|
364
|
+
|
|
365
|
+
def _calculate_reward(self, energy, prev_energy, grad_norm, prev_grad_norm, step_size):
|
|
366
|
+
"""Calculate reward based on energy and gradient improvements."""
|
|
367
|
+
# Base reward on energy improvement
|
|
368
|
+
if prev_energy is not None:
|
|
369
|
+
energy_change = prev_energy - energy
|
|
370
|
+
energy_reward = 10.0 * energy_change / (abs(prev_energy) + 1e-10)
|
|
371
|
+
else:
|
|
372
|
+
energy_reward = 0.0
|
|
373
|
+
|
|
374
|
+
# Add reward for gradient reduction
|
|
375
|
+
if prev_grad_norm is not None:
|
|
376
|
+
grad_reduction = prev_grad_norm - grad_norm
|
|
377
|
+
grad_reward = 0.5 * grad_reduction / (prev_grad_norm + 1e-10)
|
|
378
|
+
else:
|
|
379
|
+
grad_reward = 0.0
|
|
380
|
+
|
|
381
|
+
# Penalize extreme step sizes
|
|
382
|
+
step_size_penalty = 0.0
|
|
383
|
+
if step_size < 0.1 or step_size > 1.9:
|
|
384
|
+
step_size_penalty = -0.2 * abs(step_size - 1.0)
|
|
385
|
+
|
|
386
|
+
# Combine rewards
|
|
387
|
+
total_reward = energy_reward + grad_reward + step_size_penalty
|
|
388
|
+
|
|
389
|
+
# Strong penalty for energy increases
|
|
390
|
+
if energy_change < 0 and prev_energy is not None:
|
|
391
|
+
energy_increase_penalty = -5.0 * abs(energy_change) / (abs(prev_energy) + 1e-10)
|
|
392
|
+
total_reward += energy_increase_penalty
|
|
393
|
+
|
|
394
|
+
return total_reward
|
|
395
|
+
|
|
396
|
+
def _update_policy(self):
|
|
397
|
+
"""Update policy using PPO algorithm."""
|
|
398
|
+
if not self.training_mode or self.memory is None or self.memory.ptr <= self.memory.path_start_idx:
|
|
399
|
+
return
|
|
400
|
+
|
|
401
|
+
# Get batch data
|
|
402
|
+
states, actions, old_log_probs, returns, advantages = self.memory.get_batch()
|
|
403
|
+
|
|
404
|
+
# Convert to tensors
|
|
405
|
+
states = torch.FloatTensor(states).to(self.device)
|
|
406
|
+
actions = torch.FloatTensor(actions).to(self.device)
|
|
407
|
+
old_log_probs = torch.FloatTensor(old_log_probs).to(self.device)
|
|
408
|
+
returns = torch.FloatTensor(returns).to(self.device)
|
|
409
|
+
advantages = torch.FloatTensor(advantages).to(self.device)
|
|
410
|
+
|
|
411
|
+
# Normalize advantages
|
|
412
|
+
advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
|
|
413
|
+
|
|
414
|
+
# PPO update for multiple epochs
|
|
415
|
+
for _ in range(self.n_epochs):
|
|
416
|
+
# Process in batches
|
|
417
|
+
for start_idx in range(0, len(states), self.batch_size):
|
|
418
|
+
end_idx = min(start_idx + self.batch_size, len(states))
|
|
419
|
+
batch_indices = slice(start_idx, end_idx)
|
|
420
|
+
|
|
421
|
+
# Get batch data
|
|
422
|
+
batch_states = states[batch_indices]
|
|
423
|
+
batch_actions = actions[batch_indices]
|
|
424
|
+
batch_old_log_probs = old_log_probs[batch_indices]
|
|
425
|
+
batch_returns = returns[batch_indices]
|
|
426
|
+
batch_advantages = advantages[batch_indices]
|
|
427
|
+
|
|
428
|
+
# Get current policy outputs
|
|
429
|
+
mu, sigma, values = self.policy(batch_states)
|
|
430
|
+
dist = Normal(mu, sigma)
|
|
431
|
+
|
|
432
|
+
# Calculate new log probabilities
|
|
433
|
+
new_log_probs = dist.log_prob(batch_actions).sum(1, keepdim=True)
|
|
434
|
+
|
|
435
|
+
# Ensure consistent tensor shapes
|
|
436
|
+
batch_old_log_probs = batch_old_log_probs.view(-1, 1)
|
|
437
|
+
batch_returns = batch_returns.view(-1, 1)
|
|
438
|
+
batch_advantages = batch_advantages.view(-1, 1)
|
|
439
|
+
|
|
440
|
+
# Calculate ratio for PPO
|
|
441
|
+
ratio = torch.exp(new_log_probs - batch_old_log_probs)
|
|
442
|
+
|
|
443
|
+
# PPO losses
|
|
444
|
+
surrogate1 = ratio * batch_advantages
|
|
445
|
+
surrogate2 = torch.clamp(ratio, 1 - self.clip_ratio, 1 + self.clip_ratio) * batch_advantages
|
|
446
|
+
actor_loss = -torch.min(surrogate1, surrogate2).mean()
|
|
447
|
+
|
|
448
|
+
# Value loss with consistent shapes
|
|
449
|
+
value_loss = F.mse_loss(values, batch_returns)
|
|
450
|
+
|
|
451
|
+
# Entropy bonus for exploration
|
|
452
|
+
entropy = dist.entropy().mean()
|
|
453
|
+
entropy_loss = -0.01 * entropy # Small entropy bonus
|
|
454
|
+
|
|
455
|
+
# Total loss
|
|
456
|
+
total_loss = actor_loss + 0.5 * value_loss + entropy_loss
|
|
457
|
+
|
|
458
|
+
# Perform optimization step
|
|
459
|
+
self.optimizer.zero_grad()
|
|
460
|
+
total_loss.backward()
|
|
461
|
+
self.optimizer.step()
|
|
462
|
+
|
|
463
|
+
# Rest of the method remains unchanged
|
|
464
|
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
|
465
|
+
torch.save(self.policy.state_dict(), os.path.join(self.model_dir, "step_size_policy.pt"))
|
|
466
|
+
self.memory.clear()
|
|
467
|
+
|
|
468
|
+
def run(self, geom_num_list, B_g, pre_B_g, B_e, pre_B_e, original_move_vector):
|
|
469
|
+
"""
|
|
470
|
+
Run RL-based step size optimization.
|
|
471
|
+
|
|
472
|
+
Parameters:
|
|
473
|
+
-----------
|
|
474
|
+
geom_num_list : numpy.ndarray
|
|
475
|
+
Current geometry flattened
|
|
476
|
+
B_g : numpy.ndarray
|
|
477
|
+
Current gradient
|
|
478
|
+
pre_B_g : numpy.ndarray
|
|
479
|
+
Previous gradient
|
|
480
|
+
original_move_vector : numpy.ndarray
|
|
481
|
+
Original optimization step
|
|
482
|
+
B_e : float, optional
|
|
483
|
+
Current energy
|
|
484
|
+
pre_B_e : float, optional
|
|
485
|
+
Previous energy
|
|
486
|
+
|
|
487
|
+
Returns:
|
|
488
|
+
--------
|
|
489
|
+
numpy.ndarray
|
|
490
|
+
Optimized move vector
|
|
491
|
+
"""
|
|
492
|
+
print("RL Step Size Optimization")
|
|
493
|
+
|
|
494
|
+
# Handle first step initialization
|
|
495
|
+
if self.initialization:
|
|
496
|
+
self.initialization = False
|
|
497
|
+
|
|
498
|
+
# Store initial values
|
|
499
|
+
if B_e is not None:
|
|
500
|
+
self.energy_history.append(B_e)
|
|
501
|
+
self.grad_history.append(B_g.flatten())
|
|
502
|
+
|
|
503
|
+
print(f"First step, using default step size: {self.default_step_size}")
|
|
504
|
+
return self.default_step_size * original_move_vector
|
|
505
|
+
|
|
506
|
+
# Extract dimensions and norms
|
|
507
|
+
n_coords = len(geom_num_list)
|
|
508
|
+
grad_norm = np.linalg.norm(B_g)
|
|
509
|
+
prev_grad_norm = np.linalg.norm(pre_B_g) if pre_B_g is not None else None
|
|
510
|
+
|
|
511
|
+
# Calculate displacement
|
|
512
|
+
if pre_B_g is not None and pre_B_g.flatten() is not None:
|
|
513
|
+
displacement = (geom_num_list - pre_B_g).flatten()
|
|
514
|
+
self.displacement_history.append(displacement)
|
|
515
|
+
|
|
516
|
+
# Calculate energy delta if energies provided
|
|
517
|
+
energy_decreased = False
|
|
518
|
+
if B_e is not None and pre_B_e is not None:
|
|
519
|
+
energy_delta = pre_B_e - B_e
|
|
520
|
+
energy_decreased = energy_delta > 0
|
|
521
|
+
energy_ratio = abs(energy_delta / (abs(pre_B_e) + 1e-10))
|
|
522
|
+
successful_step = energy_ratio > self.step_success_threshold
|
|
523
|
+
else:
|
|
524
|
+
successful_step = True # Assume success if no energies provided
|
|
525
|
+
|
|
526
|
+
# Store current values in history
|
|
527
|
+
if B_e is not None:
|
|
528
|
+
self.energy_history.append(B_e)
|
|
529
|
+
self.grad_history.append(B_g.flatten())
|
|
530
|
+
self.geom_history.append(geom_num_list.flatten())
|
|
531
|
+
|
|
532
|
+
# If we're in recovery mode, use conservative step size
|
|
533
|
+
if self.current_recovery > 0:
|
|
534
|
+
self.current_recovery -= 1
|
|
535
|
+
step_size = min(0.5, self.default_step_size)
|
|
536
|
+
|
|
537
|
+
print(f"In recovery mode ({self.current_recovery} steps remaining)")
|
|
538
|
+
print(f"Using conservative step size: {step_size}")
|
|
539
|
+
|
|
540
|
+
self.step_history.append(step_size)
|
|
541
|
+
return step_size * original_move_vector
|
|
542
|
+
|
|
543
|
+
# Get state representation
|
|
544
|
+
state = self._get_state_representation(geom_num_list, B_g)
|
|
545
|
+
|
|
546
|
+
# Initialize RL components if not already done
|
|
547
|
+
if self.policy is None:
|
|
548
|
+
self._init_rl_components(len(state))
|
|
549
|
+
|
|
550
|
+
# Predict step size using RL policy
|
|
551
|
+
rl_step_size, log_prob, value = self._predict_step_size(state)
|
|
552
|
+
|
|
553
|
+
# Adjust for safety based on convergence
|
|
554
|
+
if grad_norm < 0.05: # Near convergence
|
|
555
|
+
# Use more conservative step size near convergence
|
|
556
|
+
safe_step_size = min(rl_step_size, self.safe_step_max)
|
|
557
|
+
print(f"Near convergence (gradient norm: {grad_norm:.6f}), using safer step size: {safe_step_size:.4f}")
|
|
558
|
+
rl_step_size = safe_step_size
|
|
559
|
+
|
|
560
|
+
# Apply adaptive weighting between RL and default
|
|
561
|
+
blended_step_size = self.rl_weight * rl_step_size + (1.0 - self.rl_weight) * self.default_step_size
|
|
562
|
+
|
|
563
|
+
# Store step size in history
|
|
564
|
+
self.step_history.append(blended_step_size)
|
|
565
|
+
|
|
566
|
+
# Calculate reward if sufficient history exists
|
|
567
|
+
if len(self.energy_history) >= 2 and B_e is not None and pre_B_e is not None:
|
|
568
|
+
reward = self._calculate_reward(B_e, pre_B_e, grad_norm, prev_grad_norm, blended_step_size)
|
|
569
|
+
|
|
570
|
+
# Update consecutive failures/successes tracking
|
|
571
|
+
if successful_step:
|
|
572
|
+
self.consecutive_failures = 0
|
|
573
|
+
# Slowly increase RL weight on success
|
|
574
|
+
self.rl_weight = min(self.rl_weight_max, self.rl_weight * self.rl_weight_growth)
|
|
575
|
+
else:
|
|
576
|
+
self.consecutive_failures += 1
|
|
577
|
+
if self.consecutive_failures >= self.max_failures:
|
|
578
|
+
# Switch to recovery mode after multiple failures
|
|
579
|
+
self.current_recovery = self.recovery_steps
|
|
580
|
+
# Reduce RL weight
|
|
581
|
+
self.rl_weight = max(self.rl_weight_min, self.rl_weight * self.rl_weight_decay)
|
|
582
|
+
print(f"Multiple failures detected, reducing RL weight to {self.rl_weight:.4f}")
|
|
583
|
+
self.consecutive_failures = 0
|
|
584
|
+
|
|
585
|
+
# Store experience for learning if in training mode
|
|
586
|
+
if self.training_mode and self.memory is not None:
|
|
587
|
+
# Convert step size to [0,1] range for storage
|
|
588
|
+
normalized_step_size = (blended_step_size - self.min_step_size) / (self.max_step_size - self.min_step_size)
|
|
589
|
+
normalized_step_size = np.clip(normalized_step_size, 0, 1)
|
|
590
|
+
|
|
591
|
+
# Store the experience
|
|
592
|
+
done = (grad_norm < 0.01) # Consider done when converged
|
|
593
|
+
self.memory.store(
|
|
594
|
+
state=state,
|
|
595
|
+
action=normalized_step_size,
|
|
596
|
+
prob=log_prob,
|
|
597
|
+
val=value,
|
|
598
|
+
reward=reward,
|
|
599
|
+
done=done
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
# Update policy periodically
|
|
603
|
+
if self.iter > 0 and self.iter % 10 == 0:
|
|
604
|
+
self._update_policy()
|
|
605
|
+
|
|
606
|
+
# Generate optimized move vector
|
|
607
|
+
optimized_move_vector = blended_step_size * original_move_vector
|
|
608
|
+
|
|
609
|
+
# Print step information
|
|
610
|
+
print(f"Original step size: 1.0")
|
|
611
|
+
print(f"RL step size: {rl_step_size:.4f}")
|
|
612
|
+
print(f"Blended step size: {blended_step_size:.4f} (RL weight: {self.rl_weight:.2f})")
|
|
613
|
+
if B_e is not None and pre_B_e is not None:
|
|
614
|
+
print(f"Energy change: {B_e - pre_B_e:.6f}")
|
|
615
|
+
print(f"Gradient norm: {grad_norm:.6f}")
|
|
616
|
+
|
|
617
|
+
# Safety check for numerical issues and extreme values
|
|
618
|
+
if np.any(np.isnan(optimized_move_vector)) or np.any(np.isinf(optimized_move_vector)):
|
|
619
|
+
print("Warning: Numerical issues in optimized step, using scaled original step")
|
|
620
|
+
optimized_move_vector = 0.5 * original_move_vector
|
|
621
|
+
elif np.linalg.norm(optimized_move_vector) > 5.0 * np.linalg.norm(original_move_vector):
|
|
622
|
+
print("Warning: Step size too large, scaling down")
|
|
623
|
+
scale_factor = 5.0 * np.linalg.norm(original_move_vector) / np.linalg.norm(optimized_move_vector)
|
|
624
|
+
optimized_move_vector = scale_factor * optimized_move_vector
|
|
625
|
+
|
|
626
|
+
self.iter += 1
|
|
627
|
+
return optimized_move_vector
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import copy
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class RMSpropGrave:
|
|
6
|
+
def __init__(self, **config):
|
|
7
|
+
#arXiv:https://arxiv.org/abs/1308.0850v5
|
|
8
|
+
self.RMSpropGrave_count = 1
|
|
9
|
+
self.DELTA = 0.75
|
|
10
|
+
self.beta_m = 0.95
|
|
11
|
+
self.beta_v = 0.95
|
|
12
|
+
self.Epsilon = 1e-10
|
|
13
|
+
self.eta = 0.0001
|
|
14
|
+
self.nue = 0.9
|
|
15
|
+
self.Initialization = True
|
|
16
|
+
self.config = config
|
|
17
|
+
|
|
18
|
+
self.hessian = None
|
|
19
|
+
self.bias_hessian = None
|
|
20
|
+
|
|
21
|
+
return
|
|
22
|
+
|
|
23
|
+
def run(self, geom_num_list, B_g, pre_B_g=[], pre_geom=[], B_e=0.0, pre_B_e=0.0, pre_move_vector=[], initial_geom_num_list=[], g=[], pre_g=[]):
|
|
24
|
+
print("RMSpropGrave")
|
|
25
|
+
if self.Initialization:
|
|
26
|
+
self.RMSpropGrave_m = geom_num_list * 0.0
|
|
27
|
+
self.RMSpropGrave_v = geom_num_list * 0.0
|
|
28
|
+
self.prev_move_vector = geom_num_list * 0.0
|
|
29
|
+
self.Initialization = False
|
|
30
|
+
|
|
31
|
+
RMSpropGrave_count = self.RMSpropGrave_count
|
|
32
|
+
RMSpropGrave_m = self.RMSpropGrave_m
|
|
33
|
+
RMSpropGrave_v = self.RMSpropGrave_v
|
|
34
|
+
new_RMSpropGrave_m = RMSpropGrave_m*0.0
|
|
35
|
+
new_RMSpropGrave_v = RMSpropGrave_v*0.0
|
|
36
|
+
|
|
37
|
+
for i in range(len(geom_num_list)):
|
|
38
|
+
new_RMSpropGrave_m[i] = copy.copy(self.beta_m*RMSpropGrave_m[i] + (1.0-self.beta_m)*(B_g[i]))
|
|
39
|
+
new_RMSpropGrave_v[i] = copy.copy(self.beta_v*RMSpropGrave_v[i] + (1.0-self.beta_v)*(B_g[i])**2)
|
|
40
|
+
|
|
41
|
+
move_vector = []
|
|
42
|
+
|
|
43
|
+
for i in range(len(geom_num_list)):
|
|
44
|
+
tmp = self.nue * self.prev_move_vector[i] + B_g[i] * self.eta / np.sqrt(np.abs(new_RMSpropGrave_m[i] -1 * new_RMSpropGrave_v[i] ** 2 + self.Epsilon))
|
|
45
|
+
move_vector.append(self.DELTA*tmp)
|
|
46
|
+
|
|
47
|
+
self.RMSpropGrave_m = new_RMSpropGrave_m
|
|
48
|
+
self.RMSpropGrave_v = new_RMSpropGrave_v
|
|
49
|
+
self.prev_move_vector = move_vector
|
|
50
|
+
self.RMSpropGrave_count += 1
|
|
51
|
+
|
|
52
|
+
return move_vector#Bohr.
|
|
53
|
+
def set_hessian(self, hessian):
|
|
54
|
+
self.hessian = hessian
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
def set_bias_hessian(self, bias_hessian):
|
|
58
|
+
self.bias_hessian = bias_hessian
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
def get_hessian(self):
|
|
62
|
+
return self.hessian
|
|
63
|
+
|
|
64
|
+
def get_bias_hessian(self):
|
|
65
|
+
return self.bias_hessian
|