cpp-hf 1.0__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -125,20 +125,42 @@ if (HF_USE_OPENMP)
125
125
  endif()
126
126
 
127
127
  # Build module (Python name: cpp_hf)
128
- pybind11_add_module(cpp_hf cpp_hf_module.cpp)
128
+ pybind11_add_module(cpp_hf cpp_hf.cpp)
129
129
  target_compile_features(cpp_hf PRIVATE cxx_std_17)
130
130
  if (MSVC)
131
131
  target_compile_options(cpp_hf PRIVATE /O2 /DNDEBUG)
132
132
  target_compile_definitions(cpp_hf PRIVATE _SILENCE_CXX17_RESULT_OF_DEPRECATION_WARNING)
133
133
  else()
134
134
  target_compile_options(cpp_hf PRIVATE -O3 -DNDEBUG -fvisibility=hidden)
135
+ include(CheckCXXCompilerFlag)
136
+ check_cxx_compiler_flag("-ffast-math" HAVE_FFAST_MATH)
137
+ if (HAVE_FFAST_MATH)
138
+ target_compile_options(cpp_hf PRIVATE -ffast-math)
139
+ endif()
140
+ check_cxx_compiler_flag("-ffp-contract=fast" HAVE_FFP_CONTRACT_FAST)
141
+ if (HAVE_FFP_CONTRACT_FAST)
142
+ target_compile_options(cpp_hf PRIVATE -ffp-contract=fast)
143
+ endif()
144
+ check_cxx_compiler_flag("-march=native" HAVE_MARCH_NATIVE)
145
+ if (HAVE_MARCH_NATIVE)
146
+ target_compile_options(cpp_hf PRIVATE -march=native)
147
+ else()
148
+ check_cxx_compiler_flag("-mcpu=native" HAVE_MCPU_NATIVE)
149
+ if (HAVE_MCPU_NATIVE)
150
+ target_compile_options(cpp_hf PRIVATE -mcpu=native)
151
+ endif()
152
+ endif()
135
153
  endif()
136
154
  if (APPLE)
137
155
  set_target_properties(cpp_hf PROPERTIES MACOSX_RPATH ON)
138
156
  endif()
139
157
 
140
158
  # Headers
141
- target_include_directories(cpp_hf PRIVATE ${Python_NumPy_INCLUDE_DIRS})
159
+ target_include_directories(cpp_hf PRIVATE
160
+ ${Python_NumPy_INCLUDE_DIRS}
161
+ ${CMAKE_CURRENT_SOURCE_DIR}
162
+ ${CMAKE_CURRENT_SOURCE_DIR}/include
163
+ )
142
164
  if (DEFINED BOOST_INCLUDE_DIR)
143
165
  target_include_directories(cpp_hf PRIVATE ${BOOST_INCLUDE_DIR})
144
166
  endif()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: cpp_hf
3
- Version: 1.0
3
+ Version: 1.0.1
4
4
  Summary: Hartree–Fock (k-grid) with FFTW + Eigen via pybind11
5
5
  Keywords: hartree-fock,fftw,eigen,pybind11,condensed-matter,physics
6
6
  Author: ContiMod contributors
@@ -0,0 +1,225 @@
1
+ // hf_cpp_module.cpp (multicore-optimized; original mixing kept; 7 fixes applied)
2
+ // C++17 + pybind11 + Eigen + FFTW (guru, batched 2D) + optional Boost (toms748)
3
+ // Layout: (nk1, nk2, d, d) row-major (C-order)
4
+
5
+ #include <pybind11/pybind11.h>
6
+ #include <pybind11/numpy.h>
7
+ #include <pybind11/stl.h>
8
+
9
+ #include <Eigen/Core>
10
+ #include <Eigen/Eigenvalues>
11
+ #include "cpp_hf/mixers.hpp"
12
+ #include "cpp_hf/utils.hpp"
13
+ #include "cpp_hf/fftw_batched2d.hpp"
14
+ #include "cpp_hf/hf_kernel.hpp"
15
+
16
+ // fftw3.h is included by cpp_hf/fftw_batched2d.hpp
17
+
18
+ #include <vector>
19
+ #include <complex>
20
+ #include <stdexcept>
21
+ #include <algorithm>
22
+ #include <numeric>
23
+ #include <limits>
24
+ #include <memory>
25
+ #include <mutex>
26
+ #include <cmath>
27
+
28
+ #ifdef _OPENMP
29
+ #include <omp.h>
30
+ #endif
31
+
32
+ // ---- Boost root solver (optional, but recommended) ----
33
+ #include <boost/math/tools/toms748_solve.hpp>
34
+ #include <boost/math/tools/roots.hpp>
35
+
36
+ namespace py = pybind11;
37
+ using cxd = std::complex<double>;
38
+ using MatC = Eigen::Matrix<std::complex<double>, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
39
+ using Vecd = Eigen::VectorXd;
40
+
41
+ // ---------------- Python-exposed function ----------------
42
+ py::tuple hartreefock_iteration_cpp(
43
+ py::array_t<double, py::array::c_style | py::array::forcecast> weights, // (nk1,nk2)
44
+ py::array_t<cxd, py::array::c_style | py::array::forcecast> hamiltonian, // (nk1,nk2,d,d)
45
+ py::array_t<cxd, py::array::c_style | py::array::forcecast> v_coulomb, // (nk1,nk2,dv1,dv2)
46
+ py::array_t<cxd, py::array::c_style | py::array::forcecast> p0, // (nk1,nk2,d,d)
47
+ double electron_density0,
48
+ double T,
49
+ size_t max_iter,
50
+ double comm_tol,
51
+ size_t diis_size,
52
+ double mixing_alpha
53
+ ) {
54
+ if (hamiltonian.ndim()!=4) throw std::invalid_argument("H must be (nk1,nk2,d,d)");
55
+ const size_t nk1 = hamiltonian.shape(0), nk2 = hamiltonian.shape(1), d = hamiltonian.shape(2);
56
+ if ((size_t)hamiltonian.shape(3)!=d) throw std::invalid_argument("H last two dims must be equal (d,d)");
57
+ if (weights.ndim()!=2 || (size_t)weights.shape(0)!=nk1 || (size_t)weights.shape(1)!=nk2) throw std::invalid_argument("weights must be (nk1,nk2)");
58
+ if (p0.ndim()!=4 || (size_t)p0.shape(0)!=nk1 || (size_t)p0.shape(1)!=nk2 || (size_t)p0.shape(2)!=d || (size_t)p0.shape(3)!=d)
59
+ throw std::invalid_argument("p0 must be (nk1,nk2,d,d)");
60
+ if (v_coulomb.ndim()!=4 || (size_t)v_coulomb.shape(0)!=nk1 || (size_t)v_coulomb.shape(1)!=nk2)
61
+ throw std::invalid_argument("V must be (nk1,nk2,dv1,dv2)");
62
+
63
+ hf::HFKernel kernel(nk1,nk2,d, weights, hamiltonian, v_coulomb, T, electron_density0);
64
+
65
+ std::vector<cxd> P(p0.data(), p0.data()+ (nk1*nk2*d*d));
66
+ hf::DiisState cdiis(diis_size); // wired in (fix #5)
67
+ hf::EdiisState ediis(diis_size);
68
+
69
+ double e_fin = 0.0; size_t k_fin = 0; double mu_fin = 0.0;
70
+
71
+ py::gil_scoped_release nogil;
72
+
73
+ enum class Phase { EDIIS, CDIIS, BROYDEN };
74
+ Phase last_phase = Phase::EDIIS;
75
+ const size_t n_flat = nk1*nk2*d*d;
76
+ hf::BroydenState bro_state(diis_size, n_flat);
77
+
78
+ // Relative thresholds based on target comm_tol; EDIIS -> CDIIS -> Broyden
79
+ // Switch earlier to faster mixers to reduce iteration count
80
+ const double to_cdiis = 9.0 * comm_tol;
81
+ const double to_broyden = 1.5 * comm_tol;
82
+ const double cdiis_blend_keep = 0.5, cdiis_blend_new = 0.5; // slightly more aggressive blend in CDIIS
83
+
84
+ for (size_t k=0; k<max_iter; ++k) {
85
+ // 1) Diagonalize F[P] to build P_new and compute μ
86
+ auto call_result = kernel.call(P);
87
+ std::vector<cxd> P_new = std::move(call_result.first);
88
+ const double mu = call_result.second;
89
+
90
+ // 2) Build F[P_new] and energy once; also cache EVD(F[P_new]) for preconditioner
91
+ std::vector<cxd> F_new;
92
+ double e_new = 0.0;
93
+ kernel.fock_energy_and_cache_evd(P_new, F_new, e_new);
94
+
95
+ // 3) Commutator residual per k, and weighted RMS (fix #7)
96
+ std::vector<cxd> comm(P_new.size());
97
+ double sum_w_c2 = 0.0;
98
+
99
+ #ifdef _OPENMP
100
+ #pragma omp parallel for collapse(2) reduction(+:sum_w_c2) schedule(static)
101
+ #endif
102
+ for (long long k1i=0;k1i<(long long)nk1;++k1i)
103
+ for (long long k2i=0;k2i<(long long)nk2;++k2i) {
104
+ const size_t base = offset(nk2,d,(size_t)k1i,(size_t)k2i,0,0);
105
+ Eigen::Map<MatC> Fk(&F_new[base], d, d);
106
+ Eigen::Map<MatC> Pk(&P_new[base], d, d);
107
+ MatC C(d,d);
108
+ C.noalias() = Fk * Pk - Pk * Fk;
109
+ const double wk = kernel.weights[(size_t)k1i*nk2 + (size_t)k2i];
110
+ sum_w_c2 += wk * C.cwiseAbs2().sum();
111
+ Eigen::Map<MatC>(&comm[base], d, d) = C;
112
+ }
113
+
114
+ const double comm_rms = std::sqrt(sum_w_c2 / std::max(1e-30, kernel.weight_sum));
115
+
116
+ if (comm_rms < comm_tol) {
117
+ P.swap(P_new); e_fin = e_new; k_fin = k; mu_fin = mu;
118
+ break;
119
+ }
120
+
121
+ // 4) Mixer schedule with CDIIS in the middle (fix #5)
122
+ Phase phase_now = Phase::BROYDEN;
123
+ if (comm_rms > to_cdiis) phase_now = Phase::EDIIS;
124
+ else if (comm_rms > to_broyden) phase_now = Phase::CDIIS;
125
+
126
+ const bool switched = (phase_now != last_phase);
127
+
128
+ std::vector<cxd> P_mix;
129
+
130
+ if (phase_now == Phase::EDIIS) {
131
+ auto ediis_result = ediis.update(P_new, F_new, e_new,
132
+ kernel.weights, nk1, nk2, d,
133
+ /*max_iter_qp=*/20, /*pg_tol=*/1e-7);
134
+ P_mix = std::move(ediis_result.first);
135
+ }
136
+ else if (phase_now == Phase::CDIIS) {
137
+ // CDIIS on commutator with a gentle blend
138
+ P_mix = cdiis.update_cdiis(P_new, comm, P,
139
+ /*coeff_cap=*/5.0, /*eps_reg=*/1e-12,
140
+ /*blend_keep=*/cdiis_blend_keep, /*blend_new=*/cdiis_blend_new);
141
+ }
142
+ else { // Phase::BROYDEN
143
+ // Precondition C with cached eigen-decomposition of F_new
144
+ std::vector<cxd> comm_pc;
145
+ kernel.precondition_commutator_cached(F_new, comm, comm_pc, 5.0e-3);
146
+
147
+ if (switched) bro_state.reset();
148
+ const size_t bro_count_before = bro_state.count;
149
+
150
+ // Store / update LBFGS and get quasi-Newton proposal
151
+ std::vector<cxd> Pflat(P_new.begin(), P_new.end());
152
+ std::vector<cxd> Rflat(comm_pc.begin(), comm_pc.end());
153
+ auto upd = bro_state.update(Pflat, Rflat, mixing_alpha);
154
+ bro_state = std::move(upd.first);
155
+ std::vector<cxd>& Praw = upd.second; // flat
156
+
157
+ if (bro_count_before == 0) {
158
+ // Fix #4: seed with a short preconditioned descent step on first Broyden iteration
159
+ const double beta = 0.35;
160
+ P_mix.resize(P.size());
161
+ #ifdef _OPENMP
162
+ #pragma omp parallel for schedule(static)
163
+ #endif
164
+ for (long long t=0;t<(long long)P_mix.size();++t)
165
+ P_mix[(size_t)t] = P[(size_t)t] - beta * comm_pc[(size_t)t];
166
+
167
+ // Smooth transition from previous iterate
168
+ const double w_keep = 0.7, w_new = 0.3;
169
+ #ifdef _OPENMP
170
+ #pragma omp parallel for schedule(static)
171
+ #endif
172
+ for (long long t=0;t<(long long)P_mix.size();++t)
173
+ P_mix[(size_t)t] = P[(size_t)t]*w_keep + P_mix[(size_t)t]*w_new;
174
+ } else {
175
+ // Use the LBFGS result
176
+ P_mix.assign(Praw.begin(), Praw.end());
177
+ }
178
+ }
179
+
180
+ last_phase = phase_now;
181
+ P = std::move(P_mix);
182
+ e_fin = e_new; k_fin = k; mu_fin = mu;
183
+ }
184
+
185
+ // Final Fock for output (single Σ)
186
+ std::vector<cxd> F_fin;
187
+ kernel.fock_of(P, F_fin);
188
+
189
+ // Final μ from P (consistent)
190
+ {
191
+ std::vector<std::vector<double>> bands_final(nk1 * nk2);
192
+ #ifdef _OPENMP
193
+ #pragma omp parallel for collapse(2) schedule(static)
194
+ #endif
195
+ for (long long k1 = 0; k1 < (long long)nk1; ++k1)
196
+ for (long long k2 = 0; k2 < (long long)nk2; ++k2) {
197
+ const size_t base = offset(nk2, d, (size_t)k1, (size_t)k2, 0, 0);
198
+ Eigen::Map<MatC> Fk(&F_fin[base], d, d);
199
+ Eigen::SelfAdjointEigenSolver<MatC> es;
200
+ es.compute(Fk, Eigen::ComputeEigenvectors);
201
+ if (es.info() != Eigen::Success) throw std::runtime_error("EVD failed (final mu)");
202
+ const auto& ev = es.eigenvalues();
203
+ bands_final[(size_t)k1 * nk2 + (size_t)k2] = std::vector<double>(ev.data(), ev.data() + d);
204
+ }
205
+ mu_fin = find_chemicalpotential(bands_final, kernel.weights, kernel.nk1, kernel.nk2, kernel.d, T, electron_density0);
206
+ }
207
+
208
+ py::gil_scoped_acquire gil;
209
+
210
+ py::array_t<cxd> P_out({(py::ssize_t)nk1,(py::ssize_t)nk2,(py::ssize_t)d,(py::ssize_t)d});
211
+ py::array_t<cxd> F_out({(py::ssize_t)nk1,(py::ssize_t)nk2,(py::ssize_t)d,(py::ssize_t)d});
212
+ std::memcpy(P_out.mutable_data(), P.data(), P.size()*sizeof(cxd));
213
+ std::memcpy(F_out.mutable_data(), F_fin.data(),F_fin.size()*sizeof(cxd));
214
+
215
+ return py::make_tuple(P_out, F_out, e_fin, mu_fin, k_fin);
216
+ }
217
+
218
+ PYBIND11_MODULE(cpp_hf, m) {
219
+ m.doc() = "Hartree–Fock (k-grid) with FFTW + Eigen + OpenMP + EDIIS/CDIIS + preconditioned-LBFGS";
220
+ m.def("hartreefock_iteration_cpp", &hartreefock_iteration_cpp,
221
+ py::arg("weights"), py::arg("hamiltonian"), py::arg("v_coulomb"), py::arg("p0"),
222
+ py::arg("electron_density0"), py::arg("T"),
223
+ py::arg("max_iter"), py::arg("comm_tol"),
224
+ py::arg("diis_size"), py::arg("mixing_alpha"));
225
+ }
@@ -0,0 +1,76 @@
1
+ // fftw_batched2d.hpp - Thin wrapper around FFTW guru64 batched 2D plans
2
+ #pragma once
3
+
4
+ #include <complex>
5
+ #include <cstddef>
6
+ #include <stdexcept>
7
+ #include <mutex>
8
+
9
+ extern "C" {
10
+ #include <fftw3.h>
11
+ }
12
+
13
+ #ifdef _OPENMP
14
+ #include <omp.h>
15
+ #endif
16
+
17
+ struct FftwBatched2D {
18
+ fftw_plan fwd = nullptr;
19
+ fftw_plan bwd = nullptr;
20
+ std::size_t nk1{}, nk2{}, d{};
21
+ std::complex<double>* plan_buf = nullptr; // planning buffer (full-sized)
22
+ std::size_t n_tot{};
23
+ int nthreads{1};
24
+
25
+ static void init_threads_once() {
26
+ #if defined(FFTW3_THREADS)
27
+ static std::once_flag once;
28
+ std::call_once(once, []{ fftw_init_threads(); });
29
+ #endif
30
+ }
31
+
32
+ static int choose_threads() {
33
+ #if defined(_OPENMP)
34
+ return std::max(1, omp_get_max_threads());
35
+ #else
36
+ return 1;
37
+ #endif
38
+ }
39
+
40
+ FftwBatched2D(std::size_t nk1_, std::size_t nk2_, std::size_t d_)
41
+ : nk1(nk1_), nk2(nk2_), d(d_), n_tot(nk1_*nk2_*d_*d_), nthreads(choose_threads()) {
42
+ plan_buf = reinterpret_cast<std::complex<double>*>(fftw_malloc(sizeof(std::complex<double>) * n_tot));
43
+ if (!plan_buf) throw std::bad_alloc{};
44
+
45
+ init_threads_once();
46
+ #if defined(FFTW3_THREADS)
47
+ fftw_plan_with_nthreads(nthreads);
48
+ #endif
49
+
50
+ // Strides for (nk1, nk2, d, d), C-order
51
+ fftw_iodim64 dims[2];
52
+ dims[0].n = static_cast<long long>(nk2); dims[0].is = static_cast<long long>(d*d); dims[0].os = dims[0].is;
53
+ dims[1].n = static_cast<long long>(nk1); dims[1].is = static_cast<long long>(nk2*d*d); dims[1].os = dims[1].is;
54
+ fftw_iodim64 how[2];
55
+ how[0].n = static_cast<long long>(d); how[0].is = static_cast<long long>(d); how[0].os = how[0].is; // i
56
+ how[1].n = static_cast<long long>(d); how[1].is = 1; how[1].os = 1; // j
57
+
58
+ fwd = fftw_plan_guru64_dft(2, dims, 2, how,
59
+ reinterpret_cast<fftw_complex*>(plan_buf),
60
+ reinterpret_cast<fftw_complex*>(plan_buf),
61
+ FFTW_FORWARD, FFTW_MEASURE);
62
+ if (!fwd) throw std::runtime_error("FFTW plan_guru64_dft forward failed");
63
+
64
+ bwd = fftw_plan_guru64_dft(2, dims, 2, how,
65
+ reinterpret_cast<fftw_complex*>(plan_buf),
66
+ reinterpret_cast<fftw_complex*>(plan_buf),
67
+ FFTW_BACKWARD, FFTW_MEASURE);
68
+ if (!bwd) { fftw_destroy_plan(fwd); throw std::runtime_error("FFTW plan_guru64_dft backward failed"); }
69
+ }
70
+
71
+ void forward(std::complex<double>* buf) const { fftw_execute_dft(fwd, reinterpret_cast<fftw_complex*>(buf), reinterpret_cast<fftw_complex*>(buf)); }
72
+ void backward(std::complex<double>* buf) const { fftw_execute_dft(bwd, reinterpret_cast<fftw_complex*>(buf), reinterpret_cast<fftw_complex*>(buf)); }
73
+
74
+ ~FftwBatched2D() { if (fwd) fftw_destroy_plan(fwd); if (bwd) fftw_destroy_plan(bwd); if (plan_buf) fftw_free(plan_buf); }
75
+ };
76
+