cpp-hf 1.0__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cpp_hf-1.0 → cpp_hf-1.0.1}/CMakeLists.txt +24 -2
- {cpp_hf-1.0 → cpp_hf-1.0.1}/PKG-INFO +1 -1
- cpp_hf-1.0.1/cpp_hf.cpp +225 -0
- cpp_hf-1.0.1/include/cpp_hf/fftw_batched2d.hpp +76 -0
- cpp_hf-1.0.1/include/cpp_hf/hf_kernel.hpp +374 -0
- cpp_hf-1.0.1/include/cpp_hf/mixers.hpp +390 -0
- cpp_hf-1.0.1/include/cpp_hf/utils.hpp +152 -0
- {cpp_hf-1.0 → cpp_hf-1.0.1}/pyproject.toml +1 -0
- cpp_hf-1.0/cpp_hf_fft.hpp +0 -26
- cpp_hf-1.0/cpp_hf_mixing.hpp +0 -389
- cpp_hf-1.0/cpp_hf_module.cpp +0 -889
- cpp_hf-1.0/cpp_hf_utils.hpp +0 -31
- {cpp_hf-1.0 → cpp_hf-1.0.1}/.github/workflows/build-and-test.yml +0 -0
- {cpp_hf-1.0 → cpp_hf-1.0.1}/.github/workflows/release.yml +0 -0
- {cpp_hf-1.0 → cpp_hf-1.0.1}/.gitignore +0 -0
- {cpp_hf-1.0 → cpp_hf-1.0.1}/LICENSE +0 -0
- {cpp_hf-1.0 → cpp_hf-1.0.1}/Readme.md +0 -0
- {cpp_hf-1.0 → cpp_hf-1.0.1}/THIRD_PARTY_NOTICES.md +0 -0
- {cpp_hf-1.0 → cpp_hf-1.0.1}/build_install.sh +0 -0
|
@@ -125,20 +125,42 @@ if (HF_USE_OPENMP)
|
|
|
125
125
|
endif()
|
|
126
126
|
|
|
127
127
|
# Build module (Python name: cpp_hf)
|
|
128
|
-
pybind11_add_module(cpp_hf
|
|
128
|
+
pybind11_add_module(cpp_hf cpp_hf.cpp)
|
|
129
129
|
target_compile_features(cpp_hf PRIVATE cxx_std_17)
|
|
130
130
|
if (MSVC)
|
|
131
131
|
target_compile_options(cpp_hf PRIVATE /O2 /DNDEBUG)
|
|
132
132
|
target_compile_definitions(cpp_hf PRIVATE _SILENCE_CXX17_RESULT_OF_DEPRECATION_WARNING)
|
|
133
133
|
else()
|
|
134
134
|
target_compile_options(cpp_hf PRIVATE -O3 -DNDEBUG -fvisibility=hidden)
|
|
135
|
+
include(CheckCXXCompilerFlag)
|
|
136
|
+
check_cxx_compiler_flag("-ffast-math" HAVE_FFAST_MATH)
|
|
137
|
+
if (HAVE_FFAST_MATH)
|
|
138
|
+
target_compile_options(cpp_hf PRIVATE -ffast-math)
|
|
139
|
+
endif()
|
|
140
|
+
check_cxx_compiler_flag("-ffp-contract=fast" HAVE_FFP_CONTRACT_FAST)
|
|
141
|
+
if (HAVE_FFP_CONTRACT_FAST)
|
|
142
|
+
target_compile_options(cpp_hf PRIVATE -ffp-contract=fast)
|
|
143
|
+
endif()
|
|
144
|
+
check_cxx_compiler_flag("-march=native" HAVE_MARCH_NATIVE)
|
|
145
|
+
if (HAVE_MARCH_NATIVE)
|
|
146
|
+
target_compile_options(cpp_hf PRIVATE -march=native)
|
|
147
|
+
else()
|
|
148
|
+
check_cxx_compiler_flag("-mcpu=native" HAVE_MCPU_NATIVE)
|
|
149
|
+
if (HAVE_MCPU_NATIVE)
|
|
150
|
+
target_compile_options(cpp_hf PRIVATE -mcpu=native)
|
|
151
|
+
endif()
|
|
152
|
+
endif()
|
|
135
153
|
endif()
|
|
136
154
|
if (APPLE)
|
|
137
155
|
set_target_properties(cpp_hf PROPERTIES MACOSX_RPATH ON)
|
|
138
156
|
endif()
|
|
139
157
|
|
|
140
158
|
# Headers
|
|
141
|
-
target_include_directories(cpp_hf PRIVATE
|
|
159
|
+
target_include_directories(cpp_hf PRIVATE
|
|
160
|
+
${Python_NumPy_INCLUDE_DIRS}
|
|
161
|
+
${CMAKE_CURRENT_SOURCE_DIR}
|
|
162
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include
|
|
163
|
+
)
|
|
142
164
|
if (DEFINED BOOST_INCLUDE_DIR)
|
|
143
165
|
target_include_directories(cpp_hf PRIVATE ${BOOST_INCLUDE_DIR})
|
|
144
166
|
endif()
|
cpp_hf-1.0.1/cpp_hf.cpp
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
// hf_cpp_module.cpp (multicore-optimized; original mixing kept; 7 fixes applied)
|
|
2
|
+
// C++17 + pybind11 + Eigen + FFTW (guru, batched 2D) + optional Boost (toms748)
|
|
3
|
+
// Layout: (nk1, nk2, d, d) row-major (C-order)
|
|
4
|
+
|
|
5
|
+
#include <pybind11/pybind11.h>
|
|
6
|
+
#include <pybind11/numpy.h>
|
|
7
|
+
#include <pybind11/stl.h>
|
|
8
|
+
|
|
9
|
+
#include <Eigen/Core>
|
|
10
|
+
#include <Eigen/Eigenvalues>
|
|
11
|
+
#include "cpp_hf/mixers.hpp"
|
|
12
|
+
#include "cpp_hf/utils.hpp"
|
|
13
|
+
#include "cpp_hf/fftw_batched2d.hpp"
|
|
14
|
+
#include "cpp_hf/hf_kernel.hpp"
|
|
15
|
+
|
|
16
|
+
// fftw3.h is included by cpp_hf/fftw_batched2d.hpp
|
|
17
|
+
|
|
18
|
+
#include <vector>
|
|
19
|
+
#include <complex>
|
|
20
|
+
#include <stdexcept>
|
|
21
|
+
#include <algorithm>
|
|
22
|
+
#include <numeric>
|
|
23
|
+
#include <limits>
|
|
24
|
+
#include <memory>
|
|
25
|
+
#include <mutex>
|
|
26
|
+
#include <cmath>
|
|
27
|
+
|
|
28
|
+
#ifdef _OPENMP
|
|
29
|
+
#include <omp.h>
|
|
30
|
+
#endif
|
|
31
|
+
|
|
32
|
+
// ---- Boost root solver (optional, but recommended) ----
|
|
33
|
+
#include <boost/math/tools/toms748_solve.hpp>
|
|
34
|
+
#include <boost/math/tools/roots.hpp>
|
|
35
|
+
|
|
36
|
+
namespace py = pybind11;
|
|
37
|
+
using cxd = std::complex<double>;
|
|
38
|
+
using MatC = Eigen::Matrix<std::complex<double>, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
|
|
39
|
+
using Vecd = Eigen::VectorXd;
|
|
40
|
+
|
|
41
|
+
// ---------------- Python-exposed function ----------------
|
|
42
|
+
py::tuple hartreefock_iteration_cpp(
|
|
43
|
+
py::array_t<double, py::array::c_style | py::array::forcecast> weights, // (nk1,nk2)
|
|
44
|
+
py::array_t<cxd, py::array::c_style | py::array::forcecast> hamiltonian, // (nk1,nk2,d,d)
|
|
45
|
+
py::array_t<cxd, py::array::c_style | py::array::forcecast> v_coulomb, // (nk1,nk2,dv1,dv2)
|
|
46
|
+
py::array_t<cxd, py::array::c_style | py::array::forcecast> p0, // (nk1,nk2,d,d)
|
|
47
|
+
double electron_density0,
|
|
48
|
+
double T,
|
|
49
|
+
size_t max_iter,
|
|
50
|
+
double comm_tol,
|
|
51
|
+
size_t diis_size,
|
|
52
|
+
double mixing_alpha
|
|
53
|
+
) {
|
|
54
|
+
if (hamiltonian.ndim()!=4) throw std::invalid_argument("H must be (nk1,nk2,d,d)");
|
|
55
|
+
const size_t nk1 = hamiltonian.shape(0), nk2 = hamiltonian.shape(1), d = hamiltonian.shape(2);
|
|
56
|
+
if ((size_t)hamiltonian.shape(3)!=d) throw std::invalid_argument("H last two dims must be equal (d,d)");
|
|
57
|
+
if (weights.ndim()!=2 || (size_t)weights.shape(0)!=nk1 || (size_t)weights.shape(1)!=nk2) throw std::invalid_argument("weights must be (nk1,nk2)");
|
|
58
|
+
if (p0.ndim()!=4 || (size_t)p0.shape(0)!=nk1 || (size_t)p0.shape(1)!=nk2 || (size_t)p0.shape(2)!=d || (size_t)p0.shape(3)!=d)
|
|
59
|
+
throw std::invalid_argument("p0 must be (nk1,nk2,d,d)");
|
|
60
|
+
if (v_coulomb.ndim()!=4 || (size_t)v_coulomb.shape(0)!=nk1 || (size_t)v_coulomb.shape(1)!=nk2)
|
|
61
|
+
throw std::invalid_argument("V must be (nk1,nk2,dv1,dv2)");
|
|
62
|
+
|
|
63
|
+
hf::HFKernel kernel(nk1,nk2,d, weights, hamiltonian, v_coulomb, T, electron_density0);
|
|
64
|
+
|
|
65
|
+
std::vector<cxd> P(p0.data(), p0.data()+ (nk1*nk2*d*d));
|
|
66
|
+
hf::DiisState cdiis(diis_size); // wired in (fix #5)
|
|
67
|
+
hf::EdiisState ediis(diis_size);
|
|
68
|
+
|
|
69
|
+
double e_fin = 0.0; size_t k_fin = 0; double mu_fin = 0.0;
|
|
70
|
+
|
|
71
|
+
py::gil_scoped_release nogil;
|
|
72
|
+
|
|
73
|
+
enum class Phase { EDIIS, CDIIS, BROYDEN };
|
|
74
|
+
Phase last_phase = Phase::EDIIS;
|
|
75
|
+
const size_t n_flat = nk1*nk2*d*d;
|
|
76
|
+
hf::BroydenState bro_state(diis_size, n_flat);
|
|
77
|
+
|
|
78
|
+
// Relative thresholds based on target comm_tol; EDIIS -> CDIIS -> Broyden
|
|
79
|
+
// Switch earlier to faster mixers to reduce iteration count
|
|
80
|
+
const double to_cdiis = 9.0 * comm_tol;
|
|
81
|
+
const double to_broyden = 1.5 * comm_tol;
|
|
82
|
+
const double cdiis_blend_keep = 0.5, cdiis_blend_new = 0.5; // slightly more aggressive blend in CDIIS
|
|
83
|
+
|
|
84
|
+
for (size_t k=0; k<max_iter; ++k) {
|
|
85
|
+
// 1) Diagonalize F[P] to build P_new and compute μ
|
|
86
|
+
auto call_result = kernel.call(P);
|
|
87
|
+
std::vector<cxd> P_new = std::move(call_result.first);
|
|
88
|
+
const double mu = call_result.second;
|
|
89
|
+
|
|
90
|
+
// 2) Build F[P_new] and energy once; also cache EVD(F[P_new]) for preconditioner
|
|
91
|
+
std::vector<cxd> F_new;
|
|
92
|
+
double e_new = 0.0;
|
|
93
|
+
kernel.fock_energy_and_cache_evd(P_new, F_new, e_new);
|
|
94
|
+
|
|
95
|
+
// 3) Commutator residual per k, and weighted RMS (fix #7)
|
|
96
|
+
std::vector<cxd> comm(P_new.size());
|
|
97
|
+
double sum_w_c2 = 0.0;
|
|
98
|
+
|
|
99
|
+
#ifdef _OPENMP
|
|
100
|
+
#pragma omp parallel for collapse(2) reduction(+:sum_w_c2) schedule(static)
|
|
101
|
+
#endif
|
|
102
|
+
for (long long k1i=0;k1i<(long long)nk1;++k1i)
|
|
103
|
+
for (long long k2i=0;k2i<(long long)nk2;++k2i) {
|
|
104
|
+
const size_t base = offset(nk2,d,(size_t)k1i,(size_t)k2i,0,0);
|
|
105
|
+
Eigen::Map<MatC> Fk(&F_new[base], d, d);
|
|
106
|
+
Eigen::Map<MatC> Pk(&P_new[base], d, d);
|
|
107
|
+
MatC C(d,d);
|
|
108
|
+
C.noalias() = Fk * Pk - Pk * Fk;
|
|
109
|
+
const double wk = kernel.weights[(size_t)k1i*nk2 + (size_t)k2i];
|
|
110
|
+
sum_w_c2 += wk * C.cwiseAbs2().sum();
|
|
111
|
+
Eigen::Map<MatC>(&comm[base], d, d) = C;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const double comm_rms = std::sqrt(sum_w_c2 / std::max(1e-30, kernel.weight_sum));
|
|
115
|
+
|
|
116
|
+
if (comm_rms < comm_tol) {
|
|
117
|
+
P.swap(P_new); e_fin = e_new; k_fin = k; mu_fin = mu;
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// 4) Mixer schedule with CDIIS in the middle (fix #5)
|
|
122
|
+
Phase phase_now = Phase::BROYDEN;
|
|
123
|
+
if (comm_rms > to_cdiis) phase_now = Phase::EDIIS;
|
|
124
|
+
else if (comm_rms > to_broyden) phase_now = Phase::CDIIS;
|
|
125
|
+
|
|
126
|
+
const bool switched = (phase_now != last_phase);
|
|
127
|
+
|
|
128
|
+
std::vector<cxd> P_mix;
|
|
129
|
+
|
|
130
|
+
if (phase_now == Phase::EDIIS) {
|
|
131
|
+
auto ediis_result = ediis.update(P_new, F_new, e_new,
|
|
132
|
+
kernel.weights, nk1, nk2, d,
|
|
133
|
+
/*max_iter_qp=*/20, /*pg_tol=*/1e-7);
|
|
134
|
+
P_mix = std::move(ediis_result.first);
|
|
135
|
+
}
|
|
136
|
+
else if (phase_now == Phase::CDIIS) {
|
|
137
|
+
// CDIIS on commutator with a gentle blend
|
|
138
|
+
P_mix = cdiis.update_cdiis(P_new, comm, P,
|
|
139
|
+
/*coeff_cap=*/5.0, /*eps_reg=*/1e-12,
|
|
140
|
+
/*blend_keep=*/cdiis_blend_keep, /*blend_new=*/cdiis_blend_new);
|
|
141
|
+
}
|
|
142
|
+
else { // Phase::BROYDEN
|
|
143
|
+
// Precondition C with cached eigen-decomposition of F_new
|
|
144
|
+
std::vector<cxd> comm_pc;
|
|
145
|
+
kernel.precondition_commutator_cached(F_new, comm, comm_pc, 5.0e-3);
|
|
146
|
+
|
|
147
|
+
if (switched) bro_state.reset();
|
|
148
|
+
const size_t bro_count_before = bro_state.count;
|
|
149
|
+
|
|
150
|
+
// Store / update LBFGS and get quasi-Newton proposal
|
|
151
|
+
std::vector<cxd> Pflat(P_new.begin(), P_new.end());
|
|
152
|
+
std::vector<cxd> Rflat(comm_pc.begin(), comm_pc.end());
|
|
153
|
+
auto upd = bro_state.update(Pflat, Rflat, mixing_alpha);
|
|
154
|
+
bro_state = std::move(upd.first);
|
|
155
|
+
std::vector<cxd>& Praw = upd.second; // flat
|
|
156
|
+
|
|
157
|
+
if (bro_count_before == 0) {
|
|
158
|
+
// Fix #4: seed with a short preconditioned descent step on first Broyden iteration
|
|
159
|
+
const double beta = 0.35;
|
|
160
|
+
P_mix.resize(P.size());
|
|
161
|
+
#ifdef _OPENMP
|
|
162
|
+
#pragma omp parallel for schedule(static)
|
|
163
|
+
#endif
|
|
164
|
+
for (long long t=0;t<(long long)P_mix.size();++t)
|
|
165
|
+
P_mix[(size_t)t] = P[(size_t)t] - beta * comm_pc[(size_t)t];
|
|
166
|
+
|
|
167
|
+
// Smooth transition from previous iterate
|
|
168
|
+
const double w_keep = 0.7, w_new = 0.3;
|
|
169
|
+
#ifdef _OPENMP
|
|
170
|
+
#pragma omp parallel for schedule(static)
|
|
171
|
+
#endif
|
|
172
|
+
for (long long t=0;t<(long long)P_mix.size();++t)
|
|
173
|
+
P_mix[(size_t)t] = P[(size_t)t]*w_keep + P_mix[(size_t)t]*w_new;
|
|
174
|
+
} else {
|
|
175
|
+
// Use the LBFGS result
|
|
176
|
+
P_mix.assign(Praw.begin(), Praw.end());
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
last_phase = phase_now;
|
|
181
|
+
P = std::move(P_mix);
|
|
182
|
+
e_fin = e_new; k_fin = k; mu_fin = mu;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Final Fock for output (single Σ)
|
|
186
|
+
std::vector<cxd> F_fin;
|
|
187
|
+
kernel.fock_of(P, F_fin);
|
|
188
|
+
|
|
189
|
+
// Final μ from P (consistent)
|
|
190
|
+
{
|
|
191
|
+
std::vector<std::vector<double>> bands_final(nk1 * nk2);
|
|
192
|
+
#ifdef _OPENMP
|
|
193
|
+
#pragma omp parallel for collapse(2) schedule(static)
|
|
194
|
+
#endif
|
|
195
|
+
for (long long k1 = 0; k1 < (long long)nk1; ++k1)
|
|
196
|
+
for (long long k2 = 0; k2 < (long long)nk2; ++k2) {
|
|
197
|
+
const size_t base = offset(nk2, d, (size_t)k1, (size_t)k2, 0, 0);
|
|
198
|
+
Eigen::Map<MatC> Fk(&F_fin[base], d, d);
|
|
199
|
+
Eigen::SelfAdjointEigenSolver<MatC> es;
|
|
200
|
+
es.compute(Fk, Eigen::ComputeEigenvectors);
|
|
201
|
+
if (es.info() != Eigen::Success) throw std::runtime_error("EVD failed (final mu)");
|
|
202
|
+
const auto& ev = es.eigenvalues();
|
|
203
|
+
bands_final[(size_t)k1 * nk2 + (size_t)k2] = std::vector<double>(ev.data(), ev.data() + d);
|
|
204
|
+
}
|
|
205
|
+
mu_fin = find_chemicalpotential(bands_final, kernel.weights, kernel.nk1, kernel.nk2, kernel.d, T, electron_density0);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
py::gil_scoped_acquire gil;
|
|
209
|
+
|
|
210
|
+
py::array_t<cxd> P_out({(py::ssize_t)nk1,(py::ssize_t)nk2,(py::ssize_t)d,(py::ssize_t)d});
|
|
211
|
+
py::array_t<cxd> F_out({(py::ssize_t)nk1,(py::ssize_t)nk2,(py::ssize_t)d,(py::ssize_t)d});
|
|
212
|
+
std::memcpy(P_out.mutable_data(), P.data(), P.size()*sizeof(cxd));
|
|
213
|
+
std::memcpy(F_out.mutable_data(), F_fin.data(),F_fin.size()*sizeof(cxd));
|
|
214
|
+
|
|
215
|
+
return py::make_tuple(P_out, F_out, e_fin, mu_fin, k_fin);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
PYBIND11_MODULE(cpp_hf, m) {
|
|
219
|
+
m.doc() = "Hartree–Fock (k-grid) with FFTW + Eigen + OpenMP + EDIIS/CDIIS + preconditioned-LBFGS";
|
|
220
|
+
m.def("hartreefock_iteration_cpp", &hartreefock_iteration_cpp,
|
|
221
|
+
py::arg("weights"), py::arg("hamiltonian"), py::arg("v_coulomb"), py::arg("p0"),
|
|
222
|
+
py::arg("electron_density0"), py::arg("T"),
|
|
223
|
+
py::arg("max_iter"), py::arg("comm_tol"),
|
|
224
|
+
py::arg("diis_size"), py::arg("mixing_alpha"));
|
|
225
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
// fftw_batched2d.hpp - Thin wrapper around FFTW guru64 batched 2D plans
|
|
2
|
+
#pragma once
|
|
3
|
+
|
|
4
|
+
#include <complex>
|
|
5
|
+
#include <cstddef>
|
|
6
|
+
#include <stdexcept>
|
|
7
|
+
#include <mutex>
|
|
8
|
+
|
|
9
|
+
extern "C" {
|
|
10
|
+
#include <fftw3.h>
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
#ifdef _OPENMP
|
|
14
|
+
#include <omp.h>
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
struct FftwBatched2D {
|
|
18
|
+
fftw_plan fwd = nullptr;
|
|
19
|
+
fftw_plan bwd = nullptr;
|
|
20
|
+
std::size_t nk1{}, nk2{}, d{};
|
|
21
|
+
std::complex<double>* plan_buf = nullptr; // planning buffer (full-sized)
|
|
22
|
+
std::size_t n_tot{};
|
|
23
|
+
int nthreads{1};
|
|
24
|
+
|
|
25
|
+
static void init_threads_once() {
|
|
26
|
+
#if defined(FFTW3_THREADS)
|
|
27
|
+
static std::once_flag once;
|
|
28
|
+
std::call_once(once, []{ fftw_init_threads(); });
|
|
29
|
+
#endif
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
static int choose_threads() {
|
|
33
|
+
#if defined(_OPENMP)
|
|
34
|
+
return std::max(1, omp_get_max_threads());
|
|
35
|
+
#else
|
|
36
|
+
return 1;
|
|
37
|
+
#endif
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
FftwBatched2D(std::size_t nk1_, std::size_t nk2_, std::size_t d_)
|
|
41
|
+
: nk1(nk1_), nk2(nk2_), d(d_), n_tot(nk1_*nk2_*d_*d_), nthreads(choose_threads()) {
|
|
42
|
+
plan_buf = reinterpret_cast<std::complex<double>*>(fftw_malloc(sizeof(std::complex<double>) * n_tot));
|
|
43
|
+
if (!plan_buf) throw std::bad_alloc{};
|
|
44
|
+
|
|
45
|
+
init_threads_once();
|
|
46
|
+
#if defined(FFTW3_THREADS)
|
|
47
|
+
fftw_plan_with_nthreads(nthreads);
|
|
48
|
+
#endif
|
|
49
|
+
|
|
50
|
+
// Strides for (nk1, nk2, d, d), C-order
|
|
51
|
+
fftw_iodim64 dims[2];
|
|
52
|
+
dims[0].n = static_cast<long long>(nk2); dims[0].is = static_cast<long long>(d*d); dims[0].os = dims[0].is;
|
|
53
|
+
dims[1].n = static_cast<long long>(nk1); dims[1].is = static_cast<long long>(nk2*d*d); dims[1].os = dims[1].is;
|
|
54
|
+
fftw_iodim64 how[2];
|
|
55
|
+
how[0].n = static_cast<long long>(d); how[0].is = static_cast<long long>(d); how[0].os = how[0].is; // i
|
|
56
|
+
how[1].n = static_cast<long long>(d); how[1].is = 1; how[1].os = 1; // j
|
|
57
|
+
|
|
58
|
+
fwd = fftw_plan_guru64_dft(2, dims, 2, how,
|
|
59
|
+
reinterpret_cast<fftw_complex*>(plan_buf),
|
|
60
|
+
reinterpret_cast<fftw_complex*>(plan_buf),
|
|
61
|
+
FFTW_FORWARD, FFTW_MEASURE);
|
|
62
|
+
if (!fwd) throw std::runtime_error("FFTW plan_guru64_dft forward failed");
|
|
63
|
+
|
|
64
|
+
bwd = fftw_plan_guru64_dft(2, dims, 2, how,
|
|
65
|
+
reinterpret_cast<fftw_complex*>(plan_buf),
|
|
66
|
+
reinterpret_cast<fftw_complex*>(plan_buf),
|
|
67
|
+
FFTW_BACKWARD, FFTW_MEASURE);
|
|
68
|
+
if (!bwd) { fftw_destroy_plan(fwd); throw std::runtime_error("FFTW plan_guru64_dft backward failed"); }
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
void forward(std::complex<double>* buf) const { fftw_execute_dft(fwd, reinterpret_cast<fftw_complex*>(buf), reinterpret_cast<fftw_complex*>(buf)); }
|
|
72
|
+
void backward(std::complex<double>* buf) const { fftw_execute_dft(bwd, reinterpret_cast<fftw_complex*>(buf), reinterpret_cast<fftw_complex*>(buf)); }
|
|
73
|
+
|
|
74
|
+
~FftwBatched2D() { if (fwd) fftw_destroy_plan(fwd); if (bwd) fftw_destroy_plan(bwd); if (plan_buf) fftw_free(plan_buf); }
|
|
75
|
+
};
|
|
76
|
+
|