llg3d 2.0.1__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llg3d/__init__.py +2 -4
- llg3d/benchmarks/__init__.py +1 -0
- llg3d/benchmarks/compare_commits.py +321 -0
- llg3d/benchmarks/efficiency.py +451 -0
- llg3d/benchmarks/utils.py +25 -0
- llg3d/element.py +98 -17
- llg3d/grid.py +48 -58
- llg3d/io.py +395 -0
- llg3d/main.py +32 -35
- llg3d/parameters.py +159 -49
- llg3d/post/__init__.py +1 -1
- llg3d/post/extract.py +112 -0
- llg3d/post/info.py +192 -0
- llg3d/post/m1_vs_T.py +107 -0
- llg3d/post/m1_vs_time.py +81 -0
- llg3d/post/process.py +87 -85
- llg3d/post/utils.py +38 -0
- llg3d/post/x_profiles.py +161 -0
- llg3d/py.typed +1 -0
- llg3d/solvers/__init__.py +153 -0
- llg3d/solvers/base.py +345 -0
- llg3d/solvers/experimental/__init__.py +9 -0
- llg3d/{solver → solvers/experimental}/jax.py +117 -143
- llg3d/solvers/math_utils.py +41 -0
- llg3d/solvers/mpi.py +370 -0
- llg3d/solvers/numpy.py +126 -0
- llg3d/solvers/opencl.py +439 -0
- llg3d/solvers/profiling.py +38 -0
- {llg3d-2.0.1.dist-info → llg3d-3.1.0.dist-info}/METADATA +5 -2
- llg3d-3.1.0.dist-info/RECORD +36 -0
- {llg3d-2.0.1.dist-info → llg3d-3.1.0.dist-info}/WHEEL +1 -1
- llg3d-3.1.0.dist-info/entry_points.txt +9 -0
- llg3d/output.py +0 -107
- llg3d/post/plot_results.py +0 -61
- llg3d/post/temperature.py +0 -76
- llg3d/simulation.py +0 -95
- llg3d/solver/__init__.py +0 -45
- llg3d/solver/mpi.py +0 -450
- llg3d/solver/numpy.py +0 -207
- llg3d/solver/opencl.py +0 -330
- llg3d/solver/solver.py +0 -89
- llg3d-2.0.1.dist-info/RECORD +0 -25
- llg3d-2.0.1.dist-info/entry_points.txt +0 -4
- {llg3d-2.0.1.dist-info → llg3d-3.1.0.dist-info}/licenses/AUTHORS +0 -0
- {llg3d-2.0.1.dist-info → llg3d-3.1.0.dist-info}/licenses/LICENSE +0 -0
- {llg3d-2.0.1.dist-info → llg3d-3.1.0.dist-info}/top_level.txt +0 -0
llg3d/solvers/opencl.py
ADDED
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
"""LLG3D solver using OpenCL."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, ClassVar
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pyopencl as cl
|
|
12
|
+
from pyopencl import array as clarray
|
|
13
|
+
from pyopencl import clrandom
|
|
14
|
+
from pyopencl import mem_flags as mf
|
|
15
|
+
|
|
16
|
+
from .base import BaseSolver
|
|
17
|
+
from .profiling import timeit
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TimedKernel:
|
|
21
|
+
"""
|
|
22
|
+
Wrapper for OpenCL kernels with automatic timing.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
kernel: The underlying OpenCL kernel
|
|
26
|
+
name: Name of the kernel for logging
|
|
27
|
+
solver: Reference to the solver for profiling storage
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, kernel: cl.Kernel, name: str, solver: "OpenCLSolver"):
|
|
31
|
+
self.kernel = kernel
|
|
32
|
+
self.name = name
|
|
33
|
+
self.solver = solver
|
|
34
|
+
|
|
35
|
+
def __call__(self, *args: Any, **kwargs: Any) -> None:
|
|
36
|
+
"""
|
|
37
|
+
Execute the kernel and wait for completion with optional timing.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
*args: Positional arguments for the kernel
|
|
41
|
+
**kwargs: Keyword arguments for the kernel
|
|
42
|
+
"""
|
|
43
|
+
# Record profiling stats if enabled (use wall-clock for reliability)
|
|
44
|
+
start = time.perf_counter() if self.solver.profiling else None
|
|
45
|
+
|
|
46
|
+
# Execute kernel
|
|
47
|
+
event: cl.Event = self.kernel(*args, **kwargs)
|
|
48
|
+
event.wait() # Always wait for kernel to complete
|
|
49
|
+
|
|
50
|
+
# Record elapsed time if profiling enabled
|
|
51
|
+
if start is not None:
|
|
52
|
+
elapsed = time.perf_counter() - start
|
|
53
|
+
stats = self.solver.profiling_stats[f"kernel_{self.name}"]
|
|
54
|
+
stats["time"] += elapsed
|
|
55
|
+
stats["calls"] += 1
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_context_and_device(
|
|
59
|
+
device_selection: str = "auto",
|
|
60
|
+
) -> tuple[cl.Context, cl.Device]:
|
|
61
|
+
"""
|
|
62
|
+
Get the OpenCL context and device.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
device_selection:
|
|
66
|
+
|
|
67
|
+
- ``"auto"``: Let OpenCL choose automatically
|
|
68
|
+
- ``"cpu"``: Select CPU device
|
|
69
|
+
- ``"gpu"``: Select first available GPU
|
|
70
|
+
- ``"gpu:N"``: Select specific GPU by index (e.g., ``"gpu:0"``, ``"gpu:1"``)
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
- The OpenCL context
|
|
74
|
+
- The OpenCL device
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
RuntimeError: If no suitable device is found
|
|
78
|
+
ValueError: If the device selection string is invalid
|
|
79
|
+
"""
|
|
80
|
+
if device_selection == "auto":
|
|
81
|
+
context = cl.create_some_context(interactive=False)
|
|
82
|
+
device = context.devices[0]
|
|
83
|
+
return context, device
|
|
84
|
+
|
|
85
|
+
# Get all platforms and devices
|
|
86
|
+
platforms = cl.get_platforms()
|
|
87
|
+
all_devices = []
|
|
88
|
+
|
|
89
|
+
for platform in platforms:
|
|
90
|
+
all_devices.extend(platform.get_devices())
|
|
91
|
+
|
|
92
|
+
if not all_devices:
|
|
93
|
+
raise RuntimeError("No OpenCL devices found")
|
|
94
|
+
|
|
95
|
+
# Filter devices based on selection
|
|
96
|
+
if device_selection == "cpu":
|
|
97
|
+
cpu_devices = [d for d in all_devices if d.type & cl.device_type.CPU]
|
|
98
|
+
if not cpu_devices:
|
|
99
|
+
raise RuntimeError("No CPU devices found")
|
|
100
|
+
selected_device = cpu_devices[0]
|
|
101
|
+
elif device_selection == "gpu":
|
|
102
|
+
gpu_devices = [d for d in all_devices if d.type & cl.device_type.GPU]
|
|
103
|
+
if not gpu_devices:
|
|
104
|
+
raise RuntimeError("No GPU devices found")
|
|
105
|
+
selected_device = gpu_devices[0]
|
|
106
|
+
elif device_selection.startswith("gpu:"):
|
|
107
|
+
gpu_devices = [d for d in all_devices if d.type & cl.device_type.GPU]
|
|
108
|
+
if not gpu_devices:
|
|
109
|
+
raise RuntimeError("No GPU devices found")
|
|
110
|
+
|
|
111
|
+
gpu_index = int(device_selection.split(":")[1])
|
|
112
|
+
if gpu_index >= len(gpu_devices):
|
|
113
|
+
raise RuntimeError(
|
|
114
|
+
f"GPU index {gpu_index} not available. Found {len(gpu_devices)} GPU(s)"
|
|
115
|
+
)
|
|
116
|
+
selected_device = gpu_devices[gpu_index]
|
|
117
|
+
else:
|
|
118
|
+
raise ValueError(f"Invalid device selection: {device_selection}")
|
|
119
|
+
|
|
120
|
+
# Create context with selected device
|
|
121
|
+
context = cl.Context([selected_device])
|
|
122
|
+
print(f"Selected OpenCL device: {selected_device.name} ({selected_device.type})")
|
|
123
|
+
|
|
124
|
+
return context, selected_device
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def get_precision(device: cl.Device, precision: str) -> np.dtype:
|
|
128
|
+
"""
|
|
129
|
+
Get the numpy float type based on the precision.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
device: OpenCL device
|
|
133
|
+
precision: Precision of the simulation (single or double)
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
The numpy float type (float32 or float64)
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
RuntimeError: If double precision is asked while the device does not support it
|
|
140
|
+
"""
|
|
141
|
+
# Check that cl device supports double precision
|
|
142
|
+
if precision == "double" and not device.double_fp_config:
|
|
143
|
+
raise RuntimeError("The selected device does not support double precision.")
|
|
144
|
+
|
|
145
|
+
return np.dtype(np.float64 if precision == "double" else np.float32)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class Program:
|
|
149
|
+
"""Class to manage the OpenCL kernels for the LLG3D simulation."""
|
|
150
|
+
|
|
151
|
+
def __init__(self, solver: OpenCLSolver):
|
|
152
|
+
self.solver = solver #: The OpenCLSolver instance
|
|
153
|
+
self.cl_program: cl.Program = self._get_built_program() #: The OpenCL program
|
|
154
|
+
|
|
155
|
+
def _get_built_program(self) -> cl.Program:
|
|
156
|
+
"""
|
|
157
|
+
Return the OpenCL program built from the source code.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
The OpenCL program object
|
|
161
|
+
"""
|
|
162
|
+
grid = self.solver.grid
|
|
163
|
+
opencl_code: str = (Path(__file__).parent / "llg3d.cl").read_text()
|
|
164
|
+
build_options = (
|
|
165
|
+
"-D USE_DOUBLE_PRECISION" if self.solver.np_float == np.float64 else ""
|
|
166
|
+
)
|
|
167
|
+
build_options += f" -D NX={grid.Jx} -D NY={grid.Jy} -D NZ={grid.Jz}"
|
|
168
|
+
build_options += " -cl-fp32-correctly-rounded-divide-sqrt"
|
|
169
|
+
|
|
170
|
+
# Optional error detection in kernels, opt-in via env var
|
|
171
|
+
if self.solver.error_check_enabled:
|
|
172
|
+
build_options += " -D ENABLE_ERROR_CHECK"
|
|
173
|
+
|
|
174
|
+
# Add anisotropy type directive
|
|
175
|
+
if self.solver.elem.anisotropy == "uniaxial":
|
|
176
|
+
build_options += " -D ANISOTROPY_UNIAXIAL"
|
|
177
|
+
# else: cubic is the default
|
|
178
|
+
|
|
179
|
+
return cl.Program(self.solver.context, opencl_code).build(options=build_options)
|
|
180
|
+
|
|
181
|
+
def get_kernel(self, kernel_name: str, arg_types: list = [None]) -> TimedKernel:
|
|
182
|
+
"""
|
|
183
|
+
Returns the specified kernel by name, wrapped with timing.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
kernel_name: Name of the kernel to retrieve
|
|
187
|
+
arg_types: List of argument types for the kernel
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
The TimedKernel wrapper around the OpenCL kernel
|
|
191
|
+
"""
|
|
192
|
+
kernel: cl.Kernel = getattr(self.cl_program, kernel_name)
|
|
193
|
+
kernel.set_arg_types(arg_types)
|
|
194
|
+
return TimedKernel(kernel, kernel_name, self.solver)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class OpenCLSolver(BaseSolver):
|
|
198
|
+
"""OpenCL-based LLG3D solver."""
|
|
199
|
+
|
|
200
|
+
solver_type: ClassVar[str] = "opencl" #: Solver type name
|
|
201
|
+
|
|
202
|
+
def __post_init__(self) -> None:
|
|
203
|
+
"""Initialize OpenCL solver and check parameters."""
|
|
204
|
+
print("Initializing OpenCL solver...")
|
|
205
|
+
print("Initializing context...")
|
|
206
|
+
self.context, opencl_device = get_context_and_device(self.device)
|
|
207
|
+
self.queue = cl.CommandQueue(self.context) #: OpenCL command queue
|
|
208
|
+
#: Numpy float type (np.dtype(np.float32) or np.dtype(np.float64))
|
|
209
|
+
self.np_float: np.dtype = get_precision(opencl_device, self.precision)
|
|
210
|
+
|
|
211
|
+
# Optional error detection in kernels, opt-in via env var
|
|
212
|
+
error_check_env = os.getenv("LLG3D_ENABLE_ERROR_CHECK", "0").lower()
|
|
213
|
+
#: Whether error checking is enabled in kernels
|
|
214
|
+
self.error_check_enabled: bool = error_check_env in {"1", "true", "on", "yes"}
|
|
215
|
+
|
|
216
|
+
super().__post_init__()
|
|
217
|
+
# Check that the grid is uniform
|
|
218
|
+
if not self.grid.uniform:
|
|
219
|
+
raise ValueError("OpenCLSolver only supports uniform grids.")
|
|
220
|
+
# Create OpenCL kernels
|
|
221
|
+
program = Program(self)
|
|
222
|
+
kernel_arg_types = [None] * 4 + [self.np_float] * 7
|
|
223
|
+
# Fused kernels combining slope computation and updates
|
|
224
|
+
self.predict_kernel = program.get_kernel(
|
|
225
|
+
"predict", kernel_arg_types
|
|
226
|
+
) #: Prediction kernel with slope storage
|
|
227
|
+
# Correction kernel with error codes
|
|
228
|
+
correct_arg_types = kernel_arg_types + [None]
|
|
229
|
+
self.correct_and_normalize_kernel = program.get_kernel(
|
|
230
|
+
"correct_and_normalize", correct_arg_types
|
|
231
|
+
) #: Correction kernel (slope + update_2 + normalize) with error codes
|
|
232
|
+
#: Weighted reduction kernel for m1 averaging
|
|
233
|
+
self.sum_m1_weighted_kernel = program.get_kernel(
|
|
234
|
+
"sum_m1_weighted", [None, None, None]
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Buffer for error codes (one int per work-item, thread-safe). Always present to simplify call.
|
|
238
|
+
if self.error_check_enabled:
|
|
239
|
+
err_buf_bytes = self.grid.ntot * 4
|
|
240
|
+
host_err_shape = self.grid.ntot
|
|
241
|
+
else:
|
|
242
|
+
err_buf_bytes = 4 # minimal placeholder when disabled
|
|
243
|
+
host_err_shape = 1
|
|
244
|
+
|
|
245
|
+
self.d_error_codes = cl.Buffer(
|
|
246
|
+
self.context, cl.mem_flags.READ_WRITE, size=err_buf_bytes
|
|
247
|
+
)
|
|
248
|
+
self.h_error_codes = np.zeros(host_err_shape, dtype=np.int32)
|
|
249
|
+
|
|
250
|
+
# Pre-allocate buffer for reduction (used in _xyz_average)
|
|
251
|
+
self.wgroup_size = 256
|
|
252
|
+
self.num_groups = (self.grid.ntot + self.wgroup_size - 1) // self.wgroup_size
|
|
253
|
+
self.d_partial_sums = cl.Buffer(
|
|
254
|
+
self.context,
|
|
255
|
+
cl.mem_flags.READ_WRITE,
|
|
256
|
+
size=self.num_groups * self.np_float.itemsize,
|
|
257
|
+
)
|
|
258
|
+
self.h_partial_sums = np.empty(self.num_groups, dtype=self.np_float)
|
|
259
|
+
|
|
260
|
+
def _init_rng(self) -> Any:
|
|
261
|
+
"""
|
|
262
|
+
Initialize a random number generator for temperature fluctuations.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
An OpenCL random number generator
|
|
266
|
+
"""
|
|
267
|
+
return clrandom.PhiloxGenerator(self.context, seed=self.seed)
|
|
268
|
+
|
|
269
|
+
@timeit
|
|
270
|
+
def _compute_R_random(self, d_R_random: clarray.Array):
|
|
271
|
+
"""
|
|
272
|
+
Compute random number array for thermal fluctuations.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
d_R_random: Device array to fill with random numbers
|
|
276
|
+
"""
|
|
277
|
+
self.rng.fill_normal(d_R_random) # type: ignore[attr-defined]
|
|
278
|
+
self.queue.finish() # ensure the array is filled
|
|
279
|
+
|
|
280
|
+
@timeit
|
|
281
|
+
def _xyz_average(self, m: Any) -> float:
|
|
282
|
+
"""
|
|
283
|
+
Compute the space average of m_n using a GPU reduction kernel.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
m: Current magnetization device array
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
The space average of m1
|
|
290
|
+
"""
|
|
291
|
+
# Launch reduction kernel (buffers pre-allocated in __post_init__)
|
|
292
|
+
local_mem_size = self.wgroup_size * self.np_float.itemsize
|
|
293
|
+
self.sum_m1_weighted_kernel(
|
|
294
|
+
self.queue,
|
|
295
|
+
(self.num_groups * self.wgroup_size,), # Global work size
|
|
296
|
+
(self.wgroup_size,), # Local work size
|
|
297
|
+
m.data,
|
|
298
|
+
self.d_partial_sums,
|
|
299
|
+
cl.LocalMemory(local_mem_size),
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Read back partial sums and finish reduction on host
|
|
303
|
+
cl.enqueue_copy(self.queue, self.h_partial_sums, self.d_partial_sums).wait()
|
|
304
|
+
weighted_sum = float(np.sum(self.h_partial_sums))
|
|
305
|
+
|
|
306
|
+
return weighted_sum / self.grid.ncell
|
|
307
|
+
|
|
308
|
+
@timeit
|
|
309
|
+
def _check_errors(self, iteration: int) -> None:
|
|
310
|
+
"""
|
|
311
|
+
Check for non-finite values detected by the kernel.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
iteration: Current iteration number (for error message)
|
|
315
|
+
|
|
316
|
+
Raises:
|
|
317
|
+
RuntimeError: If non-finite values are detected
|
|
318
|
+
"""
|
|
319
|
+
if not self.error_check_enabled:
|
|
320
|
+
return
|
|
321
|
+
|
|
322
|
+
cl.enqueue_copy(self.queue, self.h_error_codes, self.d_error_codes).wait()
|
|
323
|
+
if np.any(self.h_error_codes):
|
|
324
|
+
error_gids = np.where(self.h_error_codes)[0]
|
|
325
|
+
first_gid = error_gids[0]
|
|
326
|
+
# Convert gid to (i, j, k) coordinates
|
|
327
|
+
k = first_gid % self.grid.Jz
|
|
328
|
+
j = (first_gid // self.grid.Jz) % self.grid.Jy
|
|
329
|
+
i = first_gid // (self.grid.Jy * self.grid.Jz)
|
|
330
|
+
|
|
331
|
+
raise RuntimeError(
|
|
332
|
+
f"Non-finite value detected at iteration n={iteration}\n"
|
|
333
|
+
f"Location: gid={first_gid}, i={i}, j={j}, k={k}"
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
@timeit
|
|
337
|
+
def _update_x_profiles(self, m_n: Any, t: float):
|
|
338
|
+
"""
|
|
339
|
+
Update the x profiles of m_n at time t.
|
|
340
|
+
|
|
341
|
+
Reads the device buffer back to host and computes the y-z averaged
|
|
342
|
+
profiles for each magnetization component, then stores them in
|
|
343
|
+
`self.x_profiles`.
|
|
344
|
+
"""
|
|
345
|
+
# Initialize x_profiles on first use
|
|
346
|
+
if "x_profiles" not in self.records:
|
|
347
|
+
self.records["x_profiles"] = {
|
|
348
|
+
"t": [],
|
|
349
|
+
"m1": [],
|
|
350
|
+
"m2": [],
|
|
351
|
+
"m3": [],
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
# allocate host array and copy from device
|
|
355
|
+
h_m_n = m_n.map_to_host(is_blocking=True)
|
|
356
|
+
|
|
357
|
+
# save profiles
|
|
358
|
+
self.records["x_profiles"]["t"].append(t)
|
|
359
|
+
self.records["x_profiles"]["m1"].append(self._yz_average(h_m_n[0]))
|
|
360
|
+
self.records["x_profiles"]["m2"].append(self._yz_average(h_m_n[1]))
|
|
361
|
+
self.records["x_profiles"]["m3"].append(self._yz_average(h_m_n[2]))
|
|
362
|
+
|
|
363
|
+
def _simulate(self) -> float:
|
|
364
|
+
"""
|
|
365
|
+
Simulates the system over N iterations.
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
The time taken for the simulation
|
|
369
|
+
"""
|
|
370
|
+
elem = self.elem
|
|
371
|
+
grid = self.grid
|
|
372
|
+
queue = self.queue
|
|
373
|
+
h_m_n = self._init_m_n() # Create initial magnetization array on host
|
|
374
|
+
# Create device buffers
|
|
375
|
+
# Use clarray
|
|
376
|
+
d_m_n = clarray.to_device(queue, h_m_n)
|
|
377
|
+
d_R_random = clarray.Array(queue, (3, *grid.dims), self.np_float)
|
|
378
|
+
d_m_np1 = clarray.empty_like(d_m_n)
|
|
379
|
+
d_s_pre = cl.Buffer(self.context, mf.READ_WRITE, size=h_m_n.nbytes)
|
|
380
|
+
|
|
381
|
+
t = 0.0
|
|
382
|
+
self._record(d_m_n, t, 0) # Record the initial solution
|
|
383
|
+
start_time = time.perf_counter()
|
|
384
|
+
|
|
385
|
+
for n in self._progress_bar():
|
|
386
|
+
t += self.dt
|
|
387
|
+
|
|
388
|
+
self._compute_R_random(d_R_random)
|
|
389
|
+
|
|
390
|
+
# Prediction phase: compute slope, store it, and apply first Euler update
|
|
391
|
+
self.predict_kernel(
|
|
392
|
+
queue,
|
|
393
|
+
(grid.ntot,),
|
|
394
|
+
None,
|
|
395
|
+
d_m_n.data,
|
|
396
|
+
d_m_np1.data,
|
|
397
|
+
d_R_random.data,
|
|
398
|
+
d_s_pre,
|
|
399
|
+
grid.inv_dx2,
|
|
400
|
+
elem.coeff_1,
|
|
401
|
+
elem.coeff_2,
|
|
402
|
+
elem.coeff_3,
|
|
403
|
+
elem.coeff_4,
|
|
404
|
+
elem.lambda_G,
|
|
405
|
+
self.dt,
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
# Correction phase: compute slope, apply midpoint update and normalize
|
|
409
|
+
self.correct_and_normalize_kernel(
|
|
410
|
+
queue,
|
|
411
|
+
(grid.ntot,),
|
|
412
|
+
None,
|
|
413
|
+
d_m_n.data,
|
|
414
|
+
d_m_np1.data,
|
|
415
|
+
d_R_random.data,
|
|
416
|
+
d_s_pre,
|
|
417
|
+
grid.inv_dx2,
|
|
418
|
+
elem.coeff_1,
|
|
419
|
+
elem.coeff_2,
|
|
420
|
+
elem.coeff_3,
|
|
421
|
+
elem.coeff_4,
|
|
422
|
+
elem.lambda_G,
|
|
423
|
+
self.dt,
|
|
424
|
+
self.d_error_codes,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
# Check for errors if enabled
|
|
428
|
+
self._check_errors(n)
|
|
429
|
+
|
|
430
|
+
# Swap the buffers for the next iteration
|
|
431
|
+
d_m_n, d_m_np1 = d_m_np1, d_m_n
|
|
432
|
+
|
|
433
|
+
self._record(d_m_n, t, n)
|
|
434
|
+
|
|
435
|
+
total_time = time.perf_counter() - start_time
|
|
436
|
+
|
|
437
|
+
self._finalize()
|
|
438
|
+
|
|
439
|
+
return total_time
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Profiling utilities for solvers."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from functools import wraps
|
|
5
|
+
from typing import TYPE_CHECKING, Callable
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from .base import BaseSolver
|
|
9
|
+
|
|
10
|
+
ProfilingStats = dict[str, dict[str, float]]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def timeit(func: Callable) -> Callable:
|
|
14
|
+
"""
|
|
15
|
+
Decorator to time functions only if profiling is enabled.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
func: The function to time.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
A wrapped function that records timing stats to solver.profiling_stats.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
@wraps(func)
|
|
25
|
+
def timeit_wrapper(solver: "BaseSolver", *args, **kwargs):
|
|
26
|
+
# Skip timing overhead if profiling is disabled
|
|
27
|
+
if not solver.profiling:
|
|
28
|
+
return func(solver, *args, **kwargs)
|
|
29
|
+
|
|
30
|
+
start = time.perf_counter()
|
|
31
|
+
try:
|
|
32
|
+
return func(solver, *args, **kwargs)
|
|
33
|
+
finally:
|
|
34
|
+
stats = solver.profiling_stats[func.__name__]
|
|
35
|
+
stats["time"] += time.perf_counter() - start
|
|
36
|
+
stats["calls"] += 1
|
|
37
|
+
|
|
38
|
+
return timeit_wrapper
|
|
@@ -1,19 +1,20 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llg3d
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.1.0
|
|
4
4
|
Summary: A solver for the stochastic Landau-Lifshitz-Gilbert equation in 3D
|
|
5
5
|
Author-email: Clémentine Courtès <clementine.courtes@math.unistra.fr>, Matthieu Boileau <matthieu.boileau@math.unistra.fr>
|
|
6
6
|
Project-URL: Homepage, https://gitlab.math.unistra.fr/llg3d/llg3d
|
|
7
7
|
Classifier: Programming Language :: Python :: 3
|
|
8
8
|
Classifier: License :: OSI Approved :: MIT License
|
|
9
9
|
Classifier: Operating System :: OS Independent
|
|
10
|
-
Requires-Python: >=3.
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
11
|
Description-Content-Type: text/markdown
|
|
12
12
|
License-File: LICENSE
|
|
13
13
|
License-File: AUTHORS
|
|
14
14
|
Requires-Dist: numpy
|
|
15
15
|
Requires-Dist: matplotlib
|
|
16
16
|
Requires-Dist: scipy
|
|
17
|
+
Requires-Dist: tqdm
|
|
17
18
|
Provides-Extra: mpi
|
|
18
19
|
Requires-Dist: mpi4py; extra == "mpi"
|
|
19
20
|
Provides-Extra: opencl
|
|
@@ -22,6 +23,8 @@ Requires-Dist: mako; extra == "opencl"
|
|
|
22
23
|
Provides-Extra: jax
|
|
23
24
|
Requires-Dist: jax[cuda]; sys_platform != "darwin" and extra == "jax"
|
|
24
25
|
Requires-Dist: jax[cpu]; sys_platform == "darwin" and extra == "jax"
|
|
26
|
+
Provides-Extra: git
|
|
27
|
+
Requires-Dist: GitPython; extra == "git"
|
|
25
28
|
Dynamic: license-file
|
|
26
29
|
|
|
27
30
|
# LLG3D: A solver for the stochastic Landau-Lifshitz-Gilbert equation in 3D
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
llg3d/__init__.py,sha256=WFiJ87Ng-E_9zla-6KjvxFzq8xI8SluspEH02TTQgvM,75
|
|
2
|
+
llg3d/__main__.py,sha256=3D1q7AG5vU6gr-V0iuo5oNYl-Og2SvJ4YaBTdqOVVaw,115
|
|
3
|
+
llg3d/element.py,sha256=GnafEd8vrDr59ixi05zyMesUCUAFhFccdZ-I1y-NAlA,7259
|
|
4
|
+
llg3d/grid.py,sha256=iaT8QUZPAjt1fD39H64-c4EF4SY-qt4406pbHlO8l0s,4158
|
|
5
|
+
llg3d/io.py,sha256=mJdPv65r8u5ODrxJB39u_dVnZi-PGBwC_mos4SXkrEc,12366
|
|
6
|
+
llg3d/main.py,sha256=uUgT4PGBr255OZqYXS2x-gvaFKVf_RKo0JliDQlQcT4,1729
|
|
7
|
+
llg3d/parameters.py,sha256=Ox8-HL2AKw-Ogw-xkCFXiEm6ponYpf6I8C6BHWpgWf8,6064
|
|
8
|
+
llg3d/py.typed,sha256=iMysHiY_j0PVKAdpVSJynuRB8D7puNCzeIDwHpWZ4xE,58
|
|
9
|
+
llg3d/benchmarks/__init__.py,sha256=zw3npEgBEO_1rr7MRq9YPO3nOGRKwIhe6kFnpxI7ZJQ,26
|
|
10
|
+
llg3d/benchmarks/compare_commits.py,sha256=9fY6RMXlEfDKKvz4D5NO3jtuf4JB26zIF5L9mMKrsa4,9904
|
|
11
|
+
llg3d/benchmarks/efficiency.py,sha256=PrDE3KxVAtWVRotcod0CiegQ_2PZa6qI9ByRRadHq7s,15461
|
|
12
|
+
llg3d/benchmarks/utils.py,sha256=XSECFgoquWu7LcgCsPYx5YG3CtRlPHXUewMPGthP5wY,708
|
|
13
|
+
llg3d/post/__init__.py,sha256=S4UiquBtCm_iHQ0vaU0E4T69TZfhiYq7i26AOz7Vs40,29
|
|
14
|
+
llg3d/post/extract.py,sha256=45J2mfVU6wR2j-Vr_fNoouQsr65aRWG4knZ-B4L_pFQ,3321
|
|
15
|
+
llg3d/post/info.py,sha256=sCaGJR7nxqrpWjZMINWFJR8unZ83ISCtrfP-URKCUIE,5548
|
|
16
|
+
llg3d/post/m1_vs_T.py,sha256=WZK1wL66yGOFZgKcgESA564V7qwzNMSAlTTu16APNw8,3251
|
|
17
|
+
llg3d/post/m1_vs_time.py,sha256=VtwyAAjuUPQO2Kl1H-5DeOwS5xj_DdUo6wi5rs9VDxo,2298
|
|
18
|
+
llg3d/post/process.py,sha256=usvRLnvHDqaV3EKiP6il3se7ha_G_PHVS0LP_8c87KM,3845
|
|
19
|
+
llg3d/post/utils.py,sha256=oZBYbPhJJYzHsRrnENyxmZotylLlYVOOFePr3zc-Fo0,1114
|
|
20
|
+
llg3d/post/x_profiles.py,sha256=wRTz7IDrM_3A-259SV-SduU8-n0mmS_EFUYsalwz-OE,4594
|
|
21
|
+
llg3d/solvers/__init__.py,sha256=aNn3LjP1nDZj7PqJ_KGxY3hj-BDa-RrOcE00DnjLgDk,3436
|
|
22
|
+
llg3d/solvers/base.py,sha256=cucNTqBSbl9MgArGbmlYORKPWrsJFuZRQCZYgACuj8U,11879
|
|
23
|
+
llg3d/solvers/math_utils.py,sha256=F1uwrfNCg0tCLlkXKoMFHcOaSFsIrrvzO768IpfvZx4,1014
|
|
24
|
+
llg3d/solvers/mpi.py,sha256=8zBWU1k1Ns4nM2-Dz-gmoGeYnCWuIh8jN-b6USWBV2Q,11530
|
|
25
|
+
llg3d/solvers/numpy.py,sha256=pjNQJzTyqOwe5XjGKwK_Vjp55ZfmAtC1WOPaSQ6OpdM,3616
|
|
26
|
+
llg3d/solvers/opencl.py,sha256=i7y50uQ8bV5EcV3PDcEvJoPo3Qt6pvAq_awy7mgHDGk,15190
|
|
27
|
+
llg3d/solvers/profiling.py,sha256=B5PgxtE3xCcv7Qi51VVYAToBzR5HwjS4j9LPKg1ajyc,991
|
|
28
|
+
llg3d/solvers/experimental/__init__.py,sha256=F4smHsF9_hR5KXIsh6Xd46sSzJE6N7yFuH0vmj0ygGA,147
|
|
29
|
+
llg3d/solvers/experimental/jax.py,sha256=FrcdQYMVroPbYBbqvZMPIH9gv8n-PkT2vdUO5u5aYHg,11249
|
|
30
|
+
llg3d-3.1.0.dist-info/licenses/AUTHORS,sha256=vhJ88HikYvOrGiB_l1xH2X6hyn9ZJafx6mpoMYNhh1I,297
|
|
31
|
+
llg3d-3.1.0.dist-info/licenses/LICENSE,sha256=aFxTGAyyve8nM9T2jWTarJzQhdSSC3MbbN1heNAev9c,1062
|
|
32
|
+
llg3d-3.1.0.dist-info/METADATA,sha256=s7tmZU9PdngZQW7gDRP-AgokwiI-oAhjc33U9UC1oiY,2030
|
|
33
|
+
llg3d-3.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
34
|
+
llg3d-3.1.0.dist-info/entry_points.txt,sha256=28japzG2aS2e_J42LXw7SAK4ufbG22LGHcI2D6zVz9Y,374
|
|
35
|
+
llg3d-3.1.0.dist-info/top_level.txt,sha256=cBZ0roaXt3CAXqYojuO84lGPCtWuLlXxLGLYRKmHZy0,6
|
|
36
|
+
llg3d-3.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
[console_scripts]
|
|
2
|
+
llg3d = llg3d.main:main
|
|
3
|
+
llg3d.bench.compare_commits = llg3d.benchmarks.compare_commits:main
|
|
4
|
+
llg3d.bench.efficiency = llg3d.benchmarks.efficiency:main
|
|
5
|
+
llg3d.extract = llg3d.post.extract:main
|
|
6
|
+
llg3d.info = llg3d.post.info:main
|
|
7
|
+
llg3d.m1_vs_T = llg3d.post.m1_vs_T:main
|
|
8
|
+
llg3d.m1_vs_time = llg3d.post.m1_vs_time:main
|
|
9
|
+
llg3d.x_profiles = llg3d.post.x_profiles:main
|
llg3d/output.py
DELETED
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
"""Utility functions for LLG3D."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import sys
|
|
5
|
-
from typing import Iterable, TextIO
|
|
6
|
-
|
|
7
|
-
from .solver import rank
|
|
8
|
-
from .grid import Grid
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def progress_bar(
|
|
12
|
-
it: Iterable, prefix: str = "", size: int = 60, out: TextIO = sys.stdout
|
|
13
|
-
):
|
|
14
|
-
"""
|
|
15
|
-
Displays a progress bar.
|
|
16
|
-
|
|
17
|
-
(Source: https://stackoverflow.com/a/34482761/16593179)
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
it: Iterable object to iterate over
|
|
21
|
-
prefix: Prefix string for the progress bar
|
|
22
|
-
size: Size of the progress bar (number of characters)
|
|
23
|
-
out: Output stream (default is sys.stdout)
|
|
24
|
-
"""
|
|
25
|
-
count = len(it)
|
|
26
|
-
|
|
27
|
-
def show(j):
|
|
28
|
-
x = int(size * j / count)
|
|
29
|
-
if rank == 0:
|
|
30
|
-
print(
|
|
31
|
-
f"{prefix}[{'█' * x}{('.' * (size - x))}] {j}/{count}",
|
|
32
|
-
end="\r",
|
|
33
|
-
file=out,
|
|
34
|
-
flush=True,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
show(0)
|
|
38
|
-
for i, item in enumerate(it):
|
|
39
|
-
yield item
|
|
40
|
-
# To avoid slowing down the computation, we do not display at every iteration
|
|
41
|
-
if i % 5 == 0:
|
|
42
|
-
show(i + 1)
|
|
43
|
-
show(i + 1)
|
|
44
|
-
if rank == 0:
|
|
45
|
-
print("\n", flush=True, file=out)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def write_json(json_file: str, run: dict):
|
|
49
|
-
"""
|
|
50
|
-
Writes the run dictionary to a JSON file.
|
|
51
|
-
|
|
52
|
-
Args:
|
|
53
|
-
json_file: Name of the JSON file
|
|
54
|
-
run: Dictionary containing the run information
|
|
55
|
-
"""
|
|
56
|
-
with open(json_file, "w") as f:
|
|
57
|
-
json.dump(run, f, indent=4)
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def get_output_files(g: Grid, T: float, n_mean: int, n_profile: int) -> tuple:
|
|
61
|
-
"""
|
|
62
|
-
Open files and list them.
|
|
63
|
-
|
|
64
|
-
Args:
|
|
65
|
-
g: Grid object
|
|
66
|
-
T: temperature
|
|
67
|
-
n_mean: Number of iterations for integral output
|
|
68
|
-
n_profile: Number of iterations for profile output
|
|
69
|
-
|
|
70
|
-
Returns:
|
|
71
|
-
- a file handler for storing m space integral over time
|
|
72
|
-
- a file handler for storing x-profiles of m_i
|
|
73
|
-
- a list of output filenames
|
|
74
|
-
"""
|
|
75
|
-
f_mean = None
|
|
76
|
-
f_profiles = None
|
|
77
|
-
output_filenames = []
|
|
78
|
-
if n_mean != 0:
|
|
79
|
-
output_filenames.append(g.get_filename(T, extension="txt"))
|
|
80
|
-
if n_profile != 0:
|
|
81
|
-
output_filenames.extend(
|
|
82
|
-
[g.get_filename(T, name=f"m{i + 1}", extension="npy") for i in range(3)]
|
|
83
|
-
)
|
|
84
|
-
if rank == 0:
|
|
85
|
-
if n_mean != 0:
|
|
86
|
-
f_mean = open(output_filenames[0], "w") # integral of m1
|
|
87
|
-
if n_profile != 0:
|
|
88
|
-
f_profiles = [
|
|
89
|
-
open(output_filename, "wb") for output_filename in output_filenames[1:]
|
|
90
|
-
] # x profiles of m_i
|
|
91
|
-
|
|
92
|
-
return f_mean, f_profiles, output_filenames
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def close_output_files(f_mean: TextIO, f_profiles: list[TextIO] = None):
|
|
96
|
-
"""
|
|
97
|
-
Close all output files.
|
|
98
|
-
|
|
99
|
-
Args:
|
|
100
|
-
f_mean: file handler for storing m space integral over time
|
|
101
|
-
f_profiles: file handlers for storing x-profiles of m_i
|
|
102
|
-
"""
|
|
103
|
-
if f_mean is not None:
|
|
104
|
-
f_mean.close()
|
|
105
|
-
if f_profiles is not None:
|
|
106
|
-
for f_profile in f_profiles:
|
|
107
|
-
f_profile.close()
|