sdf-xarray 0.2.0__cp312-cp312-win_amd64.whl → 0.5.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lib/SDFC_14.4.7/sdfc.lib +0 -0
- sdf_xarray/__init__.py +496 -57
- sdf_xarray/_version.py +22 -4
- sdf_xarray/dataset_accessor.py +124 -0
- sdf_xarray/download.py +88 -0
- sdf_xarray/plotting.py +484 -101
- sdf_xarray/sdf_interface.cp312-win_amd64.pyd +0 -0
- sdf_xarray/sdf_interface.pyx +5 -3
- sdf_xarray-0.5.0.dist-info/METADATA +150 -0
- {sdf_xarray-0.2.0.dist-info → sdf_xarray-0.5.0.dist-info}/RECORD +13 -11
- {sdf_xarray-0.2.0.dist-info → sdf_xarray-0.5.0.dist-info}/WHEEL +1 -1
- {sdf_xarray-0.2.0.dist-info → sdf_xarray-0.5.0.dist-info}/entry_points.txt +3 -0
- {sdf_xarray-0.2.0.dist-info → sdf_xarray-0.5.0.dist-info}/licenses/LICENCE +1 -1
- sdf_xarray-0.2.0.dist-info/METADATA +0 -190
sdf_xarray/__init__.py
CHANGED
|
@@ -1,22 +1,43 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import os
|
|
2
|
-
import pathlib
|
|
3
3
|
import re
|
|
4
4
|
from collections import Counter, defaultdict
|
|
5
|
+
from collections.abc import Callable, Iterable
|
|
6
|
+
from importlib.metadata import version
|
|
5
7
|
from itertools import product
|
|
6
|
-
from
|
|
8
|
+
from os import PathLike as os_PathLike
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import ClassVar
|
|
7
11
|
|
|
8
12
|
import numpy as np
|
|
9
13
|
import xarray as xr
|
|
14
|
+
from packaging.version import Version
|
|
10
15
|
from xarray.backends import AbstractDataStore, BackendArray, BackendEntrypoint
|
|
11
16
|
from xarray.backends.file_manager import CachingFileManager
|
|
12
17
|
from xarray.backends.locks import ensure_lock
|
|
13
18
|
from xarray.core import indexing
|
|
19
|
+
from xarray.core.types import T_Chunks
|
|
14
20
|
from xarray.core.utils import close_on_error, try_read_magic_number_from_path
|
|
15
21
|
from xarray.core.variable import Variable
|
|
16
22
|
|
|
23
|
+
# NOTE: Do not delete these lines, otherwise the "epoch" dataset and dataarray
|
|
24
|
+
# accessors will not be imported when the user imports sdf_xarray
|
|
25
|
+
import sdf_xarray.dataset_accessor
|
|
26
|
+
import sdf_xarray.download
|
|
17
27
|
import sdf_xarray.plotting # noqa: F401
|
|
18
28
|
|
|
19
|
-
|
|
29
|
+
# NOTE: This attempts to initialise with the "pint" accessor if the user
|
|
30
|
+
# has installed the package
|
|
31
|
+
with contextlib.suppress(ImportError):
|
|
32
|
+
import pint_xarray # noqa: F401
|
|
33
|
+
|
|
34
|
+
from .sdf_interface import Constant, SDFFile # type: ignore # noqa: PGH003
|
|
35
|
+
|
|
36
|
+
# TODO Remove this once the new kwarg options are fully implemented
|
|
37
|
+
if Version(version("xarray")) >= Version("2025.8.0"):
|
|
38
|
+
xr.set_options(use_new_combine_kwarg_defaults=True)
|
|
39
|
+
|
|
40
|
+
PathLike = str | os_PathLike
|
|
20
41
|
|
|
21
42
|
|
|
22
43
|
def _rename_with_underscore(name: str) -> str:
|
|
@@ -48,24 +69,140 @@ def _process_latex_name(variable_name: str) -> str:
|
|
|
48
69
|
return variable_name
|
|
49
70
|
|
|
50
71
|
|
|
51
|
-
def
|
|
52
|
-
"""
|
|
72
|
+
def _resolve_glob(path_glob: PathLike | Iterable[PathLike]):
|
|
73
|
+
"""
|
|
74
|
+
Normalise input path_glob into a sorted list of absolute, resolved Path objects.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
p = Path(path_glob)
|
|
79
|
+
paths = list(p.parent.glob(p.name)) if p.name == "*.sdf" else list(p)
|
|
80
|
+
except TypeError:
|
|
81
|
+
paths = list({Path(p) for p in path_glob})
|
|
82
|
+
|
|
83
|
+
paths = sorted(p.resolve() for p in paths)
|
|
84
|
+
if not paths:
|
|
85
|
+
raise FileNotFoundError(f"No files matched pattern or input: {path_glob!r}")
|
|
86
|
+
return paths
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _build_datatree_from_dataset(
|
|
90
|
+
ds: xr.Dataset,
|
|
91
|
+
) -> xr.DataTree:
|
|
92
|
+
"""
|
|
93
|
+
An `xarray.DataTree` is constructed utilising the original names in the SDF
|
|
94
|
+
file. This is due to the fact that these names include slashes which `xarray`
|
|
95
|
+
can use to automatically build up a datatree. We do additionally replace
|
|
96
|
+
spaces with underscores to be more pythonic. You can find the
|
|
97
|
+
`xarray.Dataset` name under the ``attrs["flat_structure_name"]`` for referencing.
|
|
98
|
+
|
|
99
|
+
In some cases the user may output the ``always + species`` dumpmask which
|
|
100
|
+
means that SDF variable will have species data plus a general one. When
|
|
101
|
+
defining a `xarray.DataTree` you cannot have a node of that tree contain both
|
|
102
|
+
variable information and have leaves with variables so we move the node
|
|
103
|
+
information to a leaf named ``node/All`` (see example of
|
|
104
|
+
``Dervied/Number_Density/All`` in below table)
|
|
105
|
+
|
|
106
|
+
Below are some examples of how variable names are translated from the
|
|
107
|
+
regular `xarray.open_dataset` result into their more traditional names.
|
|
108
|
+
|
|
109
|
+
=================================== ===================================
|
|
110
|
+
Dataset variable name DataTree variable name
|
|
111
|
+
=================================== ===================================
|
|
112
|
+
``Derived_Number_Density`` ``Derived/Number_Density/All``
|
|
113
|
+
``Derived_Number_Density_Electron`` ``Derived/Number_Density/Electron``
|
|
114
|
+
``Derived_Number_Density_Ion`` ``Derived/Number_Density/Ion``
|
|
115
|
+
``Derived_Number_Density_Photon`` ``Derived/Number_Density/Photon``
|
|
116
|
+
``Derived_Average_Particle_Energy`` ``Derived/Average_Particle_Energy``
|
|
117
|
+
=================================== ===================================
|
|
118
|
+
|
|
119
|
+
Parameters
|
|
120
|
+
----------
|
|
121
|
+
ds
|
|
122
|
+
Incoming `xarray.Dataset` to convert to a `xarray.DataTree`
|
|
123
|
+
"""
|
|
124
|
+
renames = {}
|
|
125
|
+
for name, var in ds.data_vars.items():
|
|
126
|
+
# Append the current variable name to the attributes
|
|
127
|
+
var.attrs["flat_structure_name"] = name
|
|
128
|
+
renames.update({name: var.attrs["full_name"].replace(" ", "_")})
|
|
129
|
+
|
|
130
|
+
new_names = renames.values()
|
|
131
|
+
|
|
132
|
+
final_renames = {
|
|
133
|
+
key: (
|
|
134
|
+
f"{path}/All"
|
|
135
|
+
if any(other.startswith(f"{path}/") for other in new_names)
|
|
136
|
+
else path
|
|
137
|
+
)
|
|
138
|
+
for key, path in renames.items()
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
ds = ds.rename_vars(final_renames)
|
|
142
|
+
dt = xr.DataTree.from_dict(ds)
|
|
143
|
+
dt.attrs = ds.attrs
|
|
144
|
+
return dt
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def purge_unselected_data_vars(ds: xr.Dataset, data_vars: list[str]) -> xr.Dataset:
|
|
148
|
+
"""
|
|
149
|
+
If the user has exclusively requested only certain variables be
|
|
150
|
+
loaded in then we purge all other variables and dimensions
|
|
151
|
+
"""
|
|
152
|
+
existing_data_vars = set(ds.data_vars.keys())
|
|
153
|
+
vars_to_keep = set(data_vars) & existing_data_vars
|
|
154
|
+
vars_to_drop = existing_data_vars - vars_to_keep
|
|
155
|
+
ds = ds.drop_vars(vars_to_drop)
|
|
156
|
+
|
|
157
|
+
existing_dims = set(ds.sizes)
|
|
158
|
+
dims_to_keep = set()
|
|
159
|
+
for var in vars_to_keep:
|
|
160
|
+
dims_to_keep.update(ds[var].coords._names)
|
|
161
|
+
dims_to_keep.update(ds[var].dims)
|
|
162
|
+
|
|
163
|
+
coords_to_drop = existing_dims - dims_to_keep
|
|
164
|
+
return ds.drop_dims(coords_to_drop)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def combine_datasets(
|
|
168
|
+
path_glob: Iterable | str, data_vars: list[str], **kwargs
|
|
169
|
+
) -> xr.Dataset:
|
|
170
|
+
"""
|
|
171
|
+
Combine all datasets using a single time dimension, optionally extract
|
|
172
|
+
data from only the listed data_vars
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
if data_vars is not None:
|
|
176
|
+
return xr.open_mfdataset(
|
|
177
|
+
path_glob,
|
|
178
|
+
join="outer",
|
|
179
|
+
coords="different",
|
|
180
|
+
compat="no_conflicts",
|
|
181
|
+
combine="nested",
|
|
182
|
+
concat_dim="time",
|
|
183
|
+
preprocess=SDFPreprocess(data_vars=data_vars),
|
|
184
|
+
**kwargs,
|
|
185
|
+
)
|
|
53
186
|
|
|
54
187
|
return xr.open_mfdataset(
|
|
55
188
|
path_glob,
|
|
56
|
-
data_vars="
|
|
57
|
-
coords="
|
|
58
|
-
compat="
|
|
189
|
+
data_vars="all",
|
|
190
|
+
coords="different",
|
|
191
|
+
compat="no_conflicts",
|
|
192
|
+
join="outer",
|
|
59
193
|
preprocess=SDFPreprocess(),
|
|
60
194
|
**kwargs,
|
|
61
195
|
)
|
|
62
196
|
|
|
63
197
|
|
|
64
198
|
def open_mfdataset(
|
|
65
|
-
path_glob: Iterable | str |
|
|
199
|
+
path_glob: Iterable | str | Path | Callable[..., Iterable[Path]],
|
|
66
200
|
*,
|
|
67
201
|
separate_times: bool = False,
|
|
68
202
|
keep_particles: bool = False,
|
|
203
|
+
probe_names: list[str] | None = None,
|
|
204
|
+
data_vars: list[str] | None = None,
|
|
205
|
+
chunks: T_Chunks = "auto",
|
|
69
206
|
) -> xr.Dataset:
|
|
70
207
|
"""Open a set of EPOCH SDF files as one `xarray.Dataset`
|
|
71
208
|
|
|
@@ -95,20 +232,47 @@ def open_mfdataset(
|
|
|
95
232
|
different output frequencies
|
|
96
233
|
keep_particles :
|
|
97
234
|
If ``True``, also load particle data (this may use a lot of memory!)
|
|
235
|
+
probe_names :
|
|
236
|
+
List of EPOCH probe names
|
|
237
|
+
data_vars :
|
|
238
|
+
List of data vars to load in (If not specified loads in all variables)
|
|
239
|
+
chunks :
|
|
240
|
+
Dictionary with keys given by dimension names and values given by chunk sizes.
|
|
241
|
+
In general, these should divide the dimensions of each dataset. By default
|
|
242
|
+
chunks are automatically set so that they are the same size as the dimensions
|
|
243
|
+
stored in each of the SDF files. See `Xarray chunking-and-performance
|
|
244
|
+
<https://docs.xarray.dev/en/stable/user-guide/dask.html#chunking-and-performance>`_
|
|
245
|
+
for details on why this is useful for large datasets. The default behaviour is
|
|
246
|
+
to do this automatically and can be disabled by ``chunks=None``.
|
|
98
247
|
"""
|
|
99
248
|
|
|
100
|
-
|
|
101
|
-
if isinstance(path_glob, str):
|
|
102
|
-
path_glob = pathlib.Path().glob(path_glob)
|
|
103
|
-
|
|
104
|
-
# Coerce to list because we might need to use the sequence multiple times
|
|
105
|
-
path_glob = sorted(list(path_glob))
|
|
249
|
+
path_glob = _resolve_glob(path_glob)
|
|
106
250
|
|
|
107
251
|
if not separate_times:
|
|
108
|
-
return combine_datasets(
|
|
252
|
+
return combine_datasets(
|
|
253
|
+
path_glob,
|
|
254
|
+
data_vars=data_vars,
|
|
255
|
+
keep_particles=keep_particles,
|
|
256
|
+
probe_names=probe_names,
|
|
257
|
+
chunks=chunks,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
_, var_times_map = make_time_dims(path_glob)
|
|
261
|
+
|
|
262
|
+
all_dfs = []
|
|
263
|
+
for f in path_glob:
|
|
264
|
+
ds = xr.open_dataset(
|
|
265
|
+
f, keep_particles=keep_particles, probe_names=probe_names, chunks=chunks
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# If the data_vars are specified then only load them in and disregard the rest.
|
|
269
|
+
# If there are no remaining data variables then skip adding the dataset to list
|
|
270
|
+
if data_vars is not None:
|
|
271
|
+
ds = purge_unselected_data_vars(ds, data_vars)
|
|
272
|
+
if not ds.data_vars:
|
|
273
|
+
continue
|
|
109
274
|
|
|
110
|
-
|
|
111
|
-
all_dfs = [xr.open_dataset(f, keep_particles=keep_particles) for f in path_glob]
|
|
275
|
+
all_dfs.append(ds)
|
|
112
276
|
|
|
113
277
|
for df in all_dfs:
|
|
114
278
|
for da in df:
|
|
@@ -125,10 +289,155 @@ def open_mfdataset(
|
|
|
125
289
|
)
|
|
126
290
|
|
|
127
291
|
return xr.combine_by_coords(
|
|
128
|
-
all_dfs,
|
|
292
|
+
all_dfs,
|
|
293
|
+
coords="different",
|
|
294
|
+
combine_attrs="drop_conflicts",
|
|
295
|
+
join="outer",
|
|
296
|
+
compat="no_conflicts",
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def open_datatree(
|
|
301
|
+
path: PathLike,
|
|
302
|
+
*,
|
|
303
|
+
keep_particles: bool = False,
|
|
304
|
+
probe_names: list[str] | None = None,
|
|
305
|
+
) -> xr.DataTree:
|
|
306
|
+
"""
|
|
307
|
+
An `xarray.DataTree` is constructed utilising the original names in the SDF
|
|
308
|
+
file. This is due to the fact that these names include slashes which `xarray`
|
|
309
|
+
can use to automatically build up a datatree. We do additionally replace
|
|
310
|
+
spaces with underscores to be more pythonic. You can find the
|
|
311
|
+
`xarray.Dataset` name under the ``attrs["flat_structure_name"]`` for referencing.
|
|
312
|
+
|
|
313
|
+
In some cases the user may output the ``always + species`` dumpmask which
|
|
314
|
+
means that SDF variable will have species data plus a general one. When
|
|
315
|
+
defining a `xarray.DataTree` you cannot have a node of that tree contain both
|
|
316
|
+
variable information and have leaves with variables so we move the node
|
|
317
|
+
information to a leaf named ``node/All`` (see example of
|
|
318
|
+
``Dervied/Number_Density/All`` in below table)
|
|
319
|
+
|
|
320
|
+
Below are some examples of how variable names are translated from the
|
|
321
|
+
regular `xarray.open_dataset` result into their more traditional names.
|
|
322
|
+
|
|
323
|
+
=================================== ===================================
|
|
324
|
+
Dataset variable name DataTree variable name
|
|
325
|
+
=================================== ===================================
|
|
326
|
+
``Derived_Number_Density`` ``Derived/Number_Density/All``
|
|
327
|
+
``Derived_Number_Density_Electron`` ``Derived/Number_Density/Electron``
|
|
328
|
+
``Derived_Number_Density_Ion`` ``Derived/Number_Density/Ion``
|
|
329
|
+
``Derived_Number_Density_Photon`` ``Derived/Number_Density/Photon``
|
|
330
|
+
``Derived_Average_Particle_Energy`` ``Derived/Average_Particle_Energy``
|
|
331
|
+
=================================== ===================================
|
|
332
|
+
|
|
333
|
+
Parameters
|
|
334
|
+
----------
|
|
335
|
+
path
|
|
336
|
+
The path to the SDF file
|
|
337
|
+
keep_particles
|
|
338
|
+
If ``True``, also load particle data (this may use a lot of memory!)
|
|
339
|
+
probe_names
|
|
340
|
+
List of EPOCH probe names
|
|
341
|
+
|
|
342
|
+
Examples
|
|
343
|
+
--------
|
|
344
|
+
>>> dt = open_datatree("0000.sdf")
|
|
345
|
+
>>> dt["Electric_Field"]["Ex"].values # Access all Electric_Field_Ex data
|
|
346
|
+
"""
|
|
347
|
+
|
|
348
|
+
return xr.open_datatree(
|
|
349
|
+
path, keep_particles=keep_particles, probe_names=probe_names
|
|
129
350
|
)
|
|
130
351
|
|
|
131
352
|
|
|
353
|
+
def open_mfdatatree(
|
|
354
|
+
path_glob: Iterable | str | Path | Callable[..., Iterable[Path]],
|
|
355
|
+
*,
|
|
356
|
+
separate_times: bool = False,
|
|
357
|
+
keep_particles: bool = False,
|
|
358
|
+
probe_names: list[str] | None = None,
|
|
359
|
+
data_vars: list[str] | None = None,
|
|
360
|
+
) -> xr.DataTree:
|
|
361
|
+
"""Open a set of EPOCH SDF files as one `xarray.DataTree`
|
|
362
|
+
|
|
363
|
+
EPOCH can output variables at different periods, so each individal
|
|
364
|
+
SDF file from one EPOCH run may have different variables in it. In
|
|
365
|
+
order to combine all files into one `xarray.Dataset`, we need to
|
|
366
|
+
concatenate variables across their time dimension.
|
|
367
|
+
|
|
368
|
+
We have two choices:
|
|
369
|
+
|
|
370
|
+
1. One time dimension where some variables may not be defined at all time
|
|
371
|
+
points, and so will be filled with NaNs at missing points; or
|
|
372
|
+
2. Multiple time dimensions, one for each output frequency
|
|
373
|
+
|
|
374
|
+
The second option is better for memory consumption, as the missing data with
|
|
375
|
+
the first option still takes up space. However, proper lazy-loading may
|
|
376
|
+
mitigate this.
|
|
377
|
+
|
|
378
|
+
The ``separate_times`` argument can be used to switch between these choices.
|
|
379
|
+
|
|
380
|
+
An `xarray.DataTree` is constructed utilising the original names in the SDF
|
|
381
|
+
file. This is due to the fact that these names include slashes which `xarray`
|
|
382
|
+
can use to automatically build up a datatree. We do additionally replace
|
|
383
|
+
spaces with underscores to be more pythonic. You can find the
|
|
384
|
+
`xarray.Dataset` name under the ``attrs["flat_structure_name"]`` for referencing.
|
|
385
|
+
|
|
386
|
+
This function combines multiple SDF files into a single `xarray.DataTree` with a
|
|
387
|
+
unified time dimension and hierarchical organization of variables.
|
|
388
|
+
|
|
389
|
+
In some cases the user may output the ``always + species`` dumpmask which
|
|
390
|
+
means that SDF variable will have species data plus a general one. When
|
|
391
|
+
defining a `xarray.DataTree` you cannot have a node of that tree contain both
|
|
392
|
+
variable information and have leaves with variables so we move the node
|
|
393
|
+
information to a leaf named ``node/All`` (see example of
|
|
394
|
+
``Dervied/Number_Density/All`` in below table)
|
|
395
|
+
|
|
396
|
+
Below are some examples of how variable names are translated from the
|
|
397
|
+
regular `xarray.open_dataset` result into their more traditional names.
|
|
398
|
+
|
|
399
|
+
=================================== ===================================
|
|
400
|
+
Dataset variable name DataTree variable name
|
|
401
|
+
=================================== ===================================
|
|
402
|
+
``Derived_Number_Density`` ``Derived/Number_Density/All``
|
|
403
|
+
``Derived_Number_Density_Electron`` ``Derived/Number_Density/Electron``
|
|
404
|
+
``Derived_Number_Density_Ion`` ``Derived/Number_Density/Ion``
|
|
405
|
+
``Derived_Number_Density_Photon`` ``Derived/Number_Density/Photon``
|
|
406
|
+
``Derived_Average_Particle_Energy`` ``Derived/Average_Particle_Energy``
|
|
407
|
+
=================================== ===================================
|
|
408
|
+
|
|
409
|
+
Parameters
|
|
410
|
+
----------
|
|
411
|
+
path_glob
|
|
412
|
+
List of filenames or string glob pattern
|
|
413
|
+
separate_times
|
|
414
|
+
If ``True``, create separate time dimensions for variables defined at
|
|
415
|
+
different output frequencies
|
|
416
|
+
keep_particles
|
|
417
|
+
If ``True``, also load particle data (this may use a lot of memory!)
|
|
418
|
+
probe_names
|
|
419
|
+
List of EPOCH probe names
|
|
420
|
+
data_vars
|
|
421
|
+
List of data vars to load in (If not specified loads in all variables)
|
|
422
|
+
|
|
423
|
+
Examples
|
|
424
|
+
--------
|
|
425
|
+
>>> dt = open_mfdatatree("*.sdf")
|
|
426
|
+
>>> dt["Electric_Field"]["Ex"].values # Access all Electric_Field_Ex data
|
|
427
|
+
>>> dt.coords["time"].values # Access combined time dimension
|
|
428
|
+
"""
|
|
429
|
+
# First, combine the datasets as usual
|
|
430
|
+
combined_ds = open_mfdataset(
|
|
431
|
+
path_glob,
|
|
432
|
+
separate_times=separate_times,
|
|
433
|
+
keep_particles=keep_particles,
|
|
434
|
+
probe_names=probe_names,
|
|
435
|
+
data_vars=data_vars,
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
return _build_datatree_from_dataset(combined_ds)
|
|
439
|
+
|
|
440
|
+
|
|
132
441
|
def make_time_dims(path_glob):
|
|
133
442
|
"""Extract the distinct set of time arrays from a collection of
|
|
134
443
|
SDF files, along with a mapping from variable names to their time
|
|
@@ -146,14 +455,12 @@ def make_time_dims(path_glob):
|
|
|
146
455
|
)
|
|
147
456
|
|
|
148
457
|
# Count the unique set of lists of times
|
|
149
|
-
times_count = Counter(
|
|
458
|
+
times_count = Counter(tuple(v) for v in vars_count.values())
|
|
150
459
|
|
|
151
460
|
# Give each set of times a unique name
|
|
152
461
|
time_dims = {}
|
|
153
|
-
count
|
|
154
|
-
for t in times_count:
|
|
462
|
+
for count, t in enumerate(times_count):
|
|
155
463
|
time_dims[f"time{count}"] = t
|
|
156
|
-
count += 1
|
|
157
464
|
|
|
158
465
|
# Map each variable to the name of its time dimension
|
|
159
466
|
var_times_map = {}
|
|
@@ -174,13 +481,11 @@ class SDFBackendArray(BackendArray):
|
|
|
174
481
|
|
|
175
482
|
__slots__ = ("datastore", "dtype", "shape", "variable_name")
|
|
176
483
|
|
|
177
|
-
def __init__(self, variable_name, datastore):
|
|
484
|
+
def __init__(self, variable_name, datastore, shape, dtype):
|
|
178
485
|
self.datastore = datastore
|
|
179
486
|
self.variable_name = variable_name
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
self.shape = array.shape
|
|
183
|
-
self.dtype = array.dtype
|
|
487
|
+
self.shape = shape
|
|
488
|
+
self.dtype = dtype
|
|
184
489
|
|
|
185
490
|
def get_array(self, needs_lock=True):
|
|
186
491
|
with self.datastore.acquire_context(needs_lock) as ds:
|
|
@@ -205,19 +510,28 @@ class SDFDataStore(AbstractDataStore):
|
|
|
205
510
|
"""Store for reading and writing data via the SDF library."""
|
|
206
511
|
|
|
207
512
|
__slots__ = (
|
|
208
|
-
"lock",
|
|
209
|
-
"drop_variables",
|
|
210
|
-
"keep_particles",
|
|
211
513
|
"_filename",
|
|
212
514
|
"_manager",
|
|
515
|
+
"drop_variables",
|
|
516
|
+
"keep_particles",
|
|
517
|
+
"lock",
|
|
518
|
+
"probe_names",
|
|
213
519
|
)
|
|
214
520
|
|
|
215
|
-
def __init__(
|
|
521
|
+
def __init__(
|
|
522
|
+
self,
|
|
523
|
+
manager,
|
|
524
|
+
drop_variables=None,
|
|
525
|
+
keep_particles=False,
|
|
526
|
+
lock=None,
|
|
527
|
+
probe_names=None,
|
|
528
|
+
):
|
|
216
529
|
self._manager = manager
|
|
217
530
|
self._filename = self.ds.filename
|
|
218
531
|
self.drop_variables = drop_variables
|
|
219
532
|
self.keep_particles = keep_particles
|
|
220
533
|
self.lock = ensure_lock(lock)
|
|
534
|
+
self.probe_names = probe_names
|
|
221
535
|
|
|
222
536
|
@classmethod
|
|
223
537
|
def open(
|
|
@@ -226,6 +540,7 @@ class SDFDataStore(AbstractDataStore):
|
|
|
226
540
|
lock=None,
|
|
227
541
|
drop_variables=None,
|
|
228
542
|
keep_particles=False,
|
|
543
|
+
probe_names=None,
|
|
229
544
|
):
|
|
230
545
|
if isinstance(filename, os.PathLike):
|
|
231
546
|
filename = os.fspath(filename)
|
|
@@ -236,6 +551,7 @@ class SDFDataStore(AbstractDataStore):
|
|
|
236
551
|
lock=lock,
|
|
237
552
|
drop_variables=drop_variables,
|
|
238
553
|
keep_particles=keep_particles,
|
|
554
|
+
probe_names=probe_names,
|
|
239
555
|
)
|
|
240
556
|
|
|
241
557
|
def _acquire(self, needs_lock=True):
|
|
@@ -249,12 +565,21 @@ class SDFDataStore(AbstractDataStore):
|
|
|
249
565
|
def acquire_context(self, needs_lock=True):
|
|
250
566
|
return self._manager.acquire_context(needs_lock)
|
|
251
567
|
|
|
252
|
-
def load(self):
|
|
568
|
+
def load(self): # noqa: PLR0912, PLR0915
|
|
253
569
|
# Drop any requested variables
|
|
254
570
|
if self.drop_variables:
|
|
571
|
+
# Build a mapping from underscored names to real variable names
|
|
572
|
+
name_map = {_rename_with_underscore(var): var for var in self.ds.variables}
|
|
573
|
+
|
|
255
574
|
for variable in self.drop_variables:
|
|
256
|
-
|
|
257
|
-
|
|
575
|
+
key = _rename_with_underscore(variable)
|
|
576
|
+
original_name = name_map.get(key)
|
|
577
|
+
|
|
578
|
+
if original_name is None:
|
|
579
|
+
raise KeyError(
|
|
580
|
+
f"Variable '{variable}' not found (interpreted as '{key}')."
|
|
581
|
+
)
|
|
582
|
+
self.ds.variables.pop(original_name)
|
|
258
583
|
|
|
259
584
|
# These two dicts are global metadata about the run or file
|
|
260
585
|
attrs = {**self.ds.header, **self.ds.run_info}
|
|
@@ -274,8 +599,7 @@ class SDFDataStore(AbstractDataStore):
|
|
|
274
599
|
def _process_grid_name(grid_name: str, transform_func) -> str:
|
|
275
600
|
"""Apply the given transformation function and then rename with underscores."""
|
|
276
601
|
transformed_name = transform_func(grid_name)
|
|
277
|
-
|
|
278
|
-
return renamed_name
|
|
602
|
+
return _rename_with_underscore(transformed_name)
|
|
279
603
|
|
|
280
604
|
for key, value in self.ds.grids.items():
|
|
281
605
|
if "cpu" in key.lower():
|
|
@@ -310,6 +634,8 @@ class SDFDataStore(AbstractDataStore):
|
|
|
310
634
|
# Had some problems with these variables, so just ignore them for now
|
|
311
635
|
if "cpu" in key.lower():
|
|
312
636
|
continue
|
|
637
|
+
if "boundary" in key.lower():
|
|
638
|
+
continue
|
|
313
639
|
if "output file" in key.lower():
|
|
314
640
|
continue
|
|
315
641
|
|
|
@@ -331,12 +657,38 @@ class SDFDataStore(AbstractDataStore):
|
|
|
331
657
|
if value.units is not None:
|
|
332
658
|
data_attrs["units"] = value.units
|
|
333
659
|
|
|
334
|
-
|
|
660
|
+
var = Variable(dims, value.data, attrs=data_attrs)
|
|
661
|
+
|
|
662
|
+
# Provide preferred_chunks for constants so dask aligns to natural shapes
|
|
663
|
+
var.encoding["preferred_chunks"] = dict(zip(dims, shape))
|
|
664
|
+
|
|
665
|
+
data_vars[base_name] = var
|
|
335
666
|
continue
|
|
336
667
|
|
|
337
668
|
if value.is_point_data:
|
|
338
669
|
# Point (particle) variables are 1D
|
|
339
|
-
|
|
670
|
+
|
|
671
|
+
# Particle data does not maintain a fixed dimension size
|
|
672
|
+
# throughout the simulation. An example of a particle name comes
|
|
673
|
+
# in the form of `Particles/Px/Ion_H` which is then modified
|
|
674
|
+
# using `_process_grid_name()` into `Ion_H`. This is fine as the
|
|
675
|
+
# other components of the momentum (`Py`, `Pz`) will have the same
|
|
676
|
+
# size as they represent the same bunch of particles.
|
|
677
|
+
|
|
678
|
+
# Probes however have names in the form of `Electron_Front_Probe/Px`
|
|
679
|
+
# which are changed to just `Px`; this is fine when there is only one
|
|
680
|
+
# probe in the system but when there are multiple they will have
|
|
681
|
+
# conflicting sizes so we can't keep the names as simply `Px` so we
|
|
682
|
+
# instead set their dimension as the full name `Electron_Front_Probe_Px`.
|
|
683
|
+
is_probe_name_match = self.probe_names is not None and any(
|
|
684
|
+
name in key for name in self.probe_names
|
|
685
|
+
)
|
|
686
|
+
name_processor = (
|
|
687
|
+
_rename_with_underscore
|
|
688
|
+
if is_probe_name_match
|
|
689
|
+
else _grid_species_name
|
|
690
|
+
)
|
|
691
|
+
var_coords = (f"ID_{_process_grid_name(key, name_processor)}",)
|
|
340
692
|
else:
|
|
341
693
|
# These are DataArrays
|
|
342
694
|
|
|
@@ -359,9 +711,9 @@ class SDFDataStore(AbstractDataStore):
|
|
|
359
711
|
grid_mid = self.ds.grids[value.grid_mid]
|
|
360
712
|
grid_mid_base_name = _process_grid_name(grid_mid.name, _norm_grid_name)
|
|
361
713
|
for dim_size, dim_name in zip(grid_mid.shape, grid_mid.labels):
|
|
362
|
-
dim_size_lookup[dim_name][
|
|
363
|
-
|
|
364
|
-
|
|
714
|
+
dim_size_lookup[dim_name][dim_size] = (
|
|
715
|
+
f"{dim_name}_{grid_mid_base_name}"
|
|
716
|
+
)
|
|
365
717
|
|
|
366
718
|
var_coords = [
|
|
367
719
|
dim_size_lookup[dim_name][dim_size]
|
|
@@ -377,8 +729,24 @@ class SDFDataStore(AbstractDataStore):
|
|
|
377
729
|
"full_name": key,
|
|
378
730
|
"long_name": long_name,
|
|
379
731
|
}
|
|
380
|
-
lazy_data = indexing.LazilyIndexedArray(
|
|
381
|
-
|
|
732
|
+
lazy_data = indexing.LazilyIndexedArray(
|
|
733
|
+
SDFBackendArray(key, self, shape=value.shape, dtype=value.data.dtype)
|
|
734
|
+
)
|
|
735
|
+
var = Variable(var_coords, lazy_data, data_attrs)
|
|
736
|
+
# Set preferred chunks to match on-disk layout
|
|
737
|
+
# For point data (1D): full dimension
|
|
738
|
+
# For grid data (N-D): individual grid chunk sizes
|
|
739
|
+
if value.is_point_data:
|
|
740
|
+
var.encoding["preferred_chunks"] = {var_coords[0]: len(value.data)}
|
|
741
|
+
else:
|
|
742
|
+
# Align with on-disk grid structure
|
|
743
|
+
chunk_dict = {}
|
|
744
|
+
for dim_name, size in zip(var_coords, value.shape):
|
|
745
|
+
# Use natural on-disk boundaries
|
|
746
|
+
chunk_dict[dim_name] = size
|
|
747
|
+
var.encoding["preferred_chunks"] = chunk_dict
|
|
748
|
+
|
|
749
|
+
data_vars[base_name] = var
|
|
382
750
|
|
|
383
751
|
# TODO: might need to decode if mult is set?
|
|
384
752
|
|
|
@@ -397,14 +765,23 @@ class SDFDataStore(AbstractDataStore):
|
|
|
397
765
|
|
|
398
766
|
|
|
399
767
|
class SDFEntrypoint(BackendEntrypoint):
|
|
768
|
+
supports_groups = True
|
|
769
|
+
open_dataset_parameters: ClassVar[list[str]] = [
|
|
770
|
+
"filename_or_obj",
|
|
771
|
+
"drop_variables",
|
|
772
|
+
"keep_particles",
|
|
773
|
+
"probe_names",
|
|
774
|
+
]
|
|
775
|
+
|
|
400
776
|
def open_dataset(
|
|
401
777
|
self,
|
|
402
778
|
filename_or_obj,
|
|
403
779
|
*,
|
|
404
780
|
drop_variables=None,
|
|
405
781
|
keep_particles=False,
|
|
782
|
+
probe_names=None,
|
|
406
783
|
):
|
|
407
|
-
if isinstance(filename_or_obj,
|
|
784
|
+
if isinstance(filename_or_obj, Path):
|
|
408
785
|
# sdf library takes a filename only
|
|
409
786
|
# TODO: work out if we need to deal with file handles
|
|
410
787
|
filename_or_obj = str(filename_or_obj)
|
|
@@ -413,33 +790,89 @@ class SDFEntrypoint(BackendEntrypoint):
|
|
|
413
790
|
filename_or_obj,
|
|
414
791
|
drop_variables=drop_variables,
|
|
415
792
|
keep_particles=keep_particles,
|
|
793
|
+
probe_names=probe_names,
|
|
416
794
|
)
|
|
417
795
|
with close_on_error(store):
|
|
418
796
|
return store.load()
|
|
419
797
|
|
|
420
|
-
|
|
798
|
+
open_datatree_parameters: ClassVar[list[str]] = [
|
|
799
|
+
"filename_or_obj",
|
|
800
|
+
"drop_variables",
|
|
801
|
+
"keep_particles",
|
|
802
|
+
"probe_names",
|
|
803
|
+
]
|
|
804
|
+
|
|
805
|
+
def open_datatree(
|
|
806
|
+
self,
|
|
807
|
+
filename_or_obj,
|
|
808
|
+
*,
|
|
809
|
+
drop_variables=None,
|
|
810
|
+
keep_particles=False,
|
|
811
|
+
probe_names=None,
|
|
812
|
+
):
|
|
813
|
+
ds = self.open_dataset(
|
|
814
|
+
filename_or_obj,
|
|
815
|
+
drop_variables=drop_variables,
|
|
816
|
+
keep_particles=keep_particles,
|
|
817
|
+
probe_names=probe_names,
|
|
818
|
+
)
|
|
819
|
+
return _build_datatree_from_dataset(ds)
|
|
421
820
|
|
|
422
821
|
def guess_can_open(self, filename_or_obj):
|
|
423
822
|
magic_number = try_read_magic_number_from_path(filename_or_obj)
|
|
424
823
|
if magic_number is not None:
|
|
425
824
|
return magic_number.startswith(b"SDF1")
|
|
426
825
|
|
|
427
|
-
|
|
428
|
-
_, ext = os.path.splitext(filename_or_obj)
|
|
429
|
-
except TypeError:
|
|
430
|
-
return False
|
|
431
|
-
return ext in {".sdf", ".SDF"}
|
|
826
|
+
return Path(filename_or_obj).suffix in {".sdf", ".SDF"}
|
|
432
827
|
|
|
433
828
|
description = "Use .sdf files in Xarray"
|
|
434
829
|
|
|
435
|
-
url = "https://epochpic.github.io/documentation/visualising_output/
|
|
830
|
+
url = "https://epochpic.github.io/documentation/visualising_output/python_beam.html"
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
class XrTUIEntrpoint:
|
|
834
|
+
def open_mfdatatree(self, paths: list[Path]) -> xr.DataTree:
|
|
835
|
+
return open_mfdatatree(paths)
|
|
436
836
|
|
|
437
837
|
|
|
438
838
|
class SDFPreprocess:
|
|
439
|
-
"""Preprocess SDF files for xarray ensuring matching job ids and sets
|
|
839
|
+
"""Preprocess SDF files for xarray ensuring matching job ids and sets
|
|
840
|
+
time dimension.
|
|
841
|
+
|
|
842
|
+
This class is used as a 'preprocess' function within ``xr.open_mfdataset``. It
|
|
843
|
+
performs three main duties on each individual file's Dataset:
|
|
844
|
+
|
|
845
|
+
1. Checks for a **matching job ID** across all files to ensure dataset consistency.
|
|
846
|
+
2. **Filters** the Dataset to keep only the variables specified in `data_vars`
|
|
847
|
+
and their required coordinates.
|
|
848
|
+
3. **Expands dimensions** to include a single 'time' coordinate, preparing the
|
|
849
|
+
Dataset for concatenation.
|
|
850
|
+
|
|
851
|
+
EPOCH can output variables at different intervals, so some SDF files
|
|
852
|
+
may not contain the requested variable. We combine this data into one
|
|
853
|
+
dataset by concatenating across the time dimension.
|
|
854
|
+
|
|
855
|
+
The combination is performed using ``join="outer"`` (in the calling ``open_mfdataset`` function),
|
|
856
|
+
meaning that the final combined dataset will contain the variable across the
|
|
857
|
+
entire time span, with NaNs filling the time steps where the variable was absent in
|
|
858
|
+
the individual file.
|
|
440
859
|
|
|
441
|
-
|
|
860
|
+
With large SDF files, this filtering method will save on memory consumption when
|
|
861
|
+
compared to loading all variables from all files before concatenation.
|
|
862
|
+
|
|
863
|
+
Parameters
|
|
864
|
+
----------
|
|
865
|
+
data_vars :
|
|
866
|
+
A list of data variables to load in (If not specified loads
|
|
867
|
+
in all variables)
|
|
868
|
+
"""
|
|
869
|
+
|
|
870
|
+
def __init__(
|
|
871
|
+
self,
|
|
872
|
+
data_vars: list[str] | None = None,
|
|
873
|
+
):
|
|
442
874
|
self.job_id: int | None = None
|
|
875
|
+
self.data_vars = data_vars
|
|
443
876
|
|
|
444
877
|
def __call__(self, ds: xr.Dataset) -> xr.Dataset:
|
|
445
878
|
if self.job_id is None:
|
|
@@ -450,17 +883,23 @@ class SDFPreprocess:
|
|
|
450
883
|
f"Mismatching job ids (got {ds.attrs['jobid1']}, expected {self.job_id})"
|
|
451
884
|
)
|
|
452
885
|
|
|
453
|
-
|
|
886
|
+
# If the user has exclusively requested only certain variables be
|
|
887
|
+
# loaded in then we purge all other variables and coordinates
|
|
888
|
+
if self.data_vars:
|
|
889
|
+
ds = purge_unselected_data_vars(ds, self.data_vars)
|
|
890
|
+
|
|
891
|
+
time_val = ds.attrs.get("time", np.nan)
|
|
892
|
+
ds = ds.expand_dims(time=[time_val])
|
|
454
893
|
ds = ds.assign_coords(
|
|
455
894
|
time=(
|
|
456
895
|
"time",
|
|
457
|
-
[
|
|
896
|
+
[time_val],
|
|
458
897
|
{"units": "s", "long_name": "Time", "full_name": "time"},
|
|
459
898
|
)
|
|
460
899
|
)
|
|
461
900
|
# Particles' spartial coordinates also evolve in time
|
|
462
901
|
for coord, value in ds.coords.items():
|
|
463
902
|
if value.attrs.get("point_data", False):
|
|
464
|
-
ds.coords[coord] = value.expand_dims(time=[
|
|
903
|
+
ds.coords[coord] = value.expand_dims(time=[time_val])
|
|
465
904
|
|
|
466
905
|
return ds
|