sdf-xarray 0.2.6__cp311-cp311-win_amd64.whl → 0.3.2__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lib/SDFC_14.4.7/sdfc.lib CHANGED
Binary file
sdf_xarray/__init__.py CHANGED
@@ -1,13 +1,17 @@
1
+ import contextlib
1
2
  import os
2
3
  import re
3
4
  from collections import Counter, defaultdict
4
5
  from collections.abc import Callable, Iterable
6
+ from importlib.metadata import version
5
7
  from itertools import product
8
+ from os import PathLike as os_PathLike
6
9
  from pathlib import Path
7
10
  from typing import ClassVar
8
11
 
9
12
  import numpy as np
10
13
  import xarray as xr
14
+ from packaging.version import Version
11
15
  from xarray.backends import AbstractDataStore, BackendArray, BackendEntrypoint
12
16
  from xarray.backends.file_manager import CachingFileManager
13
17
  from xarray.backends.locks import ensure_lock
@@ -15,12 +19,24 @@ from xarray.core import indexing
15
19
  from xarray.core.utils import close_on_error, try_read_magic_number_from_path
16
20
  from xarray.core.variable import Variable
17
21
 
18
- # NOTE: Do not delete this line, otherwise the "epoch" accessor will not be
19
- # imported when the user imports sdf_xarray
22
+ # NOTE: Do not delete these lines, otherwise the "epoch" dataset and dataarray
23
+ # accessors will not be imported when the user imports sdf_xarray
24
+ import sdf_xarray.dataset_accessor
20
25
  import sdf_xarray.plotting # noqa: F401
21
26
 
27
+ # NOTE: This attempts to initialise with the "pint" accessor if the user
28
+ # has installed the package
29
+ with contextlib.suppress(ImportError):
30
+ import pint_xarray # noqa: F401
31
+
22
32
  from .sdf_interface import Constant, SDFFile # type: ignore # noqa: PGH003
23
33
 
34
+ # TODO Remove this once the new kwarg options are fully implemented
35
+ if Version(version("xarray")) >= Version("2025.8.0"):
36
+ xr.set_options(use_new_combine_kwarg_defaults=True)
37
+
38
+ PathLike = str | os_PathLike
39
+
24
40
 
25
41
  def _rename_with_underscore(name: str) -> str:
26
42
  """A lot of the variable names have spaces, forward slashes and dashes in them, which
@@ -51,14 +67,69 @@ def _process_latex_name(variable_name: str) -> str:
51
67
  return variable_name
52
68
 
53
69
 
54
- def combine_datasets(path_glob: Iterable | str, **kwargs) -> xr.Dataset:
55
- """Combine all datasets using a single time dimension"""
70
+ def _resolve_glob(path_glob: PathLike | Iterable[PathLike]):
71
+ """
72
+ Normalise input path_glob into a sorted list of absolute, resolved Path objects.
73
+ """
74
+
75
+ try:
76
+ p = Path(path_glob)
77
+ paths = list(p.parent.glob(p.name)) if p.name == "*.sdf" else list(p)
78
+ except TypeError:
79
+ paths = list({Path(p) for p in path_glob})
80
+
81
+ paths = sorted(p.resolve() for p in paths)
82
+ if not paths:
83
+ raise FileNotFoundError(f"No files matched pattern or input: {path_glob!r}")
84
+ return paths
85
+
86
+
87
+ def purge_unselected_data_vars(ds: xr.Dataset, data_vars: list[str]) -> xr.Dataset:
88
+ """
89
+ If the user has exclusively requested only certain variables be
90
+ loaded in then we purge all other variables and dimensions
91
+ """
92
+ existing_data_vars = set(ds.data_vars.keys())
93
+ vars_to_keep = set(data_vars) & existing_data_vars
94
+ vars_to_drop = existing_data_vars - vars_to_keep
95
+ ds = ds.drop_vars(vars_to_drop)
96
+
97
+ existing_dims = set(ds.sizes)
98
+ dims_to_keep = set()
99
+ for var in vars_to_keep:
100
+ dims_to_keep.update(ds[var].coords._names)
101
+ dims_to_keep.update(ds[var].dims)
102
+
103
+ coords_to_drop = existing_dims - dims_to_keep
104
+ return ds.drop_dims(coords_to_drop)
105
+
106
+
107
+ def combine_datasets(
108
+ path_glob: Iterable | str, data_vars: list[str], **kwargs
109
+ ) -> xr.Dataset:
110
+ """
111
+ Combine all datasets using a single time dimension, optionally extract
112
+ data from only the listed data_vars
113
+ """
114
+
115
+ if data_vars is not None:
116
+ return xr.open_mfdataset(
117
+ path_glob,
118
+ join="outer",
119
+ coords="different",
120
+ compat="no_conflicts",
121
+ combine="nested",
122
+ concat_dim="time",
123
+ preprocess=SDFPreprocess(data_vars=data_vars),
124
+ **kwargs,
125
+ )
56
126
 
57
127
  return xr.open_mfdataset(
58
128
  path_glob,
59
- data_vars="minimal",
60
- coords="minimal",
61
- compat="override",
129
+ data_vars="all",
130
+ coords="different",
131
+ compat="no_conflicts",
132
+ join="outer",
62
133
  preprocess=SDFPreprocess(),
63
134
  **kwargs,
64
135
  )
@@ -70,6 +141,7 @@ def open_mfdataset(
70
141
  separate_times: bool = False,
71
142
  keep_particles: bool = False,
72
143
  probe_names: list[str] | None = None,
144
+ data_vars: list[str] | None = None,
73
145
  ) -> xr.Dataset:
74
146
  """Open a set of EPOCH SDF files as one `xarray.Dataset`
75
147
 
@@ -101,25 +173,34 @@ def open_mfdataset(
101
173
  If ``True``, also load particle data (this may use a lot of memory!)
102
174
  probe_names :
103
175
  List of EPOCH probe names
176
+ data_vars :
177
+ List of data vars to load in (If not specified loads in all variables)
104
178
  """
105
179
 
106
- # TODO: This is not very robust, look at how xarray.open_mfdataset does it
107
- if isinstance(path_glob, str):
108
- path_glob = Path().glob(path_glob)
109
-
110
- # Coerce to list because we might need to use the sequence multiple times
111
- path_glob = sorted(list(path_glob)) # noqa: C414
180
+ path_glob = _resolve_glob(path_glob)
112
181
 
113
182
  if not separate_times:
114
183
  return combine_datasets(
115
- path_glob, keep_particles=keep_particles, probe_names=probe_names
184
+ path_glob,
185
+ data_vars=data_vars,
186
+ keep_particles=keep_particles,
187
+ probe_names=probe_names,
116
188
  )
117
189
 
118
- time_dims, var_times_map = make_time_dims(path_glob)
119
- all_dfs = [
120
- xr.open_dataset(f, keep_particles=keep_particles, probe_names=probe_names)
121
- for f in path_glob
122
- ]
190
+ _, var_times_map = make_time_dims(path_glob)
191
+
192
+ all_dfs = []
193
+ for f in path_glob:
194
+ ds = xr.open_dataset(f, keep_particles=keep_particles, probe_names=probe_names)
195
+
196
+ # If the data_vars are specified then only load them in and disregard the rest.
197
+ # If there are no remaining data variables then skip adding the dataset to list
198
+ if data_vars is not None:
199
+ ds = purge_unselected_data_vars(ds, data_vars)
200
+ if not ds.data_vars:
201
+ continue
202
+
203
+ all_dfs.append(ds)
123
204
 
124
205
  for df in all_dfs:
125
206
  for da in df:
@@ -136,7 +217,11 @@ def open_mfdataset(
136
217
  )
137
218
 
138
219
  return xr.combine_by_coords(
139
- all_dfs, data_vars="minimal", combine_attrs="drop_conflicts"
220
+ all_dfs,
221
+ coords="different",
222
+ combine_attrs="drop_conflicts",
223
+ join="outer",
224
+ compat="no_conflicts",
140
225
  )
141
226
 
142
227
 
@@ -490,10 +575,43 @@ class SDFEntrypoint(BackendEntrypoint):
490
575
 
491
576
 
492
577
  class SDFPreprocess:
493
- """Preprocess SDF files for xarray ensuring matching job ids and sets time dimension"""
578
+ """Preprocess SDF files for xarray ensuring matching job ids and sets
579
+ time dimension.
580
+
581
+ This class is used as a 'preprocess' function within ``xr.open_mfdataset``. It
582
+ performs three main duties on each individual file's Dataset:
583
+
584
+ 1. Checks for a **matching job ID** across all files to ensure dataset consistency.
585
+ 2. **Filters** the Dataset to keep only the variables specified in `data_vars`
586
+ and their required coordinates.
587
+ 3. **Expands dimensions** to include a single 'time' coordinate, preparing the
588
+ Dataset for concatenation.
589
+
590
+ EPOCH can output variables at different intervals, so some SDF files
591
+ may not contain the requested variable. We combine this data into one
592
+ dataset by concatenating across the time dimension.
494
593
 
495
- def __init__(self):
594
+ The combination is performed using ``join="outer"`` (in the calling ``open_mfdataset`` function),
595
+ meaning that the final combined dataset will contain the variable across the
596
+ entire time span, with NaNs filling the time steps where the variable was absent in
597
+ the individual file.
598
+
599
+ With large SDF files, this filtering method will save on memory consumption when
600
+ compared to loading all variables from all files before concatenation.
601
+
602
+ Parameters
603
+ ----------
604
+ data_vars :
605
+ A list of data variables to load in (If not specified loads
606
+ in all variables)
607
+ """
608
+
609
+ def __init__(
610
+ self,
611
+ data_vars: list[str] | None = None,
612
+ ):
496
613
  self.job_id: int | None = None
614
+ self.data_vars = data_vars
497
615
 
498
616
  def __call__(self, ds: xr.Dataset) -> xr.Dataset:
499
617
  if self.job_id is None:
@@ -504,17 +622,23 @@ class SDFPreprocess:
504
622
  f"Mismatching job ids (got {ds.attrs['jobid1']}, expected {self.job_id})"
505
623
  )
506
624
 
507
- ds = ds.expand_dims(time=[ds.attrs["time"]])
625
+ # If the user has exclusively requested only certain variables be
626
+ # loaded in then we purge all other variables and coordinates
627
+ if self.data_vars:
628
+ ds = purge_unselected_data_vars(ds, self.data_vars)
629
+
630
+ time_val = ds.attrs.get("time", np.nan)
631
+ ds = ds.expand_dims(time=[time_val])
508
632
  ds = ds.assign_coords(
509
633
  time=(
510
634
  "time",
511
- [ds.attrs["time"]],
635
+ [time_val],
512
636
  {"units": "s", "long_name": "Time", "full_name": "time"},
513
637
  )
514
638
  )
515
639
  # Particles' spartial coordinates also evolve in time
516
640
  for coord, value in ds.coords.items():
517
641
  if value.attrs.get("point_data", False):
518
- ds.coords[coord] = value.expand_dims(time=[ds.attrs["time"]])
642
+ ds.coords[coord] = value.expand_dims(time=[time_val])
519
643
 
520
644
  return ds
sdf_xarray/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.2.6'
32
- __version_tuple__ = version_tuple = (0, 2, 6)
31
+ __version__ = version = '0.3.2'
32
+ __version_tuple__ = version_tuple = (0, 3, 2)
33
33
 
34
- __commit_id__ = commit_id = 'g67411803b'
34
+ __commit_id__ = commit_id = 'g331520e50'
@@ -0,0 +1,73 @@
1
+ from typing import Union
2
+
3
+ import xarray as xr
4
+
5
+
6
+ @xr.register_dataset_accessor("epoch")
7
+ class EpochAccessor:
8
+ def __init__(self, xarray_obj: xr.Dataset):
9
+ # The xarray object is the Dataset, which we store as self._ds
10
+ self._ds = xarray_obj
11
+
12
+ def rescale_coords(
13
+ self,
14
+ multiplier: float,
15
+ unit_label: str,
16
+ coord_names: Union[str, list[str]],
17
+ ) -> xr.Dataset:
18
+ """
19
+ Rescales specified X and Y coordinates in the Dataset by a given multiplier
20
+ and updates the unit label attribute.
21
+
22
+ Parameters
23
+ ----------
24
+ multiplier : float
25
+ The factor by which to multiply the coordinate values (e.g., 1e6 for meters to microns).
26
+ unit_label : str
27
+ The new unit label for the coordinates (e.g., "µm").
28
+ coord_names : str or list of str
29
+ The name(s) of the coordinate variable(s) to rescale.
30
+ If a string, only that coordinate is rescaled.
31
+ If a list, all listed coordinates are rescaled.
32
+
33
+ Returns
34
+ -------
35
+ xr.Dataset
36
+ A new Dataset with the updated and rescaled coordinates.
37
+
38
+ Examples
39
+ --------
40
+ # Convert X, Y, and Z from meters to microns
41
+ >>> ds_in_microns = ds.epoch.rescale_coords(1e6, "µm", coord_names=["X_Grid", "Y_Grid", "Z_Grid"])
42
+
43
+ # Convert only X to millimeters
44
+ >>> ds_in_mm = ds.epoch.rescale_coords(1000, "mm", coord_names="X_Grid")
45
+ """
46
+
47
+ ds = self._ds
48
+ new_coords = {}
49
+
50
+ if isinstance(coord_names, str):
51
+ # Convert single string to a list
52
+ coords_to_process = [coord_names]
53
+ elif isinstance(coord_names, list):
54
+ # Use the provided list
55
+ coords_to_process = coord_names
56
+ else:
57
+ coords_to_process = list(coord_names)
58
+
59
+ for coord_name in coords_to_process:
60
+ if coord_name not in ds.coords:
61
+ raise ValueError(
62
+ f"Coordinate '{coord_name}' not found in the Dataset. Cannot rescale."
63
+ )
64
+
65
+ coord_original = ds[coord_name]
66
+
67
+ coord_rescaled = coord_original * multiplier
68
+ coord_rescaled.attrs = coord_original.attrs.copy()
69
+ coord_rescaled.attrs["units"] = unit_label
70
+
71
+ new_coords[coord_name] = coord_rescaled
72
+
73
+ return ds.assign_coords(new_coords)
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdf-xarray
3
- Version: 0.2.6
3
+ Version: 0.3.2
4
4
  Summary: Provides a backend for xarray to read SDF files as created by the EPOCH plasma PIC code.
5
- Author-Email: Peter Hill <peter.hill@york.ac.uk>, Joel Adams <joel.adams@york.ac.uk>, Shaun Doherty <shaun.doherty@york.ac.uk>
5
+ Author-Email: Peter Hill <peter.hill@york.ac.uk>, Joel Adams <joel.adams@york.ac.uk>, Shaun Doherty <shaun.doherty@york.ac.uk>, Chris Herdman <chris.herdman@york.ac.uk>
6
6
  License-Expression: BSD-3-Clause
7
7
  Classifier: Development Status :: 5 - Production/Stable
8
8
  Classifier: Intended Audience :: Science/Research
@@ -61,6 +61,9 @@ sdf-xarray provides a backend for [xarray](https://xarray.dev) to read SDF files
61
61
  [EPOCH](https://epochpic.github.io) using the [SDF-C](https://github.com/epochpic/SDF_C) library.
62
62
  Part of [BEAM](#broad-epoch-analysis-modules-beam) (Broad EPOCH Analysis Modules).
63
63
 
64
+ > [!IMPORTANT]
65
+ > To install this package make sure you are using one of the Python versions listed above.
66
+
64
67
  ## Installation
65
68
 
66
69
  Install from PyPI with:
@@ -6,19 +6,20 @@ include/SDFC_14.4.7/sdf_list_type.h,sha256=Quu8v0-SEsQuJpGtEZnm09tAyXqWNitx0sXl5
6
6
  include/SDFC_14.4.7/sdf_vector_type.h,sha256=dbKjhzRRsvhzrnTwVjtVlvnuisEnRMKY-vvdm94ok_Q,1595
7
7
  include/SDFC_14.4.7/stack_allocator.h,sha256=L7U9vmGiVSw3VQLIv9EzTaVq7JbFxs9aNonKStTkUSg,1335
8
8
  include/SDFC_14.4.7/uthash.h,sha256=rIyy_-ylY6S_7WaZCCC3VtvXaC9q37rFyA0f1U9xc4w,63030
9
- lib/SDFC_14.4.7/sdfc.lib,sha256=Xtb83Kznp6ErvWjqaOQC9e5zRgj821Z2A5TcxYDqPOM,350158
9
+ lib/SDFC_14.4.7/sdfc.lib,sha256=2X2DUE3jjnJGQOI2wQFQLidt_6vFdzEizuQHMEFoMqc,350158
10
10
  lib/SDFC_14.4.7/SDFCConfig.cmake,sha256=IOA1eusC-KvUK4LNTEiOAmEdaPH1ZvNvbYPgiG1oZio,802
11
11
  lib/SDFC_14.4.7/SDFCConfigVersion.cmake,sha256=pN7Qqyf04s3izw7PYQ0XK6imvmhaVegSdR_nEl3Ok_o,2830
12
12
  lib/SDFC_14.4.7/SDFCTargets-release.cmake,sha256=G4zdx5PyjePigeD_a6rmZAxbk7L8Nf0klUnV78Lm2fI,828
13
13
  lib/SDFC_14.4.7/SDFCTargets.cmake,sha256=OVt1Gm8n7Ew4fiTmA9yHoef3vIIGwsXUZfqeG9p9Bys,4152
14
- sdf_xarray/__init__.py,sha256=-RXoKffu5dD54-t5qmapiAJ9ODN-w0OmXDal88e-KKE,19816
15
- sdf_xarray/_version.py,sha256=tKK1XlQrBwni5QwVI5KGGYkfGy_9gVAaKgRzYVdjGSI,746
14
+ sdf_xarray/__init__.py,sha256=obgAD4Aecvvpd8GkxLIAiIagSaY0bFVP2Q397N48_5g,24201
15
+ sdf_xarray/_version.py,sha256=bmLiJYnZTISDv_NDGANk6QDMSY0XTk0CwXXKhbOvW3Y,746
16
16
  sdf_xarray/csdf.pxd,sha256=ADPjAuHsodAvdOz96Z_XlFF7VL3KmVaXcTifWDP3rK0,4205
17
+ sdf_xarray/dataset_accessor.py,sha256=TvnVMBefnT1d94Bkllhd-__O3ittzpaVjZKfze-3WQ4,2484
17
18
  sdf_xarray/plotting.py,sha256=PnbEspR4XkA5SHkpoFKA2G7BYj5J3mVgR1TEeGol6Vw,7041
18
- sdf_xarray/sdf_interface.cp311-win_amd64.pyd,sha256=Z-Ze_0XmYq04SD_ftp56rCdofmPF9nQlBd7sgZSYYW0,356864
19
+ sdf_xarray/sdf_interface.cp311-win_amd64.pyd,sha256=_nUbBNxen8g_lr9nLGmHxtq51QN3oGfDh_E2DRO7KZ4,356864
19
20
  sdf_xarray/sdf_interface.pyx,sha256=PFC6upg14OZBqiGInLgBoxztIIKBk-HOh3WC9Ro4YUw,11975
20
- sdf_xarray-0.2.6.dist-info/METADATA,sha256=tiVT3h4nUdTgmGJsSKbymdlWjDNbVw8pnr5ccxNy5cQ,7475
21
- sdf_xarray-0.2.6.dist-info/WHEEL,sha256=RKWfL8d6R7y9dzb5_AyhPLMoBaKZaDpOTwy7YMg9zGI,106
22
- sdf_xarray-0.2.6.dist-info/entry_points.txt,sha256=gP7BIQpXNg6vIf7S7p-Rw_EJZTC1X50BsVTkK7dA7g0,57
23
- sdf_xarray-0.2.6.dist-info/licenses/LICENCE,sha256=aHWuyELjtzIL1jTXFHTbI3tr9vyVyhnw3I9_QYPdEX8,1515
24
- sdf_xarray-0.2.6.dist-info/RECORD,,
21
+ sdf_xarray-0.3.2.dist-info/METADATA,sha256=xvADFsOdsd5EzaZbVYGOUgmEMe4RzrTDF9IbyijadqE,7624
22
+ sdf_xarray-0.3.2.dist-info/WHEEL,sha256=oXhHG6ewLm-FNdEna2zwgy-K0KEl4claZ1ztR4VTx0I,106
23
+ sdf_xarray-0.3.2.dist-info/entry_points.txt,sha256=gP7BIQpXNg6vIf7S7p-Rw_EJZTC1X50BsVTkK7dA7g0,57
24
+ sdf_xarray-0.3.2.dist-info/licenses/LICENCE,sha256=aHWuyELjtzIL1jTXFHTbI3tr9vyVyhnw3I9_QYPdEX8,1515
25
+ sdf_xarray-0.3.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: scikit-build-core 0.11.5
2
+ Generator: scikit-build-core 0.11.6
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp311-cp311-win_amd64
5
5