sdf-xarray 0.3.2__cp312-cp312-win_amd64.whl → 0.5.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lib/SDFC_14.4.7/sdfc.lib CHANGED
Binary file
sdf_xarray/__init__.py CHANGED
@@ -16,12 +16,14 @@ from xarray.backends import AbstractDataStore, BackendArray, BackendEntrypoint
16
16
  from xarray.backends.file_manager import CachingFileManager
17
17
  from xarray.backends.locks import ensure_lock
18
18
  from xarray.core import indexing
19
+ from xarray.core.types import T_Chunks
19
20
  from xarray.core.utils import close_on_error, try_read_magic_number_from_path
20
21
  from xarray.core.variable import Variable
21
22
 
22
23
  # NOTE: Do not delete these lines, otherwise the "epoch" dataset and dataarray
23
24
  # accessors will not be imported when the user imports sdf_xarray
24
25
  import sdf_xarray.dataset_accessor
26
+ import sdf_xarray.download
25
27
  import sdf_xarray.plotting # noqa: F401
26
28
 
27
29
  # NOTE: This attempts to initialise with the "pint" accessor if the user
@@ -84,6 +86,64 @@ def _resolve_glob(path_glob: PathLike | Iterable[PathLike]):
84
86
  return paths
85
87
 
86
88
 
89
+ def _build_datatree_from_dataset(
90
+ ds: xr.Dataset,
91
+ ) -> xr.DataTree:
92
+ """
93
+ An `xarray.DataTree` is constructed utilising the original names in the SDF
94
+ file. This is due to the fact that these names include slashes which `xarray`
95
+ can use to automatically build up a datatree. We do additionally replace
96
+ spaces with underscores to be more pythonic. You can find the
97
+ `xarray.Dataset` name under the ``attrs["flat_structure_name"]`` for referencing.
98
+
99
+ In some cases the user may output the ``always + species`` dumpmask which
100
+ means that SDF variable will have species data plus a general one. When
101
+ defining a `xarray.DataTree` you cannot have a node of that tree contain both
102
+ variable information and have leaves with variables so we move the node
103
+ information to a leaf named ``node/All`` (see example of
104
+ ``Dervied/Number_Density/All`` in below table)
105
+
106
+ Below are some examples of how variable names are translated from the
107
+ regular `xarray.open_dataset` result into their more traditional names.
108
+
109
+ =================================== ===================================
110
+ Dataset variable name DataTree variable name
111
+ =================================== ===================================
112
+ ``Derived_Number_Density`` ``Derived/Number_Density/All``
113
+ ``Derived_Number_Density_Electron`` ``Derived/Number_Density/Electron``
114
+ ``Derived_Number_Density_Ion`` ``Derived/Number_Density/Ion``
115
+ ``Derived_Number_Density_Photon`` ``Derived/Number_Density/Photon``
116
+ ``Derived_Average_Particle_Energy`` ``Derived/Average_Particle_Energy``
117
+ =================================== ===================================
118
+
119
+ Parameters
120
+ ----------
121
+ ds
122
+ Incoming `xarray.Dataset` to convert to a `xarray.DataTree`
123
+ """
124
+ renames = {}
125
+ for name, var in ds.data_vars.items():
126
+ # Append the current variable name to the attributes
127
+ var.attrs["flat_structure_name"] = name
128
+ renames.update({name: var.attrs["full_name"].replace(" ", "_")})
129
+
130
+ new_names = renames.values()
131
+
132
+ final_renames = {
133
+ key: (
134
+ f"{path}/All"
135
+ if any(other.startswith(f"{path}/") for other in new_names)
136
+ else path
137
+ )
138
+ for key, path in renames.items()
139
+ }
140
+
141
+ ds = ds.rename_vars(final_renames)
142
+ dt = xr.DataTree.from_dict(ds)
143
+ dt.attrs = ds.attrs
144
+ return dt
145
+
146
+
87
147
  def purge_unselected_data_vars(ds: xr.Dataset, data_vars: list[str]) -> xr.Dataset:
88
148
  """
89
149
  If the user has exclusively requested only certain variables be
@@ -142,6 +202,7 @@ def open_mfdataset(
142
202
  keep_particles: bool = False,
143
203
  probe_names: list[str] | None = None,
144
204
  data_vars: list[str] | None = None,
205
+ chunks: T_Chunks = "auto",
145
206
  ) -> xr.Dataset:
146
207
  """Open a set of EPOCH SDF files as one `xarray.Dataset`
147
208
 
@@ -175,6 +236,14 @@ def open_mfdataset(
175
236
  List of EPOCH probe names
176
237
  data_vars :
177
238
  List of data vars to load in (If not specified loads in all variables)
239
+ chunks :
240
+ Dictionary with keys given by dimension names and values given by chunk sizes.
241
+ In general, these should divide the dimensions of each dataset. By default
242
+ chunks are automatically set so that they are the same size as the dimensions
243
+ stored in each of the SDF files. See `Xarray chunking-and-performance
244
+ <https://docs.xarray.dev/en/stable/user-guide/dask.html#chunking-and-performance>`_
245
+ for details on why this is useful for large datasets. The default behaviour is
246
+ to do this automatically and can be disabled by ``chunks=None``.
178
247
  """
179
248
 
180
249
  path_glob = _resolve_glob(path_glob)
@@ -185,13 +254,16 @@ def open_mfdataset(
185
254
  data_vars=data_vars,
186
255
  keep_particles=keep_particles,
187
256
  probe_names=probe_names,
257
+ chunks=chunks,
188
258
  )
189
259
 
190
260
  _, var_times_map = make_time_dims(path_glob)
191
261
 
192
262
  all_dfs = []
193
263
  for f in path_glob:
194
- ds = xr.open_dataset(f, keep_particles=keep_particles, probe_names=probe_names)
264
+ ds = xr.open_dataset(
265
+ f, keep_particles=keep_particles, probe_names=probe_names, chunks=chunks
266
+ )
195
267
 
196
268
  # If the data_vars are specified then only load them in and disregard the rest.
197
269
  # If there are no remaining data variables then skip adding the dataset to list
@@ -225,6 +297,147 @@ def open_mfdataset(
225
297
  )
226
298
 
227
299
 
300
+ def open_datatree(
301
+ path: PathLike,
302
+ *,
303
+ keep_particles: bool = False,
304
+ probe_names: list[str] | None = None,
305
+ ) -> xr.DataTree:
306
+ """
307
+ An `xarray.DataTree` is constructed utilising the original names in the SDF
308
+ file. This is due to the fact that these names include slashes which `xarray`
309
+ can use to automatically build up a datatree. We do additionally replace
310
+ spaces with underscores to be more pythonic. You can find the
311
+ `xarray.Dataset` name under the ``attrs["flat_structure_name"]`` for referencing.
312
+
313
+ In some cases the user may output the ``always + species`` dumpmask which
314
+ means that SDF variable will have species data plus a general one. When
315
+ defining a `xarray.DataTree` you cannot have a node of that tree contain both
316
+ variable information and have leaves with variables so we move the node
317
+ information to a leaf named ``node/All`` (see example of
318
+ ``Dervied/Number_Density/All`` in below table)
319
+
320
+ Below are some examples of how variable names are translated from the
321
+ regular `xarray.open_dataset` result into their more traditional names.
322
+
323
+ =================================== ===================================
324
+ Dataset variable name DataTree variable name
325
+ =================================== ===================================
326
+ ``Derived_Number_Density`` ``Derived/Number_Density/All``
327
+ ``Derived_Number_Density_Electron`` ``Derived/Number_Density/Electron``
328
+ ``Derived_Number_Density_Ion`` ``Derived/Number_Density/Ion``
329
+ ``Derived_Number_Density_Photon`` ``Derived/Number_Density/Photon``
330
+ ``Derived_Average_Particle_Energy`` ``Derived/Average_Particle_Energy``
331
+ =================================== ===================================
332
+
333
+ Parameters
334
+ ----------
335
+ path
336
+ The path to the SDF file
337
+ keep_particles
338
+ If ``True``, also load particle data (this may use a lot of memory!)
339
+ probe_names
340
+ List of EPOCH probe names
341
+
342
+ Examples
343
+ --------
344
+ >>> dt = open_datatree("0000.sdf")
345
+ >>> dt["Electric_Field"]["Ex"].values # Access all Electric_Field_Ex data
346
+ """
347
+
348
+ return xr.open_datatree(
349
+ path, keep_particles=keep_particles, probe_names=probe_names
350
+ )
351
+
352
+
353
+ def open_mfdatatree(
354
+ path_glob: Iterable | str | Path | Callable[..., Iterable[Path]],
355
+ *,
356
+ separate_times: bool = False,
357
+ keep_particles: bool = False,
358
+ probe_names: list[str] | None = None,
359
+ data_vars: list[str] | None = None,
360
+ ) -> xr.DataTree:
361
+ """Open a set of EPOCH SDF files as one `xarray.DataTree`
362
+
363
+ EPOCH can output variables at different periods, so each individal
364
+ SDF file from one EPOCH run may have different variables in it. In
365
+ order to combine all files into one `xarray.Dataset`, we need to
366
+ concatenate variables across their time dimension.
367
+
368
+ We have two choices:
369
+
370
+ 1. One time dimension where some variables may not be defined at all time
371
+ points, and so will be filled with NaNs at missing points; or
372
+ 2. Multiple time dimensions, one for each output frequency
373
+
374
+ The second option is better for memory consumption, as the missing data with
375
+ the first option still takes up space. However, proper lazy-loading may
376
+ mitigate this.
377
+
378
+ The ``separate_times`` argument can be used to switch between these choices.
379
+
380
+ An `xarray.DataTree` is constructed utilising the original names in the SDF
381
+ file. This is due to the fact that these names include slashes which `xarray`
382
+ can use to automatically build up a datatree. We do additionally replace
383
+ spaces with underscores to be more pythonic. You can find the
384
+ `xarray.Dataset` name under the ``attrs["flat_structure_name"]`` for referencing.
385
+
386
+ This function combines multiple SDF files into a single `xarray.DataTree` with a
387
+ unified time dimension and hierarchical organization of variables.
388
+
389
+ In some cases the user may output the ``always + species`` dumpmask which
390
+ means that SDF variable will have species data plus a general one. When
391
+ defining a `xarray.DataTree` you cannot have a node of that tree contain both
392
+ variable information and have leaves with variables so we move the node
393
+ information to a leaf named ``node/All`` (see example of
394
+ ``Dervied/Number_Density/All`` in below table)
395
+
396
+ Below are some examples of how variable names are translated from the
397
+ regular `xarray.open_dataset` result into their more traditional names.
398
+
399
+ =================================== ===================================
400
+ Dataset variable name DataTree variable name
401
+ =================================== ===================================
402
+ ``Derived_Number_Density`` ``Derived/Number_Density/All``
403
+ ``Derived_Number_Density_Electron`` ``Derived/Number_Density/Electron``
404
+ ``Derived_Number_Density_Ion`` ``Derived/Number_Density/Ion``
405
+ ``Derived_Number_Density_Photon`` ``Derived/Number_Density/Photon``
406
+ ``Derived_Average_Particle_Energy`` ``Derived/Average_Particle_Energy``
407
+ =================================== ===================================
408
+
409
+ Parameters
410
+ ----------
411
+ path_glob
412
+ List of filenames or string glob pattern
413
+ separate_times
414
+ If ``True``, create separate time dimensions for variables defined at
415
+ different output frequencies
416
+ keep_particles
417
+ If ``True``, also load particle data (this may use a lot of memory!)
418
+ probe_names
419
+ List of EPOCH probe names
420
+ data_vars
421
+ List of data vars to load in (If not specified loads in all variables)
422
+
423
+ Examples
424
+ --------
425
+ >>> dt = open_mfdatatree("*.sdf")
426
+ >>> dt["Electric_Field"]["Ex"].values # Access all Electric_Field_Ex data
427
+ >>> dt.coords["time"].values # Access combined time dimension
428
+ """
429
+ # First, combine the datasets as usual
430
+ combined_ds = open_mfdataset(
431
+ path_glob,
432
+ separate_times=separate_times,
433
+ keep_particles=keep_particles,
434
+ probe_names=probe_names,
435
+ data_vars=data_vars,
436
+ )
437
+
438
+ return _build_datatree_from_dataset(combined_ds)
439
+
440
+
228
441
  def make_time_dims(path_glob):
229
442
  """Extract the distinct set of time arrays from a collection of
230
443
  SDF files, along with a mapping from variable names to their time
@@ -268,13 +481,11 @@ class SDFBackendArray(BackendArray):
268
481
 
269
482
  __slots__ = ("datastore", "dtype", "shape", "variable_name")
270
483
 
271
- def __init__(self, variable_name, datastore):
484
+ def __init__(self, variable_name, datastore, shape, dtype):
272
485
  self.datastore = datastore
273
486
  self.variable_name = variable_name
274
-
275
- array = self.get_array()
276
- self.shape = array.shape
277
- self.dtype = array.dtype
487
+ self.shape = shape
488
+ self.dtype = dtype
278
489
 
279
490
  def get_array(self, needs_lock=True):
280
491
  with self.datastore.acquire_context(needs_lock) as ds:
@@ -446,7 +657,12 @@ class SDFDataStore(AbstractDataStore):
446
657
  if value.units is not None:
447
658
  data_attrs["units"] = value.units
448
659
 
449
- data_vars[base_name] = Variable(dims, value.data, attrs=data_attrs)
660
+ var = Variable(dims, value.data, attrs=data_attrs)
661
+
662
+ # Provide preferred_chunks for constants so dask aligns to natural shapes
663
+ var.encoding["preferred_chunks"] = dict(zip(dims, shape))
664
+
665
+ data_vars[base_name] = var
450
666
  continue
451
667
 
452
668
  if value.is_point_data:
@@ -495,9 +711,9 @@ class SDFDataStore(AbstractDataStore):
495
711
  grid_mid = self.ds.grids[value.grid_mid]
496
712
  grid_mid_base_name = _process_grid_name(grid_mid.name, _norm_grid_name)
497
713
  for dim_size, dim_name in zip(grid_mid.shape, grid_mid.labels):
498
- dim_size_lookup[dim_name][
499
- dim_size
500
- ] = f"{dim_name}_{grid_mid_base_name}"
714
+ dim_size_lookup[dim_name][dim_size] = (
715
+ f"{dim_name}_{grid_mid_base_name}"
716
+ )
501
717
 
502
718
  var_coords = [
503
719
  dim_size_lookup[dim_name][dim_size]
@@ -513,8 +729,24 @@ class SDFDataStore(AbstractDataStore):
513
729
  "full_name": key,
514
730
  "long_name": long_name,
515
731
  }
516
- lazy_data = indexing.LazilyIndexedArray(SDFBackendArray(key, self))
517
- data_vars[base_name] = Variable(var_coords, lazy_data, data_attrs)
732
+ lazy_data = indexing.LazilyIndexedArray(
733
+ SDFBackendArray(key, self, shape=value.shape, dtype=value.data.dtype)
734
+ )
735
+ var = Variable(var_coords, lazy_data, data_attrs)
736
+ # Set preferred chunks to match on-disk layout
737
+ # For point data (1D): full dimension
738
+ # For grid data (N-D): individual grid chunk sizes
739
+ if value.is_point_data:
740
+ var.encoding["preferred_chunks"] = {var_coords[0]: len(value.data)}
741
+ else:
742
+ # Align with on-disk grid structure
743
+ chunk_dict = {}
744
+ for dim_name, size in zip(var_coords, value.shape):
745
+ # Use natural on-disk boundaries
746
+ chunk_dict[dim_name] = size
747
+ var.encoding["preferred_chunks"] = chunk_dict
748
+
749
+ data_vars[base_name] = var
518
750
 
519
751
  # TODO: might need to decode if mult is set?
520
752
 
@@ -533,6 +765,14 @@ class SDFDataStore(AbstractDataStore):
533
765
 
534
766
 
535
767
  class SDFEntrypoint(BackendEntrypoint):
768
+ supports_groups = True
769
+ open_dataset_parameters: ClassVar[list[str]] = [
770
+ "filename_or_obj",
771
+ "drop_variables",
772
+ "keep_particles",
773
+ "probe_names",
774
+ ]
775
+
536
776
  def open_dataset(
537
777
  self,
538
778
  filename_or_obj,
@@ -555,13 +795,29 @@ class SDFEntrypoint(BackendEntrypoint):
555
795
  with close_on_error(store):
556
796
  return store.load()
557
797
 
558
- open_dataset_parameters: ClassVar[list[str]] = [
798
+ open_datatree_parameters: ClassVar[list[str]] = [
559
799
  "filename_or_obj",
560
800
  "drop_variables",
561
801
  "keep_particles",
562
802
  "probe_names",
563
803
  ]
564
804
 
805
+ def open_datatree(
806
+ self,
807
+ filename_or_obj,
808
+ *,
809
+ drop_variables=None,
810
+ keep_particles=False,
811
+ probe_names=None,
812
+ ):
813
+ ds = self.open_dataset(
814
+ filename_or_obj,
815
+ drop_variables=drop_variables,
816
+ keep_particles=keep_particles,
817
+ probe_names=probe_names,
818
+ )
819
+ return _build_datatree_from_dataset(ds)
820
+
565
821
  def guess_can_open(self, filename_or_obj):
566
822
  magic_number = try_read_magic_number_from_path(filename_or_obj)
567
823
  if magic_number is not None:
@@ -574,6 +830,11 @@ class SDFEntrypoint(BackendEntrypoint):
574
830
  url = "https://epochpic.github.io/documentation/visualising_output/python_beam.html"
575
831
 
576
832
 
833
+ class XrTUIEntrpoint:
834
+ def open_mfdatatree(self, paths: list[Path]) -> xr.DataTree:
835
+ return open_mfdatatree(paths)
836
+
837
+
577
838
  class SDFPreprocess:
578
839
  """Preprocess SDF files for xarray ensuring matching job ids and sets
579
840
  time dimension.
sdf_xarray/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.3.2'
32
- __version_tuple__ = version_tuple = (0, 3, 2)
31
+ __version__ = version = '0.5.0'
32
+ __version_tuple__ = version_tuple = (0, 5, 0)
33
33
 
34
- __commit_id__ = commit_id = 'g331520e50'
34
+ __commit_id__ = commit_id = 'g8a1775409'
@@ -1,7 +1,15 @@
1
- from typing import Union
1
+ from __future__ import annotations
2
+
3
+ from types import MethodType
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  import xarray as xr
4
7
 
8
+ from .plotting import animate_multiple, show
9
+
10
+ if TYPE_CHECKING:
11
+ from matplotlib.animation import FuncAnimation
12
+
5
13
 
6
14
  @xr.register_dataset_accessor("epoch")
7
15
  class EpochAccessor:
@@ -13,7 +21,7 @@ class EpochAccessor:
13
21
  self,
14
22
  multiplier: float,
15
23
  unit_label: str,
16
- coord_names: Union[str, list[str]],
24
+ coord_names: str | list[str],
17
25
  ) -> xr.Dataset:
18
26
  """
19
27
  Rescales specified X and Y coordinates in the Dataset by a given multiplier
@@ -71,3 +79,46 @@ class EpochAccessor:
71
79
  new_coords[coord_name] = coord_rescaled
72
80
 
73
81
  return ds.assign_coords(new_coords)
82
+
83
+ def animate_multiple(
84
+ self,
85
+ *variables: str | xr.DataArray,
86
+ datasets_kwargs: list[dict] | None = None,
87
+ **kwargs,
88
+ ) -> FuncAnimation:
89
+ """
90
+ Animate multiple Dataset variables on the same axes.
91
+
92
+ Parameters
93
+ ----------
94
+ variables
95
+ The variables to animate.
96
+ datasets_kwargs
97
+ Per-dataset keyword arguments passed to plotting.
98
+ kwargs
99
+ Common keyword arguments forwarded to animation.
100
+
101
+ Examples
102
+ --------
103
+ >>> anim = ds.epoch.animate_multiple(
104
+ ds["Derived_Number_Density_Electron"],
105
+ ds["Derived_Number_Density_Ion"],
106
+ datasets_kwargs=[{"label": "Electron"}, {"label": "Ion"}],
107
+ ylabel="Derived Number Density [1/m$^3$]"
108
+ )
109
+ >>> anim.save("animation.gif")
110
+ >>> # Or in a jupyter notebook:
111
+ >>> anim.show()
112
+ """
113
+
114
+ dataarrays = [
115
+ self._obj[var] if isinstance(var, str) else var for var in variables
116
+ ]
117
+ anim = animate_multiple(
118
+ *dataarrays,
119
+ datasets_kwargs=datasets_kwargs,
120
+ **kwargs,
121
+ )
122
+ anim.show = MethodType(show, anim)
123
+
124
+ return anim
sdf_xarray/download.py ADDED
@@ -0,0 +1,88 @@
1
+ from pathlib import Path
2
+ from shutil import move
3
+ from typing import TYPE_CHECKING, Literal, TypeAlias
4
+
5
+ if TYPE_CHECKING:
6
+ import pooch # noqa: F401
7
+
8
+ DatasetName: TypeAlias = Literal[
9
+ "test_array_no_grids",
10
+ "test_dist_fn",
11
+ "test_files_1D",
12
+ "test_files_2D_moving_window",
13
+ "test_files_3D",
14
+ "test_mismatched_files",
15
+ "test_two_probes_2D",
16
+ "tutorial_dataset_1d",
17
+ "tutorial_dataset_2d",
18
+ "tutorial_dataset_2d_moving_window",
19
+ "tutorial_dataset_3d",
20
+ ]
21
+
22
+
23
+ def fetch_dataset(
24
+ dataset_name: DatasetName, save_path: Path | str | None = None
25
+ ) -> Path:
26
+ """
27
+ Downloads the specified dataset from its Zenodo URL. If it is already
28
+ downloaded, then the path to the cached, unzipped directory is returned.
29
+
30
+ Parameters
31
+ ---------
32
+ dataset_name
33
+ The name of the dataset to download
34
+ save_path
35
+ The directory to save the dataset to (defaults to the cache folder ``"sdf_datasets"``.
36
+ See `pooch.os_cache` for details on how the cache works)
37
+
38
+ Returns
39
+ -------
40
+ Path
41
+ The path to the directory containing the unzipped dataset files
42
+
43
+ Examples
44
+ --------
45
+ >>> # Assuming the dataset has not been downloaded yet
46
+ >>> path = fetch_dataset("tutorial_dataset_1d")
47
+ Downloading file 'tutorial_dataset_1d.zip' ...
48
+ Unzipping contents of '.../sdf_datasets/tutorial_dataset_1d.zip' to '.../sdf_datasets/tutorial_dataset_1d'
49
+ >>> path
50
+ '.../sdf_datasets/tutorial_dataset_1d'
51
+ """
52
+ import pooch # noqa: PLC0415
53
+
54
+ logger = pooch.get_logger()
55
+ datasets = pooch.create(
56
+ path=pooch.os_cache("sdf_datasets"),
57
+ base_url="https://zenodo.org/records/17991042/files",
58
+ registry={
59
+ "test_array_no_grids.zip": "md5:583c85ed8c31d0e34e7766b6d9f2d6da",
60
+ "test_dist_fn.zip": "md5:a582ff5e8c59bad62fe4897f65fc7a11",
61
+ "test_files_1D.zip": "md5:42e53b229556c174c538c5481c4d596a",
62
+ "test_files_2D_moving_window.zip": "md5:3744483bbf416936ad6df8847c54dad1",
63
+ "test_files_3D.zip": "md5:a679e71281bab1d373dc4980e6da1a7c",
64
+ "test_mismatched_files.zip": "md5:710fdc94666edf7777523e8fc9dd1bd4",
65
+ "test_two_probes_2D.zip": "md5:0f2a4fefe84a15292d066b3320d4d533",
66
+ "tutorial_dataset_1d.zip": "md5:7fad744d8b8b2b84bba5c0e705fdef7b",
67
+ "tutorial_dataset_2d.zip": "md5:b7f35c05703a48eb5128049cdd106ffa",
68
+ "tutorial_dataset_2d_moving_window.zip": "md5:a795f40d18df69263842055de4559501",
69
+ "tutorial_dataset_3d.zip": "md5:d9254648867016292440fdb028f717f7",
70
+ },
71
+ retry_if_failed=10,
72
+ )
73
+
74
+ datasets.fetch(
75
+ f"{dataset_name}.zip", processor=pooch.Unzip(extract_dir="."), progressbar=True
76
+ )
77
+ cache_path = Path(datasets.path) / dataset_name
78
+
79
+ if save_path is not None:
80
+ save_path = Path(save_path)
81
+ logger.info(
82
+ "Moving contents of '%s' to '%s'",
83
+ cache_path,
84
+ save_path / dataset_name,
85
+ )
86
+ return move(cache_path, save_path / dataset_name)
87
+
88
+ return cache_path