essreduce 25.11.4__py3-none-any.whl → 25.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ess/reduce/data/_registry.py +110 -44
- {essreduce-25.11.4.dist-info → essreduce-25.11.5.dist-info}/METADATA +1 -1
- {essreduce-25.11.4.dist-info → essreduce-25.11.5.dist-info}/RECORD +7 -7
- {essreduce-25.11.4.dist-info → essreduce-25.11.5.dist-info}/WHEEL +0 -0
- {essreduce-25.11.4.dist-info → essreduce-25.11.5.dist-info}/entry_points.txt +0 -0
- {essreduce-25.11.4.dist-info → essreduce-25.11.5.dist-info}/licenses/LICENSE +0 -0
- {essreduce-25.11.4.dist-info → essreduce-25.11.5.dist-info}/top_level.txt +0 -0
ess/reduce/data/_registry.py
CHANGED
|
@@ -3,14 +3,14 @@
|
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
|
+
import dataclasses
|
|
6
7
|
import hashlib
|
|
7
8
|
import os
|
|
8
9
|
from abc import ABC, abstractmethod
|
|
9
10
|
from collections.abc import Mapping
|
|
10
|
-
from dataclasses import dataclass
|
|
11
11
|
from functools import cache
|
|
12
12
|
from pathlib import Path
|
|
13
|
-
from typing import Any
|
|
13
|
+
from typing import Any, Literal
|
|
14
14
|
|
|
15
15
|
_LOCAL_CACHE_ENV_VAR = "SCIPP_DATA_DIR"
|
|
16
16
|
_LOCAL_REGISTRY_ENV_VAR = "SCIPP_OVERRIDE_DATA_DIR"
|
|
@@ -28,8 +28,8 @@ def make_registry(
|
|
|
28
28
|
|
|
29
29
|
By default, this function creates a :class:`PoochRegistry` to download files
|
|
30
30
|
via HTTP from an online file store.
|
|
31
|
-
This can be overridden by setting the environment variable
|
|
32
|
-
path on the local file system.
|
|
31
|
+
This can be overridden by setting the environment variable
|
|
32
|
+
``SCIPP_OVERRIDE_DATA_DIR`` to a path on the local file system.
|
|
33
33
|
In this case, a :class:`LocalRegistry` is returned.
|
|
34
34
|
|
|
35
35
|
Files are specified as a dict using either the Pooch string format explicitly
|
|
@@ -40,7 +40,11 @@ def make_registry(
|
|
|
40
40
|
... "file1.dat": "md5:1234567890abcdef",
|
|
41
41
|
... "file2.csv": Entry(alg="md5", chk="abcdef123456789"),
|
|
42
42
|
... "folder/nested.dat": "blake2b:1234567890abcdef",
|
|
43
|
-
... "zipped.zip": Entry(
|
|
43
|
+
... "zipped.zip": Entry(
|
|
44
|
+
... alg="blake2b",
|
|
45
|
+
... chk="abcdef123456789",
|
|
46
|
+
... extractor="unzip"
|
|
47
|
+
... ),
|
|
44
48
|
... }
|
|
45
49
|
|
|
46
50
|
In the example above, the specifications for ``file1.dat`` and ``file2.csv`` are
|
|
@@ -49,10 +53,11 @@ def make_registry(
|
|
|
49
53
|
Paths like this must always use forward slashes (/) even on Windows.
|
|
50
54
|
|
|
51
55
|
As shown above, it is possible to automatically unzip
|
|
52
|
-
files by specifying ``unzip
|
|
56
|
+
files by specifying ``extractor="unzip"``.
|
|
53
57
|
When calling ``registry.get_path("zipped.zip")`` the file will be unzipped and
|
|
54
58
|
a path to the content is returned.
|
|
55
|
-
|
|
59
|
+
Similarly, ``extractor="untar"`` specifies that a file needs to be untarred
|
|
60
|
+
(and possibly un-gzipped).
|
|
56
61
|
|
|
57
62
|
The complete path to the source file is constructed as follows:
|
|
58
63
|
|
|
@@ -111,7 +116,7 @@ def _check_local_override_path(override: str) -> Path:
|
|
|
111
116
|
return path
|
|
112
117
|
|
|
113
118
|
|
|
114
|
-
@dataclass(frozen=True, slots=True)
|
|
119
|
+
@dataclasses.dataclass(frozen=True, slots=True)
|
|
115
120
|
class Entry:
|
|
116
121
|
"""An entry in a registry."""
|
|
117
122
|
|
|
@@ -119,9 +124,18 @@ class Entry:
|
|
|
119
124
|
"""Checksum."""
|
|
120
125
|
alg: str
|
|
121
126
|
"""Checksum algorithm."""
|
|
122
|
-
|
|
127
|
+
extractor: Literal["unzip", "untar"] | None = None
|
|
128
|
+
"""Processor to extract file contents."""
|
|
129
|
+
|
|
130
|
+
unzip: dataclasses.InitVar[bool] = False
|
|
123
131
|
"""Whether to unzip the file."""
|
|
124
132
|
|
|
133
|
+
def __post_init__(self, unzip: bool) -> None:
|
|
134
|
+
if self.extractor is not None and unzip:
|
|
135
|
+
raise TypeError("Set either the 'unzip' argument or 'extractor', not both.")
|
|
136
|
+
if self.extractor is None and unzip:
|
|
137
|
+
super().__setattr__("extractor", "unzip")
|
|
138
|
+
|
|
125
139
|
@classmethod
|
|
126
140
|
def from_pooch_string(cls, pooch_string: str) -> Entry:
|
|
127
141
|
alg, chk = pooch_string.split(":")
|
|
@@ -132,7 +146,7 @@ class Registry(ABC):
|
|
|
132
146
|
def __init__(self, files: Mapping[str, str | Entry]) -> None:
|
|
133
147
|
self._files = _to_file_entries(files)
|
|
134
148
|
|
|
135
|
-
@
|
|
149
|
+
@cache # noqa: B019
|
|
136
150
|
def get_path(self, name: str) -> Path:
|
|
137
151
|
"""Get the path to a file in the registry.
|
|
138
152
|
|
|
@@ -154,9 +168,60 @@ class Registry(ABC):
|
|
|
154
168
|
:
|
|
155
169
|
The Path to the file.
|
|
156
170
|
"""
|
|
171
|
+
return Path(
|
|
172
|
+
_expect_single(
|
|
173
|
+
self._fetch(name, extractor=self._extractor_processor(name)),
|
|
174
|
+
name,
|
|
175
|
+
)
|
|
176
|
+
)
|
|
157
177
|
|
|
158
|
-
|
|
159
|
-
|
|
178
|
+
@cache # noqa: B019
|
|
179
|
+
def get_paths(self, name: str) -> list[Path]:
|
|
180
|
+
"""Get the paths to unpacked files from the registry.
|
|
181
|
+
|
|
182
|
+
This method downloads the given file, extracts its contents, and returns
|
|
183
|
+
the paths to all extracted contents.
|
|
184
|
+
Unlike :meth:`get_path`, this method requires an extractor processor
|
|
185
|
+
(unzip or untar).
|
|
186
|
+
|
|
187
|
+
Depending on the implementation, the file is downloaded if necessary.
|
|
188
|
+
|
|
189
|
+
Note that implementations are allowed to cache return values of this method
|
|
190
|
+
to avoid recomputing potentially expensive checksums.
|
|
191
|
+
This usually means that the ``Registry`` object itself gets stored until the
|
|
192
|
+
Python interpreter shuts down.
|
|
193
|
+
However, registries are small and do not own resources.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
name:
|
|
198
|
+
Name of the zipped or tarred file to get the path for.
|
|
199
|
+
|
|
200
|
+
Returns
|
|
201
|
+
-------
|
|
202
|
+
:
|
|
203
|
+
The Paths to the files.
|
|
204
|
+
"""
|
|
205
|
+
if (extractor := self._extractor_processor(name)) is None:
|
|
206
|
+
raise ValueError(f"File '{name}' is not zipped or tarred.")
|
|
207
|
+
return [Path(path) for path in self._fetch(name, extractor=extractor)]
|
|
208
|
+
|
|
209
|
+
def _extractor_processor_type(self, name: str) -> Any:
|
|
210
|
+
match self._files[name].extractor:
|
|
211
|
+
case "unzip":
|
|
212
|
+
return _pooch_unzip_processor_class()
|
|
213
|
+
case "untar":
|
|
214
|
+
return _pooch_untar_processor_class()
|
|
215
|
+
case None:
|
|
216
|
+
return None
|
|
217
|
+
|
|
218
|
+
@abstractmethod
|
|
219
|
+
def _extractor_processor(self, name: str) -> Any:
|
|
220
|
+
"""Return an instance of a processor for the given file."""
|
|
221
|
+
|
|
222
|
+
@abstractmethod
|
|
223
|
+
def _fetch(self, name: str, extractor: Any) -> list[str] | str:
|
|
224
|
+
"""Fetch the given file from the registry."""
|
|
160
225
|
|
|
161
226
|
|
|
162
227
|
class PoochRegistry(Registry):
|
|
@@ -178,24 +243,15 @@ class PoochRegistry(Registry):
|
|
|
178
243
|
)
|
|
179
244
|
super().__init__(files)
|
|
180
245
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
"""Get the path to a file in the registry.
|
|
184
|
-
|
|
185
|
-
Downloads the file if necessary.
|
|
186
|
-
"""
|
|
187
|
-
if self._needs_unzip(name):
|
|
188
|
-
paths: list[str] = self._registry.fetch( # type: ignore[assignment]
|
|
189
|
-
name, processor=self._unzip_processor
|
|
190
|
-
)
|
|
191
|
-
return Path(_expect_single_unzipped(paths, name))
|
|
192
|
-
return Path(self._registry.fetch(name))
|
|
246
|
+
def _fetch(self, name: str, extractor: Any) -> list[str] | str:
|
|
247
|
+
return self._registry.fetch(name, processor=extractor)
|
|
193
248
|
|
|
194
|
-
|
|
195
|
-
def _unzip_processor(self) -> Any:
|
|
249
|
+
def _extractor_processor(self, name: str) -> Any:
|
|
196
250
|
# Create a new processor on demand because reusing the same processor would
|
|
197
251
|
# reuse the same output path for every file.
|
|
198
|
-
|
|
252
|
+
if (cls := self._extractor_processor_type(name=name)) is not None:
|
|
253
|
+
return cls()
|
|
254
|
+
return None
|
|
199
255
|
|
|
200
256
|
|
|
201
257
|
class LocalRegistry(Registry):
|
|
@@ -217,12 +273,11 @@ class LocalRegistry(Registry):
|
|
|
217
273
|
base_url=base_url,
|
|
218
274
|
retry_if_failed=retry_if_failed,
|
|
219
275
|
)
|
|
220
|
-
self.
|
|
276
|
+
self._extract_base_dir = pooch_registry.path
|
|
221
277
|
self._source_path = source_path.resolve().joinpath(*prefix.split("/"), version)
|
|
222
278
|
super().__init__(files)
|
|
223
279
|
|
|
224
|
-
|
|
225
|
-
def get_path(self, name: str) -> Path:
|
|
280
|
+
def _fetch(self, name: str, extractor: Any) -> list[str] | str:
|
|
226
281
|
"""Get the path to a file in the registry."""
|
|
227
282
|
try:
|
|
228
283
|
entry = self._files[name]
|
|
@@ -238,24 +293,24 @@ class LocalRegistry(Registry):
|
|
|
238
293
|
|
|
239
294
|
_check_hash(name, path, entry)
|
|
240
295
|
|
|
241
|
-
if
|
|
242
|
-
return
|
|
243
|
-
|
|
244
|
-
self._unzip_processor(os.fspath(path), "download", None), path
|
|
245
|
-
)
|
|
246
|
-
)
|
|
247
|
-
return path
|
|
296
|
+
if extractor is not None:
|
|
297
|
+
return extractor(os.fspath(path), "download", None)
|
|
298
|
+
return os.fspath(path)
|
|
248
299
|
|
|
249
300
|
def _local_path(self, name: str) -> Path:
|
|
250
301
|
# Split on "/" because `name` is always a POSIX-style path, but the return
|
|
251
302
|
# value is a system path, i.e., it can be a Windows-style path.
|
|
252
303
|
return self._source_path.joinpath(*name.split("/"))
|
|
253
304
|
|
|
254
|
-
|
|
255
|
-
|
|
305
|
+
def _extract_dir(self, name: str) -> Path:
|
|
306
|
+
return self._extract_base_dir / name
|
|
307
|
+
|
|
308
|
+
def _extractor_processor(self, name: str) -> Any:
|
|
256
309
|
# Create a new processor on demand because reusing the same processor would
|
|
257
310
|
# reuse the same output path for every file.
|
|
258
|
-
|
|
311
|
+
if (cls := self._extractor_processor_type(name=name)) is not None:
|
|
312
|
+
return cls(extract_dir=self._extract_dir(name))
|
|
313
|
+
return None
|
|
259
314
|
|
|
260
315
|
|
|
261
316
|
def _import_pooch() -> Any:
|
|
@@ -288,19 +343,30 @@ def _create_pooch(
|
|
|
288
343
|
)
|
|
289
344
|
|
|
290
345
|
|
|
291
|
-
def
|
|
346
|
+
def _pooch_unzip_processor_class() -> Any:
|
|
292
347
|
try:
|
|
293
348
|
import pooch
|
|
294
349
|
except ImportError:
|
|
295
350
|
raise ImportError("You need to install Pooch to unzip files.") from None
|
|
296
351
|
|
|
297
|
-
return pooch.processors.Unzip
|
|
352
|
+
return pooch.processors.Unzip
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _pooch_untar_processor_class() -> Any:
|
|
356
|
+
try:
|
|
357
|
+
import pooch
|
|
358
|
+
except ImportError:
|
|
359
|
+
raise ImportError("You need to install Pooch to untar files.") from None
|
|
360
|
+
|
|
361
|
+
return pooch.processors.Untar
|
|
298
362
|
|
|
299
363
|
|
|
300
|
-
def
|
|
364
|
+
def _expect_single(paths: list[str] | str, archive: str | os.PathLike) -> str:
|
|
365
|
+
if isinstance(paths, str):
|
|
366
|
+
return paths
|
|
301
367
|
if len(paths) != 1:
|
|
302
368
|
raise ValueError(
|
|
303
|
-
f"Expected exactly one file
|
|
369
|
+
f"Expected exactly one extracted file, got {len(paths)} in "
|
|
304
370
|
f"'{os.fspath(archive)}'."
|
|
305
371
|
)
|
|
306
372
|
return paths[0]
|
|
@@ -8,7 +8,7 @@ ess/reduce/ui.py,sha256=zmorAbDwX1cU3ygDT--OP58o0qU7OBcmJz03jPeYSLA,10884
|
|
|
8
8
|
ess/reduce/uncertainty.py,sha256=LR4O6ApB6Z-W9gC_XW0ajupl8yFG-du0eee1AX_R-gk,6990
|
|
9
9
|
ess/reduce/workflow.py,sha256=738-lcdgsORYfQ4A0UTk2IgnbVxC3jBdpscpaOFIpdc,3114
|
|
10
10
|
ess/reduce/data/__init__.py,sha256=uDtqkmKA_Zwtj6II25zntz9T812XhdCn3tktYev4uyY,486
|
|
11
|
-
ess/reduce/data/_registry.py,sha256=
|
|
11
|
+
ess/reduce/data/_registry.py,sha256=dJ4DymZsknFi3F6kHZmPTkEJ774tRXsZgD6R6v6sz0o,13987
|
|
12
12
|
ess/reduce/live/__init__.py,sha256=jPQVhihRVNtEDrE20PoKkclKV2aBF1lS7cCHootgFgI,204
|
|
13
13
|
ess/reduce/live/raw.py,sha256=CkPqp4VMNvj0IcFPp1J0n7sVt5PNKdIXnDlALCg9W_Q,31031
|
|
14
14
|
ess/reduce/live/roi.py,sha256=Hs-pW98k41WU6Kl3UQ41kQawk80c2QNOQ_WNctLzDPE,3795
|
|
@@ -41,9 +41,9 @@ ess/reduce/widgets/_spinner.py,sha256=2VY4Fhfa7HMXox2O7UbofcdKsYG-AJGrsgGJB85nDX
|
|
|
41
41
|
ess/reduce/widgets/_string_widget.py,sha256=iPAdfANyXHf-nkfhgkyH6gQDklia0LebLTmwi3m-iYQ,1482
|
|
42
42
|
ess/reduce/widgets/_switchable_widget.py,sha256=fjKz99SKLhIF1BLgGVBSKKn3Lu_jYBwDYGeAjbJY3Q8,2390
|
|
43
43
|
ess/reduce/widgets/_vector_widget.py,sha256=aTaBqCFHZQhrIoX6-sSqFWCPePEW8HQt5kUio8jP1t8,1203
|
|
44
|
-
essreduce-25.11.
|
|
45
|
-
essreduce-25.11.
|
|
46
|
-
essreduce-25.11.
|
|
47
|
-
essreduce-25.11.
|
|
48
|
-
essreduce-25.11.
|
|
49
|
-
essreduce-25.11.
|
|
44
|
+
essreduce-25.11.5.dist-info/licenses/LICENSE,sha256=nVEiume4Qj6jMYfSRjHTM2jtJ4FGu0g-5Sdh7osfEYw,1553
|
|
45
|
+
essreduce-25.11.5.dist-info/METADATA,sha256=4FcweJeK7mrl5NtA1cvbTWyem_tw1I6XA_TFNQwL-qc,1937
|
|
46
|
+
essreduce-25.11.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
47
|
+
essreduce-25.11.5.dist-info/entry_points.txt,sha256=PMZOIYzCifHMTe4pK3HbhxUwxjFaZizYlLD0td4Isb0,66
|
|
48
|
+
essreduce-25.11.5.dist-info/top_level.txt,sha256=0JxTCgMKPLKtp14wb1-RKisQPQWX7i96innZNvHBr-s,4
|
|
49
|
+
essreduce-25.11.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|