hoa-tools 1.0.8__py3-none-any.whl → 2.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hoa-tools might be problematic. Click here for more details.
- hoa_tools/_n5.py +432 -6
- hoa_tools/_version.py +2 -2
- hoa_tools/dataset.py +15 -10
- hoa_tools/metadata.py +9 -6
- {hoa_tools-1.0.8.dist-info → hoa_tools-2.0.0b1.dist-info}/METADATA +2 -2
- {hoa_tools-1.0.8.dist-info → hoa_tools-2.0.0b1.dist-info}/RECORD +9 -9
- {hoa_tools-1.0.8.dist-info → hoa_tools-2.0.0b1.dist-info}/WHEEL +0 -0
- {hoa_tools-1.0.8.dist-info → hoa_tools-2.0.0b1.dist-info}/licenses/LICENSE.md +0 -0
- {hoa_tools-1.0.8.dist-info → hoa_tools-2.0.0b1.dist-info}/top_level.txt +0 -0
hoa_tools/_n5.py
CHANGED
|
@@ -1,10 +1,436 @@
|
|
|
1
|
+
import codecs
|
|
2
|
+
import json
|
|
3
|
+
import numbers
|
|
4
|
+
import re
|
|
5
|
+
import struct
|
|
6
|
+
import sys
|
|
7
|
+
from collections.abc import AsyncIterator, Iterable, Sequence
|
|
1
8
|
from typing import Any
|
|
2
9
|
|
|
3
|
-
import
|
|
4
|
-
|
|
10
|
+
import numpy as np
|
|
11
|
+
import numpy.typing as npt
|
|
12
|
+
from numcodecs.abc import Codec
|
|
13
|
+
from numcodecs.registry import get_codec, register_codec
|
|
14
|
+
from zarr.abc.store import ByteRequest
|
|
15
|
+
from zarr.core.buffer import Buffer, BufferPrototype, default_buffer_prototype
|
|
16
|
+
from zarr.storage import FsspecStore
|
|
5
17
|
|
|
18
|
+
zarr_group_meta_key = ".zgroup"
|
|
19
|
+
zarr_array_meta_key = ".zarray"
|
|
20
|
+
zarr_attrs_key = ".zattrs"
|
|
21
|
+
n5_attrs_key = "attributes.json"
|
|
22
|
+
N5_FORMAT = "2.0.0"
|
|
23
|
+
ZARR_FORMAT = 2
|
|
6
24
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
25
|
+
zarr_to_n5_keys = [
|
|
26
|
+
("chunks", "blockSize"),
|
|
27
|
+
("dtype", "dataType"),
|
|
28
|
+
("compressor", "compression"),
|
|
29
|
+
("shape", "dimensions"),
|
|
30
|
+
]
|
|
31
|
+
n5_keywords = ["n5", "dataType", "dimensions", "blockSize", "compression"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class N5FSStore(FsspecStore):
|
|
35
|
+
async def get(
|
|
36
|
+
self,
|
|
37
|
+
key: str,
|
|
38
|
+
prototype: BufferPrototype,
|
|
39
|
+
byte_range: ByteRequest | None = None,
|
|
40
|
+
) -> Buffer | None:
|
|
41
|
+
if key.endswith(zarr_group_meta_key):
|
|
42
|
+
key_new = key.replace(zarr_group_meta_key, n5_attrs_key)
|
|
43
|
+
value = group_metadata_to_zarr(await self._load_n5_attrs(key_new))
|
|
44
|
+
|
|
45
|
+
return prototype.buffer.from_bytes(json_dumps(value))
|
|
46
|
+
|
|
47
|
+
if key.endswith(zarr_array_meta_key):
|
|
48
|
+
key_new = key.replace(zarr_array_meta_key, n5_attrs_key)
|
|
49
|
+
top_level = key == zarr_array_meta_key
|
|
50
|
+
value = array_metadata_to_zarr(
|
|
51
|
+
await self._load_n5_attrs(key_new), top_level=top_level
|
|
52
|
+
)
|
|
53
|
+
return prototype.buffer.from_bytes(json_dumps(value))
|
|
54
|
+
|
|
55
|
+
if key.endswith(zarr_attrs_key):
|
|
56
|
+
key_new = key.replace(zarr_attrs_key, n5_attrs_key)
|
|
57
|
+
value = attrs_to_zarr(await self._load_n5_attrs(key_new))
|
|
58
|
+
|
|
59
|
+
if len(value) == 0:
|
|
60
|
+
raise KeyError(key_new)
|
|
61
|
+
return prototype.buffer.from_bytes(json_dumps(value))
|
|
62
|
+
|
|
63
|
+
key_new = invert_chunk_coords(key) if is_chunk_key(key) else key
|
|
64
|
+
|
|
65
|
+
return await super().get(key_new, prototype=prototype, byte_range=byte_range)
|
|
66
|
+
|
|
67
|
+
async def get_partial_values(
|
|
68
|
+
self,
|
|
69
|
+
prototype: BufferPrototype,
|
|
70
|
+
key_ranges: Iterable[tuple[str, ByteRequest | None]],
|
|
71
|
+
) -> list[Buffer | None]:
|
|
72
|
+
raise NotImplementedError
|
|
73
|
+
|
|
74
|
+
async def exists(self, key: str) -> bool:
|
|
75
|
+
if key.endswith(zarr_group_meta_key):
|
|
76
|
+
key_new = key.replace(zarr_group_meta_key, n5_attrs_key)
|
|
77
|
+
if not await super().exists(key_new):
|
|
78
|
+
return False
|
|
79
|
+
# group if not a dataset (attributes do not contain 'dimensions')
|
|
80
|
+
return "dimensions" not in await self._load_n5_attrs(key_new)
|
|
81
|
+
|
|
82
|
+
if key.endswith(zarr_array_meta_key):
|
|
83
|
+
key_new = key.replace(zarr_array_meta_key, n5_attrs_key)
|
|
84
|
+
# array if attributes contain 'dimensions'
|
|
85
|
+
return "dimensions" in await self._load_n5_attrs(key_new)
|
|
86
|
+
|
|
87
|
+
if key.endswith(zarr_attrs_key):
|
|
88
|
+
key_new = key.replace(zarr_attrs_key, n5_attrs_key)
|
|
89
|
+
return await self._contains_attrs(key_new)
|
|
90
|
+
|
|
91
|
+
key_new = invert_chunk_coords(key) if is_chunk_key(key) else key
|
|
92
|
+
|
|
93
|
+
return await super().exists(key_new)
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def supports_writes(self) -> bool: # type: ignore[override]
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
async def set(
|
|
100
|
+
self,
|
|
101
|
+
key: str,
|
|
102
|
+
value: Buffer,
|
|
103
|
+
byte_range: tuple[int, int] | None = None,
|
|
104
|
+
) -> None:
|
|
105
|
+
raise NotImplementedError
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def supports_deletes(self) -> bool: # type: ignore[override]
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
async def delete(self, key: str) -> None:
|
|
112
|
+
raise NotImplementedError
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def supports_listing(self) -> bool: # type: ignore[override]
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
def list(self) -> AsyncIterator[str]:
|
|
119
|
+
raise NotImplementedError
|
|
120
|
+
|
|
121
|
+
def list_prefix(self, prefix: str) -> AsyncIterator[str]:
|
|
122
|
+
raise NotImplementedError
|
|
123
|
+
|
|
124
|
+
def list_dir(self, prefix: str) -> AsyncIterator[str]:
|
|
125
|
+
# This method should be async, like overridden methods in child classes.
|
|
126
|
+
# However, that's not straightforward:
|
|
127
|
+
# https://stackoverflow.com/questions/68905848
|
|
128
|
+
raise NotImplementedError
|
|
129
|
+
|
|
130
|
+
async def _load_n5_attrs(self, path: str) -> dict[str, Any]:
|
|
131
|
+
try:
|
|
132
|
+
s = await super().get(path, prototype=default_buffer_prototype())
|
|
133
|
+
if s is None:
|
|
134
|
+
raise RuntimeError(f"No N5 attributes at path {path}")
|
|
135
|
+
return json_loads(s.to_bytes())
|
|
136
|
+
except KeyError:
|
|
137
|
+
return {}
|
|
138
|
+
|
|
139
|
+
async def _contains_attrs(self, path: str | None) -> bool:
|
|
140
|
+
if path is None:
|
|
141
|
+
attrs_key = n5_attrs_key
|
|
142
|
+
elif not path.endswith(n5_attrs_key):
|
|
143
|
+
attrs_key = f"{path}/{n5_attrs_key}"
|
|
144
|
+
else:
|
|
145
|
+
attrs_key = path
|
|
146
|
+
|
|
147
|
+
attrs = attrs_to_zarr(await self._load_n5_attrs(attrs_key))
|
|
148
|
+
return len(attrs) > 0
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
# match strings of numbers with "." between
|
|
152
|
+
# (e.g., "1.2.4", "1.2", "5")
|
|
153
|
+
_prog_ckey = re.compile(r"^(\d+)(\.\d+)+$")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def is_chunk_key(key: str) -> bool:
|
|
157
|
+
rv = False
|
|
158
|
+
segments = list(key.split("/"))
|
|
159
|
+
if segments:
|
|
160
|
+
last_segment = segments[-1]
|
|
161
|
+
rv = bool(_prog_ckey.match(last_segment))
|
|
162
|
+
return rv
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def invert_chunk_coords(key: str) -> str:
|
|
166
|
+
segments = list(key.split("/"))
|
|
167
|
+
if segments:
|
|
168
|
+
last_segment = segments[-1]
|
|
169
|
+
if _prog_ckey.match(last_segment):
|
|
170
|
+
coords = list(last_segment.split("."))
|
|
171
|
+
last_segment = "/".join(coords[::-1])
|
|
172
|
+
segments = [*segments[:-1], last_segment]
|
|
173
|
+
key = "/".join(segments)
|
|
174
|
+
return key
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def group_metadata_to_zarr(group_metadata: dict[str, Any]) -> dict[str, Any]:
|
|
178
|
+
"""Convert group metadata from N5 to zarr format."""
|
|
179
|
+
return {"zarr_format": ZARR_FORMAT}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def array_metadata_to_zarr(
|
|
183
|
+
array_metadata: dict[str, Any], *, top_level: bool = False
|
|
184
|
+
) -> dict[str, Any]:
|
|
185
|
+
"""
|
|
186
|
+
Convert array metadata from N5 to zarr format.
|
|
187
|
+
|
|
188
|
+
If the `top_level` keyword argument is True,
|
|
189
|
+
then the `N5` key will be removed from metadata
|
|
190
|
+
"""
|
|
191
|
+
for t, f in zarr_to_n5_keys:
|
|
192
|
+
array_metadata[t] = array_metadata.pop(f)
|
|
193
|
+
if top_level:
|
|
194
|
+
array_metadata.pop("n5")
|
|
195
|
+
array_metadata["zarr_format"] = ZARR_FORMAT
|
|
196
|
+
|
|
197
|
+
array_metadata["shape"] = array_metadata["shape"][::-1]
|
|
198
|
+
array_metadata["chunks"] = array_metadata["chunks"][::-1]
|
|
199
|
+
array_metadata["fill_value"] = 0 # also if None was requested
|
|
200
|
+
array_metadata["order"] = "C"
|
|
201
|
+
array_metadata["filters"] = None
|
|
202
|
+
array_metadata["dimension_separator"] = "."
|
|
203
|
+
array_metadata["dtype"] = np.dtype(array_metadata["dtype"]).str
|
|
204
|
+
|
|
205
|
+
compressor_config = array_metadata["compressor"]
|
|
206
|
+
compressor_config = compressor_config_to_zarr(compressor_config)
|
|
207
|
+
array_metadata["compressor"] = {
|
|
208
|
+
"id": N5ChunkWrapper.codec_id,
|
|
209
|
+
"compressor_config": compressor_config,
|
|
210
|
+
"dtype": array_metadata["dtype"],
|
|
211
|
+
"chunk_shape": array_metadata["chunks"],
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return array_metadata
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def attrs_to_zarr(attrs: dict[str, Any]) -> dict[str, Any]:
|
|
218
|
+
"""
|
|
219
|
+
Get all zarr attributes from an N5 attributes dictionary.
|
|
220
|
+
|
|
221
|
+
(i.e.,
|
|
222
|
+
all non-keyword attributes).
|
|
223
|
+
|
|
224
|
+
"""
|
|
225
|
+
# remove all N5 keywords
|
|
226
|
+
for n5_key in n5_keywords:
|
|
227
|
+
attrs.pop(n5_key, None)
|
|
228
|
+
|
|
229
|
+
return attrs
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def json_loads(s: bytes | str) -> dict[str, Any]:
|
|
233
|
+
"""Read JSON in a consistent way."""
|
|
234
|
+
return json.loads(ensure_text(s, "utf-8")) # type: ignore[no-any-return]
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def json_dumps(o: Any) -> bytes:
|
|
238
|
+
"""Write JSON in a consistent, human-readable way."""
|
|
239
|
+
return json.dumps(
|
|
240
|
+
o,
|
|
241
|
+
indent=4,
|
|
242
|
+
sort_keys=True,
|
|
243
|
+
ensure_ascii=True,
|
|
244
|
+
separators=(",", ": "),
|
|
245
|
+
cls=NumberEncoder,
|
|
246
|
+
).encode("ascii")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class NumberEncoder(json.JSONEncoder):
|
|
250
|
+
def default(self, o: Any) -> float:
|
|
251
|
+
# See json.JSONEncoder.default docstring for explanation
|
|
252
|
+
# This is necessary to encode numpy dtype
|
|
253
|
+
if isinstance(o, numbers.Integral):
|
|
254
|
+
return int(o)
|
|
255
|
+
if isinstance(o, numbers.Real):
|
|
256
|
+
return float(o)
|
|
257
|
+
return json.JSONEncoder.default(self, o) # type: ignore[no-any-return]
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def ensure_text(s: bytes | str, encoding: str = "utf-8") -> str:
|
|
261
|
+
if not isinstance(s, str):
|
|
262
|
+
return codecs.decode(s, encoding)
|
|
263
|
+
return s
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def compressor_config_to_zarr(
|
|
267
|
+
compressor_config: dict[str, Any],
|
|
268
|
+
) -> dict[str, Any] | None:
|
|
269
|
+
codec_id = compressor_config["type"]
|
|
270
|
+
zarr_config = {"id": codec_id}
|
|
271
|
+
|
|
272
|
+
if codec_id == "bzip2":
|
|
273
|
+
zarr_config["id"] = "bz2"
|
|
274
|
+
zarr_config["level"] = compressor_config["blockSize"]
|
|
275
|
+
|
|
276
|
+
elif codec_id == "blosc":
|
|
277
|
+
zarr_config["cname"] = compressor_config["cname"]
|
|
278
|
+
zarr_config["clevel"] = compressor_config["clevel"]
|
|
279
|
+
zarr_config["shuffle"] = compressor_config["shuffle"]
|
|
280
|
+
zarr_config["blocksize"] = compressor_config["blocksize"]
|
|
281
|
+
|
|
282
|
+
elif codec_id == "lzma":
|
|
283
|
+
zarr_config["format"] = compressor_config["format"]
|
|
284
|
+
zarr_config["check"] = compressor_config["check"]
|
|
285
|
+
zarr_config["preset"] = compressor_config["preset"]
|
|
286
|
+
zarr_config["filters"] = compressor_config["filters"]
|
|
287
|
+
|
|
288
|
+
elif codec_id == "xz":
|
|
289
|
+
zarr_config["id"] = "lzma"
|
|
290
|
+
zarr_config["format"] = 1 # lzma.FORMAT_XZ
|
|
291
|
+
zarr_config["check"] = -1
|
|
292
|
+
zarr_config["preset"] = compressor_config["preset"]
|
|
293
|
+
zarr_config["filters"] = None
|
|
294
|
+
|
|
295
|
+
elif codec_id == "gzip":
|
|
296
|
+
if compressor_config.get("useZlib"):
|
|
297
|
+
zarr_config["id"] = "zlib"
|
|
298
|
+
zarr_config["level"] = compressor_config["level"]
|
|
299
|
+
else:
|
|
300
|
+
zarr_config["id"] = "gzip"
|
|
301
|
+
zarr_config["level"] = compressor_config["level"]
|
|
302
|
+
|
|
303
|
+
elif codec_id == "raw":
|
|
304
|
+
return None
|
|
305
|
+
|
|
306
|
+
else:
|
|
307
|
+
zarr_config.update({k: v for k, v in compressor_config.items() if k != "type"})
|
|
308
|
+
|
|
309
|
+
return zarr_config
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class N5ChunkWrapper(Codec): # type: ignore[misc]
|
|
313
|
+
codec_id = "n5_wrapper"
|
|
314
|
+
chunk_shape: tuple[int, ...]
|
|
315
|
+
dtype: np.dtype
|
|
316
|
+
|
|
317
|
+
def __init__(
|
|
318
|
+
self,
|
|
319
|
+
dtype: npt.DTypeLike,
|
|
320
|
+
chunk_shape: Sequence[int],
|
|
321
|
+
compressor_config: dict[str, Any] | None = None,
|
|
322
|
+
compressor: Codec | None = None,
|
|
323
|
+
):
|
|
324
|
+
self.dtype = np.dtype(dtype)
|
|
325
|
+
self.chunk_shape = tuple(chunk_shape)
|
|
326
|
+
# is the dtype a little endian format?
|
|
327
|
+
self._little_endian = self.dtype.byteorder == "<" or (
|
|
328
|
+
self.dtype.byteorder == "=" and sys.byteorder == "little"
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
if compressor is not None:
|
|
332
|
+
if compressor_config is not None:
|
|
333
|
+
raise ValueError(
|
|
334
|
+
"Only one of compressor_config or compressor should be given."
|
|
335
|
+
)
|
|
336
|
+
compressor_config = compressor.get_config()
|
|
337
|
+
|
|
338
|
+
if compressor_config is None or compressor_config["id"] == "raw":
|
|
339
|
+
self.compressor_config = None
|
|
340
|
+
self._compressor = None
|
|
341
|
+
else:
|
|
342
|
+
self._compressor = get_codec(compressor_config)
|
|
343
|
+
self.compressor_config = self._compressor.get_config()
|
|
344
|
+
|
|
345
|
+
def get_config(self) -> dict[str, Any]:
|
|
346
|
+
return {"id": self.codec_id, "compressor_config": self.compressor_config}
|
|
347
|
+
|
|
348
|
+
def encode(self, chunk: npt.NDArray[Any]) -> bytes:
|
|
349
|
+
assert chunk.flags.c_contiguous, "Chunk is not C contiguous" # noqa: S101
|
|
350
|
+
|
|
351
|
+
header = self._create_header(chunk)
|
|
352
|
+
chunk = self._to_big_endian(chunk)
|
|
353
|
+
|
|
354
|
+
if self._compressor:
|
|
355
|
+
return header + self._compressor.encode(chunk) # type: ignore[no-any-return]
|
|
356
|
+
return header + chunk.tobytes(order="A")
|
|
357
|
+
|
|
358
|
+
def decode(
|
|
359
|
+
self, chunk: bytes, out: npt.NDArray[Any] | None = None
|
|
360
|
+
) -> npt.NDArray[Any]:
|
|
361
|
+
len_header, chunk_shape = self._read_header(chunk)
|
|
362
|
+
chunk = chunk[len_header:]
|
|
363
|
+
|
|
364
|
+
if out is not None:
|
|
365
|
+
# out should only be used if we read a complete chunk
|
|
366
|
+
assert chunk_shape == self.chunk_shape, ( # noqa: S101
|
|
367
|
+
f"Expected chunk of shape {self.chunk_shape}, found {chunk_shape}"
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
if self._compressor:
|
|
371
|
+
self._compressor.decode(chunk, out)
|
|
372
|
+
else:
|
|
373
|
+
raise RuntimeError("Can't handle case with no compressor")
|
|
374
|
+
# ndarray_copy(chunk, out)
|
|
375
|
+
|
|
376
|
+
# we can byteswap in-place
|
|
377
|
+
if self._little_endian:
|
|
378
|
+
out.byteswap(inplace=True)
|
|
379
|
+
|
|
380
|
+
return out
|
|
381
|
+
|
|
382
|
+
if self._compressor:
|
|
383
|
+
chunk = self._compressor.decode(chunk)
|
|
384
|
+
|
|
385
|
+
# more expensive byteswap
|
|
386
|
+
chunk = self._from_big_endian(chunk) # type: ignore[assignment]
|
|
387
|
+
|
|
388
|
+
# read partial chunk
|
|
389
|
+
if chunk_shape != self.chunk_shape:
|
|
390
|
+
chunk = np.frombuffer(chunk, dtype=self.dtype) # type: ignore[assignment]
|
|
391
|
+
chunk = chunk.reshape(chunk_shape) # type: ignore[attr-defined]
|
|
392
|
+
complete_chunk = np.zeros(self.chunk_shape, dtype=self.dtype)
|
|
393
|
+
target_slices = tuple(slice(0, s) for s in chunk_shape)
|
|
394
|
+
complete_chunk[target_slices] = chunk
|
|
395
|
+
chunk = complete_chunk # type: ignore[assignment]
|
|
396
|
+
|
|
397
|
+
return chunk # type: ignore[return-value]
|
|
398
|
+
|
|
399
|
+
@staticmethod
|
|
400
|
+
def _create_header(chunk: npt.NDArray[Any]) -> bytes:
|
|
401
|
+
mode = struct.pack(">H", 0)
|
|
402
|
+
num_dims = struct.pack(">H", len(chunk.shape))
|
|
403
|
+
shape = b"".join(struct.pack(">I", d) for d in chunk.shape[::-1])
|
|
404
|
+
|
|
405
|
+
return mode + num_dims + shape
|
|
406
|
+
|
|
407
|
+
@staticmethod
|
|
408
|
+
def _read_header(chunk: bytes) -> tuple[int, tuple[int, ...]]:
|
|
409
|
+
num_dims = struct.unpack(">H", chunk[2:4])[0]
|
|
410
|
+
shape = tuple(
|
|
411
|
+
struct.unpack(">I", chunk[i : i + 4])[0]
|
|
412
|
+
for i in range(4, num_dims * 4 + 4, 4)
|
|
413
|
+
)[::-1]
|
|
414
|
+
|
|
415
|
+
len_header = 4 + num_dims * 4
|
|
416
|
+
|
|
417
|
+
return len_header, shape
|
|
418
|
+
|
|
419
|
+
def _to_big_endian(self, data: npt.NDArray[Any]) -> npt.NDArray[Any]:
|
|
420
|
+
# assumes data is ndarray
|
|
421
|
+
|
|
422
|
+
if self._little_endian:
|
|
423
|
+
return data.byteswap()
|
|
424
|
+
return data
|
|
425
|
+
|
|
426
|
+
def _from_big_endian(self, data: bytes) -> npt.NDArray[Any]:
|
|
427
|
+
# assumes data is byte array in big endian
|
|
428
|
+
|
|
429
|
+
if not self._little_endian:
|
|
430
|
+
return data # type:ignore[return-value]
|
|
431
|
+
|
|
432
|
+
a = np.frombuffer(data, self.dtype.newbyteorder(">"))
|
|
433
|
+
return a.astype(self.dtype)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
register_codec(N5ChunkWrapper, N5ChunkWrapper.codec_id)
|
hoa_tools/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '
|
|
32
|
-
__version_tuple__ = version_tuple = (
|
|
31
|
+
__version__ = version = '2.0.0b1'
|
|
32
|
+
__version_tuple__ = version_tuple = (2, 0, 0, 'b1')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
hoa_tools/dataset.py
CHANGED
|
@@ -20,8 +20,7 @@ import gcsfs
|
|
|
20
20
|
import networkx as nx
|
|
21
21
|
import numpy as np
|
|
22
22
|
import xarray as xr
|
|
23
|
-
import zarr.
|
|
24
|
-
import zarr.n5
|
|
23
|
+
import zarr.abc.store
|
|
25
24
|
import zarr.storage
|
|
26
25
|
|
|
27
26
|
from hoa_tools._n5 import N5FSStore
|
|
@@ -63,7 +62,7 @@ class Dataset(HOAMetadata):
|
|
|
63
62
|
"""
|
|
64
63
|
Whether this dataset contains the whole organ or not.
|
|
65
64
|
"""
|
|
66
|
-
return self.voi.startswith("complete")
|
|
65
|
+
return self.voi.startswith("complete") or self.voi.startswith("overview")
|
|
67
66
|
|
|
68
67
|
@property
|
|
69
68
|
def is_zoom(self) -> bool:
|
|
@@ -148,15 +147,21 @@ class Dataset(HOAMetadata):
|
|
|
148
147
|
|
|
149
148
|
# n5://gs://ucl-hip-ct-35a68e99feaae8932b1d44da0358940b/S-20-29/heart/2.5um_VOI-01_bm05/
|
|
150
149
|
bucket, path = gcs_path.split("/", maxsplit=1)
|
|
151
|
-
fs = gcsfs.GCSFileSystem(
|
|
150
|
+
fs = gcsfs.GCSFileSystem(
|
|
151
|
+
project="ucl-hip-ct",
|
|
152
|
+
token="anon", # noqa: S106
|
|
153
|
+
access="read_only",
|
|
154
|
+
asynchronous=True,
|
|
155
|
+
)
|
|
156
|
+
store: zarr.abc.store.Store
|
|
152
157
|
if self._remote_fmt == "n5":
|
|
153
|
-
store = N5FSStore(
|
|
158
|
+
store = N5FSStore(fs=fs, path=f"/{bucket}", read_only=True)
|
|
154
159
|
elif self._remote_fmt == "zarr":
|
|
155
|
-
store = zarr.storage.
|
|
160
|
+
store = zarr.storage.FsspecStore(fs=fs, path=f"/{bucket}", read_only=True)
|
|
156
161
|
|
|
157
|
-
return zarr.open_group(store, mode="r", path=path)
|
|
162
|
+
return zarr.open_group(store, mode="r", path=path, zarr_format=2)
|
|
158
163
|
|
|
159
|
-
def _remote_array(self, *, downsample_level: int) -> zarr.
|
|
164
|
+
def _remote_array(self, *, downsample_level: int) -> zarr.Array:
|
|
160
165
|
"""
|
|
161
166
|
Get an object representing the data array in the remote Google Cloud Store.
|
|
162
167
|
"""
|
|
@@ -168,7 +173,7 @@ class Dataset(HOAMetadata):
|
|
|
168
173
|
key = f"s{downsample_level}"
|
|
169
174
|
else:
|
|
170
175
|
key = f"{downsample_level}"
|
|
171
|
-
return self._remote_store[key]
|
|
176
|
+
return self._remote_store[key] # type: ignore[return-value]
|
|
172
177
|
|
|
173
178
|
def data_array(self, *, downsample_level: int) -> xr.DataArray:
|
|
174
179
|
"""
|
|
@@ -216,7 +221,7 @@ def _load_datasets_from_files(data_dir: Path) -> dict[str, Dataset]:
|
|
|
216
221
|
}
|
|
217
222
|
if len(datasets) == 0:
|
|
218
223
|
raise FileNotFoundError(
|
|
219
|
-
f"Did not find any dataset metadata files at {data_dir}"
|
|
224
|
+
f"Did not find any dataset metadata files at {data_dir}"
|
|
220
225
|
)
|
|
221
226
|
return datasets
|
|
222
227
|
|
hoa_tools/metadata.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: metadata-schema.json
|
|
3
|
-
# timestamp: 2025-
|
|
3
|
+
# timestamp: 2025-10-22T15:38:14+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
|
-
from datetime import date
|
|
7
|
+
from datetime import date as date_aliased
|
|
8
8
|
from typing import Annotated, Literal
|
|
9
9
|
|
|
10
10
|
from pydantic import BaseModel, ConfigDict, Field, RootModel
|
|
@@ -111,7 +111,7 @@ class Donor(BaseModel):
|
|
|
111
111
|
Height | None, Field(description="Height in cm at death.", title="Height")
|
|
112
112
|
] = None
|
|
113
113
|
cause_of_death: Annotated[str | None, Field(title="Cause of death")] = None
|
|
114
|
-
date_of_death: Annotated[
|
|
114
|
+
date_of_death: Annotated[date_aliased | None, Field(title="Date of death")] = None
|
|
115
115
|
medical_history: Annotated[str | None, Field(title="Medical history")] = None
|
|
116
116
|
diabetes: Annotated[
|
|
117
117
|
Literal["Type 2", "No", "Yes"] | None, Field(title="Diabetes history")
|
|
@@ -183,6 +183,7 @@ class Sample(BaseModel):
|
|
|
183
183
|
Literal[
|
|
184
184
|
"brain",
|
|
185
185
|
"breast",
|
|
186
|
+
"clitoris",
|
|
186
187
|
"colon",
|
|
187
188
|
"eye",
|
|
188
189
|
"femaleGenitalia",
|
|
@@ -460,7 +461,8 @@ class XrayMagnification(RootModel[float]):
|
|
|
460
461
|
|
|
461
462
|
class Scan(BaseModel):
|
|
462
463
|
date: Annotated[
|
|
463
|
-
|
|
464
|
+
date_aliased,
|
|
465
|
+
Field(description="Date when the scan was performed.", title="Date"),
|
|
464
466
|
]
|
|
465
467
|
beamline: Annotated[
|
|
466
468
|
Literal["BM05", "BM18"],
|
|
@@ -622,12 +624,13 @@ class Scan(BaseModel):
|
|
|
622
624
|
"Hasselblad tandem optic 100mm/300mm",
|
|
623
625
|
"Hasselblad revolved 100 100",
|
|
624
626
|
"LAFIP2 optic with canon 50mm",
|
|
627
|
+
"Fixed x10",
|
|
628
|
+
"Fixed x2.85",
|
|
625
629
|
"Fixed x2",
|
|
626
630
|
"Fixed x1",
|
|
631
|
+
"Fixed x0.5",
|
|
627
632
|
"Fixed x0.125",
|
|
628
633
|
"Fixed x0.1",
|
|
629
|
-
"Fixed x2.85",
|
|
630
|
-
"Fixed x10",
|
|
631
634
|
"Twinmic 5",
|
|
632
635
|
"Twinmic 10",
|
|
633
636
|
"Twinmic 20",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hoa-tools
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0b1
|
|
4
4
|
Summary: Tools for working with the Human Organ Atlas
|
|
5
5
|
Author-email: David Stansby <d.stansby@ucl.ac.uk>
|
|
6
6
|
License: Copyright (c) 2024, David Stansby
|
|
@@ -49,7 +49,7 @@ Requires-Dist: pandas>=2
|
|
|
49
49
|
Requires-Dist: pydantic>=2
|
|
50
50
|
Requires-Dist: simpleitk
|
|
51
51
|
Requires-Dist: xarray
|
|
52
|
-
Requires-Dist: zarr
|
|
52
|
+
Requires-Dist: zarr>=3
|
|
53
53
|
Provides-Extra: dev
|
|
54
54
|
Requires-Dist: build; extra == "dev"
|
|
55
55
|
Requires-Dist: mypy; extra == "dev"
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
hoa_tools/__init__.py,sha256=wc7Yn5CXR3hxr2JyV3eEyCx3p8UZdknXDFJPSthKE_g,87
|
|
2
|
-
hoa_tools/_n5.py,sha256=
|
|
3
|
-
hoa_tools/_version.py,sha256=
|
|
4
|
-
hoa_tools/dataset.py,sha256=
|
|
2
|
+
hoa_tools/_n5.py,sha256=7hl3Tf5n1hKVTM-l593w7eeYzQRenk_znFdyWnif-kw,14400
|
|
3
|
+
hoa_tools/_version.py,sha256=fz65no5MBu9R7ellXZDEC0QTY0L4mFHp5ebemVVbIqg,712
|
|
4
|
+
hoa_tools/dataset.py,sha256=DiQl-1lBZhG1M-Pbtalji5aqWe6Srb__VpeKo_EHxp0,10228
|
|
5
5
|
hoa_tools/inventory.py,sha256=44IbE1eLFTetAeCadpBFYXXIwcX5fagq76zSMLWBOmE,613
|
|
6
|
-
hoa_tools/metadata.py,sha256=
|
|
6
|
+
hoa_tools/metadata.py,sha256=TwcTsXjwnhBdeSsb8TC_9XznQrfwXLOPi-teaZ9-UtI,23011
|
|
7
7
|
hoa_tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
hoa_tools/registration.py,sha256=BXxJYMHh6pngwTnjcx7phryWb_b0LHsXxsOvUk_es7k,3481
|
|
9
9
|
hoa_tools/types.py,sha256=X8GzZXknwgmtTLWR2d_JxEfoavXAQ1zVrf8BaNdevSw,1452
|
|
@@ -306,8 +306,8 @@ hoa_tools/data/metadata/metadata/S-20-29_lung_left_complete-organ_25.31um_bm05.j
|
|
|
306
306
|
hoa_tools/data/metadata/metadata/S-21-33_brain_complete-organ_19.57um_bm18.json,sha256=rrodLFZXIYWktEifFiRCy1EGZOSZw57Tm6_yliWagW4,7256
|
|
307
307
|
hoa_tools/data/metadata/metadata/S-21-46_brain_complete-organ_19.57um_bm18.json,sha256=p4n2FRkBdoddNnI_fw1veRbMcrhOGjWcMcgPYJ73S6M,7415
|
|
308
308
|
hoa_tools/data/metadata/metadata/S-22-16_brain_complete-organ_19.59um_bm18.json,sha256=aPKtOTz3qTPa0gvVuOi9kDDsBk0qmHNg8HjKb43gNbM,7426
|
|
309
|
-
hoa_tools-
|
|
310
|
-
hoa_tools-
|
|
311
|
-
hoa_tools-
|
|
312
|
-
hoa_tools-
|
|
313
|
-
hoa_tools-
|
|
309
|
+
hoa_tools-2.0.0b1.dist-info/licenses/LICENSE.md,sha256=CU-KGODbkoeDUXYPeLgpicDrFTffLQECao-wTKJrXFU,1480
|
|
310
|
+
hoa_tools-2.0.0b1.dist-info/METADATA,sha256=cSxXHiPS_c1Wn8DK7q6YnbR7yDju-ce7V7TwpDjMCHQ,5133
|
|
311
|
+
hoa_tools-2.0.0b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
312
|
+
hoa_tools-2.0.0b1.dist-info/top_level.txt,sha256=NkWbNX-2byZN6hcVyowOVv0aiRNdH0uXpOkA0SFHDgY,10
|
|
313
|
+
hoa_tools-2.0.0b1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|