siibra 0.5a2__py3-none-any.whl → 1.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of siibra might be problematic. Click here for more details.
- siibra/VERSION +1 -1
- siibra/__init__.py +20 -12
- siibra/commons.py +145 -90
- siibra/configuration/__init__.py +1 -1
- siibra/configuration/configuration.py +22 -17
- siibra/configuration/factory.py +177 -128
- siibra/core/__init__.py +1 -8
- siibra/core/{relation_qualification.py → assignment.py} +17 -14
- siibra/core/atlas.py +66 -35
- siibra/core/concept.py +81 -39
- siibra/core/parcellation.py +83 -67
- siibra/core/region.py +569 -263
- siibra/core/space.py +7 -39
- siibra/core/structure.py +111 -0
- siibra/exceptions.py +63 -0
- siibra/experimental/__init__.py +19 -0
- siibra/experimental/contour.py +61 -0
- siibra/experimental/cortical_profile_sampler.py +57 -0
- siibra/experimental/patch.py +98 -0
- siibra/experimental/plane3d.py +256 -0
- siibra/explorer/__init__.py +16 -0
- siibra/explorer/url.py +112 -52
- siibra/explorer/util.py +31 -9
- siibra/features/__init__.py +73 -8
- siibra/features/anchor.py +75 -196
- siibra/features/connectivity/__init__.py +1 -1
- siibra/features/connectivity/functional_connectivity.py +2 -2
- siibra/features/connectivity/regional_connectivity.py +99 -10
- siibra/features/connectivity/streamline_counts.py +1 -1
- siibra/features/connectivity/streamline_lengths.py +1 -1
- siibra/features/connectivity/tracing_connectivity.py +1 -1
- siibra/features/dataset/__init__.py +1 -1
- siibra/features/dataset/ebrains.py +3 -3
- siibra/features/feature.py +219 -110
- siibra/features/image/__init__.py +1 -1
- siibra/features/image/image.py +21 -13
- siibra/features/image/sections.py +1 -1
- siibra/features/image/volume_of_interest.py +1 -1
- siibra/features/tabular/__init__.py +1 -1
- siibra/features/tabular/bigbrain_intensity_profile.py +24 -13
- siibra/features/tabular/cell_density_profile.py +111 -69
- siibra/features/tabular/cortical_profile.py +82 -16
- siibra/features/tabular/gene_expression.py +117 -6
- siibra/features/tabular/layerwise_bigbrain_intensities.py +7 -9
- siibra/features/tabular/layerwise_cell_density.py +9 -24
- siibra/features/tabular/receptor_density_fingerprint.py +11 -6
- siibra/features/tabular/receptor_density_profile.py +12 -15
- siibra/features/tabular/regional_timeseries_activity.py +74 -18
- siibra/features/tabular/tabular.py +17 -8
- siibra/livequeries/__init__.py +1 -7
- siibra/livequeries/allen.py +139 -77
- siibra/livequeries/bigbrain.py +104 -128
- siibra/livequeries/ebrains.py +7 -4
- siibra/livequeries/query.py +1 -2
- siibra/locations/__init__.py +32 -25
- siibra/locations/boundingbox.py +153 -127
- siibra/locations/location.py +45 -80
- siibra/locations/point.py +97 -83
- siibra/locations/pointcloud.py +349 -0
- siibra/retrieval/__init__.py +1 -1
- siibra/retrieval/cache.py +107 -13
- siibra/retrieval/datasets.py +9 -14
- siibra/retrieval/exceptions/__init__.py +2 -1
- siibra/retrieval/repositories.py +147 -53
- siibra/retrieval/requests.py +64 -29
- siibra/vocabularies/__init__.py +2 -2
- siibra/volumes/__init__.py +7 -9
- siibra/volumes/parcellationmap.py +396 -253
- siibra/volumes/providers/__init__.py +20 -0
- siibra/volumes/providers/freesurfer.py +113 -0
- siibra/volumes/{gifti.py → providers/gifti.py} +29 -18
- siibra/volumes/{neuroglancer.py → providers/neuroglancer.py} +204 -92
- siibra/volumes/{nifti.py → providers/nifti.py} +64 -44
- siibra/volumes/providers/provider.py +107 -0
- siibra/volumes/sparsemap.py +159 -260
- siibra/volumes/volume.py +720 -152
- {siibra-0.5a2.dist-info → siibra-1.0.0a1.dist-info}/METADATA +25 -28
- siibra-1.0.0a1.dist-info/RECORD +84 -0
- {siibra-0.5a2.dist-info → siibra-1.0.0a1.dist-info}/WHEEL +1 -1
- siibra/locations/pointset.py +0 -198
- siibra-0.5a2.dist-info/RECORD +0 -74
- {siibra-0.5a2.dist-info → siibra-1.0.0a1.dist-info}/LICENSE +0 -0
- {siibra-0.5a2.dist-info → siibra-1.0.0a1.dist-info}/top_level.txt +0 -0
siibra/retrieval/cache.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2018-
|
|
1
|
+
# Copyright 2018-2024
|
|
2
2
|
# Institute of Neuroscience and Medicine (INM-1), Forschungszentrum Jülich GmbH
|
|
3
3
|
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -12,14 +12,21 @@
|
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
|
-
"""Maintaining and
|
|
15
|
+
"""Maintaining and handling caching files on disk."""
|
|
16
16
|
|
|
17
17
|
import hashlib
|
|
18
18
|
import os
|
|
19
19
|
from appdirs import user_cache_dir
|
|
20
20
|
import tempfile
|
|
21
|
+
from functools import wraps
|
|
22
|
+
from enum import Enum
|
|
23
|
+
from typing import Callable, List, NamedTuple, Union
|
|
24
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from filelock import FileLock as Lock
|
|
21
27
|
|
|
22
|
-
from ..commons import logger, SIIBRA_CACHEDIR, SKIP_CACHEINIT_MAINTENANCE
|
|
28
|
+
from ..commons import logger, SIIBRA_CACHEDIR, SKIP_CACHEINIT_MAINTENANCE, siibra_tqdm
|
|
29
|
+
from ..exceptions import WarmupRegException
|
|
23
30
|
|
|
24
31
|
|
|
25
32
|
def assert_folder(folder):
|
|
@@ -94,7 +101,7 @@ class Cache:
|
|
|
94
101
|
targetsize -= st.st_size / 1024**3
|
|
95
102
|
|
|
96
103
|
if index > 0:
|
|
97
|
-
logger.debug(f"Removing the {index+1} oldest files to keep cache size below {targetsize:.2f} GiB.")
|
|
104
|
+
logger.debug(f"Removing the {index + 1} oldest files to keep cache size below {targetsize:.2f} GiB.")
|
|
98
105
|
for fn, st in sfiles[:index + 1]:
|
|
99
106
|
if os.path.isdir(fn):
|
|
100
107
|
import shutil
|
|
@@ -114,15 +121,20 @@ class Cache:
|
|
|
114
121
|
""" Iterate all element names in the cache directory. """
|
|
115
122
|
return (os.path.join(self.folder, f) for f in os.listdir(self.folder))
|
|
116
123
|
|
|
117
|
-
def build_filename(self, str_rep: str, suffix=None):
|
|
118
|
-
"""
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
124
|
+
def build_filename(self, str_rep: str, suffix=None) -> str:
|
|
125
|
+
"""
|
|
126
|
+
Generate a filename in the cache.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
str_rep: str
|
|
131
|
+
Unique string representation of the item. Will be used to compute a hash.
|
|
132
|
+
suffix: str. Default: None
|
|
133
|
+
Optional file suffix, in order to allow filetype recognition by the name.
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
str
|
|
126
138
|
"""
|
|
127
139
|
hashfile = os.path.join(
|
|
128
140
|
self.folder, str(hashlib.sha256(str_rep.encode("ascii")).hexdigest())
|
|
@@ -137,3 +149,85 @@ class Cache:
|
|
|
137
149
|
|
|
138
150
|
|
|
139
151
|
CACHE = Cache.instance()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class WarmupLevel(int, Enum):
|
|
155
|
+
TEST = -1000
|
|
156
|
+
INSTANCE = 1
|
|
157
|
+
DATA = 5
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class WarmupParam(NamedTuple):
|
|
161
|
+
level: Union[int, WarmupLevel]
|
|
162
|
+
fn: Callable
|
|
163
|
+
is_factory: bool = False
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class Warmup:
|
|
167
|
+
|
|
168
|
+
_warmup_fns: List[WarmupParam] = []
|
|
169
|
+
|
|
170
|
+
@staticmethod
|
|
171
|
+
def fn_eql(wrapped_fn, original_fn):
|
|
172
|
+
return wrapped_fn is original_fn or wrapped_fn.__wrapped__ is original_fn
|
|
173
|
+
|
|
174
|
+
@classmethod
|
|
175
|
+
def is_registered(cls, fn):
|
|
176
|
+
return len([warmup_fn.fn
|
|
177
|
+
for warmup_fn in cls._warmup_fns
|
|
178
|
+
if cls.fn_eql(warmup_fn.fn, fn)]) > 0
|
|
179
|
+
|
|
180
|
+
@classmethod
|
|
181
|
+
def register_warmup_fn(cls, warmup_level: WarmupLevel = WarmupLevel.INSTANCE, *, is_factory=False):
|
|
182
|
+
def outer(fn):
|
|
183
|
+
if cls.is_registered(fn):
|
|
184
|
+
raise WarmupRegException
|
|
185
|
+
|
|
186
|
+
@wraps(fn)
|
|
187
|
+
def inner(*args, **kwargs):
|
|
188
|
+
return fn(*args, **kwargs)
|
|
189
|
+
|
|
190
|
+
cls._warmup_fns.append(WarmupParam(warmup_level, inner, is_factory))
|
|
191
|
+
return inner
|
|
192
|
+
return outer
|
|
193
|
+
|
|
194
|
+
@classmethod
|
|
195
|
+
def deregister_warmup_fn(cls, original_fn):
|
|
196
|
+
cls._warmup_fns = [
|
|
197
|
+
warmup_fn for warmup_fn in cls._warmup_fns
|
|
198
|
+
if not cls.fn_eql(warmup_fn.fn, original_fn)
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
@classmethod
|
|
202
|
+
def warmup(cls, warmup_level: WarmupLevel = WarmupLevel.INSTANCE, *, max_workers=4):
|
|
203
|
+
all_fns = [warmup for warmup in cls._warmup_fns if warmup.level <= warmup_level]
|
|
204
|
+
|
|
205
|
+
def call_fn(fn: WarmupParam):
|
|
206
|
+
return_val = fn.fn()
|
|
207
|
+
if not fn.is_factory:
|
|
208
|
+
return
|
|
209
|
+
for f in return_val:
|
|
210
|
+
f()
|
|
211
|
+
|
|
212
|
+
with Lock(CACHE.build_filename("lockfile", ".warmup")):
|
|
213
|
+
with ThreadPoolExecutor(max_workers=max_workers) as ex:
|
|
214
|
+
for _ in siibra_tqdm(
|
|
215
|
+
ex.map(
|
|
216
|
+
call_fn,
|
|
217
|
+
all_fns
|
|
218
|
+
),
|
|
219
|
+
desc="Warming cache",
|
|
220
|
+
total=len(all_fns),
|
|
221
|
+
):
|
|
222
|
+
...
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
from joblib import Memory
|
|
227
|
+
jobmemory_path = Path(CACHE.folder) / "joblib"
|
|
228
|
+
jobmemory_path.mkdir(parents=True, exist_ok=True)
|
|
229
|
+
jobmemory = Memory(jobmemory_path, verbose=0)
|
|
230
|
+
cache_user_fn = jobmemory.cache
|
|
231
|
+
except ImportError:
|
|
232
|
+
from functools import lru_cache
|
|
233
|
+
cache_user_fn = lru_cache
|
siibra/retrieval/datasets.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2018-
|
|
1
|
+
# Copyright 2018-2024
|
|
2
2
|
# Institute of Neuroscience and Medicine (INM-1), Forschungszentrum Jülich GmbH
|
|
3
3
|
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -102,6 +102,13 @@ class EbrainsBaseDataset(ABC):
|
|
|
102
102
|
f"Cannot match {spec.__class__}, must be either str or EbrainsBaseDataset"
|
|
103
103
|
)
|
|
104
104
|
|
|
105
|
+
@property
|
|
106
|
+
def LICENSE(self) -> str:
|
|
107
|
+
license_ = self._detail.get("license", [])
|
|
108
|
+
if len(license_) > 0:
|
|
109
|
+
return license_ if isinstance(license_, str) else '\n'.join(license_)
|
|
110
|
+
return None
|
|
111
|
+
|
|
105
112
|
|
|
106
113
|
class EbrainsDataset(EbrainsBaseDataset):
|
|
107
114
|
"""Ebrains dataset v1 connection"""
|
|
@@ -173,10 +180,6 @@ class EbrainsDataset(EbrainsBaseDataset):
|
|
|
173
180
|
def custodians(self) -> EbrainsDatasetPerson:
|
|
174
181
|
return self._detail.get("custodians")
|
|
175
182
|
|
|
176
|
-
@property
|
|
177
|
-
def LICENSE(self) -> str:
|
|
178
|
-
return self._detail.get("license", "No license information is found.")
|
|
179
|
-
|
|
180
183
|
|
|
181
184
|
class EbrainsV3DatasetVersion(EbrainsBaseDataset):
|
|
182
185
|
@staticmethod
|
|
@@ -271,10 +274,6 @@ class EbrainsV3DatasetVersion(EbrainsBaseDataset):
|
|
|
271
274
|
def version_identifier(self):
|
|
272
275
|
return self._detail.get("versionIdentifier", "")
|
|
273
276
|
|
|
274
|
-
@property
|
|
275
|
-
def LICENSE(self) -> str:
|
|
276
|
-
return self._detail.get("license", "No license information is found.")
|
|
277
|
-
|
|
278
277
|
|
|
279
278
|
class EbrainsV3Dataset(EbrainsBaseDataset):
|
|
280
279
|
def __init__(self, id, *, cached_data=None) -> None:
|
|
@@ -342,10 +341,6 @@ class EbrainsV3Dataset(EbrainsBaseDataset):
|
|
|
342
341
|
def version_ids(self) -> List['str']:
|
|
343
342
|
return [version.get("id") for version in self._detail.get("versions", [])]
|
|
344
343
|
|
|
345
|
-
@property
|
|
346
|
-
def LICENSE(self) -> str:
|
|
347
|
-
return self._detail.get("license", "No license information is found.")
|
|
348
|
-
|
|
349
344
|
|
|
350
345
|
class GenericDataset():
|
|
351
346
|
|
|
@@ -355,7 +350,7 @@ class GenericDataset():
|
|
|
355
350
|
contributors: List[str] = None,
|
|
356
351
|
url: str = None,
|
|
357
352
|
description: str = None,
|
|
358
|
-
license: str =
|
|
353
|
+
license: str = None
|
|
359
354
|
):
|
|
360
355
|
self._name = name
|
|
361
356
|
self._contributors = contributors
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2018-
|
|
1
|
+
# Copyright 2018-2024
|
|
2
2
|
# Institute of Neuroscience and Medicine (INM-1), Forschungszentrum Jülich GmbH
|
|
3
3
|
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Exceptions concerning file retrieval processes."""
|
|
16
16
|
|
|
17
|
+
|
|
17
18
|
class NoSiibraConfigMirrorsAvailableException(Exception):
|
|
18
19
|
pass
|
|
19
20
|
|
siibra/retrieval/repositories.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2018-
|
|
1
|
+
# Copyright 2018-2024
|
|
2
2
|
# Institute of Neuroscience and Medicine (INM-1), Forschungszentrum Jülich GmbH
|
|
3
3
|
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -19,7 +19,8 @@ from .requests import (
|
|
|
19
19
|
EbrainsRequest,
|
|
20
20
|
SiibraHttpRequestError,
|
|
21
21
|
find_suitiable_decoder,
|
|
22
|
-
DECODERS
|
|
22
|
+
DECODERS,
|
|
23
|
+
FileLoader
|
|
23
24
|
)
|
|
24
25
|
from .cache import CACHE
|
|
25
26
|
|
|
@@ -31,7 +32,6 @@ import pathlib
|
|
|
31
32
|
import os
|
|
32
33
|
from zipfile import ZipFile
|
|
33
34
|
from typing import List
|
|
34
|
-
import requests
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
class RepositoryConnector(ABC):
|
|
@@ -43,7 +43,23 @@ class RepositoryConnector(ABC):
|
|
|
43
43
|
self.base_url = base_url
|
|
44
44
|
|
|
45
45
|
@abstractmethod
|
|
46
|
-
def search_files(folder: str, suffix: str, recursive: bool = False) -> List[str]:
|
|
46
|
+
def search_files(self, folder: str, suffix: str, recursive: bool = False) -> List[str]:
|
|
47
|
+
"""
|
|
48
|
+
Get the files within the repository.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
folder : str
|
|
53
|
+
folder or folders in the form 'path/to/file'
|
|
54
|
+
suffix : str
|
|
55
|
+
recursive : bool, default: False
|
|
56
|
+
If True, searches files in all subfolders
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
List[str]
|
|
61
|
+
List of file names.
|
|
62
|
+
"""
|
|
47
63
|
pass
|
|
48
64
|
|
|
49
65
|
@abstractmethod
|
|
@@ -109,34 +125,16 @@ class LocalFileRepository(RepositoryConnector):
|
|
|
109
125
|
self._folder = pathlib.Path(folder)
|
|
110
126
|
assert pathlib.Path.is_dir(self._folder)
|
|
111
127
|
|
|
112
|
-
def _build_url(self, folder: str, filename: str):
|
|
113
|
-
return pathlib.Path.joinpath(self._folder, folder, filename)
|
|
114
|
-
|
|
115
|
-
class FileLoader:
|
|
116
|
-
"""
|
|
117
|
-
Just a loads a local file, but mimics the behaviour
|
|
118
|
-
of cached http requests used in other connectors.
|
|
119
|
-
"""
|
|
120
|
-
def __init__(self, file_url, decode_func):
|
|
121
|
-
self.url = file_url
|
|
122
|
-
self.func = decode_func
|
|
123
|
-
self.cached = True
|
|
124
|
-
|
|
125
|
-
@property
|
|
126
|
-
def data(self):
|
|
127
|
-
with open(self.url, 'rb') as f:
|
|
128
|
-
return self.func(f.read())
|
|
128
|
+
def _build_url(self, folder: str, filename: str) -> str:
|
|
129
|
+
return pathlib.Path.joinpath(self._folder, folder, filename).as_posix()
|
|
129
130
|
|
|
130
131
|
def get_loader(self, filename, folder="", decode_func=None):
|
|
131
132
|
"""Get a lazy loader for a file, for loading data
|
|
132
133
|
only once loader.data is accessed."""
|
|
133
|
-
|
|
134
|
-
if
|
|
135
|
-
raise RuntimeError(f"
|
|
136
|
-
|
|
137
|
-
return self.FileLoader(url, lambda b: self._decode_response(b, filename))
|
|
138
|
-
else:
|
|
139
|
-
return self.FileLoader(url, decode_func)
|
|
134
|
+
filepath = self._build_url(folder, filename)
|
|
135
|
+
if not pathlib.Path(filepath).is_file():
|
|
136
|
+
raise RuntimeError(f"No file is found in {filepath}")
|
|
137
|
+
return FileLoader(filepath, decode_func)
|
|
140
138
|
|
|
141
139
|
def search_files(self, folder="", suffix=None, recursive=False):
|
|
142
140
|
results = []
|
|
@@ -150,10 +148,108 @@ class LocalFileRepository(RepositoryConnector):
|
|
|
150
148
|
def __str__(self):
|
|
151
149
|
return f"{self.__class__.__name__} at {self._folder}"
|
|
152
150
|
|
|
153
|
-
def __eq__(self, other):
|
|
151
|
+
def __eq__(self, other: "LocalFileRepository"):
|
|
154
152
|
return self._folder == other._folder
|
|
155
153
|
|
|
156
154
|
|
|
155
|
+
class GithubConnector(RepositoryConnector):
|
|
156
|
+
|
|
157
|
+
def __init__(
|
|
158
|
+
self,
|
|
159
|
+
owner: str,
|
|
160
|
+
repo: str,
|
|
161
|
+
reftag: str,
|
|
162
|
+
skip_branchtest=False,
|
|
163
|
+
archive_mode=False
|
|
164
|
+
):
|
|
165
|
+
"""
|
|
166
|
+
Connect to a GitHub repository with a specific ref (branch or tag).
|
|
167
|
+
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
owner : str
|
|
171
|
+
repo : str
|
|
172
|
+
reftag : str
|
|
173
|
+
Tag or branch
|
|
174
|
+
skip_branchtest : bool, default: False
|
|
175
|
+
Whether to test if the reftag resides in the repository.
|
|
176
|
+
archive_mode : bool, default: False
|
|
177
|
+
Archive the repo (for reftag only) to siibra local cache.
|
|
178
|
+
Raises
|
|
179
|
+
------
|
|
180
|
+
RuntimeError
|
|
181
|
+
If branch test could not find the reftag in the repo's list of tags
|
|
182
|
+
and branches.
|
|
183
|
+
"""
|
|
184
|
+
RepositoryConnector.__init__(
|
|
185
|
+
self,
|
|
186
|
+
base_url=f"https://api.github.com/repos/{owner}/{repo}"
|
|
187
|
+
)
|
|
188
|
+
assert reftag, "Please supply a branch name or tag for `reftag` to create a `GithubConnector`."
|
|
189
|
+
if not skip_branchtest:
|
|
190
|
+
try:
|
|
191
|
+
tags = HttpRequest(f"{self.base_url}/tags", DECODERS[".json"], refresh=True).data
|
|
192
|
+
branches = HttpRequest(f"{self.base_url}/branches", DECODERS[".json"], refresh=True).data
|
|
193
|
+
matched_reftags = list(
|
|
194
|
+
filter(lambda b: b["name"] == reftag, tags + branches)
|
|
195
|
+
)
|
|
196
|
+
if len(matched_reftags) == 1:
|
|
197
|
+
self._want_commit_cached = matched_reftags[0]["commit"]
|
|
198
|
+
else:
|
|
199
|
+
raise RuntimeError(f"Found {len(matched_reftags)} mathces to {reftag}")
|
|
200
|
+
self._tag_checked = True
|
|
201
|
+
except Exception:
|
|
202
|
+
logger.warning("Could not connect to GitHub repository.", exc_info=1)
|
|
203
|
+
self.reftag = reftag
|
|
204
|
+
self._raw_baseurl = f"https://raw.githubusercontent.com/{owner}/{repo}/{self.reftag}"
|
|
205
|
+
self.archive_mode = archive_mode
|
|
206
|
+
self._archive_conn: LocalFileRepository = None
|
|
207
|
+
self._recursed_tree = None
|
|
208
|
+
|
|
209
|
+
def search_files(self, folder="", suffix="", recursive=False) -> List[str]:
|
|
210
|
+
if self._recursed_tree is None:
|
|
211
|
+
self._recursed_tree = HttpRequest(
|
|
212
|
+
f"{self.base_url}/git/trees/{self.reftag}?recursive=1",
|
|
213
|
+
DECODERS[".json"]
|
|
214
|
+
).data.get("tree", [])
|
|
215
|
+
folder_depth = len(folder.split('/')) if folder else 0
|
|
216
|
+
return [
|
|
217
|
+
f["path"] for f in self._recursed_tree
|
|
218
|
+
if f["type"] == "blob"
|
|
219
|
+
and f["path"].startswith(folder)
|
|
220
|
+
and f["path"].endswith(suffix)
|
|
221
|
+
and (recursive or len(f["path"].split('/')) == folder_depth + 1)
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
def _build_url(self, folder: str, filename: str):
|
|
225
|
+
pathstr = pathlib.Path(folder, filename or "").as_posix()
|
|
226
|
+
return f'{self._raw_baseurl}/{quote(pathstr, safe="")}'
|
|
227
|
+
|
|
228
|
+
def get_loader(self, filename, folder="", decode_func=None):
|
|
229
|
+
if self.archive_mode:
|
|
230
|
+
self._archive()
|
|
231
|
+
return self._archive_conn.get_loader(filename, folder, decode_func)
|
|
232
|
+
else:
|
|
233
|
+
return super().get_loader(filename, folder, decode_func)
|
|
234
|
+
|
|
235
|
+
def _archive(self):
|
|
236
|
+
assert self.archive_mode, "To archive the repo, `archive_mode` must be True."
|
|
237
|
+
archive_directory = CACHE.build_filename(self.base_url + self.reftag)
|
|
238
|
+
if not os.path.isdir(archive_directory):
|
|
239
|
+
import tarfile
|
|
240
|
+
|
|
241
|
+
tarball_url = f"{self.base_url}/tarball/{self.reftag}"
|
|
242
|
+
req = HttpRequest(tarball_url, func=lambda b: b)
|
|
243
|
+
req.get()
|
|
244
|
+
with tarfile.open(name=req.cachefile, mode="r:gz") as tar:
|
|
245
|
+
tar.extractall(CACHE.folder)
|
|
246
|
+
foldername = tar.getnames()[0]
|
|
247
|
+
os.rename(os.path.join(CACHE.folder, foldername), archive_directory)
|
|
248
|
+
if self._archive_conn is None:
|
|
249
|
+
# create LocalFileRepository as an interface to the local files
|
|
250
|
+
self._archive_conn = LocalFileRepository(archive_directory)
|
|
251
|
+
|
|
252
|
+
|
|
157
253
|
class GitlabConnector(RepositoryConnector):
|
|
158
254
|
|
|
159
255
|
def __init__(self, server: str, project: int, reftag: str, skip_branchtest=False, *, archive_mode=False):
|
|
@@ -175,6 +271,7 @@ class GitlabConnector(RepositoryConnector):
|
|
|
175
271
|
self._tag_checked = True if skip_branchtest else False
|
|
176
272
|
self._want_commit_cached = None
|
|
177
273
|
self.archive_mode = archive_mode
|
|
274
|
+
self._archive_conn: LocalFileRepository = None
|
|
178
275
|
|
|
179
276
|
def __str__(self):
|
|
180
277
|
return f"{self.__class__.__name__} {self.base_url} {self.reftag}"
|
|
@@ -206,7 +303,7 @@ class GitlabConnector(RepositoryConnector):
|
|
|
206
303
|
def _build_url(self, folder="", filename=None, recursive=False, page=1):
|
|
207
304
|
ref = self.reftag if self.want_commit is None else self.want_commit["short_id"]
|
|
208
305
|
if filename is None:
|
|
209
|
-
pathstr = "" if len(folder) == 0 else f"&path={quote(folder,safe='')}"
|
|
306
|
+
pathstr = "" if len(folder) == 0 else f"&path={quote(folder, safe='')}"
|
|
210
307
|
return f"{self.base_url}/tree?ref={ref}{pathstr}&per_page={self._per_page}&page={page}&recursive={recursive}"
|
|
211
308
|
else:
|
|
212
309
|
pathstr = filename if folder == "" else f"{folder}/{filename}"
|
|
@@ -233,33 +330,30 @@ class GitlabConnector(RepositoryConnector):
|
|
|
233
330
|
if e["type"] == "blob" and e["name"].endswith(end)
|
|
234
331
|
]
|
|
235
332
|
|
|
236
|
-
def
|
|
237
|
-
if
|
|
238
|
-
|
|
333
|
+
def get_loader(self, filename, folder="", decode_func=None):
|
|
334
|
+
if self.archive_mode:
|
|
335
|
+
self._archive()
|
|
336
|
+
return self._archive_conn.get_loader(filename, folder, decode_func)
|
|
337
|
+
else:
|
|
338
|
+
return super().get_loader(filename, folder, decode_func)
|
|
239
339
|
|
|
340
|
+
def _archive(self):
|
|
341
|
+
assert self.archive_mode, "To archive the repo, `archive_mode` must be True."
|
|
240
342
|
ref = self.reftag if self.want_commit is None else self.want_commit["short_id"]
|
|
241
|
-
archive_directory = CACHE.build_filename(self.base_url + ref)
|
|
242
|
-
|
|
343
|
+
archive_directory = CACHE.build_filename(self.base_url + ref)
|
|
243
344
|
if not os.path.isdir(archive_directory):
|
|
244
|
-
|
|
245
|
-
url = self.base_url + f"/archive.tar.gz?sha={ref}"
|
|
246
|
-
resp = requests.get(url)
|
|
247
|
-
tar_filename = f"{archive_directory}.tar.gz"
|
|
248
|
-
|
|
249
|
-
resp.raise_for_status()
|
|
250
|
-
with open(tar_filename, "wb") as fp:
|
|
251
|
-
fp.write(resp.content)
|
|
252
|
-
|
|
253
345
|
import tarfile
|
|
254
|
-
tar = tarfile.open(tar_filename, "r:gz")
|
|
255
|
-
tar.extractall(archive_directory)
|
|
256
|
-
for _dir in os.listdir(archive_directory):
|
|
257
|
-
for file in os.listdir(f"{archive_directory}/{_dir}"):
|
|
258
|
-
os.rename(f"{archive_directory}/{_dir}/{file}", f"{archive_directory}/{file}")
|
|
259
|
-
os.rmdir(f"{archive_directory}/{_dir}")
|
|
260
346
|
|
|
261
|
-
|
|
262
|
-
|
|
347
|
+
tarball_url = self.base_url + f"/archive.tar.gz?sha={ref}"
|
|
348
|
+
req = HttpRequest(tarball_url, func=lambda b: b)
|
|
349
|
+
req.get()
|
|
350
|
+
with tarfile.open(name=req.cachefile, mode="r:gz") as tar:
|
|
351
|
+
tar.extractall(CACHE.folder)
|
|
352
|
+
foldername = tar.getnames()[0]
|
|
353
|
+
os.rename(os.path.join(CACHE.folder, foldername), archive_directory)
|
|
354
|
+
if self._archive_conn is None:
|
|
355
|
+
# create LocalFileRepository as an interface to the local files
|
|
356
|
+
self._archive_conn = LocalFileRepository(archive_directory)
|
|
263
357
|
|
|
264
358
|
def __eq__(self, other):
|
|
265
359
|
return all([
|
|
@@ -353,7 +447,7 @@ class OwncloudConnector(RepositoryConnector):
|
|
|
353
447
|
)
|
|
354
448
|
|
|
355
449
|
def _build_url(self, folder, filename):
|
|
356
|
-
fpath = "" if folder == "" else f"path={quote(folder,safe='')}&"
|
|
450
|
+
fpath = "" if folder == "" else f"path={quote(folder, safe='')}&"
|
|
357
451
|
fpath += f"files={quote(filename)}"
|
|
358
452
|
url = f"{self.base_url}/download?{fpath}"
|
|
359
453
|
return url
|
siibra/retrieval/requests.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2018-
|
|
1
|
+
# Copyright 2018-2024
|
|
2
2
|
# Institute of Neuroscience and Medicine (INM-1), Forschungszentrum Jülich GmbH
|
|
3
3
|
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Request files with decoders, lazy loading, and caching."""
|
|
16
16
|
|
|
17
|
-
from .cache import CACHE
|
|
17
|
+
from .cache import CACHE, cache_user_fn
|
|
18
18
|
from .exceptions import EbrainsAuthenticationError
|
|
19
19
|
from ..commons import (
|
|
20
20
|
logger,
|
|
@@ -30,30 +30,52 @@ import json
|
|
|
30
30
|
from zipfile import ZipFile
|
|
31
31
|
import requests
|
|
32
32
|
import os
|
|
33
|
-
from nibabel import Nifti1Image, GiftiImage, streamlines
|
|
33
|
+
from nibabel import Nifti1Image, GiftiImage, streamlines, freesurfer
|
|
34
34
|
from skimage import io as skimage_io
|
|
35
35
|
import gzip
|
|
36
36
|
from io import BytesIO
|
|
37
37
|
import urllib.parse
|
|
38
38
|
import pandas as pd
|
|
39
39
|
import numpy as np
|
|
40
|
-
from typing import List, Callable,
|
|
40
|
+
from typing import List, Callable, TYPE_CHECKING
|
|
41
41
|
from enum import Enum
|
|
42
42
|
from functools import wraps
|
|
43
43
|
from time import sleep
|
|
44
44
|
import sys
|
|
45
|
-
import
|
|
46
|
-
|
|
47
|
-
if platform.system() == "Linux":
|
|
48
|
-
from filelock import FileLock as Lock
|
|
49
|
-
else:
|
|
50
|
-
from filelock import SoftFileLock as Lock
|
|
51
|
-
|
|
45
|
+
from filelock import FileLock as Lock
|
|
52
46
|
if TYPE_CHECKING:
|
|
53
47
|
from .repositories import GitlabConnector
|
|
54
48
|
|
|
55
49
|
USER_AGENT_HEADER = {"User-Agent": f"siibra-python/{__version__}"}
|
|
56
50
|
|
|
51
|
+
|
|
52
|
+
def read_as_bytesio(function: Callable, suffix: str, bytesio: BytesIO):
|
|
53
|
+
"""
|
|
54
|
+
Helper method to provide BytesIO to methods that only takes file path and
|
|
55
|
+
cannot handle BytesIO normally (e.g., `nibabel.freesurfer.read_annot()`).
|
|
56
|
+
|
|
57
|
+
Writes the bytes to a temporary file on cache and reads with the
|
|
58
|
+
original function.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
function : Callable
|
|
63
|
+
suffix : str
|
|
64
|
+
Must match the suffix expected by the function provided.
|
|
65
|
+
bytesio : BytesIO
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
Return type of the provided function.
|
|
70
|
+
"""
|
|
71
|
+
tempfile = CACHE.build_filename(f"temp_{suffix}") + suffix
|
|
72
|
+
with open(tempfile, "wb") as bf:
|
|
73
|
+
bf.write(bytesio.getbuffer())
|
|
74
|
+
result = function(tempfile)
|
|
75
|
+
os.remove(tempfile)
|
|
76
|
+
return result
|
|
77
|
+
|
|
78
|
+
|
|
57
79
|
DECODERS = {
|
|
58
80
|
".nii": lambda b: Nifti1Image.from_bytes(b),
|
|
59
81
|
".gii": lambda b: GiftiImage.from_bytes(b),
|
|
@@ -65,13 +87,14 @@ DECODERS = {
|
|
|
65
87
|
".zip": lambda b: ZipFile(BytesIO(b)),
|
|
66
88
|
".png": lambda b: skimage_io.imread(BytesIO(b)),
|
|
67
89
|
".npy": lambda b: np.load(BytesIO(b)),
|
|
90
|
+
".annot": lambda b: read_as_bytesio(freesurfer.read_annot, '.annot', BytesIO(b)),
|
|
68
91
|
}
|
|
69
92
|
|
|
70
93
|
|
|
71
94
|
def find_suitiable_decoder(url: str) -> Callable:
|
|
72
95
|
"""
|
|
73
96
|
By supplying a url or a filename, obtain a suitable decoder function
|
|
74
|
-
for siibra to digest based on predifined DECODERS. An extra layer of
|
|
97
|
+
for siibra to digest based on predifined DECODERS. An extra layer of
|
|
75
98
|
gzip decompresser automatically added for gzipped files.
|
|
76
99
|
|
|
77
100
|
Parameters
|
|
@@ -94,8 +117,7 @@ def find_suitiable_decoder(url: str) -> Callable:
|
|
|
94
117
|
suitable_decoders = [
|
|
95
118
|
dec for sfx, dec in DECODERS.items() if urlpath.endswith(sfx)
|
|
96
119
|
]
|
|
97
|
-
if len(suitable_decoders)
|
|
98
|
-
assert len(suitable_decoders) == 1
|
|
120
|
+
if len(suitable_decoders) == 1:
|
|
99
121
|
return suitable_decoders[0]
|
|
100
122
|
else:
|
|
101
123
|
return None
|
|
@@ -247,6 +269,24 @@ class HttpRequest:
|
|
|
247
269
|
return self.get()
|
|
248
270
|
|
|
249
271
|
|
|
272
|
+
class FileLoader(HttpRequest):
|
|
273
|
+
"""
|
|
274
|
+
Just a loads a local file, but mimics the behaviour
|
|
275
|
+
of cached http requests used in other connectors.
|
|
276
|
+
"""
|
|
277
|
+
def __init__(self, filepath, func=None):
|
|
278
|
+
HttpRequest.__init__(
|
|
279
|
+
self, filepath, refresh=False,
|
|
280
|
+
func=func or find_suitiable_decoder(filepath)
|
|
281
|
+
)
|
|
282
|
+
self.cachefile = filepath
|
|
283
|
+
|
|
284
|
+
def _retrieve(self, **kwargs):
|
|
285
|
+
if kwargs:
|
|
286
|
+
logger.info(f"Keywords {list(kwargs.keys())} are supplied but won't be used.")
|
|
287
|
+
assert os.path.isfile(self.cachefile)
|
|
288
|
+
|
|
289
|
+
|
|
250
290
|
class ZipfileRequest(HttpRequest):
|
|
251
291
|
def __init__(self, url, filename, func=None, refresh=False):
|
|
252
292
|
HttpRequest.__init__(
|
|
@@ -539,10 +579,11 @@ class GitlabProxyEnum(Enum):
|
|
|
539
579
|
if SIIBRA_USE_LOCAL_SNAPSPOT:
|
|
540
580
|
logger.info(f"Using localsnapshot at {SIIBRA_USE_LOCAL_SNAPSPOT}")
|
|
541
581
|
return [LocalFileRepository(SIIBRA_USE_LOCAL_SNAPSPOT)]
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
582
|
+
else:
|
|
583
|
+
return [
|
|
584
|
+
GitlabConnector(server[0], server[1], "master", archive_mode=True)
|
|
585
|
+
for server in servers
|
|
586
|
+
]
|
|
546
587
|
|
|
547
588
|
@try_all_connectors()
|
|
548
589
|
def search_files(
|
|
@@ -574,27 +615,21 @@ class GitlabProxy(HttpRequest):
|
|
|
574
615
|
self,
|
|
575
616
|
flavour: GitlabProxyEnum,
|
|
576
617
|
instance_id=None,
|
|
577
|
-
postprocess: Callable[["GitlabProxy", Any], Any] = (
|
|
578
|
-
lambda proxy, obj: obj
|
|
579
|
-
if hasattr(proxy, "instance_id") and proxy.instance_id
|
|
580
|
-
else {"results": obj}
|
|
581
|
-
),
|
|
582
618
|
):
|
|
583
619
|
if flavour not in GitlabProxyEnum:
|
|
584
620
|
raise RuntimeError("Can only proxy enum members")
|
|
585
621
|
|
|
586
622
|
self.flavour = flavour
|
|
587
623
|
self.folder = self.folder_dict[flavour]
|
|
588
|
-
self.postprocess = postprocess
|
|
589
624
|
self.instance_id = instance_id
|
|
590
|
-
self.
|
|
625
|
+
self.get = cache_user_fn(self.get)
|
|
591
626
|
|
|
592
627
|
def get(self):
|
|
593
628
|
if self.instance_id:
|
|
594
|
-
return self.
|
|
595
|
-
|
|
596
|
-
)
|
|
597
|
-
|
|
629
|
+
return self.flavour.get(f"{self.folder}/{self.instance_id}.json")
|
|
630
|
+
return {
|
|
631
|
+
"results": self.flavour.get(f"{self.folder}/_all.json")
|
|
632
|
+
}
|
|
598
633
|
|
|
599
634
|
|
|
600
635
|
class MultiSourceRequestException(Exception):
|