siibra 1.0a1__1-py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of siibra might be problematic. Click here for more details.
- siibra/VERSION +1 -0
- siibra/__init__.py +164 -0
- siibra/commons.py +823 -0
- siibra/configuration/__init__.py +17 -0
- siibra/configuration/configuration.py +189 -0
- siibra/configuration/factory.py +589 -0
- siibra/core/__init__.py +16 -0
- siibra/core/assignment.py +110 -0
- siibra/core/atlas.py +239 -0
- siibra/core/concept.py +308 -0
- siibra/core/parcellation.py +387 -0
- siibra/core/region.py +1223 -0
- siibra/core/space.py +131 -0
- siibra/core/structure.py +111 -0
- siibra/exceptions.py +63 -0
- siibra/experimental/__init__.py +19 -0
- siibra/experimental/contour.py +61 -0
- siibra/experimental/cortical_profile_sampler.py +57 -0
- siibra/experimental/patch.py +98 -0
- siibra/experimental/plane3d.py +256 -0
- siibra/explorer/__init__.py +17 -0
- siibra/explorer/url.py +222 -0
- siibra/explorer/util.py +87 -0
- siibra/features/__init__.py +117 -0
- siibra/features/anchor.py +224 -0
- siibra/features/connectivity/__init__.py +33 -0
- siibra/features/connectivity/functional_connectivity.py +57 -0
- siibra/features/connectivity/regional_connectivity.py +494 -0
- siibra/features/connectivity/streamline_counts.py +27 -0
- siibra/features/connectivity/streamline_lengths.py +27 -0
- siibra/features/connectivity/tracing_connectivity.py +30 -0
- siibra/features/dataset/__init__.py +17 -0
- siibra/features/dataset/ebrains.py +90 -0
- siibra/features/feature.py +970 -0
- siibra/features/image/__init__.py +27 -0
- siibra/features/image/image.py +115 -0
- siibra/features/image/sections.py +26 -0
- siibra/features/image/volume_of_interest.py +88 -0
- siibra/features/tabular/__init__.py +24 -0
- siibra/features/tabular/bigbrain_intensity_profile.py +77 -0
- siibra/features/tabular/cell_density_profile.py +298 -0
- siibra/features/tabular/cortical_profile.py +322 -0
- siibra/features/tabular/gene_expression.py +257 -0
- siibra/features/tabular/layerwise_bigbrain_intensities.py +62 -0
- siibra/features/tabular/layerwise_cell_density.py +95 -0
- siibra/features/tabular/receptor_density_fingerprint.py +192 -0
- siibra/features/tabular/receptor_density_profile.py +110 -0
- siibra/features/tabular/regional_timeseries_activity.py +294 -0
- siibra/features/tabular/tabular.py +139 -0
- siibra/livequeries/__init__.py +19 -0
- siibra/livequeries/allen.py +352 -0
- siibra/livequeries/bigbrain.py +197 -0
- siibra/livequeries/ebrains.py +145 -0
- siibra/livequeries/query.py +49 -0
- siibra/locations/__init__.py +91 -0
- siibra/locations/boundingbox.py +454 -0
- siibra/locations/location.py +115 -0
- siibra/locations/point.py +344 -0
- siibra/locations/pointcloud.py +349 -0
- siibra/retrieval/__init__.py +27 -0
- siibra/retrieval/cache.py +233 -0
- siibra/retrieval/datasets.py +389 -0
- siibra/retrieval/exceptions/__init__.py +27 -0
- siibra/retrieval/repositories.py +769 -0
- siibra/retrieval/requests.py +659 -0
- siibra/vocabularies/__init__.py +45 -0
- siibra/vocabularies/gene_names.json +29176 -0
- siibra/vocabularies/receptor_symbols.json +210 -0
- siibra/vocabularies/region_aliases.json +460 -0
- siibra/volumes/__init__.py +23 -0
- siibra/volumes/parcellationmap.py +1279 -0
- siibra/volumes/providers/__init__.py +20 -0
- siibra/volumes/providers/freesurfer.py +113 -0
- siibra/volumes/providers/gifti.py +165 -0
- siibra/volumes/providers/neuroglancer.py +736 -0
- siibra/volumes/providers/nifti.py +266 -0
- siibra/volumes/providers/provider.py +107 -0
- siibra/volumes/sparsemap.py +468 -0
- siibra/volumes/volume.py +892 -0
- siibra-1.0.0a1.dist-info/LICENSE +201 -0
- siibra-1.0.0a1.dist-info/METADATA +160 -0
- siibra-1.0.0a1.dist-info/RECORD +84 -0
- siibra-1.0.0a1.dist-info/WHEEL +5 -0
- siibra-1.0.0a1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,769 @@
|
|
|
1
|
+
# Copyright 2018-2024
|
|
2
|
+
# Institute of Neuroscience and Medicine (INM-1), Forschungszentrum Jülich GmbH
|
|
3
|
+
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Connect to repositories to browse and pull files within."""
|
|
16
|
+
|
|
17
|
+
from .requests import (
|
|
18
|
+
HttpRequest,
|
|
19
|
+
EbrainsRequest,
|
|
20
|
+
SiibraHttpRequestError,
|
|
21
|
+
find_suitiable_decoder,
|
|
22
|
+
DECODERS,
|
|
23
|
+
FileLoader
|
|
24
|
+
)
|
|
25
|
+
from .cache import CACHE
|
|
26
|
+
|
|
27
|
+
from ..commons import logger, siibra_tqdm
|
|
28
|
+
|
|
29
|
+
from abc import ABC, abstractmethod
|
|
30
|
+
from urllib.parse import quote
|
|
31
|
+
import pathlib
|
|
32
|
+
import os
|
|
33
|
+
from zipfile import ZipFile
|
|
34
|
+
from typing import List
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class RepositoryConnector(ABC):
|
|
38
|
+
"""
|
|
39
|
+
Base class for repository connectors.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, base_url):
|
|
43
|
+
self.base_url = base_url
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def search_files(self, folder: str, suffix: str, recursive: bool = False) -> List[str]:
|
|
47
|
+
"""
|
|
48
|
+
Get the files within the repository.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
folder : str
|
|
53
|
+
folder or folders in the form 'path/to/file'
|
|
54
|
+
suffix : str
|
|
55
|
+
recursive : bool, default: False
|
|
56
|
+
If True, searches files in all subfolders
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
List[str]
|
|
61
|
+
List of file names.
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def _build_url(self, folder: str, filename: str):
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
def _decode_response(self, response, filename: str):
|
|
70
|
+
decoder = find_suitiable_decoder(filename)
|
|
71
|
+
return decoder(response) if decoder else response
|
|
72
|
+
|
|
73
|
+
def get(self, filename, folder="", decode_func=None):
|
|
74
|
+
"""Get a file right away."""
|
|
75
|
+
return self.get_loader(filename, folder, decode_func).data
|
|
76
|
+
|
|
77
|
+
def get_loader(self, filename, folder="", decode_func=None):
|
|
78
|
+
"""Get a lazy loader for a file, for executing the query
|
|
79
|
+
only once loader.data is accessed."""
|
|
80
|
+
url = self._build_url(folder, filename)
|
|
81
|
+
if url is None:
|
|
82
|
+
raise RuntimeError(f"Cannot build url for ({folder}, {filename})")
|
|
83
|
+
if decode_func is None:
|
|
84
|
+
return HttpRequest(url, lambda b: self._decode_response(b, filename))
|
|
85
|
+
else:
|
|
86
|
+
return HttpRequest(url, decode_func)
|
|
87
|
+
|
|
88
|
+
def get_loaders(
|
|
89
|
+
self, folder="", suffix=None, progress=None, recursive=False, decode_func=None
|
|
90
|
+
):
|
|
91
|
+
"""
|
|
92
|
+
Returns an iterator with lazy loaders for the files in a given folder.
|
|
93
|
+
In each iteration, a tuple (filename,file content) is returned.
|
|
94
|
+
"""
|
|
95
|
+
fnames: List[str] = self.search_files(folder, suffix, recursive)
|
|
96
|
+
result = [
|
|
97
|
+
(fname, self.get_loader(fname, decode_func=decode_func)) for fname in fnames
|
|
98
|
+
]
|
|
99
|
+
all_cached = all(_[1].cached for _ in result)
|
|
100
|
+
if progress is None or all_cached:
|
|
101
|
+
return result
|
|
102
|
+
else:
|
|
103
|
+
return list(siibra_tqdm(result, total=len(fnames), desc=progress))
|
|
104
|
+
|
|
105
|
+
@classmethod
|
|
106
|
+
def _from_url(cls, url: str):
|
|
107
|
+
expurl = os.path.abspath(os.path.expanduser(url))
|
|
108
|
+
if url.endswith(".zip"):
|
|
109
|
+
return ZipfileConnector(url)
|
|
110
|
+
elif os.path.isdir(expurl):
|
|
111
|
+
return LocalFileRepository(expurl)
|
|
112
|
+
else:
|
|
113
|
+
raise TypeError(
|
|
114
|
+
"Do not know how to create a repository "
|
|
115
|
+
f"connector from url '{url}'."
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def __eq__(self, other):
|
|
119
|
+
return self.base_url == other.base_url
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class LocalFileRepository(RepositoryConnector):
|
|
123
|
+
|
|
124
|
+
def __init__(self, folder: str):
|
|
125
|
+
self._folder = pathlib.Path(folder)
|
|
126
|
+
assert pathlib.Path.is_dir(self._folder)
|
|
127
|
+
|
|
128
|
+
def _build_url(self, folder: str, filename: str) -> str:
|
|
129
|
+
return pathlib.Path.joinpath(self._folder, folder, filename).as_posix()
|
|
130
|
+
|
|
131
|
+
def get_loader(self, filename, folder="", decode_func=None):
|
|
132
|
+
"""Get a lazy loader for a file, for loading data
|
|
133
|
+
only once loader.data is accessed."""
|
|
134
|
+
filepath = self._build_url(folder, filename)
|
|
135
|
+
if not pathlib.Path(filepath).is_file():
|
|
136
|
+
raise RuntimeError(f"No file is found in {filepath}")
|
|
137
|
+
return FileLoader(filepath, decode_func)
|
|
138
|
+
|
|
139
|
+
def search_files(self, folder="", suffix=None, recursive=False):
|
|
140
|
+
results = []
|
|
141
|
+
walk_pattern = f"{'**/' if recursive else ''}[!.~]*"
|
|
142
|
+
for file in self._folder.joinpath(folder).glob(walk_pattern):
|
|
143
|
+
if suffix is not None and not file.as_posix().endswith(suffix):
|
|
144
|
+
continue
|
|
145
|
+
results.append(file.relative_to(self._folder).as_posix())
|
|
146
|
+
return results
|
|
147
|
+
|
|
148
|
+
def __str__(self):
|
|
149
|
+
return f"{self.__class__.__name__} at {self._folder}"
|
|
150
|
+
|
|
151
|
+
def __eq__(self, other: "LocalFileRepository"):
|
|
152
|
+
return self._folder == other._folder
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class GithubConnector(RepositoryConnector):
|
|
156
|
+
|
|
157
|
+
def __init__(
|
|
158
|
+
self,
|
|
159
|
+
owner: str,
|
|
160
|
+
repo: str,
|
|
161
|
+
reftag: str,
|
|
162
|
+
skip_branchtest=False,
|
|
163
|
+
archive_mode=False
|
|
164
|
+
):
|
|
165
|
+
"""
|
|
166
|
+
Connect to a GitHub repository with a specific ref (branch or tag).
|
|
167
|
+
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
owner : str
|
|
171
|
+
repo : str
|
|
172
|
+
reftag : str
|
|
173
|
+
Tag or branch
|
|
174
|
+
skip_branchtest : bool, default: False
|
|
175
|
+
Whether to test if the reftag resides in the repository.
|
|
176
|
+
archive_mode : bool, default: False
|
|
177
|
+
Archive the repo (for reftag only) to siibra local cache.
|
|
178
|
+
Raises
|
|
179
|
+
------
|
|
180
|
+
RuntimeError
|
|
181
|
+
If branch test could not find the reftag in the repo's list of tags
|
|
182
|
+
and branches.
|
|
183
|
+
"""
|
|
184
|
+
RepositoryConnector.__init__(
|
|
185
|
+
self,
|
|
186
|
+
base_url=f"https://api.github.com/repos/{owner}/{repo}"
|
|
187
|
+
)
|
|
188
|
+
assert reftag, "Please supply a branch name or tag for `reftag` to create a `GithubConnector`."
|
|
189
|
+
if not skip_branchtest:
|
|
190
|
+
try:
|
|
191
|
+
tags = HttpRequest(f"{self.base_url}/tags", DECODERS[".json"], refresh=True).data
|
|
192
|
+
branches = HttpRequest(f"{self.base_url}/branches", DECODERS[".json"], refresh=True).data
|
|
193
|
+
matched_reftags = list(
|
|
194
|
+
filter(lambda b: b["name"] == reftag, tags + branches)
|
|
195
|
+
)
|
|
196
|
+
if len(matched_reftags) == 1:
|
|
197
|
+
self._want_commit_cached = matched_reftags[0]["commit"]
|
|
198
|
+
else:
|
|
199
|
+
raise RuntimeError(f"Found {len(matched_reftags)} mathces to {reftag}")
|
|
200
|
+
self._tag_checked = True
|
|
201
|
+
except Exception:
|
|
202
|
+
logger.warning("Could not connect to GitHub repository.", exc_info=1)
|
|
203
|
+
self.reftag = reftag
|
|
204
|
+
self._raw_baseurl = f"https://raw.githubusercontent.com/{owner}/{repo}/{self.reftag}"
|
|
205
|
+
self.archive_mode = archive_mode
|
|
206
|
+
self._archive_conn: LocalFileRepository = None
|
|
207
|
+
self._recursed_tree = None
|
|
208
|
+
|
|
209
|
+
def search_files(self, folder="", suffix="", recursive=False) -> List[str]:
|
|
210
|
+
if self._recursed_tree is None:
|
|
211
|
+
self._recursed_tree = HttpRequest(
|
|
212
|
+
f"{self.base_url}/git/trees/{self.reftag}?recursive=1",
|
|
213
|
+
DECODERS[".json"]
|
|
214
|
+
).data.get("tree", [])
|
|
215
|
+
folder_depth = len(folder.split('/')) if folder else 0
|
|
216
|
+
return [
|
|
217
|
+
f["path"] for f in self._recursed_tree
|
|
218
|
+
if f["type"] == "blob"
|
|
219
|
+
and f["path"].startswith(folder)
|
|
220
|
+
and f["path"].endswith(suffix)
|
|
221
|
+
and (recursive or len(f["path"].split('/')) == folder_depth + 1)
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
def _build_url(self, folder: str, filename: str):
|
|
225
|
+
pathstr = pathlib.Path(folder, filename or "").as_posix()
|
|
226
|
+
return f'{self._raw_baseurl}/{quote(pathstr, safe="")}'
|
|
227
|
+
|
|
228
|
+
def get_loader(self, filename, folder="", decode_func=None):
|
|
229
|
+
if self.archive_mode:
|
|
230
|
+
self._archive()
|
|
231
|
+
return self._archive_conn.get_loader(filename, folder, decode_func)
|
|
232
|
+
else:
|
|
233
|
+
return super().get_loader(filename, folder, decode_func)
|
|
234
|
+
|
|
235
|
+
def _archive(self):
|
|
236
|
+
assert self.archive_mode, "To archive the repo, `archive_mode` must be True."
|
|
237
|
+
archive_directory = CACHE.build_filename(self.base_url + self.reftag)
|
|
238
|
+
if not os.path.isdir(archive_directory):
|
|
239
|
+
import tarfile
|
|
240
|
+
|
|
241
|
+
tarball_url = f"{self.base_url}/tarball/{self.reftag}"
|
|
242
|
+
req = HttpRequest(tarball_url, func=lambda b: b)
|
|
243
|
+
req.get()
|
|
244
|
+
with tarfile.open(name=req.cachefile, mode="r:gz") as tar:
|
|
245
|
+
tar.extractall(CACHE.folder)
|
|
246
|
+
foldername = tar.getnames()[0]
|
|
247
|
+
os.rename(os.path.join(CACHE.folder, foldername), archive_directory)
|
|
248
|
+
if self._archive_conn is None:
|
|
249
|
+
# create LocalFileRepository as an interface to the local files
|
|
250
|
+
self._archive_conn = LocalFileRepository(archive_directory)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class GitlabConnector(RepositoryConnector):
|
|
254
|
+
|
|
255
|
+
def __init__(self, server: str, project: int, reftag: str, skip_branchtest=False, *, archive_mode=False):
|
|
256
|
+
"""
|
|
257
|
+
archive_mode: in archive mode, the entire repository is downloaded as an archive. This is necessary/could be useful for repositories with numerous files.
|
|
258
|
+
n.b. only archive_mode should only be set for trusted domains. Extraction of archive can result in files created outside the path
|
|
259
|
+
see https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall
|
|
260
|
+
"""
|
|
261
|
+
# TODO: the query builder needs to check wether the reftag is a branch, and then not cache.
|
|
262
|
+
assert server.startswith("http")
|
|
263
|
+
RepositoryConnector.__init__(
|
|
264
|
+
self, base_url=f"{server}/api/v4/projects/{project}/repository"
|
|
265
|
+
)
|
|
266
|
+
self.reftag = reftag
|
|
267
|
+
self._per_page = 100
|
|
268
|
+
self._branchloader = HttpRequest(
|
|
269
|
+
f"{self.base_url}/branches", DECODERS[".json"], refresh=True
|
|
270
|
+
)
|
|
271
|
+
self._tag_checked = True if skip_branchtest else False
|
|
272
|
+
self._want_commit_cached = None
|
|
273
|
+
self.archive_mode = archive_mode
|
|
274
|
+
self._archive_conn: LocalFileRepository = None
|
|
275
|
+
|
|
276
|
+
def __str__(self):
|
|
277
|
+
return f"{self.__class__.__name__} {self.base_url} {self.reftag}"
|
|
278
|
+
|
|
279
|
+
@property
|
|
280
|
+
def want_commit(self):
|
|
281
|
+
if not self._tag_checked:
|
|
282
|
+
try:
|
|
283
|
+
matched_branches = list(
|
|
284
|
+
filter(lambda b: b["name"] == self.reftag, self.branches)
|
|
285
|
+
)
|
|
286
|
+
if len(matched_branches) > 0:
|
|
287
|
+
self._want_commit_cached = matched_branches[0]["commit"]
|
|
288
|
+
logger.debug(
|
|
289
|
+
f"{self.reftag} is a branch of {self.base_url}! Want last commit "
|
|
290
|
+
f"{self._want_commit_cached['short_id']} from "
|
|
291
|
+
f"{self._want_commit_cached['created_at']}"
|
|
292
|
+
)
|
|
293
|
+
self._tag_checked = True
|
|
294
|
+
except Exception as e:
|
|
295
|
+
print(str(e))
|
|
296
|
+
logger.warning("Could not connect to gitlab server!")
|
|
297
|
+
return self._want_commit_cached
|
|
298
|
+
|
|
299
|
+
@property
|
|
300
|
+
def branches(self):
|
|
301
|
+
return self._branchloader.data
|
|
302
|
+
|
|
303
|
+
def _build_url(self, folder="", filename=None, recursive=False, page=1):
|
|
304
|
+
ref = self.reftag if self.want_commit is None else self.want_commit["short_id"]
|
|
305
|
+
if filename is None:
|
|
306
|
+
pathstr = "" if len(folder) == 0 else f"&path={quote(folder, safe='')}"
|
|
307
|
+
return f"{self.base_url}/tree?ref={ref}{pathstr}&per_page={self._per_page}&page={page}&recursive={recursive}"
|
|
308
|
+
else:
|
|
309
|
+
pathstr = filename if folder == "" else f"{folder}/{filename}"
|
|
310
|
+
filepath = quote(pathstr, safe="")
|
|
311
|
+
return f"{self.base_url}/files/{filepath}/raw?ref={ref}"
|
|
312
|
+
|
|
313
|
+
def search_files(self, folder="", suffix=None, recursive=False):
|
|
314
|
+
page = 1
|
|
315
|
+
results = []
|
|
316
|
+
while True:
|
|
317
|
+
loader = HttpRequest(
|
|
318
|
+
self._build_url(folder, recursive=recursive, page=page),
|
|
319
|
+
DECODERS[".json"],
|
|
320
|
+
)
|
|
321
|
+
results.extend(loader.data)
|
|
322
|
+
if len(loader.data) < self._per_page:
|
|
323
|
+
# no more pages
|
|
324
|
+
break
|
|
325
|
+
page += 1
|
|
326
|
+
end = "" if suffix is None else suffix
|
|
327
|
+
return [
|
|
328
|
+
e["path"]
|
|
329
|
+
for e in results
|
|
330
|
+
if e["type"] == "blob" and e["name"].endswith(end)
|
|
331
|
+
]
|
|
332
|
+
|
|
333
|
+
def get_loader(self, filename, folder="", decode_func=None):
|
|
334
|
+
if self.archive_mode:
|
|
335
|
+
self._archive()
|
|
336
|
+
return self._archive_conn.get_loader(filename, folder, decode_func)
|
|
337
|
+
else:
|
|
338
|
+
return super().get_loader(filename, folder, decode_func)
|
|
339
|
+
|
|
340
|
+
def _archive(self):
|
|
341
|
+
assert self.archive_mode, "To archive the repo, `archive_mode` must be True."
|
|
342
|
+
ref = self.reftag if self.want_commit is None else self.want_commit["short_id"]
|
|
343
|
+
archive_directory = CACHE.build_filename(self.base_url + ref)
|
|
344
|
+
if not os.path.isdir(archive_directory):
|
|
345
|
+
import tarfile
|
|
346
|
+
|
|
347
|
+
tarball_url = self.base_url + f"/archive.tar.gz?sha={ref}"
|
|
348
|
+
req = HttpRequest(tarball_url, func=lambda b: b)
|
|
349
|
+
req.get()
|
|
350
|
+
with tarfile.open(name=req.cachefile, mode="r:gz") as tar:
|
|
351
|
+
tar.extractall(CACHE.folder)
|
|
352
|
+
foldername = tar.getnames()[0]
|
|
353
|
+
os.rename(os.path.join(CACHE.folder, foldername), archive_directory)
|
|
354
|
+
if self._archive_conn is None:
|
|
355
|
+
# create LocalFileRepository as an interface to the local files
|
|
356
|
+
self._archive_conn = LocalFileRepository(archive_directory)
|
|
357
|
+
|
|
358
|
+
def __eq__(self, other):
|
|
359
|
+
return all([
|
|
360
|
+
self.base_url == other.base_url,
|
|
361
|
+
self.reftag == other.reftag
|
|
362
|
+
])
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
class ZipfileConnector(RepositoryConnector):
|
|
366
|
+
|
|
367
|
+
def __init__(self, url: str):
|
|
368
|
+
RepositoryConnector.__init__(self, base_url="")
|
|
369
|
+
self.url = url
|
|
370
|
+
self._zipfile_cached = None
|
|
371
|
+
|
|
372
|
+
@property
|
|
373
|
+
def zipfile(self):
|
|
374
|
+
if self._zipfile_cached is None:
|
|
375
|
+
if os.path.isfile(os.path.abspath(os.path.expanduser(self.url))):
|
|
376
|
+
self._zipfile_cached = os.path.abspath(os.path.expanduser(self.url))
|
|
377
|
+
else:
|
|
378
|
+
# assume the url is web URL to download the zip!
|
|
379
|
+
req = HttpRequest(self.url)
|
|
380
|
+
req._retrieve()
|
|
381
|
+
self._zipfile_cached = req.cachefile
|
|
382
|
+
return self._zipfile_cached
|
|
383
|
+
|
|
384
|
+
def _build_url(self, folder="", filename=None):
|
|
385
|
+
return os.path.join(folder, filename)
|
|
386
|
+
|
|
387
|
+
def search_files(self, folder="", suffix="", recursive=False):
|
|
388
|
+
container = ZipFile(self.zipfile)
|
|
389
|
+
result = []
|
|
390
|
+
if folder and not folder.endswith(os.path.sep):
|
|
391
|
+
folder += os.path.sep
|
|
392
|
+
for fname in container.namelist():
|
|
393
|
+
if os.path.dirname(fname.replace(folder, "")) and not recursive:
|
|
394
|
+
continue
|
|
395
|
+
if not os.path.basename(fname):
|
|
396
|
+
continue
|
|
397
|
+
if fname.startswith(folder) and fname.endswith(suffix):
|
|
398
|
+
result.append(fname)
|
|
399
|
+
return result
|
|
400
|
+
|
|
401
|
+
def __eq__(self, other):
|
|
402
|
+
return self.url == other.url
|
|
403
|
+
|
|
404
|
+
def clear_cache(self):
|
|
405
|
+
os.remove(self.zipfile)
|
|
406
|
+
self._zipfile_cached = None
|
|
407
|
+
|
|
408
|
+
class ZipFileLoader:
|
|
409
|
+
"""
|
|
410
|
+
Loads a file from the zip archive, but mimics the behaviour
|
|
411
|
+
of cached http requests used in other connectors.
|
|
412
|
+
"""
|
|
413
|
+
def __init__(self, zipfile, filename, decode_func):
|
|
414
|
+
self.zipfile = zipfile
|
|
415
|
+
self.filename = filename
|
|
416
|
+
self.func = decode_func
|
|
417
|
+
self.cachefile = CACHE.build_filename(zipfile + filename)
|
|
418
|
+
|
|
419
|
+
@property
|
|
420
|
+
def cached(self):
|
|
421
|
+
return os.path.isfile(self.cachefile)
|
|
422
|
+
|
|
423
|
+
@property
|
|
424
|
+
def data(self):
|
|
425
|
+
container = ZipFile(self.zipfile)
|
|
426
|
+
return self.func(container.open(self.filename).read())
|
|
427
|
+
|
|
428
|
+
def get_loader(self, filename, folder="", decode_func=None):
|
|
429
|
+
"""Get a lazy loader for a file, for loading data
|
|
430
|
+
only once loader.data is accessed."""
|
|
431
|
+
if decode_func is None:
|
|
432
|
+
return self.ZipFileLoader(self.zipfile, filename, lambda b: self._decode_response(b, filename))
|
|
433
|
+
else:
|
|
434
|
+
return self.ZipFileLoader(self.zipfile, filename, decode_func)
|
|
435
|
+
|
|
436
|
+
def __str__(self):
|
|
437
|
+
return f"{self.__class__.__name__}: {self.zipfile}"
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
class OwncloudConnector(RepositoryConnector):
|
|
441
|
+
def __init__(self, server: str, share: int):
|
|
442
|
+
RepositoryConnector.__init__(self, base_url=f"{server}/s/{share}")
|
|
443
|
+
|
|
444
|
+
def search_files(self, folder="", suffix=None, recursive=False):
|
|
445
|
+
raise NotImplementedError(
|
|
446
|
+
f"File search in folders not implemented for {self.__class__.__name__}."
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
def _build_url(self, folder, filename):
|
|
450
|
+
fpath = "" if folder == "" else f"path={quote(folder, safe='')}&"
|
|
451
|
+
fpath += f"files={quote(filename)}"
|
|
452
|
+
url = f"{self.base_url}/download?{fpath}"
|
|
453
|
+
return url
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
class EbrainsHdgConnector(RepositoryConnector):
|
|
457
|
+
"""Download sensitive files from EBRAINS using
|
|
458
|
+
the Human Data Gateway (HDG) via the data proxy API.
|
|
459
|
+
Service documentation can be found here https://data-proxy.ebrains.eu/api/docs
|
|
460
|
+
"""
|
|
461
|
+
|
|
462
|
+
"""
|
|
463
|
+
Version of the data-proxy API that should be used for this request.
|
|
464
|
+
Currently v1 is the only supported version."""
|
|
465
|
+
api_version = "v1"
|
|
466
|
+
|
|
467
|
+
"""
|
|
468
|
+
Base URL for the Dataset Endpoint of the Data-Proxy API
|
|
469
|
+
https://data-proxy.ebrains.eu/api/docs#/datasets
|
|
470
|
+
|
|
471
|
+
Supported functions by the endpoint:
|
|
472
|
+
------------------------------------
|
|
473
|
+
- POST: Request access to the dataset.
|
|
474
|
+
This is required for the other functions.
|
|
475
|
+
- GET: Return list of all available objects in the dataset
|
|
476
|
+
"""
|
|
477
|
+
base_url = f"https://data-proxy.ebrains.eu/api/{api_version}/datasets"
|
|
478
|
+
|
|
479
|
+
"""
|
|
480
|
+
Limit of returned objects
|
|
481
|
+
Default value on API side is 50 objects
|
|
482
|
+
"""
|
|
483
|
+
maxentries = 1000
|
|
484
|
+
|
|
485
|
+
def __init__(self, dataset_id):
|
|
486
|
+
"""Construct a dataset query for the Human Data Gateway.
|
|
487
|
+
|
|
488
|
+
Parameters
|
|
489
|
+
----------
|
|
490
|
+
dataset_id : str
|
|
491
|
+
EBRAINS dataset id for a dataset that is exposed
|
|
492
|
+
via the human data gateway.
|
|
493
|
+
"""
|
|
494
|
+
|
|
495
|
+
self._files = []
|
|
496
|
+
self.dataset_id = dataset_id
|
|
497
|
+
|
|
498
|
+
marker = None
|
|
499
|
+
while True:
|
|
500
|
+
|
|
501
|
+
# The endpoint implements basic pagination, using the filenames as markers.
|
|
502
|
+
|
|
503
|
+
if marker is None:
|
|
504
|
+
url = f"{self.base_url}/{dataset_id}?limit={self.maxentries}"
|
|
505
|
+
else:
|
|
506
|
+
url = f"{self.base_url}/{dataset_id}?limit={self.maxentries}&marker={marker}"
|
|
507
|
+
|
|
508
|
+
try:
|
|
509
|
+
result = EbrainsRequest(url, DECODERS[".json"]).get()
|
|
510
|
+
except SiibraHttpRequestError as e:
|
|
511
|
+
if e.status_code in [401, 422]:
|
|
512
|
+
# Request access to the dataset (401: expired, 422: not yet requested)
|
|
513
|
+
EbrainsRequest(f"{self.base_url}/{dataset_id}", post=True).get()
|
|
514
|
+
input(
|
|
515
|
+
"You should have received an email with a confirmation link - "
|
|
516
|
+
"please find that email and click on the link, then press enter "
|
|
517
|
+
"to continue"
|
|
518
|
+
)
|
|
519
|
+
continue
|
|
520
|
+
else:
|
|
521
|
+
raise RuntimeError(
|
|
522
|
+
f"Could not request private file links for dataset {dataset_id}. "
|
|
523
|
+
f"Status code was: {e.response.status_code}. "
|
|
524
|
+
f"Message was: {e.response.text}. "
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
newfiles = result["objects"]
|
|
528
|
+
self._files.extend(newfiles)
|
|
529
|
+
logger.debug(f"{len(newfiles)} of {self.maxentries} objects returned.")
|
|
530
|
+
|
|
531
|
+
if len(newfiles) == self.maxentries:
|
|
532
|
+
# there might be more files
|
|
533
|
+
marker = newfiles[-1]["name"]
|
|
534
|
+
else:
|
|
535
|
+
logger.info(
|
|
536
|
+
f"{len(self._files)} objects found for dataset {dataset_id} returned."
|
|
537
|
+
)
|
|
538
|
+
self.container = result["container"]
|
|
539
|
+
self.prefix = result["prefix"]
|
|
540
|
+
break
|
|
541
|
+
|
|
542
|
+
def search_files(self, folder="", suffix=None, recursive=False):
|
|
543
|
+
result = []
|
|
544
|
+
for f in self._files:
|
|
545
|
+
if f["name"].startswith(folder):
|
|
546
|
+
if suffix is None:
|
|
547
|
+
result.append(f["name"])
|
|
548
|
+
else:
|
|
549
|
+
if f["name"].endswith(suffix):
|
|
550
|
+
result.append(f["name"])
|
|
551
|
+
return result
|
|
552
|
+
|
|
553
|
+
def _build_url(self, folder, filename):
|
|
554
|
+
if len(folder) > 0:
|
|
555
|
+
fpath = quote(f"{folder}/{filename}")
|
|
556
|
+
else:
|
|
557
|
+
fpath = quote(f"{filename}")
|
|
558
|
+
url = f"{self.base_url}/{self.dataset_id}/{fpath}?redirect=true"
|
|
559
|
+
return url
|
|
560
|
+
|
|
561
|
+
def get_loader(self, filename, folder="", decode_func=None):
|
|
562
|
+
"""Get a lazy loader for a file, for executing the query
|
|
563
|
+
only once loader.data is accessed."""
|
|
564
|
+
return EbrainsRequest(self._build_url(folder, filename), decode_func)
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
class EbrainsPublicDatasetConnector(RepositoryConnector):
|
|
568
|
+
"""Access files from public EBRAINS datasets via the Knowledge Graph v3 API."""
|
|
569
|
+
|
|
570
|
+
QUERY_ID = "bebbe365-a0d6-41ea-9ff8-2554c15f70b7"
|
|
571
|
+
base_url = "https://core.kg.ebrains.eu/v3-beta/queries/"
|
|
572
|
+
maxentries = 1000
|
|
573
|
+
|
|
574
|
+
def __init__(self, dataset_id: str = None, version_id: str = None, title: str = None, in_progress=False):
|
|
575
|
+
"""Construct a dataset query with the dataset id.
|
|
576
|
+
|
|
577
|
+
Parameters
|
|
578
|
+
----------
|
|
579
|
+
dataset_id : str
|
|
580
|
+
EBRAINS dataset id of a public dataset in KG v3.
|
|
581
|
+
version_id : str
|
|
582
|
+
Version id to pick from the dataset (optional)
|
|
583
|
+
title: str
|
|
584
|
+
Part of dataset title as an alternative dataset specification (will ignore dataset_id then)
|
|
585
|
+
in_progress: bool (default:False)
|
|
586
|
+
If true, will request datasets that are still under curation.
|
|
587
|
+
Will only work when autenticated with an appropriately privileged
|
|
588
|
+
user account.
|
|
589
|
+
"""
|
|
590
|
+
self.dataset_id = dataset_id
|
|
591
|
+
self.versions = {}
|
|
592
|
+
self._description = ""
|
|
593
|
+
self._name = ""
|
|
594
|
+
self.use_version = None
|
|
595
|
+
|
|
596
|
+
stage = "IN_PROGRESS" if in_progress else "RELEASED"
|
|
597
|
+
if title is None:
|
|
598
|
+
assert dataset_id is not None
|
|
599
|
+
self.dataset_id = dataset_id
|
|
600
|
+
url = f"{self.base_url}/{self.QUERY_ID}/instances?stage={stage}&dataset_id={dataset_id}"
|
|
601
|
+
else:
|
|
602
|
+
assert dataset_id is None
|
|
603
|
+
logger.info(f"Using title '{title}' for EBRAINS dataset search, ignoring id '{dataset_id}'")
|
|
604
|
+
url = f"{self.base_url}/{self.QUERY_ID}/instances?stage={stage}&title={title}"
|
|
605
|
+
|
|
606
|
+
response = EbrainsRequest(url, DECODERS[".json"]).get()
|
|
607
|
+
results = response.get('data', [])
|
|
608
|
+
if len(results) != 1:
|
|
609
|
+
if dataset_id is None:
|
|
610
|
+
for r in results:
|
|
611
|
+
print(r['name'])
|
|
612
|
+
raise RuntimeError(f"Search for '{title}' yielded {len(results)} datasets. Please refine your specification.")
|
|
613
|
+
else:
|
|
614
|
+
raise RuntimeError(f"Dataset id {dataset_id} did not yield a unique match, please fix the dataset specification.")
|
|
615
|
+
|
|
616
|
+
data = results[0]
|
|
617
|
+
self.id = data['id']
|
|
618
|
+
if title is not None:
|
|
619
|
+
self.dataset_id = data['id']
|
|
620
|
+
self._description += data.get("description", "")
|
|
621
|
+
self._name += data.get("name", "")
|
|
622
|
+
self.versions = {v["versionIdentifier"]: v for v in data["versions"]}
|
|
623
|
+
if version_id is None:
|
|
624
|
+
self.use_version = sorted(list(self.versions.keys()))[-1]
|
|
625
|
+
if len(self.versions) > 1:
|
|
626
|
+
logger.info(
|
|
627
|
+
f"Found {len(self.versions)} versions for dataset '{data['name']}' "
|
|
628
|
+
f"({', '.join(self.versions.keys())}). "
|
|
629
|
+
f"Will use {self.use_version} per default."
|
|
630
|
+
)
|
|
631
|
+
else:
|
|
632
|
+
assert version_id in self.versions
|
|
633
|
+
self.use_version = version_id
|
|
634
|
+
|
|
635
|
+
@property
|
|
636
|
+
def name(self):
|
|
637
|
+
if self.use_version in self.versions:
|
|
638
|
+
if "name" in self.versions[self.use_version]:
|
|
639
|
+
if len(self.versions[self.use_version]["name"]) > 0:
|
|
640
|
+
return self.versions[self.use_version]["name"]
|
|
641
|
+
return self._name
|
|
642
|
+
|
|
643
|
+
@property
|
|
644
|
+
def description(self):
|
|
645
|
+
result = self._description
|
|
646
|
+
if self.use_version in self.versions:
|
|
647
|
+
result += "\n" + self.versions[self.use_version].get("description", "")
|
|
648
|
+
return result
|
|
649
|
+
|
|
650
|
+
@property
|
|
651
|
+
def authors(self):
|
|
652
|
+
result = []
|
|
653
|
+
if self.use_version in self.versions:
|
|
654
|
+
for author_info in self.versions[self.use_version]["authors"]:
|
|
655
|
+
result.append(f"{author_info['familyName']}, {author_info['givenName']}")
|
|
656
|
+
return result
|
|
657
|
+
|
|
658
|
+
@property
|
|
659
|
+
def citation(self):
|
|
660
|
+
if self.use_version in self.versions:
|
|
661
|
+
return self.versions[self.use_version].get("cite", "")
|
|
662
|
+
else:
|
|
663
|
+
return None
|
|
664
|
+
|
|
665
|
+
@property
|
|
666
|
+
def _files(self):
|
|
667
|
+
if self.use_version in self.versions:
|
|
668
|
+
return {
|
|
669
|
+
f["name"]: f["url"] for f in self.versions[self.use_version]["files"]
|
|
670
|
+
}
|
|
671
|
+
else:
|
|
672
|
+
return {}
|
|
673
|
+
|
|
674
|
+
def search_files(self, folder="", suffix=None, recursive=False):
|
|
675
|
+
result = []
|
|
676
|
+
for fname in self._files:
|
|
677
|
+
if fname.startswith(folder):
|
|
678
|
+
if suffix is None:
|
|
679
|
+
result.append(fname)
|
|
680
|
+
else:
|
|
681
|
+
if fname.endswith(suffix):
|
|
682
|
+
result.append(fname)
|
|
683
|
+
return result
|
|
684
|
+
|
|
685
|
+
def _build_url(self, folder, filename):
|
|
686
|
+
fpath = f"{folder}/{filename}" if len(folder) > 0 else f"{filename}"
|
|
687
|
+
if fpath not in self._files:
|
|
688
|
+
raise RuntimeError(
|
|
689
|
+
f"The file {fpath} requested from EBRAINS dataset {self.dataset_id} is not available in this repository."
|
|
690
|
+
)
|
|
691
|
+
return self._files[fpath]
|
|
692
|
+
|
|
693
|
+
def get_loader(self, filename, folder="", decode_func=None):
|
|
694
|
+
"""Get a lazy loader for a file, for executing the query
|
|
695
|
+
only once loader.data is accessed."""
|
|
696
|
+
return HttpRequest(self._build_url(folder, filename), decode_func)
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
class EbrainsPublicDatasetConnectorMinds(RepositoryConnector):
|
|
700
|
+
"""Access files from public EBRAINS datasets via the Knowledge Graph v3 API."""
|
|
701
|
+
|
|
702
|
+
QUERY_ID = "siibra-minds-dataset-v1"
|
|
703
|
+
base_url = "https://kg.humanbrainproject.eu/query/minds/core/dataset/v1.0.0"
|
|
704
|
+
maxentries = 1000
|
|
705
|
+
|
|
706
|
+
def __init__(self, dataset_id=None, title=None, in_progress=False):
|
|
707
|
+
"""Construct a dataset query with the dataset id.
|
|
708
|
+
|
|
709
|
+
Parameters
|
|
710
|
+
----------
|
|
711
|
+
dataset_id : str
|
|
712
|
+
EBRAINS dataset id of a public dataset in KG v3.
|
|
713
|
+
title: str
|
|
714
|
+
Part of dataset title as an alternative dataset specification (will ignore dataset_id then)
|
|
715
|
+
in_progress: bool, default: False
|
|
716
|
+
If true, will request datasets that are still under curation.
|
|
717
|
+
Will only work when authenticated with an appropriately privileged
|
|
718
|
+
user account.
|
|
719
|
+
"""
|
|
720
|
+
stage = "IN_PROGRESS" if in_progress else "RELEASED"
|
|
721
|
+
if title is None:
|
|
722
|
+
assert dataset_id is not None
|
|
723
|
+
self.dataset_id = dataset_id
|
|
724
|
+
url = f"{self.base_url}/{self.QUERY_ID}/instances?databaseScope={stage}&dataset_id={dataset_id}"
|
|
725
|
+
else:
|
|
726
|
+
assert dataset_id is None
|
|
727
|
+
logger.info(f"Using title '{title}' for EBRAINS dataset search, ignoring id '{dataset_id}'")
|
|
728
|
+
url = f"{self.base_url}/{self.QUERY_ID}/instances?databaseScope={stage}&title={title}"
|
|
729
|
+
req = EbrainsRequest(url, DECODERS[".json"])
|
|
730
|
+
response = req.get()
|
|
731
|
+
self._files = {}
|
|
732
|
+
results = response.get('results', [])
|
|
733
|
+
if dataset_id is not None:
|
|
734
|
+
assert len(results) < 2
|
|
735
|
+
elif len(results) > 1:
|
|
736
|
+
for r in results:
|
|
737
|
+
print(r.keys())
|
|
738
|
+
print(r['name'])
|
|
739
|
+
raise RuntimeError(f"Search for '{title}' yielded {len(results)} datasets, see above. Please refine your specification.")
|
|
740
|
+
for res in results:
|
|
741
|
+
if title is not None:
|
|
742
|
+
self.dataset_id = res['id']
|
|
743
|
+
self.id = res['id']
|
|
744
|
+
for fileinfo in res['https://schema.hbp.eu/myQuery/v1.0.0']:
|
|
745
|
+
self._files[fileinfo['relative_path']] = fileinfo['path']
|
|
746
|
+
|
|
747
|
+
def search_files(self, folder="", suffix=None, recursive=False):
|
|
748
|
+
result = []
|
|
749
|
+
for fname in self._files:
|
|
750
|
+
if fname.startswith(folder):
|
|
751
|
+
if suffix is None:
|
|
752
|
+
result.append(fname)
|
|
753
|
+
else:
|
|
754
|
+
if fname.endswith(suffix):
|
|
755
|
+
result.append(fname)
|
|
756
|
+
return result
|
|
757
|
+
|
|
758
|
+
def _build_url(self, folder, filename):
|
|
759
|
+
fpath = f"{folder}/{filename}" if len(folder) > 0 else f"{filename}"
|
|
760
|
+
if fpath not in self._files:
|
|
761
|
+
raise RuntimeError(
|
|
762
|
+
f"The file {fpath} requested from EBRAINS dataset {self.dataset_id} is not available in this repository."
|
|
763
|
+
)
|
|
764
|
+
return self._files[fpath]
|
|
765
|
+
|
|
766
|
+
def get_loader(self, filename, folder="", decode_func=None):
|
|
767
|
+
"""Get a lazy loader for a file, for executing the query
|
|
768
|
+
only once loader.data is accessed."""
|
|
769
|
+
return HttpRequest(self._build_url(folder, filename), decode_func)
|