siibra 1.0a1__1-py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of siibra might be problematic. Click here for more details.

Files changed (84) hide show
  1. siibra/VERSION +1 -0
  2. siibra/__init__.py +164 -0
  3. siibra/commons.py +823 -0
  4. siibra/configuration/__init__.py +17 -0
  5. siibra/configuration/configuration.py +189 -0
  6. siibra/configuration/factory.py +589 -0
  7. siibra/core/__init__.py +16 -0
  8. siibra/core/assignment.py +110 -0
  9. siibra/core/atlas.py +239 -0
  10. siibra/core/concept.py +308 -0
  11. siibra/core/parcellation.py +387 -0
  12. siibra/core/region.py +1223 -0
  13. siibra/core/space.py +131 -0
  14. siibra/core/structure.py +111 -0
  15. siibra/exceptions.py +63 -0
  16. siibra/experimental/__init__.py +19 -0
  17. siibra/experimental/contour.py +61 -0
  18. siibra/experimental/cortical_profile_sampler.py +57 -0
  19. siibra/experimental/patch.py +98 -0
  20. siibra/experimental/plane3d.py +256 -0
  21. siibra/explorer/__init__.py +17 -0
  22. siibra/explorer/url.py +222 -0
  23. siibra/explorer/util.py +87 -0
  24. siibra/features/__init__.py +117 -0
  25. siibra/features/anchor.py +224 -0
  26. siibra/features/connectivity/__init__.py +33 -0
  27. siibra/features/connectivity/functional_connectivity.py +57 -0
  28. siibra/features/connectivity/regional_connectivity.py +494 -0
  29. siibra/features/connectivity/streamline_counts.py +27 -0
  30. siibra/features/connectivity/streamline_lengths.py +27 -0
  31. siibra/features/connectivity/tracing_connectivity.py +30 -0
  32. siibra/features/dataset/__init__.py +17 -0
  33. siibra/features/dataset/ebrains.py +90 -0
  34. siibra/features/feature.py +970 -0
  35. siibra/features/image/__init__.py +27 -0
  36. siibra/features/image/image.py +115 -0
  37. siibra/features/image/sections.py +26 -0
  38. siibra/features/image/volume_of_interest.py +88 -0
  39. siibra/features/tabular/__init__.py +24 -0
  40. siibra/features/tabular/bigbrain_intensity_profile.py +77 -0
  41. siibra/features/tabular/cell_density_profile.py +298 -0
  42. siibra/features/tabular/cortical_profile.py +322 -0
  43. siibra/features/tabular/gene_expression.py +257 -0
  44. siibra/features/tabular/layerwise_bigbrain_intensities.py +62 -0
  45. siibra/features/tabular/layerwise_cell_density.py +95 -0
  46. siibra/features/tabular/receptor_density_fingerprint.py +192 -0
  47. siibra/features/tabular/receptor_density_profile.py +110 -0
  48. siibra/features/tabular/regional_timeseries_activity.py +294 -0
  49. siibra/features/tabular/tabular.py +139 -0
  50. siibra/livequeries/__init__.py +19 -0
  51. siibra/livequeries/allen.py +352 -0
  52. siibra/livequeries/bigbrain.py +197 -0
  53. siibra/livequeries/ebrains.py +145 -0
  54. siibra/livequeries/query.py +49 -0
  55. siibra/locations/__init__.py +91 -0
  56. siibra/locations/boundingbox.py +454 -0
  57. siibra/locations/location.py +115 -0
  58. siibra/locations/point.py +344 -0
  59. siibra/locations/pointcloud.py +349 -0
  60. siibra/retrieval/__init__.py +27 -0
  61. siibra/retrieval/cache.py +233 -0
  62. siibra/retrieval/datasets.py +389 -0
  63. siibra/retrieval/exceptions/__init__.py +27 -0
  64. siibra/retrieval/repositories.py +769 -0
  65. siibra/retrieval/requests.py +659 -0
  66. siibra/vocabularies/__init__.py +45 -0
  67. siibra/vocabularies/gene_names.json +29176 -0
  68. siibra/vocabularies/receptor_symbols.json +210 -0
  69. siibra/vocabularies/region_aliases.json +460 -0
  70. siibra/volumes/__init__.py +23 -0
  71. siibra/volumes/parcellationmap.py +1279 -0
  72. siibra/volumes/providers/__init__.py +20 -0
  73. siibra/volumes/providers/freesurfer.py +113 -0
  74. siibra/volumes/providers/gifti.py +165 -0
  75. siibra/volumes/providers/neuroglancer.py +736 -0
  76. siibra/volumes/providers/nifti.py +266 -0
  77. siibra/volumes/providers/provider.py +107 -0
  78. siibra/volumes/sparsemap.py +468 -0
  79. siibra/volumes/volume.py +892 -0
  80. siibra-1.0.0a1.dist-info/LICENSE +201 -0
  81. siibra-1.0.0a1.dist-info/METADATA +160 -0
  82. siibra-1.0.0a1.dist-info/RECORD +84 -0
  83. siibra-1.0.0a1.dist-info/WHEEL +5 -0
  84. siibra-1.0.0a1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,769 @@
1
+ # Copyright 2018-2024
2
+ # Institute of Neuroscience and Medicine (INM-1), Forschungszentrum Jülich GmbH
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Connect to repositories to browse and pull files within."""
16
+
17
+ from .requests import (
18
+ HttpRequest,
19
+ EbrainsRequest,
20
+ SiibraHttpRequestError,
21
+ find_suitiable_decoder,
22
+ DECODERS,
23
+ FileLoader
24
+ )
25
+ from .cache import CACHE
26
+
27
+ from ..commons import logger, siibra_tqdm
28
+
29
+ from abc import ABC, abstractmethod
30
+ from urllib.parse import quote
31
+ import pathlib
32
+ import os
33
+ from zipfile import ZipFile
34
+ from typing import List
35
+
36
+
37
+ class RepositoryConnector(ABC):
38
+ """
39
+ Base class for repository connectors.
40
+ """
41
+
42
+ def __init__(self, base_url):
43
+ self.base_url = base_url
44
+
45
+ @abstractmethod
46
+ def search_files(self, folder: str, suffix: str, recursive: bool = False) -> List[str]:
47
+ """
48
+ Get the files within the repository.
49
+
50
+ Parameters
51
+ ----------
52
+ folder : str
53
+ folder or folders in the form 'path/to/file'
54
+ suffix : str
55
+ recursive : bool, default: False
56
+ If True, searches files in all subfolders
57
+
58
+ Returns
59
+ -------
60
+ List[str]
61
+ List of file names.
62
+ """
63
+ pass
64
+
65
+ @abstractmethod
66
+ def _build_url(self, folder: str, filename: str):
67
+ pass
68
+
69
+ def _decode_response(self, response, filename: str):
70
+ decoder = find_suitiable_decoder(filename)
71
+ return decoder(response) if decoder else response
72
+
73
+ def get(self, filename, folder="", decode_func=None):
74
+ """Get a file right away."""
75
+ return self.get_loader(filename, folder, decode_func).data
76
+
77
+ def get_loader(self, filename, folder="", decode_func=None):
78
+ """Get a lazy loader for a file, for executing the query
79
+ only once loader.data is accessed."""
80
+ url = self._build_url(folder, filename)
81
+ if url is None:
82
+ raise RuntimeError(f"Cannot build url for ({folder}, {filename})")
83
+ if decode_func is None:
84
+ return HttpRequest(url, lambda b: self._decode_response(b, filename))
85
+ else:
86
+ return HttpRequest(url, decode_func)
87
+
88
+ def get_loaders(
89
+ self, folder="", suffix=None, progress=None, recursive=False, decode_func=None
90
+ ):
91
+ """
92
+ Returns an iterator with lazy loaders for the files in a given folder.
93
+ In each iteration, a tuple (filename,file content) is returned.
94
+ """
95
+ fnames: List[str] = self.search_files(folder, suffix, recursive)
96
+ result = [
97
+ (fname, self.get_loader(fname, decode_func=decode_func)) for fname in fnames
98
+ ]
99
+ all_cached = all(_[1].cached for _ in result)
100
+ if progress is None or all_cached:
101
+ return result
102
+ else:
103
+ return list(siibra_tqdm(result, total=len(fnames), desc=progress))
104
+
105
+ @classmethod
106
+ def _from_url(cls, url: str):
107
+ expurl = os.path.abspath(os.path.expanduser(url))
108
+ if url.endswith(".zip"):
109
+ return ZipfileConnector(url)
110
+ elif os.path.isdir(expurl):
111
+ return LocalFileRepository(expurl)
112
+ else:
113
+ raise TypeError(
114
+ "Do not know how to create a repository "
115
+ f"connector from url '{url}'."
116
+ )
117
+
118
+ def __eq__(self, other):
119
+ return self.base_url == other.base_url
120
+
121
+
122
+ class LocalFileRepository(RepositoryConnector):
123
+
124
+ def __init__(self, folder: str):
125
+ self._folder = pathlib.Path(folder)
126
+ assert pathlib.Path.is_dir(self._folder)
127
+
128
+ def _build_url(self, folder: str, filename: str) -> str:
129
+ return pathlib.Path.joinpath(self._folder, folder, filename).as_posix()
130
+
131
+ def get_loader(self, filename, folder="", decode_func=None):
132
+ """Get a lazy loader for a file, for loading data
133
+ only once loader.data is accessed."""
134
+ filepath = self._build_url(folder, filename)
135
+ if not pathlib.Path(filepath).is_file():
136
+ raise RuntimeError(f"No file is found in {filepath}")
137
+ return FileLoader(filepath, decode_func)
138
+
139
+ def search_files(self, folder="", suffix=None, recursive=False):
140
+ results = []
141
+ walk_pattern = f"{'**/' if recursive else ''}[!.~]*"
142
+ for file in self._folder.joinpath(folder).glob(walk_pattern):
143
+ if suffix is not None and not file.as_posix().endswith(suffix):
144
+ continue
145
+ results.append(file.relative_to(self._folder).as_posix())
146
+ return results
147
+
148
+ def __str__(self):
149
+ return f"{self.__class__.__name__} at {self._folder}"
150
+
151
+ def __eq__(self, other: "LocalFileRepository"):
152
+ return self._folder == other._folder
153
+
154
+
155
+ class GithubConnector(RepositoryConnector):
156
+
157
+ def __init__(
158
+ self,
159
+ owner: str,
160
+ repo: str,
161
+ reftag: str,
162
+ skip_branchtest=False,
163
+ archive_mode=False
164
+ ):
165
+ """
166
+ Connect to a GitHub repository with a specific ref (branch or tag).
167
+
168
+ Parameters
169
+ ----------
170
+ owner : str
171
+ repo : str
172
+ reftag : str
173
+ Tag or branch
174
+ skip_branchtest : bool, default: False
175
+ Whether to test if the reftag resides in the repository.
176
+ archive_mode : bool, default: False
177
+ Archive the repo (for reftag only) to siibra local cache.
178
+ Raises
179
+ ------
180
+ RuntimeError
181
+ If branch test could not find the reftag in the repo's list of tags
182
+ and branches.
183
+ """
184
+ RepositoryConnector.__init__(
185
+ self,
186
+ base_url=f"https://api.github.com/repos/{owner}/{repo}"
187
+ )
188
+ assert reftag, "Please supply a branch name or tag for `reftag` to create a `GithubConnector`."
189
+ if not skip_branchtest:
190
+ try:
191
+ tags = HttpRequest(f"{self.base_url}/tags", DECODERS[".json"], refresh=True).data
192
+ branches = HttpRequest(f"{self.base_url}/branches", DECODERS[".json"], refresh=True).data
193
+ matched_reftags = list(
194
+ filter(lambda b: b["name"] == reftag, tags + branches)
195
+ )
196
+ if len(matched_reftags) == 1:
197
+ self._want_commit_cached = matched_reftags[0]["commit"]
198
+ else:
199
+ raise RuntimeError(f"Found {len(matched_reftags)} mathces to {reftag}")
200
+ self._tag_checked = True
201
+ except Exception:
202
+ logger.warning("Could not connect to GitHub repository.", exc_info=1)
203
+ self.reftag = reftag
204
+ self._raw_baseurl = f"https://raw.githubusercontent.com/{owner}/{repo}/{self.reftag}"
205
+ self.archive_mode = archive_mode
206
+ self._archive_conn: LocalFileRepository = None
207
+ self._recursed_tree = None
208
+
209
+ def search_files(self, folder="", suffix="", recursive=False) -> List[str]:
210
+ if self._recursed_tree is None:
211
+ self._recursed_tree = HttpRequest(
212
+ f"{self.base_url}/git/trees/{self.reftag}?recursive=1",
213
+ DECODERS[".json"]
214
+ ).data.get("tree", [])
215
+ folder_depth = len(folder.split('/')) if folder else 0
216
+ return [
217
+ f["path"] for f in self._recursed_tree
218
+ if f["type"] == "blob"
219
+ and f["path"].startswith(folder)
220
+ and f["path"].endswith(suffix)
221
+ and (recursive or len(f["path"].split('/')) == folder_depth + 1)
222
+ ]
223
+
224
+ def _build_url(self, folder: str, filename: str):
225
+ pathstr = pathlib.Path(folder, filename or "").as_posix()
226
+ return f'{self._raw_baseurl}/{quote(pathstr, safe="")}'
227
+
228
+ def get_loader(self, filename, folder="", decode_func=None):
229
+ if self.archive_mode:
230
+ self._archive()
231
+ return self._archive_conn.get_loader(filename, folder, decode_func)
232
+ else:
233
+ return super().get_loader(filename, folder, decode_func)
234
+
235
+ def _archive(self):
236
+ assert self.archive_mode, "To archive the repo, `archive_mode` must be True."
237
+ archive_directory = CACHE.build_filename(self.base_url + self.reftag)
238
+ if not os.path.isdir(archive_directory):
239
+ import tarfile
240
+
241
+ tarball_url = f"{self.base_url}/tarball/{self.reftag}"
242
+ req = HttpRequest(tarball_url, func=lambda b: b)
243
+ req.get()
244
+ with tarfile.open(name=req.cachefile, mode="r:gz") as tar:
245
+ tar.extractall(CACHE.folder)
246
+ foldername = tar.getnames()[0]
247
+ os.rename(os.path.join(CACHE.folder, foldername), archive_directory)
248
+ if self._archive_conn is None:
249
+ # create LocalFileRepository as an interface to the local files
250
+ self._archive_conn = LocalFileRepository(archive_directory)
251
+
252
+
253
+ class GitlabConnector(RepositoryConnector):
254
+
255
+ def __init__(self, server: str, project: int, reftag: str, skip_branchtest=False, *, archive_mode=False):
256
+ """
257
+ archive_mode: in archive mode, the entire repository is downloaded as an archive. This is necessary/could be useful for repositories with numerous files.
258
+ n.b. only archive_mode should only be set for trusted domains. Extraction of archive can result in files created outside the path
259
+ see https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall
260
+ """
261
+ # TODO: the query builder needs to check wether the reftag is a branch, and then not cache.
262
+ assert server.startswith("http")
263
+ RepositoryConnector.__init__(
264
+ self, base_url=f"{server}/api/v4/projects/{project}/repository"
265
+ )
266
+ self.reftag = reftag
267
+ self._per_page = 100
268
+ self._branchloader = HttpRequest(
269
+ f"{self.base_url}/branches", DECODERS[".json"], refresh=True
270
+ )
271
+ self._tag_checked = True if skip_branchtest else False
272
+ self._want_commit_cached = None
273
+ self.archive_mode = archive_mode
274
+ self._archive_conn: LocalFileRepository = None
275
+
276
+ def __str__(self):
277
+ return f"{self.__class__.__name__} {self.base_url} {self.reftag}"
278
+
279
+ @property
280
+ def want_commit(self):
281
+ if not self._tag_checked:
282
+ try:
283
+ matched_branches = list(
284
+ filter(lambda b: b["name"] == self.reftag, self.branches)
285
+ )
286
+ if len(matched_branches) > 0:
287
+ self._want_commit_cached = matched_branches[0]["commit"]
288
+ logger.debug(
289
+ f"{self.reftag} is a branch of {self.base_url}! Want last commit "
290
+ f"{self._want_commit_cached['short_id']} from "
291
+ f"{self._want_commit_cached['created_at']}"
292
+ )
293
+ self._tag_checked = True
294
+ except Exception as e:
295
+ print(str(e))
296
+ logger.warning("Could not connect to gitlab server!")
297
+ return self._want_commit_cached
298
+
299
+ @property
300
+ def branches(self):
301
+ return self._branchloader.data
302
+
303
+ def _build_url(self, folder="", filename=None, recursive=False, page=1):
304
+ ref = self.reftag if self.want_commit is None else self.want_commit["short_id"]
305
+ if filename is None:
306
+ pathstr = "" if len(folder) == 0 else f"&path={quote(folder, safe='')}"
307
+ return f"{self.base_url}/tree?ref={ref}{pathstr}&per_page={self._per_page}&page={page}&recursive={recursive}"
308
+ else:
309
+ pathstr = filename if folder == "" else f"{folder}/{filename}"
310
+ filepath = quote(pathstr, safe="")
311
+ return f"{self.base_url}/files/{filepath}/raw?ref={ref}"
312
+
313
+ def search_files(self, folder="", suffix=None, recursive=False):
314
+ page = 1
315
+ results = []
316
+ while True:
317
+ loader = HttpRequest(
318
+ self._build_url(folder, recursive=recursive, page=page),
319
+ DECODERS[".json"],
320
+ )
321
+ results.extend(loader.data)
322
+ if len(loader.data) < self._per_page:
323
+ # no more pages
324
+ break
325
+ page += 1
326
+ end = "" if suffix is None else suffix
327
+ return [
328
+ e["path"]
329
+ for e in results
330
+ if e["type"] == "blob" and e["name"].endswith(end)
331
+ ]
332
+
333
+ def get_loader(self, filename, folder="", decode_func=None):
334
+ if self.archive_mode:
335
+ self._archive()
336
+ return self._archive_conn.get_loader(filename, folder, decode_func)
337
+ else:
338
+ return super().get_loader(filename, folder, decode_func)
339
+
340
+ def _archive(self):
341
+ assert self.archive_mode, "To archive the repo, `archive_mode` must be True."
342
+ ref = self.reftag if self.want_commit is None else self.want_commit["short_id"]
343
+ archive_directory = CACHE.build_filename(self.base_url + ref)
344
+ if not os.path.isdir(archive_directory):
345
+ import tarfile
346
+
347
+ tarball_url = self.base_url + f"/archive.tar.gz?sha={ref}"
348
+ req = HttpRequest(tarball_url, func=lambda b: b)
349
+ req.get()
350
+ with tarfile.open(name=req.cachefile, mode="r:gz") as tar:
351
+ tar.extractall(CACHE.folder)
352
+ foldername = tar.getnames()[0]
353
+ os.rename(os.path.join(CACHE.folder, foldername), archive_directory)
354
+ if self._archive_conn is None:
355
+ # create LocalFileRepository as an interface to the local files
356
+ self._archive_conn = LocalFileRepository(archive_directory)
357
+
358
+ def __eq__(self, other):
359
+ return all([
360
+ self.base_url == other.base_url,
361
+ self.reftag == other.reftag
362
+ ])
363
+
364
+
365
+ class ZipfileConnector(RepositoryConnector):
366
+
367
+ def __init__(self, url: str):
368
+ RepositoryConnector.__init__(self, base_url="")
369
+ self.url = url
370
+ self._zipfile_cached = None
371
+
372
+ @property
373
+ def zipfile(self):
374
+ if self._zipfile_cached is None:
375
+ if os.path.isfile(os.path.abspath(os.path.expanduser(self.url))):
376
+ self._zipfile_cached = os.path.abspath(os.path.expanduser(self.url))
377
+ else:
378
+ # assume the url is web URL to download the zip!
379
+ req = HttpRequest(self.url)
380
+ req._retrieve()
381
+ self._zipfile_cached = req.cachefile
382
+ return self._zipfile_cached
383
+
384
+ def _build_url(self, folder="", filename=None):
385
+ return os.path.join(folder, filename)
386
+
387
+ def search_files(self, folder="", suffix="", recursive=False):
388
+ container = ZipFile(self.zipfile)
389
+ result = []
390
+ if folder and not folder.endswith(os.path.sep):
391
+ folder += os.path.sep
392
+ for fname in container.namelist():
393
+ if os.path.dirname(fname.replace(folder, "")) and not recursive:
394
+ continue
395
+ if not os.path.basename(fname):
396
+ continue
397
+ if fname.startswith(folder) and fname.endswith(suffix):
398
+ result.append(fname)
399
+ return result
400
+
401
+ def __eq__(self, other):
402
+ return self.url == other.url
403
+
404
+ def clear_cache(self):
405
+ os.remove(self.zipfile)
406
+ self._zipfile_cached = None
407
+
408
+ class ZipFileLoader:
409
+ """
410
+ Loads a file from the zip archive, but mimics the behaviour
411
+ of cached http requests used in other connectors.
412
+ """
413
+ def __init__(self, zipfile, filename, decode_func):
414
+ self.zipfile = zipfile
415
+ self.filename = filename
416
+ self.func = decode_func
417
+ self.cachefile = CACHE.build_filename(zipfile + filename)
418
+
419
+ @property
420
+ def cached(self):
421
+ return os.path.isfile(self.cachefile)
422
+
423
+ @property
424
+ def data(self):
425
+ container = ZipFile(self.zipfile)
426
+ return self.func(container.open(self.filename).read())
427
+
428
+ def get_loader(self, filename, folder="", decode_func=None):
429
+ """Get a lazy loader for a file, for loading data
430
+ only once loader.data is accessed."""
431
+ if decode_func is None:
432
+ return self.ZipFileLoader(self.zipfile, filename, lambda b: self._decode_response(b, filename))
433
+ else:
434
+ return self.ZipFileLoader(self.zipfile, filename, decode_func)
435
+
436
+ def __str__(self):
437
+ return f"{self.__class__.__name__}: {self.zipfile}"
438
+
439
+
440
+ class OwncloudConnector(RepositoryConnector):
441
+ def __init__(self, server: str, share: int):
442
+ RepositoryConnector.__init__(self, base_url=f"{server}/s/{share}")
443
+
444
+ def search_files(self, folder="", suffix=None, recursive=False):
445
+ raise NotImplementedError(
446
+ f"File search in folders not implemented for {self.__class__.__name__}."
447
+ )
448
+
449
+ def _build_url(self, folder, filename):
450
+ fpath = "" if folder == "" else f"path={quote(folder, safe='')}&"
451
+ fpath += f"files={quote(filename)}"
452
+ url = f"{self.base_url}/download?{fpath}"
453
+ return url
454
+
455
+
456
+ class EbrainsHdgConnector(RepositoryConnector):
457
+ """Download sensitive files from EBRAINS using
458
+ the Human Data Gateway (HDG) via the data proxy API.
459
+ Service documentation can be found here https://data-proxy.ebrains.eu/api/docs
460
+ """
461
+
462
+ """
463
+ Version of the data-proxy API that should be used for this request.
464
+ Currently v1 is the only supported version."""
465
+ api_version = "v1"
466
+
467
+ """
468
+ Base URL for the Dataset Endpoint of the Data-Proxy API
469
+ https://data-proxy.ebrains.eu/api/docs#/datasets
470
+
471
+ Supported functions by the endpoint:
472
+ ------------------------------------
473
+ - POST: Request access to the dataset.
474
+ This is required for the other functions.
475
+ - GET: Return list of all available objects in the dataset
476
+ """
477
+ base_url = f"https://data-proxy.ebrains.eu/api/{api_version}/datasets"
478
+
479
+ """
480
+ Limit of returned objects
481
+ Default value on API side is 50 objects
482
+ """
483
+ maxentries = 1000
484
+
485
+ def __init__(self, dataset_id):
486
+ """Construct a dataset query for the Human Data Gateway.
487
+
488
+ Parameters
489
+ ----------
490
+ dataset_id : str
491
+ EBRAINS dataset id for a dataset that is exposed
492
+ via the human data gateway.
493
+ """
494
+
495
+ self._files = []
496
+ self.dataset_id = dataset_id
497
+
498
+ marker = None
499
+ while True:
500
+
501
+ # The endpoint implements basic pagination, using the filenames as markers.
502
+
503
+ if marker is None:
504
+ url = f"{self.base_url}/{dataset_id}?limit={self.maxentries}"
505
+ else:
506
+ url = f"{self.base_url}/{dataset_id}?limit={self.maxentries}&marker={marker}"
507
+
508
+ try:
509
+ result = EbrainsRequest(url, DECODERS[".json"]).get()
510
+ except SiibraHttpRequestError as e:
511
+ if e.status_code in [401, 422]:
512
+ # Request access to the dataset (401: expired, 422: not yet requested)
513
+ EbrainsRequest(f"{self.base_url}/{dataset_id}", post=True).get()
514
+ input(
515
+ "You should have received an email with a confirmation link - "
516
+ "please find that email and click on the link, then press enter "
517
+ "to continue"
518
+ )
519
+ continue
520
+ else:
521
+ raise RuntimeError(
522
+ f"Could not request private file links for dataset {dataset_id}. "
523
+ f"Status code was: {e.response.status_code}. "
524
+ f"Message was: {e.response.text}. "
525
+ )
526
+
527
+ newfiles = result["objects"]
528
+ self._files.extend(newfiles)
529
+ logger.debug(f"{len(newfiles)} of {self.maxentries} objects returned.")
530
+
531
+ if len(newfiles) == self.maxentries:
532
+ # there might be more files
533
+ marker = newfiles[-1]["name"]
534
+ else:
535
+ logger.info(
536
+ f"{len(self._files)} objects found for dataset {dataset_id} returned."
537
+ )
538
+ self.container = result["container"]
539
+ self.prefix = result["prefix"]
540
+ break
541
+
542
+ def search_files(self, folder="", suffix=None, recursive=False):
543
+ result = []
544
+ for f in self._files:
545
+ if f["name"].startswith(folder):
546
+ if suffix is None:
547
+ result.append(f["name"])
548
+ else:
549
+ if f["name"].endswith(suffix):
550
+ result.append(f["name"])
551
+ return result
552
+
553
+ def _build_url(self, folder, filename):
554
+ if len(folder) > 0:
555
+ fpath = quote(f"{folder}/{filename}")
556
+ else:
557
+ fpath = quote(f"{filename}")
558
+ url = f"{self.base_url}/{self.dataset_id}/{fpath}?redirect=true"
559
+ return url
560
+
561
+ def get_loader(self, filename, folder="", decode_func=None):
562
+ """Get a lazy loader for a file, for executing the query
563
+ only once loader.data is accessed."""
564
+ return EbrainsRequest(self._build_url(folder, filename), decode_func)
565
+
566
+
567
+ class EbrainsPublicDatasetConnector(RepositoryConnector):
568
+ """Access files from public EBRAINS datasets via the Knowledge Graph v3 API."""
569
+
570
+ QUERY_ID = "bebbe365-a0d6-41ea-9ff8-2554c15f70b7"
571
+ base_url = "https://core.kg.ebrains.eu/v3-beta/queries/"
572
+ maxentries = 1000
573
+
574
+ def __init__(self, dataset_id: str = None, version_id: str = None, title: str = None, in_progress=False):
575
+ """Construct a dataset query with the dataset id.
576
+
577
+ Parameters
578
+ ----------
579
+ dataset_id : str
580
+ EBRAINS dataset id of a public dataset in KG v3.
581
+ version_id : str
582
+ Version id to pick from the dataset (optional)
583
+ title: str
584
+ Part of dataset title as an alternative dataset specification (will ignore dataset_id then)
585
+ in_progress: bool (default:False)
586
+ If true, will request datasets that are still under curation.
587
+ Will only work when autenticated with an appropriately privileged
588
+ user account.
589
+ """
590
+ self.dataset_id = dataset_id
591
+ self.versions = {}
592
+ self._description = ""
593
+ self._name = ""
594
+ self.use_version = None
595
+
596
+ stage = "IN_PROGRESS" if in_progress else "RELEASED"
597
+ if title is None:
598
+ assert dataset_id is not None
599
+ self.dataset_id = dataset_id
600
+ url = f"{self.base_url}/{self.QUERY_ID}/instances?stage={stage}&dataset_id={dataset_id}"
601
+ else:
602
+ assert dataset_id is None
603
+ logger.info(f"Using title '{title}' for EBRAINS dataset search, ignoring id '{dataset_id}'")
604
+ url = f"{self.base_url}/{self.QUERY_ID}/instances?stage={stage}&title={title}"
605
+
606
+ response = EbrainsRequest(url, DECODERS[".json"]).get()
607
+ results = response.get('data', [])
608
+ if len(results) != 1:
609
+ if dataset_id is None:
610
+ for r in results:
611
+ print(r['name'])
612
+ raise RuntimeError(f"Search for '{title}' yielded {len(results)} datasets. Please refine your specification.")
613
+ else:
614
+ raise RuntimeError(f"Dataset id {dataset_id} did not yield a unique match, please fix the dataset specification.")
615
+
616
+ data = results[0]
617
+ self.id = data['id']
618
+ if title is not None:
619
+ self.dataset_id = data['id']
620
+ self._description += data.get("description", "")
621
+ self._name += data.get("name", "")
622
+ self.versions = {v["versionIdentifier"]: v for v in data["versions"]}
623
+ if version_id is None:
624
+ self.use_version = sorted(list(self.versions.keys()))[-1]
625
+ if len(self.versions) > 1:
626
+ logger.info(
627
+ f"Found {len(self.versions)} versions for dataset '{data['name']}' "
628
+ f"({', '.join(self.versions.keys())}). "
629
+ f"Will use {self.use_version} per default."
630
+ )
631
+ else:
632
+ assert version_id in self.versions
633
+ self.use_version = version_id
634
+
635
+ @property
636
+ def name(self):
637
+ if self.use_version in self.versions:
638
+ if "name" in self.versions[self.use_version]:
639
+ if len(self.versions[self.use_version]["name"]) > 0:
640
+ return self.versions[self.use_version]["name"]
641
+ return self._name
642
+
643
+ @property
644
+ def description(self):
645
+ result = self._description
646
+ if self.use_version in self.versions:
647
+ result += "\n" + self.versions[self.use_version].get("description", "")
648
+ return result
649
+
650
+ @property
651
+ def authors(self):
652
+ result = []
653
+ if self.use_version in self.versions:
654
+ for author_info in self.versions[self.use_version]["authors"]:
655
+ result.append(f"{author_info['familyName']}, {author_info['givenName']}")
656
+ return result
657
+
658
+ @property
659
+ def citation(self):
660
+ if self.use_version in self.versions:
661
+ return self.versions[self.use_version].get("cite", "")
662
+ else:
663
+ return None
664
+
665
+ @property
666
+ def _files(self):
667
+ if self.use_version in self.versions:
668
+ return {
669
+ f["name"]: f["url"] for f in self.versions[self.use_version]["files"]
670
+ }
671
+ else:
672
+ return {}
673
+
674
+ def search_files(self, folder="", suffix=None, recursive=False):
675
+ result = []
676
+ for fname in self._files:
677
+ if fname.startswith(folder):
678
+ if suffix is None:
679
+ result.append(fname)
680
+ else:
681
+ if fname.endswith(suffix):
682
+ result.append(fname)
683
+ return result
684
+
685
+ def _build_url(self, folder, filename):
686
+ fpath = f"{folder}/{filename}" if len(folder) > 0 else f"{filename}"
687
+ if fpath not in self._files:
688
+ raise RuntimeError(
689
+ f"The file {fpath} requested from EBRAINS dataset {self.dataset_id} is not available in this repository."
690
+ )
691
+ return self._files[fpath]
692
+
693
+ def get_loader(self, filename, folder="", decode_func=None):
694
+ """Get a lazy loader for a file, for executing the query
695
+ only once loader.data is accessed."""
696
+ return HttpRequest(self._build_url(folder, filename), decode_func)
697
+
698
+
699
+ class EbrainsPublicDatasetConnectorMinds(RepositoryConnector):
700
+ """Access files from public EBRAINS datasets via the Knowledge Graph v3 API."""
701
+
702
+ QUERY_ID = "siibra-minds-dataset-v1"
703
+ base_url = "https://kg.humanbrainproject.eu/query/minds/core/dataset/v1.0.0"
704
+ maxentries = 1000
705
+
706
+ def __init__(self, dataset_id=None, title=None, in_progress=False):
707
+ """Construct a dataset query with the dataset id.
708
+
709
+ Parameters
710
+ ----------
711
+ dataset_id : str
712
+ EBRAINS dataset id of a public dataset in KG v3.
713
+ title: str
714
+ Part of dataset title as an alternative dataset specification (will ignore dataset_id then)
715
+ in_progress: bool, default: False
716
+ If true, will request datasets that are still under curation.
717
+ Will only work when authenticated with an appropriately privileged
718
+ user account.
719
+ """
720
+ stage = "IN_PROGRESS" if in_progress else "RELEASED"
721
+ if title is None:
722
+ assert dataset_id is not None
723
+ self.dataset_id = dataset_id
724
+ url = f"{self.base_url}/{self.QUERY_ID}/instances?databaseScope={stage}&dataset_id={dataset_id}"
725
+ else:
726
+ assert dataset_id is None
727
+ logger.info(f"Using title '{title}' for EBRAINS dataset search, ignoring id '{dataset_id}'")
728
+ url = f"{self.base_url}/{self.QUERY_ID}/instances?databaseScope={stage}&title={title}"
729
+ req = EbrainsRequest(url, DECODERS[".json"])
730
+ response = req.get()
731
+ self._files = {}
732
+ results = response.get('results', [])
733
+ if dataset_id is not None:
734
+ assert len(results) < 2
735
+ elif len(results) > 1:
736
+ for r in results:
737
+ print(r.keys())
738
+ print(r['name'])
739
+ raise RuntimeError(f"Search for '{title}' yielded {len(results)} datasets, see above. Please refine your specification.")
740
+ for res in results:
741
+ if title is not None:
742
+ self.dataset_id = res['id']
743
+ self.id = res['id']
744
+ for fileinfo in res['https://schema.hbp.eu/myQuery/v1.0.0']:
745
+ self._files[fileinfo['relative_path']] = fileinfo['path']
746
+
747
+ def search_files(self, folder="", suffix=None, recursive=False):
748
+ result = []
749
+ for fname in self._files:
750
+ if fname.startswith(folder):
751
+ if suffix is None:
752
+ result.append(fname)
753
+ else:
754
+ if fname.endswith(suffix):
755
+ result.append(fname)
756
+ return result
757
+
758
+ def _build_url(self, folder, filename):
759
+ fpath = f"{folder}/{filename}" if len(folder) > 0 else f"{filename}"
760
+ if fpath not in self._files:
761
+ raise RuntimeError(
762
+ f"The file {fpath} requested from EBRAINS dataset {self.dataset_id} is not available in this repository."
763
+ )
764
+ return self._files[fpath]
765
+
766
+ def get_loader(self, filename, folder="", decode_func=None):
767
+ """Get a lazy loader for a file, for executing the query
768
+ only once loader.data is accessed."""
769
+ return HttpRequest(self._build_url(folder, filename), decode_func)