siibra 1.0a1__1-py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of siibra might be problematic. Click here for more details.
- siibra/VERSION +1 -0
- siibra/__init__.py +164 -0
- siibra/commons.py +823 -0
- siibra/configuration/__init__.py +17 -0
- siibra/configuration/configuration.py +189 -0
- siibra/configuration/factory.py +589 -0
- siibra/core/__init__.py +16 -0
- siibra/core/assignment.py +110 -0
- siibra/core/atlas.py +239 -0
- siibra/core/concept.py +308 -0
- siibra/core/parcellation.py +387 -0
- siibra/core/region.py +1223 -0
- siibra/core/space.py +131 -0
- siibra/core/structure.py +111 -0
- siibra/exceptions.py +63 -0
- siibra/experimental/__init__.py +19 -0
- siibra/experimental/contour.py +61 -0
- siibra/experimental/cortical_profile_sampler.py +57 -0
- siibra/experimental/patch.py +98 -0
- siibra/experimental/plane3d.py +256 -0
- siibra/explorer/__init__.py +17 -0
- siibra/explorer/url.py +222 -0
- siibra/explorer/util.py +87 -0
- siibra/features/__init__.py +117 -0
- siibra/features/anchor.py +224 -0
- siibra/features/connectivity/__init__.py +33 -0
- siibra/features/connectivity/functional_connectivity.py +57 -0
- siibra/features/connectivity/regional_connectivity.py +494 -0
- siibra/features/connectivity/streamline_counts.py +27 -0
- siibra/features/connectivity/streamline_lengths.py +27 -0
- siibra/features/connectivity/tracing_connectivity.py +30 -0
- siibra/features/dataset/__init__.py +17 -0
- siibra/features/dataset/ebrains.py +90 -0
- siibra/features/feature.py +970 -0
- siibra/features/image/__init__.py +27 -0
- siibra/features/image/image.py +115 -0
- siibra/features/image/sections.py +26 -0
- siibra/features/image/volume_of_interest.py +88 -0
- siibra/features/tabular/__init__.py +24 -0
- siibra/features/tabular/bigbrain_intensity_profile.py +77 -0
- siibra/features/tabular/cell_density_profile.py +298 -0
- siibra/features/tabular/cortical_profile.py +322 -0
- siibra/features/tabular/gene_expression.py +257 -0
- siibra/features/tabular/layerwise_bigbrain_intensities.py +62 -0
- siibra/features/tabular/layerwise_cell_density.py +95 -0
- siibra/features/tabular/receptor_density_fingerprint.py +192 -0
- siibra/features/tabular/receptor_density_profile.py +110 -0
- siibra/features/tabular/regional_timeseries_activity.py +294 -0
- siibra/features/tabular/tabular.py +139 -0
- siibra/livequeries/__init__.py +19 -0
- siibra/livequeries/allen.py +352 -0
- siibra/livequeries/bigbrain.py +197 -0
- siibra/livequeries/ebrains.py +145 -0
- siibra/livequeries/query.py +49 -0
- siibra/locations/__init__.py +91 -0
- siibra/locations/boundingbox.py +454 -0
- siibra/locations/location.py +115 -0
- siibra/locations/point.py +344 -0
- siibra/locations/pointcloud.py +349 -0
- siibra/retrieval/__init__.py +27 -0
- siibra/retrieval/cache.py +233 -0
- siibra/retrieval/datasets.py +389 -0
- siibra/retrieval/exceptions/__init__.py +27 -0
- siibra/retrieval/repositories.py +769 -0
- siibra/retrieval/requests.py +659 -0
- siibra/vocabularies/__init__.py +45 -0
- siibra/vocabularies/gene_names.json +29176 -0
- siibra/vocabularies/receptor_symbols.json +210 -0
- siibra/vocabularies/region_aliases.json +460 -0
- siibra/volumes/__init__.py +23 -0
- siibra/volumes/parcellationmap.py +1279 -0
- siibra/volumes/providers/__init__.py +20 -0
- siibra/volumes/providers/freesurfer.py +113 -0
- siibra/volumes/providers/gifti.py +165 -0
- siibra/volumes/providers/neuroglancer.py +736 -0
- siibra/volumes/providers/nifti.py +266 -0
- siibra/volumes/providers/provider.py +107 -0
- siibra/volumes/sparsemap.py +468 -0
- siibra/volumes/volume.py +892 -0
- siibra-1.0.0a1.dist-info/LICENSE +201 -0
- siibra-1.0.0a1.dist-info/METADATA +160 -0
- siibra-1.0.0a1.dist-info/RECORD +84 -0
- siibra-1.0.0a1.dist-info/WHEEL +5 -0
- siibra-1.0.0a1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Copyright 2018-2024
|
|
2
|
+
# Institute of Neuroscience and Medicine (INM-1), Forschungszentrum Jülich GmbH
|
|
3
|
+
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Base type of features in tabular formats."""
|
|
16
|
+
|
|
17
|
+
from zipfile import ZipFile
|
|
18
|
+
from .. import feature
|
|
19
|
+
|
|
20
|
+
from .. import anchor as _anchor
|
|
21
|
+
|
|
22
|
+
from ... import commons
|
|
23
|
+
|
|
24
|
+
import pandas as pd
|
|
25
|
+
from textwrap import wrap
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Tabular(feature.Feature):
|
|
29
|
+
"""
|
|
30
|
+
Represents a table of different measures anchored to a brain location.
|
|
31
|
+
|
|
32
|
+
Columns represent different types of values, while rows represent different
|
|
33
|
+
samples. The number of columns might thus be interpreted as the feature
|
|
34
|
+
dimension.
|
|
35
|
+
|
|
36
|
+
As an example, receptor fingerprints use rows to represent different
|
|
37
|
+
neurotransmitter receptors, and separate columns for the mean and standard
|
|
38
|
+
deviations measure across multiple tissue samples.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
description: str,
|
|
44
|
+
modality: str,
|
|
45
|
+
anchor: _anchor.AnatomicalAnchor,
|
|
46
|
+
data: pd.DataFrame, # sample x feature dimension
|
|
47
|
+
datasets: list = [],
|
|
48
|
+
id: str = None,
|
|
49
|
+
prerelease: bool = False,
|
|
50
|
+
):
|
|
51
|
+
feature.Feature.__init__(
|
|
52
|
+
self,
|
|
53
|
+
modality=modality,
|
|
54
|
+
description=description,
|
|
55
|
+
anchor=anchor,
|
|
56
|
+
datasets=datasets,
|
|
57
|
+
id=id,
|
|
58
|
+
prerelease=prerelease
|
|
59
|
+
)
|
|
60
|
+
self._data_cached = data
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def data(self):
|
|
64
|
+
return self._data_cached.copy()
|
|
65
|
+
|
|
66
|
+
def _to_zip(self, fh: ZipFile):
|
|
67
|
+
super()._to_zip(fh)
|
|
68
|
+
fh.writestr("tabular.csv", self.data.to_csv())
|
|
69
|
+
|
|
70
|
+
def plot(self, *args, backend="matplotlib", **kwargs):
|
|
71
|
+
"""
|
|
72
|
+
Create a bar plot of a columns of the data.
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
backend: str
|
|
76
|
+
"matplotlib", "plotly", or others supported by pandas DataFrame
|
|
77
|
+
plotting backend.
|
|
78
|
+
**kwargs
|
|
79
|
+
takes Matplotlib.pyplot keyword arguments
|
|
80
|
+
"""
|
|
81
|
+
wrapwidth = kwargs.pop("textwrap") if "textwrap" in kwargs else 40
|
|
82
|
+
kwargs["title"] = kwargs.get(
|
|
83
|
+
"title",
|
|
84
|
+
"\n".join(wrap(
|
|
85
|
+
f"{self.modality} in {', '.join({_.name for _ in self.anchor.regions})}",
|
|
86
|
+
wrapwidth
|
|
87
|
+
))
|
|
88
|
+
)
|
|
89
|
+
kwargs["kind"] = kwargs.get("kind", "bar")
|
|
90
|
+
kwargs["y"] = kwargs.get("y", self.data.columns[0])
|
|
91
|
+
if backend == "matplotlib":
|
|
92
|
+
try:
|
|
93
|
+
import matplotlib.pyplot as plt
|
|
94
|
+
except ImportError:
|
|
95
|
+
commons.logger.error("matplotlib not available. Plotting of fingerprints disabled.")
|
|
96
|
+
return None
|
|
97
|
+
# default kwargs
|
|
98
|
+
if kwargs.get("error_y") is None:
|
|
99
|
+
kwargs["yerr"] = kwargs.get("yerr", 'std' if 'std' in self.data.columns else None)
|
|
100
|
+
yerr_label = f" \u00b1 {kwargs.get('yerr')}" if kwargs.get('yerr') else ''
|
|
101
|
+
kwargs["width"] = kwargs.get("width", 0.95)
|
|
102
|
+
kwargs["ylabel"] = kwargs.get(
|
|
103
|
+
"ylabel",
|
|
104
|
+
f"{kwargs['y']}{yerr_label}" + f"\n{self.unit}" if hasattr(self, 'unit') else ""
|
|
105
|
+
)
|
|
106
|
+
kwargs["grid"] = kwargs.get("grid", True)
|
|
107
|
+
kwargs["legend"] = kwargs.get("legend", False)
|
|
108
|
+
xticklabel_rotation = kwargs.get("xticklabel_rotation", 60)
|
|
109
|
+
ax = self.data.plot(*args, backend=backend, **kwargs)
|
|
110
|
+
ax.set_title(ax.get_title(), fontsize="medium")
|
|
111
|
+
ax.set_xticklabels(
|
|
112
|
+
ax.get_xticklabels(),
|
|
113
|
+
rotation=xticklabel_rotation,
|
|
114
|
+
ha='center' if xticklabel_rotation % 90 == 0 else 'right'
|
|
115
|
+
)
|
|
116
|
+
plt.tight_layout()
|
|
117
|
+
return ax
|
|
118
|
+
elif backend == "plotly":
|
|
119
|
+
kwargs["title"] = kwargs["title"].replace("\n", "<br>")
|
|
120
|
+
kwargs["error_y"] = kwargs.get("error_y", 'std' if 'std' in self.data.columns else None)
|
|
121
|
+
error_y_label = f" ± {kwargs.get('error_y')}<br>" if kwargs.get('error_y') else ''
|
|
122
|
+
kwargs["labels"] = {
|
|
123
|
+
"index": kwargs.pop("xlabel", None) or kwargs.pop("index", ""),
|
|
124
|
+
"value": kwargs.pop("ylabel", None) or kwargs.pop(
|
|
125
|
+
"value",
|
|
126
|
+
f"{kwargs.get('y')}{error_y_label} {self.unit if hasattr(self, 'unit') else ''}"
|
|
127
|
+
)
|
|
128
|
+
}
|
|
129
|
+
fig = self.data.plot(*args, backend=backend, **kwargs)
|
|
130
|
+
fig.update_layout(
|
|
131
|
+
yaxis_title=kwargs["labels"]['value'],
|
|
132
|
+
title=dict(
|
|
133
|
+
automargin=True, yref="container", xref="container",
|
|
134
|
+
pad=dict(t=40), xanchor="left", yanchor="top"
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
return fig
|
|
138
|
+
else:
|
|
139
|
+
return self.data.plot(*args, backend=backend, **kwargs)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Copyright 2018-2024
|
|
2
|
+
# Institute of Neuroscience and Medicine (INM-1), Forschungszentrum Jülich GmbH
|
|
3
|
+
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Feature queries requiring live query or on-the-fly calculations."""
|
|
16
|
+
|
|
17
|
+
from .allen import AllenBrainAtlasQuery
|
|
18
|
+
from .bigbrain import LayerwiseBigBrainIntensityQuery, BigBrainProfileQuery
|
|
19
|
+
from .ebrains import EbrainsFeatureQuery
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
# Copyright 2018-2024
|
|
2
|
+
# Institute of Neuroscience and Medicine (INM-1), Forschungszentrum Jülich GmbH
|
|
3
|
+
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Query Allen Human Brain Atlas microarray data in specified volume."""
|
|
16
|
+
|
|
17
|
+
from .query import LiveQuery
|
|
18
|
+
|
|
19
|
+
from ..core import space as _space, structure
|
|
20
|
+
from ..features import anchor as _anchor
|
|
21
|
+
from ..features.tabular.gene_expression import GeneExpressions
|
|
22
|
+
from ..commons import logger, Species
|
|
23
|
+
from ..locations import point, pointcloud
|
|
24
|
+
from ..retrieval import HttpRequest
|
|
25
|
+
from ..vocabularies import GENE_NAMES
|
|
26
|
+
|
|
27
|
+
from typing import List
|
|
28
|
+
from xml.etree import ElementTree
|
|
29
|
+
import numpy as np
|
|
30
|
+
import json
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
BASE_URL = "http://api.brain-map.org/api/v2/data"
|
|
34
|
+
|
|
35
|
+
LOCATION_PRECISION_MM = 2. # the assumed spatial precision of the probe locations in MNI space
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def is_allen_api_microarray_service_available():
|
|
39
|
+
import requests
|
|
40
|
+
|
|
41
|
+
# see https://community.brain-map.org/t/human-brain-atlas-api/2876
|
|
42
|
+
microarray_test_url = "http://api.brain-map.org/api/v2/data/query.json?criteria= service::human_microarray_expression[probes$eq1023146,1023147][donors$eq15496][structures$eq9148]"
|
|
43
|
+
try:
|
|
44
|
+
response = requests.get(microarray_test_url).json()
|
|
45
|
+
except requests.RequestException:
|
|
46
|
+
return False
|
|
47
|
+
return response["success"]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class InvalidAllenAPIResponseException(Exception):
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class AllenBrainAtlasQuery(LiveQuery, args=['gene'], FeatureType=GeneExpressions):
|
|
55
|
+
"""
|
|
56
|
+
Interface to Allen Human Brain Atlas microarray data.
|
|
57
|
+
|
|
58
|
+
This class connects to the web API of the Allen Brain Atlas:
|
|
59
|
+
© 2015 Allen Institute for Brain Science. Allen Brain Atlas API.
|
|
60
|
+
Available from: brain-map.org/api/index.html
|
|
61
|
+
Any use of the data needs to be in accordance with their terms of use, see
|
|
62
|
+
https://alleninstitute.org/legal/terms-use/
|
|
63
|
+
|
|
64
|
+
- We have samples from 6 different human donors.
|
|
65
|
+
- Each donor corresponds to exactly 1 specimen (tissue used for study)
|
|
66
|
+
- Each sample was subject to multiple (in fact 4) different probes.
|
|
67
|
+
- The probe data structures contain the list of gene expression of a
|
|
68
|
+
particular gene measured in each sample. Therefore the length of the gene
|
|
69
|
+
expression list in a probe coresponds to the number of samples taken in
|
|
70
|
+
the corresponding donor for the given gene.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
_FEATURETYPE = GeneExpressions
|
|
74
|
+
|
|
75
|
+
_notification_shown = False
|
|
76
|
+
|
|
77
|
+
_QUERY = {
|
|
78
|
+
"probe": BASE_URL
|
|
79
|
+
+ "/query.xml?criteria=model::Probe,rma::criteria,[probe_type$eq'DNA'],products[abbreviation$eq'HumanMA'],gene[acronym$eq'{gene}'],rma::options[only$eq'probes.id']",
|
|
80
|
+
"multiple_gene_probe": BASE_URL
|
|
81
|
+
+ "/query.xml?criteria=model::Probe,rma::criteria,[probe_type$eq'DNA'],products[abbreviation$eq'HumanMA'],gene[acronym$in{genes}],rma::options[only$eq'probes.id']&start_row={start_row}&num_rows={num_rows}",
|
|
82
|
+
"specimen": BASE_URL
|
|
83
|
+
+ "/Specimen/query.json?criteria=[name$eq'{specimen_id}']&include=alignment3d",
|
|
84
|
+
"microarray": BASE_URL
|
|
85
|
+
+ "/query.json?criteria=service::human_microarray_expression[probes$in{probe_ids}][donors$eq{donor_id}]",
|
|
86
|
+
"gene": BASE_URL
|
|
87
|
+
+ "/Gene/query.json?criteria=products[abbreviation$eq'HumanMA']&num_rows=all",
|
|
88
|
+
"factors": BASE_URL
|
|
89
|
+
+ "/query.json?criteria=model::Donor,rma::criteria,products[id$eq2],rma::include,age,rma::options[only$eq%27donors.id,dono rs.name,donors.race_only,donors.sex%27]&start_row={start_row}&num_rows={num_rows}",
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# there is a 1:1 mapping between donors and specimen for the 6 adult human brains
|
|
93
|
+
_DONOR_IDS = ["15496", "14380", "15697", "9861", "12876", "10021"]
|
|
94
|
+
_SPECIMEN_IDS = [
|
|
95
|
+
"H0351.1015",
|
|
96
|
+
"H0351.1012",
|
|
97
|
+
"H0351.1016",
|
|
98
|
+
"H0351.2001",
|
|
99
|
+
"H0351.1009",
|
|
100
|
+
"H0351.2002",
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
_specimen = None
|
|
104
|
+
factors = None
|
|
105
|
+
species = Species.decode('homo sapiens')
|
|
106
|
+
|
|
107
|
+
def __init__(self, **kwargs):
|
|
108
|
+
"""
|
|
109
|
+
Each instance of this live query retrieves the probe IDs
|
|
110
|
+
containing measurements for any gene in the given set
|
|
111
|
+
of candidate genes.
|
|
112
|
+
Each probe has expression levels and z-scores for a set of
|
|
113
|
+
N samples.
|
|
114
|
+
Each sample is linked to a donor, brain structure, and
|
|
115
|
+
ICBM coordinate.
|
|
116
|
+
When querying with a brain structure, the ICBM coordinates
|
|
117
|
+
will be tested agains the region mask in ICBM space
|
|
118
|
+
to produce a table of outputs.
|
|
119
|
+
"""
|
|
120
|
+
LiveQuery.__init__(self, **kwargs)
|
|
121
|
+
gene = kwargs.get('gene')
|
|
122
|
+
|
|
123
|
+
def parse_gene(spec):
|
|
124
|
+
if isinstance(spec, str):
|
|
125
|
+
return [GENE_NAMES.get(spec)]
|
|
126
|
+
elif isinstance(spec, dict):
|
|
127
|
+
assert all(k in spec for k in ['symbol', 'description'])
|
|
128
|
+
assert spec['symbol'] in GENE_NAMES
|
|
129
|
+
return [spec]
|
|
130
|
+
elif isinstance(spec, list):
|
|
131
|
+
return [g for s in spec for g in parse_gene(s)]
|
|
132
|
+
else:
|
|
133
|
+
logger.error("Invalid specification of gene:", spec)
|
|
134
|
+
return []
|
|
135
|
+
|
|
136
|
+
self.genes = parse_gene(gene)
|
|
137
|
+
|
|
138
|
+
def query(self, concept: structure.BrainStructure) -> List[GeneExpressions]:
|
|
139
|
+
if not is_allen_api_microarray_service_available():
|
|
140
|
+
raise InvalidAllenAPIResponseException(
|
|
141
|
+
'The service "web API of the Allen Brain Atlas for the human microarray expression" '
|
|
142
|
+
'is not available at the moment, therefore siibra is not able to fetch '
|
|
143
|
+
'gene expression features. This is a known issue which we are investigating: '
|
|
144
|
+
'https://github.com/FZJ-INM1-BDA/siibra-python/issues/636.'
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
mnispace = _space.Space.registry().get('mni152')
|
|
148
|
+
|
|
149
|
+
# Match the microarray probes to the query mask.
|
|
150
|
+
# Record matched instances and their locations.
|
|
151
|
+
measurements = []
|
|
152
|
+
coordinates = []
|
|
153
|
+
for measurement in self:
|
|
154
|
+
pt = point.Point(measurement['mni_xyz'], space=mnispace, sigma_mm=LOCATION_PRECISION_MM)
|
|
155
|
+
if pt in concept:
|
|
156
|
+
measurements.append(measurement)
|
|
157
|
+
coordinates.append(pt)
|
|
158
|
+
|
|
159
|
+
if len(coordinates) == 0:
|
|
160
|
+
logger.info(f"No probes found that lie within {concept}")
|
|
161
|
+
return []
|
|
162
|
+
|
|
163
|
+
# Build the anatomical anchor and assignment to the query concept.
|
|
164
|
+
# It will be attached to the returned feature, with the set of matched
|
|
165
|
+
# MNI coordinates as anchor's location.
|
|
166
|
+
anchor = _anchor.AnatomicalAnchor(
|
|
167
|
+
location=pointcloud.from_points(coordinates),
|
|
168
|
+
species=self.species
|
|
169
|
+
)
|
|
170
|
+
explanation = f"MNI coordinates of tissue samples were filtered using {concept}"
|
|
171
|
+
anchor._assignments[concept] = [_anchor.AnatomicalAssignment(
|
|
172
|
+
query_structure=concept,
|
|
173
|
+
assigned_structure=concept,
|
|
174
|
+
qualification=_anchor.Qualification.CONTAINED,
|
|
175
|
+
explanation=explanation
|
|
176
|
+
)]
|
|
177
|
+
anchor._last_matched_concept = concept
|
|
178
|
+
|
|
179
|
+
return [GeneExpressions(
|
|
180
|
+
anchor=anchor,
|
|
181
|
+
genes=[m['gene'] for m in measurements],
|
|
182
|
+
levels=[m['expression_level'] for m in measurements],
|
|
183
|
+
z_scores=[m['z_score'] for m in measurements],
|
|
184
|
+
additional_columns={
|
|
185
|
+
"race": [m['race'] for m in measurements],
|
|
186
|
+
"gender": [m['gender'] for m in measurements],
|
|
187
|
+
"age": [m['age'] for m in measurements],
|
|
188
|
+
"mni_xyz": [tuple(m['mni_xyz']) for m in measurements],
|
|
189
|
+
"sample": [m['sample_index'] for m in measurements],
|
|
190
|
+
"probe_id": [m['probe_id'] for m in measurements],
|
|
191
|
+
"donor_name": [m['donor_name'] for m in measurements],
|
|
192
|
+
}
|
|
193
|
+
)]
|
|
194
|
+
|
|
195
|
+
def __iter__(self):
|
|
196
|
+
|
|
197
|
+
if self.genes is None:
|
|
198
|
+
logger.warning(
|
|
199
|
+
f"No gene name provided to {self.__class__.__name__}, so no gene expressions will be retrieved. "
|
|
200
|
+
'Use the "gene=<name>" option in the feature query to specify one.'
|
|
201
|
+
)
|
|
202
|
+
return
|
|
203
|
+
|
|
204
|
+
if not self.__class__._notification_shown:
|
|
205
|
+
print(GeneExpressions.ALLEN_ATLAS_NOTIFICATION)
|
|
206
|
+
self.__class__._notification_shown = True
|
|
207
|
+
|
|
208
|
+
probe_ids = self._retrieve_probe_ids(self.genes)
|
|
209
|
+
|
|
210
|
+
# get specimen information
|
|
211
|
+
if AllenBrainAtlasQuery._specimen is None:
|
|
212
|
+
AllenBrainAtlasQuery._specimen = {
|
|
213
|
+
spcid: AllenBrainAtlasQuery._retrieve_specimen(spcid) for spcid in self._SPECIMEN_IDS
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
if AllenBrainAtlasQuery.factors is None:
|
|
217
|
+
self._retrieve_factors()
|
|
218
|
+
|
|
219
|
+
# get expression levels and z_scores for the gene
|
|
220
|
+
if len(probe_ids) > 0:
|
|
221
|
+
for donor_id in self._DONOR_IDS:
|
|
222
|
+
for item in self._retrieve_microarray(donor_id, probe_ids):
|
|
223
|
+
yield item
|
|
224
|
+
|
|
225
|
+
@staticmethod
|
|
226
|
+
def _retrieve_probe_ids(genes: list):
|
|
227
|
+
assert isinstance(genes, list)
|
|
228
|
+
if len(genes) == 1:
|
|
229
|
+
logger.debug(f"Retrieving probe ids for gene {genes[0]['symbol']}")
|
|
230
|
+
else:
|
|
231
|
+
logger.debug(f"Retrieving probe ids for genes {', '.join(g['symbol'] for g in genes)}")
|
|
232
|
+
start_row = 0
|
|
233
|
+
num_rows = 50
|
|
234
|
+
probe_ids = []
|
|
235
|
+
while True:
|
|
236
|
+
url = AllenBrainAtlasQuery._QUERY["multiple_gene_probe"].format(
|
|
237
|
+
start_row=start_row, num_rows=num_rows, genes=','.join([f"'{g['symbol']}'" for g in genes])
|
|
238
|
+
)
|
|
239
|
+
response = HttpRequest(url).get()
|
|
240
|
+
if "site unavailable" in response.decode().lower():
|
|
241
|
+
# When the Allen site is not available, they still send a status code 200.
|
|
242
|
+
raise RuntimeError(
|
|
243
|
+
"Allen institute site unavailable - please try again later."
|
|
244
|
+
)
|
|
245
|
+
root = ElementTree.fromstring(response)
|
|
246
|
+
num_probes = int(root.attrib["num_rows"])
|
|
247
|
+
total_probes = int(root.attrib["total_rows"])
|
|
248
|
+
assert len(root) == 1
|
|
249
|
+
probe_ids.extend([int(root[0][i][0].text) for i in range(num_probes)])
|
|
250
|
+
if (start_row + num_rows) >= total_probes:
|
|
251
|
+
break
|
|
252
|
+
# retrieve another page
|
|
253
|
+
start_row += num_rows
|
|
254
|
+
return probe_ids
|
|
255
|
+
|
|
256
|
+
@staticmethod
|
|
257
|
+
def _retrieve_factors():
|
|
258
|
+
start_row = 0
|
|
259
|
+
num_rows = 50
|
|
260
|
+
if AllenBrainAtlasQuery.factors is None or len(AllenBrainAtlasQuery.factors) == 0:
|
|
261
|
+
AllenBrainAtlasQuery.factors = {}
|
|
262
|
+
while True:
|
|
263
|
+
factors_url = AllenBrainAtlasQuery._QUERY["factors"].format(start_row=start_row, num_rows=num_rows)
|
|
264
|
+
response = HttpRequest(factors_url).get()
|
|
265
|
+
AllenBrainAtlasQuery.factors.update({
|
|
266
|
+
item["id"]: {
|
|
267
|
+
"race": item["race_only"],
|
|
268
|
+
"gender": item["sex"],
|
|
269
|
+
"age": int(item["age"]["days"] / 365),
|
|
270
|
+
}
|
|
271
|
+
for item in response["msg"]
|
|
272
|
+
})
|
|
273
|
+
total_factors = int(response["total_rows"])
|
|
274
|
+
if (start_row + num_rows) >= total_factors:
|
|
275
|
+
break
|
|
276
|
+
# retrieve another page
|
|
277
|
+
start_row += num_rows
|
|
278
|
+
|
|
279
|
+
@staticmethod
|
|
280
|
+
def _retrieve_specimen(specimen_id: str):
|
|
281
|
+
"""
|
|
282
|
+
Retrieves information about a human specimen.
|
|
283
|
+
"""
|
|
284
|
+
url = AllenBrainAtlasQuery._QUERY["specimen"].format(specimen_id=specimen_id)
|
|
285
|
+
response = HttpRequest(url).get()
|
|
286
|
+
if not response["success"]:
|
|
287
|
+
raise InvalidAllenAPIResponseException(
|
|
288
|
+
"Invalid response when retrieving specimen information: {}".format(url)
|
|
289
|
+
)
|
|
290
|
+
# we ask for 1 specimen, so list should have length 1
|
|
291
|
+
assert len(response["msg"]) == 1
|
|
292
|
+
specimen = response["msg"][0]
|
|
293
|
+
T = specimen["alignment3d"]
|
|
294
|
+
specimen["donor2icbm"] = np.array(
|
|
295
|
+
[
|
|
296
|
+
[T["tvr_00"], T["tvr_01"], T["tvr_02"], T["tvr_09"]],
|
|
297
|
+
[T["tvr_03"], T["tvr_04"], T["tvr_05"], T["tvr_10"]],
|
|
298
|
+
[T["tvr_06"], T["tvr_07"], T["tvr_08"], T["tvr_11"]],
|
|
299
|
+
]
|
|
300
|
+
)
|
|
301
|
+
return specimen
|
|
302
|
+
|
|
303
|
+
@classmethod
|
|
304
|
+
def _retrieve_microarray(cls, donor_id: str, probe_ids: str):
|
|
305
|
+
"""
|
|
306
|
+
Retrieve microarray data for several probes of a given donor, and
|
|
307
|
+
compute the MRI position of the corresponding tissue block in the ICBM
|
|
308
|
+
152 space to generate a SpatialFeature object for each sample.
|
|
309
|
+
"""
|
|
310
|
+
|
|
311
|
+
if len(probe_ids) == 0:
|
|
312
|
+
return
|
|
313
|
+
|
|
314
|
+
# query the microarray data for this donor
|
|
315
|
+
url = AllenBrainAtlasQuery._QUERY["microarray"].format(
|
|
316
|
+
probe_ids=",".join([str(id) for id in probe_ids]), donor_id=donor_id
|
|
317
|
+
)
|
|
318
|
+
try:
|
|
319
|
+
response = HttpRequest(url, json.loads).get()
|
|
320
|
+
except json.JSONDecodeError as e:
|
|
321
|
+
raise RuntimeError(f"Allen institute site produced an empty response - please try again later.\n{e}")
|
|
322
|
+
if not response["success"]:
|
|
323
|
+
raise InvalidAllenAPIResponseException(
|
|
324
|
+
"Invalid response when retrieving microarray data: {}".format(url)
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
probes, samples = [response["msg"][n] for n in ["probes", "samples"]]
|
|
328
|
+
|
|
329
|
+
for i, sample in enumerate(samples):
|
|
330
|
+
|
|
331
|
+
# coordinate conversion to ICBM152 standard space
|
|
332
|
+
donor_id = sample["donor"]["id"]
|
|
333
|
+
donor_name = sample["donor"]["name"]
|
|
334
|
+
icbm_coord = (np.matmul(
|
|
335
|
+
AllenBrainAtlasQuery._specimen[donor_name]["donor2icbm"],
|
|
336
|
+
sample["sample"]["mri"] + [1],
|
|
337
|
+
)).round(2)
|
|
338
|
+
|
|
339
|
+
for probe in probes:
|
|
340
|
+
yield {
|
|
341
|
+
"gene": probe['gene-symbol'],
|
|
342
|
+
"expression_level": float(probe["expression_level"][i]),
|
|
343
|
+
"z_score": float(probe["z-score"][i]),
|
|
344
|
+
"sample_index": i,
|
|
345
|
+
"probe_id": probe["id"],
|
|
346
|
+
"donor_id": donor_id,
|
|
347
|
+
"donor_name": donor_name,
|
|
348
|
+
"race": cls.factors[donor_id]["race"],
|
|
349
|
+
"gender": cls.factors[donor_id]["gender"],
|
|
350
|
+
"age": cls.factors[donor_id]["age"],
|
|
351
|
+
"mni_xyz": icbm_coord[:3],
|
|
352
|
+
}
|