metahq-core 0.1.2__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metahq_core/__init__.py +1 -1
- metahq_core/curations/annotation_converter.py +5 -5
- metahq_core/curations/annotations.py +361 -151
- metahq_core/curations/index.py +104 -43
- metahq_core/curations/labels.py +259 -128
- metahq_core/curations/propagator.py +62 -85
- metahq_core/export/__init__.py +0 -0
- metahq_core/export/annotations.py +125 -59
- metahq_core/export/labels.py +128 -70
- metahq_core/logger.py +11 -18
- metahq_core/query.py +346 -241
- metahq_core/{ontology/loader.py → relations_loader.py} +2 -1
- metahq_core/search.py +37 -14
- metahq_core/util/io.py +109 -46
- metahq_core/util/supported.py +16 -5
- {metahq_core-0.1.2.dist-info → metahq_core-1.0.0rc1.dist-info}/METADATA +13 -6
- metahq_core-1.0.0rc1.dist-info/RECORD +30 -0
- {metahq_core-0.1.2.dist-info → metahq_core-1.0.0rc1.dist-info}/WHEEL +1 -1
- metahq_core-1.0.0rc1.dist-info/licenses/LICENSE +28 -0
- metahq_core/ontology/base.py +0 -376
- metahq_core/ontology/graph.py +0 -252
- metahq_core-0.1.2.dist-info/RECORD +0 -30
- /metahq_core/{ontology → curations}/__init__.py +0 -0
|
@@ -1,24 +1,25 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Class for performing annotation propagation.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
an ontology structure.
|
|
6
|
-
|
|
7
|
-
Applies the dot product between an annotations matrix and familial adjacency
|
|
4
|
+
Applies the dot product between an annotations matrix and familial membership
|
|
8
5
|
matrices. Below is the computation:
|
|
9
6
|
|
|
10
7
|
(samples x reference_terms) @ (reference_terms, propagated_terms)
|
|
11
8
|
-> (samples x propagated_terms).
|
|
12
9
|
|
|
13
|
-
|
|
10
|
+
To propagate upwards, the (reference_terms, propagated_terms) familial membership
|
|
11
|
+
matrix indicates ancestor relationships. To propagate downwards, the
|
|
12
|
+
(reference_terms, propagated_terms) familial membership matrix indicates descendant
|
|
13
|
+
relationships.
|
|
14
|
+
|
|
15
|
+
If labeling, this is done once for ancestors and once for descendants. Then for each sample,
|
|
14
16
|
if a term is is not an ancestor or descendant of that sample, then the sample is
|
|
15
17
|
given a negative label for that term.
|
|
16
18
|
|
|
17
|
-
|
|
18
19
|
Author: Parker Hicks
|
|
19
20
|
Date: 2025-04-23
|
|
20
21
|
|
|
21
|
-
Last updated: 2025-11-
|
|
22
|
+
Last updated: 2025-11-28 by Parker Hicks
|
|
22
23
|
"""
|
|
23
24
|
|
|
24
25
|
from pathlib import Path
|
|
@@ -30,7 +31,7 @@ import polars as pl
|
|
|
30
31
|
from metahq_core.curations._multiprocess_propagator import MultiprocessPropagator
|
|
31
32
|
from metahq_core.logger import setup_logger
|
|
32
33
|
from metahq_core.util.alltypes import NpIntMatrix, NpStringArray
|
|
33
|
-
from metahq_core.util.supported import onto_relations
|
|
34
|
+
from metahq_core.util.supported import get_default_log_dir, onto_relations
|
|
34
35
|
|
|
35
36
|
if TYPE_CHECKING:
|
|
36
37
|
import logging
|
|
@@ -39,38 +40,23 @@ if TYPE_CHECKING:
|
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
class Propagator:
|
|
42
|
-
"""
|
|
43
|
-
Class to propagate annotations to labels given an ontology structure.
|
|
43
|
+
"""Class to propagate annotations given an particular ontology structure.
|
|
44
44
|
|
|
45
|
-
Attributes
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
The name of an ontology supported by MetaHQ.
|
|
45
|
+
Attributes:
|
|
46
|
+
ontology (str):
|
|
47
|
+
The name of an ontology supported by MetaHQ.
|
|
49
48
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
anno (Annotations):
|
|
50
|
+
A MetaHQ Annotations object with columns of ontology terms
|
|
51
|
+
rows as samples, and each value is a 1 or 0 indicating if a sample is
|
|
52
|
+
annotated to a particular term.
|
|
54
53
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
family: dict[str, pl.DataFrame | list[str]]
|
|
59
|
-
A pointer to the ancestry and descendants adjacency matrices and ids
|
|
60
|
-
denoting their column ids.
|
|
61
|
-
|
|
62
|
-
Methods
|
|
63
|
-
-------
|
|
64
|
-
propagate_up()
|
|
65
|
-
Propagates annotations up to all terms in the annotations curation.
|
|
66
|
-
If an index is annotated to a descendant of a term in `to`, then it
|
|
67
|
-
is given an annotation of 1 to that term.
|
|
68
|
-
|
|
69
|
-
propagate_down()
|
|
70
|
-
Propagates annotations down to all terms in the annotations curation.
|
|
71
|
-
If an index is annotated to an ancestor of a term in `to`, then it
|
|
72
|
-
is given an annotation of 1 to that term.
|
|
54
|
+
to (list[str]):
|
|
55
|
+
A list of ontology term IDs to propagate annotations up or down to.
|
|
73
56
|
|
|
57
|
+
family (dict[str, pl.DataFrame | list[str]]):
|
|
58
|
+
A pointer to the ancestry and descendants adjacency matrices and ids
|
|
59
|
+
denoting their column ids.
|
|
74
60
|
"""
|
|
75
61
|
|
|
76
62
|
def __init__(
|
|
@@ -81,7 +67,7 @@ class Propagator:
|
|
|
81
67
|
relatives,
|
|
82
68
|
logger=None,
|
|
83
69
|
loglevel=20,
|
|
84
|
-
logdir=
|
|
70
|
+
logdir=get_default_log_dir(),
|
|
85
71
|
verbose=True,
|
|
86
72
|
):
|
|
87
73
|
self.ontology: str = ontology
|
|
@@ -98,35 +84,34 @@ class Propagator:
|
|
|
98
84
|
self.verbose: bool = verbose
|
|
99
85
|
self._propagator = MultiprocessPropagator(logger=logger, verbose=verbose)
|
|
100
86
|
|
|
101
|
-
def propagate_down(
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
87
|
+
def propagate_down(self) -> tuple[NpIntMatrix, list[str], pl.DataFrame]:
|
|
88
|
+
"""Propagates annotations down to all terms in the annotations curation.
|
|
89
|
+
If an index is annotated to an ancestor of a term in `to`, then it
|
|
90
|
+
is given an annotation of 1 to that term.
|
|
91
|
+
"""
|
|
92
|
+
if self.verbose:
|
|
106
93
|
return self._propagate_to_family(
|
|
107
94
|
"descendants", task="Propagating descendants"
|
|
108
95
|
)
|
|
109
96
|
return self._propagate_to_family("descendants")
|
|
110
97
|
|
|
111
|
-
def propagate_up(
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
98
|
+
def propagate_up(self) -> tuple[NpIntMatrix, list[str], pl.DataFrame]:
|
|
99
|
+
"""Propagates annotations up to all terms in the annotations curation.
|
|
100
|
+
If an index is annotated to a descendant of a term in `to`, then it
|
|
101
|
+
is given an annotation of 1 to that term.
|
|
102
|
+
"""
|
|
103
|
+
if self.verbose:
|
|
116
104
|
return self._propagate_to_family("ancestors", task="Propagating ancestors")
|
|
117
105
|
return self._propagate_to_family("ancestors")
|
|
118
106
|
|
|
119
107
|
def _load_anscestors(
|
|
120
108
|
self, lf: pl.LazyFrame, _from: list[str], all_terms: pl.Series
|
|
121
109
|
) -> NpIntMatrix:
|
|
122
|
-
"""
|
|
123
|
-
Loads the relations matrix with a ancestor-forward orientation.
|
|
124
|
-
|
|
125
|
-
Returns
|
|
126
|
-
-------
|
|
127
|
-
Matrix of shape [_from, _to] where each value indicates if a particular
|
|
128
|
-
column is a ancestor of a particular row.
|
|
110
|
+
"""Loads the relations matrix with a ancestor-forward orientation.
|
|
129
111
|
|
|
112
|
+
Returns:
|
|
113
|
+
Matrix of shape [_from, _to] where each value indicates if a particular
|
|
114
|
+
column is a ancestor of a particular row.
|
|
130
115
|
"""
|
|
131
116
|
return (
|
|
132
117
|
lf.select(_from)
|
|
@@ -142,14 +127,11 @@ class Propagator:
|
|
|
142
127
|
def _load_descendants(
|
|
143
128
|
self, lf: pl.LazyFrame, _from: list[str], all_terms: pl.Series
|
|
144
129
|
) -> NpIntMatrix:
|
|
145
|
-
"""
|
|
146
|
-
Loads the relations matrix with a descendants-forward orientation.
|
|
147
|
-
|
|
148
|
-
Returns
|
|
149
|
-
-------
|
|
150
|
-
Matrix of shape [_from, _to] where each value indicates if a particular
|
|
151
|
-
column is a descendant of a particular row.
|
|
130
|
+
"""Loads the relations matrix with a descendants-forward orientation.
|
|
152
131
|
|
|
132
|
+
Returns:
|
|
133
|
+
Matrix of shape [_from, _to] where each value indicates if a particular
|
|
134
|
+
column is a descendant of a particular row.
|
|
153
135
|
"""
|
|
154
136
|
return (
|
|
155
137
|
lf.select(sorted(self.to))
|
|
@@ -161,8 +143,7 @@ class Propagator:
|
|
|
161
143
|
)
|
|
162
144
|
|
|
163
145
|
def _load_family(self):
|
|
164
|
-
"""
|
|
165
|
-
Loads the terms x terms relations matrix for ancestor and descendant relationships.
|
|
146
|
+
"""Loads the terms x terms relations matrix for ancestor and descendant relationships.
|
|
166
147
|
These matrices store column-wise relational annotations where if term_n is an ancestor
|
|
167
148
|
of term_m, then ancestors[n, m] will be 1 and ancestors[m, n] will be 0. This matrix is
|
|
168
149
|
transposed when loading to get row-wise relational annotations and match dimensions with
|
|
@@ -218,9 +199,8 @@ class Propagator:
|
|
|
218
199
|
return propagated, list(self.family["ids"]), self.anno.ids
|
|
219
200
|
|
|
220
201
|
def _split_anno(self) -> list:
|
|
221
|
-
"""
|
|
222
|
-
|
|
223
|
-
for matrix multiplication. Each chunk will have at most 1000 entries.
|
|
202
|
+
"""Splits annotation matrix into chunks row-wise to reduce computational overhead
|
|
203
|
+
for matrix multiplication. Each chunk will have at most 500 entries.
|
|
224
204
|
"""
|
|
225
205
|
nchunks = self.anno.ids.height // 500
|
|
226
206
|
if nchunks == 0:
|
|
@@ -241,30 +221,27 @@ def propagate_controls(
|
|
|
241
221
|
a label of 2 for any disease term IDs that are labeled as positives for any other
|
|
242
222
|
samples that come from the same study (group) as the control samples.
|
|
243
223
|
|
|
244
|
-
Parameters
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
indicating if an index is labeled to a term (other columns), not, or unknown.
|
|
249
|
-
|
|
250
|
-
to_terms: list[str]
|
|
251
|
-
Ontology term IDs for which to generate labels. Must be in the columns of labels.
|
|
224
|
+
Parameters:
|
|
225
|
+
labels (pl.DataFrame):
|
|
226
|
+
Labels DataFrame with an index and group column specifically. Values are -1, 0, and 1
|
|
227
|
+
indicating if an index is labeled to a term (other columns), not, or unknown.
|
|
252
228
|
|
|
253
|
-
|
|
254
|
-
|
|
229
|
+
to_terms (list[str]):
|
|
230
|
+
Ontology term IDs for which to generate labels. Must be in the columns of `labels`.
|
|
255
231
|
|
|
256
|
-
|
|
257
|
-
|
|
232
|
+
index_col (str):
|
|
233
|
+
Name of the column in `labels` storing index IDs.
|
|
258
234
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
also in labels.
|
|
235
|
+
group_col (str):
|
|
236
|
+
Name of the column in `labels` storing group IDs.
|
|
262
237
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
index is a control for a particular disease.
|
|
238
|
+
ctrl_ids (pl.DataFrame):
|
|
239
|
+
DataFrame of index IDs that are healthy controls and any other ID columns that are
|
|
240
|
+
also in `labels`.
|
|
267
241
|
|
|
242
|
+
Returns:
|
|
243
|
+
A `polars.DataFrame` object of -1, 0, 1, and 2 labels of all available indices where 2
|
|
244
|
+
indicates if an index is a control for a particular disease.
|
|
268
245
|
"""
|
|
269
246
|
mapper = {0: 0, 1: 1, -1: 0}
|
|
270
247
|
select = to_terms + [group_col]
|
|
File without changes
|
|
@@ -4,7 +4,7 @@ Class for Annotations export io classes.
|
|
|
4
4
|
Author: Parker Hicks
|
|
5
5
|
Date: 2025-09-08
|
|
6
6
|
|
|
7
|
-
Last updated:
|
|
7
|
+
Last updated: 2026-02-03 by Parker Hicks
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
from __future__ import annotations
|
|
@@ -21,6 +21,7 @@ from metahq_core.util.supported import (
|
|
|
21
21
|
database_ids,
|
|
22
22
|
geo_metadata,
|
|
23
23
|
get_annotations,
|
|
24
|
+
get_default_log_dir,
|
|
24
25
|
metadata_fields,
|
|
25
26
|
supported,
|
|
26
27
|
)
|
|
@@ -36,31 +37,82 @@ ANNOTATION_KEY = {"1": True, "0": False}
|
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
class AnnotationsExporter(BaseExporter):
|
|
39
|
-
"""
|
|
40
|
+
"""Exporter for Annotations curations.
|
|
40
41
|
|
|
41
|
-
|
|
42
|
+
Attributes:
|
|
43
|
+
attribute (Literal["tissue", "disease", "sex", "age"]):
|
|
44
|
+
Attribute of the annotations to save.
|
|
45
|
+
|
|
46
|
+
level (Literal["sample", "series"]):
|
|
47
|
+
Level of the annotations.
|
|
48
|
+
|
|
49
|
+
logger (logging.Logger):
|
|
50
|
+
Python builtin Logger.
|
|
51
|
+
|
|
52
|
+
loglevel (int):
|
|
53
|
+
Logging level.
|
|
54
|
+
|
|
55
|
+
logdir (str | Path):
|
|
56
|
+
Path to directory storing logs.
|
|
57
|
+
|
|
58
|
+
verbose (bool):
|
|
59
|
+
Controls logging outputs.
|
|
60
|
+
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
attribute: str,
|
|
66
|
+
level: str,
|
|
67
|
+
logger=None,
|
|
68
|
+
loglevel=20,
|
|
69
|
+
logdir=get_default_log_dir(),
|
|
70
|
+
verbose=True,
|
|
71
|
+
):
|
|
72
|
+
self.attribute = attribute
|
|
73
|
+
self._database = self._load_annotations(level)
|
|
42
74
|
|
|
43
75
|
if logger is None:
|
|
44
76
|
logger = setup_logger(__name__, level=loglevel, log_dir=logdir)
|
|
45
77
|
self.log: logging.Logger = logger
|
|
46
78
|
self.verbose: bool = verbose
|
|
47
79
|
|
|
80
|
+
def add_sources(self, anno: Annotations) -> Annotations:
|
|
81
|
+
"""Add the sources that contributed to the labels of each sample or dataset.
|
|
82
|
+
|
|
83
|
+
Arguments:
|
|
84
|
+
anno (Annotations):
|
|
85
|
+
A populated Labels curation object.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
The Labels object with additional source IDs for each index.
|
|
89
|
+
|
|
90
|
+
"""
|
|
91
|
+
sources = {anno.index_col: [], "sources": []}
|
|
92
|
+
for idx in anno.index:
|
|
93
|
+
sources[anno.index_col].append(idx)
|
|
94
|
+
|
|
95
|
+
# get sources for a particular index for the specified attribute
|
|
96
|
+
sources["sources"].append(
|
|
97
|
+
"|".join(list(self._database[idx][self.attribute].keys()))
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return anno.add_ids(pl.DataFrame(sources))
|
|
101
|
+
|
|
48
102
|
def get_sra(self, anno: Annotations, fields: list[str]) -> Annotations:
|
|
49
103
|
"""
|
|
50
104
|
Retrieve SRA IDs from the annotations if they exist.
|
|
51
105
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
filters.
|
|
106
|
+
Arguments:
|
|
107
|
+
anno (Annotations):
|
|
108
|
+
An Annotations curation containing samples and terms matching user-specified
|
|
109
|
+
filters.
|
|
57
110
|
|
|
58
|
-
|
|
59
|
-
|
|
111
|
+
fields (list[str]):
|
|
112
|
+
SRA ID levels (i.e., srr, srx, srs, or srp)
|
|
60
113
|
|
|
61
|
-
Returns
|
|
62
|
-
|
|
63
|
-
A new Annotations curation with merged SRA IDs.
|
|
114
|
+
Returns:
|
|
115
|
+
A new Annotations curation with merged SRA IDs.
|
|
64
116
|
|
|
65
117
|
"""
|
|
66
118
|
_anno = self._load_annotations(level=anno.index_col) # all MetaHQ annotations
|
|
@@ -88,18 +140,22 @@ class AnnotationsExporter(BaseExporter):
|
|
|
88
140
|
metadata: str | None = None,
|
|
89
141
|
**kwargs,
|
|
90
142
|
):
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
Save annotations curation to json. Keys are terms and values are
|
|
143
|
+
"""Save annotations curation to json. Keys are terms and values are
|
|
94
144
|
positively annotated indices.
|
|
95
145
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
146
|
+
Arguments:
|
|
147
|
+
anno (Annotations):
|
|
148
|
+
A populated Annotations object.
|
|
149
|
+
|
|
150
|
+
fmt (Literal["json", "parquet", "csv", "tsv"]):
|
|
151
|
+
File format to save to.
|
|
152
|
+
|
|
153
|
+
file (FilePath):
|
|
154
|
+
Path to outfile.json.
|
|
155
|
+
|
|
156
|
+
metadata (str):
|
|
157
|
+
Metadata fields to include.
|
|
100
158
|
|
|
101
|
-
metadata: str
|
|
102
|
-
Metadata fields to include.
|
|
103
159
|
"""
|
|
104
160
|
_ = checkdir(file, is_file=True)
|
|
105
161
|
opt = {
|
|
@@ -117,37 +173,39 @@ class AnnotationsExporter(BaseExporter):
|
|
|
117
173
|
def to_csv(
|
|
118
174
|
self, anno: Annotations, file: FilePath, metadata: str | None = None, **kwargs
|
|
119
175
|
):
|
|
120
|
-
"""
|
|
121
|
-
|
|
176
|
+
"""Save annotations to csv.
|
|
177
|
+
|
|
178
|
+
Arguments:
|
|
179
|
+
anno (Annotations):
|
|
180
|
+
A populated Annotations object.
|
|
122
181
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
outfile: FilePath
|
|
126
|
-
Path to outfile.csv.
|
|
182
|
+
file (FilePath):
|
|
183
|
+
Path to outfile.csv.
|
|
127
184
|
|
|
128
|
-
|
|
129
|
-
|
|
185
|
+
metadata (str):
|
|
186
|
+
Metadata fields to include.
|
|
130
187
|
|
|
131
188
|
"""
|
|
132
189
|
self._save_tabular("csv", anno, file, metadata, **kwargs)
|
|
133
190
|
|
|
134
191
|
def to_json(self, anno: Annotations, file: FilePath, metadata: str | None = None):
|
|
135
|
-
"""
|
|
136
|
-
Save annotations curation to json. Keys are terms and values are
|
|
192
|
+
"""Save annotations curation to json. Keys are terms and values are
|
|
137
193
|
positively annotated indices.
|
|
138
194
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
195
|
+
Arguments:
|
|
196
|
+
anno (Annotations):
|
|
197
|
+
A populated Annotations object.
|
|
198
|
+
|
|
199
|
+
file (FilePath):
|
|
200
|
+
Path to outfile.json.
|
|
143
201
|
|
|
144
|
-
|
|
145
|
-
|
|
202
|
+
metadata (str):
|
|
203
|
+
Metadata fields to include.
|
|
146
204
|
|
|
147
205
|
"""
|
|
148
206
|
|
|
149
207
|
if self._only_index(metadata, anno.index_col):
|
|
150
|
-
self.
|
|
208
|
+
self._save_json_with_metadata(anno, file, anno.index_col)
|
|
151
209
|
|
|
152
210
|
elif isinstance(metadata, str):
|
|
153
211
|
self._save_json_with_metadata(anno, file, metadata)
|
|
@@ -169,19 +227,17 @@ class AnnotationsExporter(BaseExporter):
|
|
|
169
227
|
metadata: str | None = None,
|
|
170
228
|
**kwargs,
|
|
171
229
|
):
|
|
172
|
-
"""
|
|
173
|
-
Save annotations to parquet.
|
|
230
|
+
"""Save annotations to parquet.
|
|
174
231
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
Annotations curation object to save.
|
|
232
|
+
Arguments:
|
|
233
|
+
anno (Annotations):
|
|
234
|
+
Annotations curation object to save.
|
|
179
235
|
|
|
180
|
-
|
|
181
|
-
|
|
236
|
+
file (FilePath):
|
|
237
|
+
Path to outfile.parquet.
|
|
182
238
|
|
|
183
|
-
|
|
184
|
-
|
|
239
|
+
metadata (str | None):
|
|
240
|
+
Metadata fields to include.
|
|
185
241
|
|
|
186
242
|
"""
|
|
187
243
|
self._save_tabular("parquet", anno, file, metadata, **kwargs)
|
|
@@ -189,15 +245,17 @@ class AnnotationsExporter(BaseExporter):
|
|
|
189
245
|
def to_tsv(
|
|
190
246
|
self, anno: Annotations, file: FilePath, metadata: str | None = None, **kwargs
|
|
191
247
|
):
|
|
192
|
-
"""
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
248
|
+
"""Save annotations to tsv.
|
|
249
|
+
|
|
250
|
+
Arguments:
|
|
251
|
+
anno (Annotations):
|
|
252
|
+
A populated Annotations object.
|
|
253
|
+
|
|
254
|
+
file (FilePath):
|
|
255
|
+
Path to outfile.tsv.
|
|
198
256
|
|
|
199
|
-
|
|
200
|
-
|
|
257
|
+
metadata (str):
|
|
258
|
+
Metadata fields to include.
|
|
201
259
|
|
|
202
260
|
"""
|
|
203
261
|
self._save_tabular("tsv", anno, file, metadata, **kwargs)
|
|
@@ -277,8 +335,7 @@ class AnnotationsExporter(BaseExporter):
|
|
|
277
335
|
def _save_table_with_description(
|
|
278
336
|
self, file: FilePath, anno: Annotations, metadata: list[str], fmt: str, **kwargs
|
|
279
337
|
):
|
|
280
|
-
"""
|
|
281
|
-
Fetches corresponding sample/study descriptions and saves the annotations
|
|
338
|
+
"""Fetches corresponding sample/study descriptions and saves the annotations
|
|
282
339
|
curation in tabular format (parquet, csv, tsv).
|
|
283
340
|
"""
|
|
284
341
|
|
|
@@ -317,6 +374,10 @@ class AnnotationsExporter(BaseExporter):
|
|
|
317
374
|
anno, [field for field in _metadata if field in database_ids("sra")]
|
|
318
375
|
)
|
|
319
376
|
|
|
377
|
+
# add sources
|
|
378
|
+
anno = self.add_sources(anno)
|
|
379
|
+
_metadata.extend(["sources"])
|
|
380
|
+
|
|
320
381
|
if "description" in _metadata:
|
|
321
382
|
self._save_table_with_description(file, anno, _metadata, fmt=fmt, **kwargs)
|
|
322
383
|
|
|
@@ -356,10 +417,15 @@ class AnnotationsExporter(BaseExporter):
|
|
|
356
417
|
self, anno: Annotations, file: FilePath, metadata: str
|
|
357
418
|
):
|
|
358
419
|
"""Save annotations as JSON with requested metadata."""
|
|
420
|
+
|
|
421
|
+
# add sources
|
|
422
|
+
anno = self.add_sources(anno)
|
|
423
|
+
|
|
359
424
|
_anno: dict[str, dict[str, dict[str, str]]] = {
|
|
360
425
|
term: {} for term in anno.entities
|
|
361
426
|
}
|
|
362
427
|
_metadata = self._parse_metafields(anno.index_col, metadata)
|
|
428
|
+
_metadata.extend(["sources"])
|
|
363
429
|
|
|
364
430
|
if self._sra_in_metadata(_metadata):
|
|
365
431
|
anno = self.get_sra(
|