metahq-core 0.1.2__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metahq_core/__init__.py +1 -1
- metahq_core/curations/annotation_converter.py +5 -5
- metahq_core/curations/annotations.py +361 -151
- metahq_core/curations/index.py +104 -43
- metahq_core/curations/labels.py +259 -128
- metahq_core/curations/propagator.py +62 -85
- metahq_core/export/__init__.py +0 -0
- metahq_core/export/annotations.py +125 -59
- metahq_core/export/labels.py +128 -70
- metahq_core/logger.py +11 -18
- metahq_core/query.py +346 -241
- metahq_core/{ontology/loader.py → relations_loader.py} +2 -1
- metahq_core/search.py +37 -14
- metahq_core/util/io.py +109 -46
- metahq_core/util/supported.py +16 -5
- {metahq_core-0.1.2.dist-info → metahq_core-1.0.0rc1.dist-info}/METADATA +13 -6
- metahq_core-1.0.0rc1.dist-info/RECORD +30 -0
- {metahq_core-0.1.2.dist-info → metahq_core-1.0.0rc1.dist-info}/WHEEL +1 -1
- metahq_core-1.0.0rc1.dist-info/licenses/LICENSE +28 -0
- metahq_core/ontology/base.py +0 -376
- metahq_core/ontology/graph.py +0 -252
- metahq_core-0.1.2.dist-info/RECORD +0 -30
- /metahq_core/{ontology → curations}/__init__.py +0 -0
metahq_core/export/labels.py
CHANGED
|
@@ -4,7 +4,7 @@ Class for Labels export io classes.
|
|
|
4
4
|
Author: Parker Hicks
|
|
5
5
|
Date: 2025-09-08
|
|
6
6
|
|
|
7
|
-
Last updated:
|
|
7
|
+
Last updated: 2026-02-05 by Parker Hicks
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
from __future__ import annotations
|
|
@@ -22,6 +22,7 @@ from metahq_core.util.supported import (
|
|
|
22
22
|
disease_ontologies,
|
|
23
23
|
geo_metadata,
|
|
24
24
|
get_annotations,
|
|
25
|
+
get_default_log_dir,
|
|
25
26
|
metadata_fields,
|
|
26
27
|
supported,
|
|
27
28
|
)
|
|
@@ -37,33 +38,84 @@ LABEL_KEY = {"1": "positive", "-1": "negative", "2": "control"}
|
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
class LabelsExporter(BaseExporter):
|
|
40
|
-
"""
|
|
41
|
+
"""Exporter for Labels curations.
|
|
41
42
|
|
|
42
|
-
|
|
43
|
+
Attributes:
|
|
44
|
+
attribute (Literal["tissue", "disease", "sex", "age"]):
|
|
45
|
+
Attribute of the annotations to save.
|
|
46
|
+
|
|
47
|
+
level (Literal["sample", "series"]):
|
|
48
|
+
Level of the annotations.
|
|
49
|
+
|
|
50
|
+
logger (logging.Logger):
|
|
51
|
+
Python builtin Logger.
|
|
52
|
+
|
|
53
|
+
loglevel (int):
|
|
54
|
+
Logging level.
|
|
55
|
+
|
|
56
|
+
logdir (str | Path):
|
|
57
|
+
Path to directory storing logs.
|
|
58
|
+
|
|
59
|
+
verbose (bool):
|
|
60
|
+
Controls logging outputs.
|
|
61
|
+
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
attribute: Literal["tissue", "disease", "sex", "age"],
|
|
67
|
+
level: Literal["sample", "series"],
|
|
68
|
+
logger=None,
|
|
69
|
+
loglevel=20,
|
|
70
|
+
logdir=get_default_log_dir(),
|
|
71
|
+
verbose=True,
|
|
72
|
+
):
|
|
73
|
+
self.attribute = attribute
|
|
74
|
+
self._database = self._load_annotations(level)
|
|
43
75
|
|
|
44
76
|
if logger is None:
|
|
45
77
|
logger = setup_logger(__name__, level=loglevel, log_dir=logdir)
|
|
46
78
|
self.log: logging.Logger = logger
|
|
47
79
|
self.verbose: bool = verbose
|
|
48
80
|
|
|
49
|
-
def
|
|
81
|
+
def add_sources(self, labels: Labels) -> Labels:
|
|
82
|
+
"""Add the sources that contributed to the lables of each sample or dataset.
|
|
83
|
+
|
|
84
|
+
Arguments:
|
|
85
|
+
labels (Labels):
|
|
86
|
+
A populated Labels curation object.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
The Labels object with additional source IDs for each index.
|
|
90
|
+
|
|
50
91
|
"""
|
|
51
|
-
|
|
92
|
+
sources = {labels.index_col: [], "sources": []}
|
|
93
|
+
for idx in labels.index:
|
|
94
|
+
sources[labels.index_col].append(idx)
|
|
95
|
+
|
|
96
|
+
# get sources for a particular index for the specified attribute
|
|
97
|
+
sources["sources"].append(
|
|
98
|
+
"|".join(list(self._database[idx][self.attribute].keys()))
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return labels.add_ids(pl.DataFrame(sources))
|
|
102
|
+
|
|
103
|
+
def get_sra(self, labels: Labels, fields: list[str]) -> Labels:
|
|
104
|
+
"""Retrieve SRA IDs from the annotations if they exist.
|
|
52
105
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
filters.
|
|
106
|
+
Arguments:
|
|
107
|
+
labels (Labels):
|
|
108
|
+
A Labels curation containing samples and terms matching user-specified
|
|
109
|
+
filters.
|
|
58
110
|
|
|
59
|
-
|
|
60
|
-
|
|
111
|
+
fields (list[str]):
|
|
112
|
+
SRA ID levels (i.e., srr, srx, srs, or srp)
|
|
61
113
|
|
|
62
|
-
Returns
|
|
63
|
-
|
|
64
|
-
A new Annotations curation with merged SRA IDs.
|
|
114
|
+
Returns:
|
|
115
|
+
A new Annotations curation with merged SRA IDs.
|
|
65
116
|
|
|
66
117
|
"""
|
|
118
|
+
|
|
67
119
|
_labels = self._load_annotations(
|
|
68
120
|
level=labels.index_col
|
|
69
121
|
) # all MetaHQ annotations
|
|
@@ -91,18 +143,22 @@ class LabelsExporter(BaseExporter):
|
|
|
91
143
|
metadata: str | None = None,
|
|
92
144
|
**kwargs,
|
|
93
145
|
):
|
|
94
|
-
"""
|
|
146
|
+
"""Save labels curation to json. Keys are terms and values are
|
|
147
|
+
positively, negative, netral, and control labeled entries.
|
|
95
148
|
|
|
96
|
-
|
|
97
|
-
|
|
149
|
+
Arguments:
|
|
150
|
+
labels (Labels):
|
|
151
|
+
A populated Labels curation object.
|
|
152
|
+
|
|
153
|
+
fmt (Literal["json", "parquet", "csv", "tsv"]):
|
|
154
|
+
File format to save to.
|
|
155
|
+
|
|
156
|
+
file (FilePath):
|
|
157
|
+
Path to outfile.json.
|
|
98
158
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
outfile: FilePath
|
|
102
|
-
Path to outfile.json.
|
|
159
|
+
metadata (str):
|
|
160
|
+
Metadata fields to include.
|
|
103
161
|
|
|
104
|
-
metadata: str
|
|
105
|
-
Metadata fields to include.
|
|
106
162
|
"""
|
|
107
163
|
_ = checkdir(file, is_file=True)
|
|
108
164
|
opt = {
|
|
@@ -119,34 +175,37 @@ class LabelsExporter(BaseExporter):
|
|
|
119
175
|
def to_csv(
|
|
120
176
|
self, curation: Labels, file: FilePath, metadata: str | None = None, **kwargs
|
|
121
177
|
):
|
|
122
|
-
"""
|
|
123
|
-
|
|
178
|
+
"""Save labels to csv.
|
|
179
|
+
|
|
180
|
+
Arguments:
|
|
181
|
+
curation (Labels):
|
|
182
|
+
A populated Labels curation object.
|
|
124
183
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
outfile: FilePath
|
|
128
|
-
Path to outfile.csv.
|
|
184
|
+
file (FilePath):
|
|
185
|
+
Path to outfile.csv.
|
|
129
186
|
|
|
130
|
-
|
|
131
|
-
|
|
187
|
+
metadata (str):
|
|
188
|
+
Metadata fields to include.
|
|
132
189
|
|
|
133
190
|
"""
|
|
134
191
|
self._save_tabular("csv", curation, file, metadata, **kwargs)
|
|
135
192
|
|
|
136
193
|
def to_json(self, curation: Labels, file: FilePath, metadata: str | None = None):
|
|
137
|
-
"""
|
|
138
|
-
Save labels curation to json. Keys are terms and values are
|
|
194
|
+
"""Save labels curation to json. Keys are terms and values are
|
|
139
195
|
positively labelstated indices.
|
|
140
196
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
Path to outfile.json.
|
|
197
|
+
Arguments:
|
|
198
|
+
curation (Labels):
|
|
199
|
+
A populated Labels curation object.
|
|
145
200
|
|
|
146
|
-
|
|
147
|
-
|
|
201
|
+
file (FilePath):
|
|
202
|
+
Path to outfile.json.
|
|
203
|
+
|
|
204
|
+
metadata (str):
|
|
205
|
+
Metadata fields to include.
|
|
148
206
|
|
|
149
207
|
"""
|
|
208
|
+
|
|
150
209
|
has_controls = any(
|
|
151
210
|
term.startswith(disease_ontologies()) for term in curation.entities
|
|
152
211
|
)
|
|
@@ -164,13 +223,14 @@ class LabelsExporter(BaseExporter):
|
|
|
164
223
|
isinstance(metadata, str)
|
|
165
224
|
& (metadata.strip().replace(",", "") == curation.index_col)
|
|
166
225
|
):
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
226
|
+
metadata = curation.index_col
|
|
227
|
+
|
|
228
|
+
if isinstance(metadata, str):
|
|
229
|
+
# add sources
|
|
230
|
+
curation = self.add_sources(curation)
|
|
171
231
|
|
|
172
|
-
elif isinstance(metadata, str):
|
|
173
232
|
_metadata = self._parse_metafields(curation.index_col, metadata)
|
|
233
|
+
_metadata.extend(["sources"])
|
|
174
234
|
|
|
175
235
|
if self._sra_in_metadata(_metadata):
|
|
176
236
|
curation = self.get_sra(
|
|
@@ -209,19 +269,17 @@ class LabelsExporter(BaseExporter):
|
|
|
209
269
|
metadata: str | None = None,
|
|
210
270
|
**kwargs,
|
|
211
271
|
):
|
|
212
|
-
"""
|
|
213
|
-
Save labels to parquet.
|
|
272
|
+
"""Save labels to parquet.
|
|
214
273
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
Labels curation object to save.
|
|
274
|
+
Arguments:
|
|
275
|
+
curation (Labels):
|
|
276
|
+
Labels curation object to save.
|
|
219
277
|
|
|
220
|
-
|
|
221
|
-
|
|
278
|
+
file (FilePath):
|
|
279
|
+
Path to outfile.parquet.
|
|
222
280
|
|
|
223
|
-
|
|
224
|
-
|
|
281
|
+
metadata (str | None):
|
|
282
|
+
Metadata fields to include.
|
|
225
283
|
|
|
226
284
|
"""
|
|
227
285
|
self._save_tabular("parquet", curation, file, metadata, **kwargs)
|
|
@@ -229,16 +287,17 @@ class LabelsExporter(BaseExporter):
|
|
|
229
287
|
def to_tsv(
|
|
230
288
|
self, curation: Labels, file: FilePath, metadata: str | None = None, **kwargs
|
|
231
289
|
):
|
|
232
|
-
"""
|
|
233
|
-
|
|
290
|
+
"""Save labels to tsv.
|
|
291
|
+
|
|
292
|
+
Arguments:
|
|
293
|
+
curation (Labels):
|
|
294
|
+
A populated Labels curation object.
|
|
234
295
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
outfile: FilePath
|
|
238
|
-
Path to outfile.tsv.
|
|
296
|
+
file (FilePath):
|
|
297
|
+
Path to outfile.tsv.
|
|
239
298
|
|
|
240
|
-
|
|
241
|
-
|
|
299
|
+
metadata (str):
|
|
300
|
+
Metadata fields to include.
|
|
242
301
|
|
|
243
302
|
"""
|
|
244
303
|
self._save_tabular("tsv", curation, file, metadata, **kwargs)
|
|
@@ -358,6 +417,11 @@ class LabelsExporter(BaseExporter):
|
|
|
358
417
|
curation = self.get_sra(
|
|
359
418
|
curation, [field for field in _metadata if field in database_ids("sra")]
|
|
360
419
|
)
|
|
420
|
+
|
|
421
|
+
# add sources
|
|
422
|
+
curation = self.add_sources(curation)
|
|
423
|
+
_metadata = _metadata + ["sources"]
|
|
424
|
+
|
|
361
425
|
if "description" in _metadata:
|
|
362
426
|
self._save_table_with_description(
|
|
363
427
|
file, curation, _metadata, fmt=fmt, **kwargs
|
|
@@ -394,13 +458,7 @@ class LabelsExporter(BaseExporter):
|
|
|
394
458
|
for entity in labels:
|
|
395
459
|
label = str(row[entity])
|
|
396
460
|
if label in LABEL_KEY:
|
|
397
|
-
|
|
398
|
-
labels[entity][LABEL_KEY[label]].append(idx)
|
|
399
|
-
except KeyError:
|
|
400
|
-
print(labels[entity])
|
|
401
|
-
print(LABEL_KEY[label])
|
|
402
|
-
print(idx)
|
|
403
|
-
exit()
|
|
461
|
+
labels[entity][LABEL_KEY[label]].append(idx)
|
|
404
462
|
|
|
405
463
|
def _write_row_with_metadata(
|
|
406
464
|
self,
|
metahq_core/logger.py
CHANGED
|
@@ -4,11 +4,10 @@ Logger setup.
|
|
|
4
4
|
Author: Parker Hicks
|
|
5
5
|
Date: 2025-10-16
|
|
6
6
|
|
|
7
|
-
Last updated:
|
|
7
|
+
Last updated: 2026-02-04 by Parker Hicks
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
|
-
from logging.handlers import TimedRotatingFileHandler
|
|
12
11
|
from pathlib import Path
|
|
13
12
|
|
|
14
13
|
|
|
@@ -17,22 +16,19 @@ def setup_logger(
|
|
|
17
16
|
log_dir: str | Path,
|
|
18
17
|
level: int = logging.INFO,
|
|
19
18
|
) -> logging.Logger:
|
|
20
|
-
"""
|
|
21
|
-
Sets up a logger.
|
|
19
|
+
"""Sets up a logger.
|
|
22
20
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
Logger name.
|
|
21
|
+
Arguments:
|
|
22
|
+
name (str):
|
|
23
|
+
Logger name.
|
|
27
24
|
|
|
28
|
-
|
|
29
|
-
|
|
25
|
+
log_dir (str | Path):
|
|
26
|
+
Path to logging directory.
|
|
30
27
|
|
|
31
|
-
|
|
32
|
-
|
|
28
|
+
level (int):
|
|
29
|
+
Logging level.
|
|
33
30
|
|
|
34
|
-
Returns
|
|
35
|
-
-------
|
|
31
|
+
Returns:
|
|
36
32
|
Configured logger.
|
|
37
33
|
|
|
38
34
|
"""
|
|
@@ -56,11 +52,8 @@ def setup_logger(
|
|
|
56
52
|
logger.addHandler(console_handler)
|
|
57
53
|
|
|
58
54
|
# file handler
|
|
59
|
-
file_handler =
|
|
55
|
+
file_handler = logging.FileHandler(
|
|
60
56
|
Path(log_dir) / "log.log",
|
|
61
|
-
when="midnight",
|
|
62
|
-
interval=1,
|
|
63
|
-
backupCount=30,
|
|
64
57
|
encoding="utf-8",
|
|
65
58
|
)
|
|
66
59
|
file_handler.setFormatter(formatter)
|