metahq-core 0.1.2__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metahq_core/__init__.py +1 -1
- metahq_core/curations/annotation_converter.py +5 -5
- metahq_core/curations/annotations.py +361 -151
- metahq_core/curations/index.py +104 -43
- metahq_core/curations/labels.py +259 -128
- metahq_core/curations/propagator.py +62 -85
- metahq_core/export/__init__.py +0 -0
- metahq_core/export/annotations.py +125 -59
- metahq_core/export/labels.py +128 -70
- metahq_core/logger.py +11 -18
- metahq_core/query.py +346 -241
- metahq_core/{ontology/loader.py → relations_loader.py} +2 -1
- metahq_core/search.py +37 -14
- metahq_core/util/io.py +109 -46
- metahq_core/util/supported.py +16 -5
- {metahq_core-0.1.2.dist-info → metahq_core-1.0.0rc1.dist-info}/METADATA +13 -6
- metahq_core-1.0.0rc1.dist-info/RECORD +30 -0
- {metahq_core-0.1.2.dist-info → metahq_core-1.0.0rc1.dist-info}/WHEEL +1 -1
- metahq_core-1.0.0rc1.dist-info/licenses/LICENSE +28 -0
- metahq_core/ontology/base.py +0 -376
- metahq_core/ontology/graph.py +0 -252
- metahq_core-0.1.2.dist-info/RECORD +0 -30
- /metahq_core/{ontology → curations}/__init__.py +0 -0
metahq_core/curations/labels.py
CHANGED
|
@@ -4,7 +4,7 @@ Class for mutating and operating on sets of labels.
|
|
|
4
4
|
Author: Parker Hicks
|
|
5
5
|
Date: 2025-08-13
|
|
6
6
|
|
|
7
|
-
Last updated:
|
|
7
|
+
Last updated: 2026-02-02 by Parker Hicks
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
from __future__ import annotations
|
|
@@ -12,14 +12,14 @@ from __future__ import annotations
|
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from typing import TYPE_CHECKING, Literal
|
|
14
14
|
|
|
15
|
-
import numpy as np
|
|
16
15
|
import polars as pl
|
|
17
16
|
|
|
18
17
|
from metahq_core.curations.base import BaseCuration
|
|
19
18
|
from metahq_core.curations.index import Ids
|
|
20
19
|
from metahq_core.export.labels import LabelsExporter
|
|
21
20
|
from metahq_core.logger import setup_logger
|
|
22
|
-
from metahq_core.util.alltypes import
|
|
21
|
+
from metahq_core.util.alltypes import NpIntMatrix
|
|
22
|
+
from metahq_core.util.supported import get_default_log_dir
|
|
23
23
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
25
25
|
import logging
|
|
@@ -27,72 +27,24 @@ if TYPE_CHECKING:
|
|
|
27
27
|
|
|
28
28
|
# TODO: Add method to remove redundant terms
|
|
29
29
|
class Labels(BaseCuration):
|
|
30
|
-
"""
|
|
31
|
-
Class for storing and mutating labels.
|
|
30
|
+
"""Class for storing and mutating labels.
|
|
32
31
|
|
|
33
32
|
Currently supports -1, 0, +1 labels.
|
|
34
33
|
|
|
35
|
-
Attributes
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
attribute entity for each index (e.g. male or female, tissues, diseases, etc).
|
|
40
|
-
|
|
41
|
-
disease: bool
|
|
42
|
-
Indicates if the annotations are disease based. Used to account for control samples
|
|
43
|
-
when converting annotations to labels.
|
|
44
|
-
|
|
45
|
-
index_col: IdArray
|
|
46
|
-
Name of the column of data that contains the index IDs.
|
|
47
|
-
|
|
48
|
-
group_cols: tuple
|
|
49
|
-
Names of columns of data that contain an ID for each index indicating if it belongs
|
|
50
|
-
to a particular group (e.g. dataset, sex, platform, etc.).
|
|
51
|
-
|
|
52
|
-
collapsed: bool
|
|
53
|
-
Indicates if the annotations have already been collapsed.
|
|
54
|
-
|
|
55
|
-
Methods
|
|
56
|
-
-------
|
|
57
|
-
drop()
|
|
58
|
-
Wrapper for polars `drop`.
|
|
59
|
-
|
|
60
|
-
filter()
|
|
61
|
-
Wrapper for polars `filter`.
|
|
62
|
-
|
|
63
|
-
head()
|
|
64
|
-
Wrapper for polars `head`.
|
|
65
|
-
|
|
66
|
-
select()
|
|
67
|
-
Wrapper for polars `select`.
|
|
68
|
-
|
|
69
|
-
slice()
|
|
70
|
-
Wrapper for polars `slice`.
|
|
71
|
-
|
|
72
|
-
Properties
|
|
73
|
-
---------
|
|
74
|
-
entities: list[str]
|
|
75
|
-
columns of the annotations frame of ontology terms.
|
|
76
|
-
|
|
77
|
-
groups: list[str]
|
|
78
|
-
Groups associated with each index of the annotations curation.
|
|
79
|
-
Note that groups are not unique.
|
|
80
|
-
|
|
81
|
-
ids: pl.DataFrame
|
|
82
|
-
The frame of all IDs within the annotations curation.
|
|
83
|
-
|
|
84
|
-
index
|
|
85
|
-
The index IDs of the annotations frame.
|
|
34
|
+
Attributes:
|
|
35
|
+
data (pl.DataFrame):
|
|
36
|
+
Polars DataFrame with columns `index`, `groups` and columns for each
|
|
37
|
+
attribute entity for each index (e.g. male or female, tissues, diseases, etc).
|
|
86
38
|
|
|
87
|
-
|
|
88
|
-
|
|
39
|
+
index_col (str):
|
|
40
|
+
Name of the column of data that contains the index IDs.
|
|
89
41
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
unique_groups: list[str]
|
|
94
|
-
Unique groups in the annotations curation.
|
|
42
|
+
group_cols (tuple[str, ...]):
|
|
43
|
+
Names of columns of data that contain an ID for each index indicating if it belongs
|
|
44
|
+
to a particular group (e.g. dataset, sex, platform, etc.).
|
|
95
45
|
|
|
46
|
+
collapsed (bool):
|
|
47
|
+
Indicates if the annotations have already been collapsed.
|
|
96
48
|
"""
|
|
97
49
|
|
|
98
50
|
def __init__(
|
|
@@ -104,7 +56,7 @@ class Labels(BaseCuration):
|
|
|
104
56
|
collapsed: bool = False,
|
|
105
57
|
logger=None,
|
|
106
58
|
loglevel=20,
|
|
107
|
-
logdir=
|
|
59
|
+
logdir=get_default_log_dir(),
|
|
108
60
|
verbose=True,
|
|
109
61
|
):
|
|
110
62
|
self.data = data
|
|
@@ -120,14 +72,21 @@ class Labels(BaseCuration):
|
|
|
120
72
|
self.verbose: bool = verbose
|
|
121
73
|
|
|
122
74
|
def add_ids(self, new: pl.DataFrame) -> Labels:
|
|
123
|
-
"""
|
|
124
|
-
Append new group ID columns to the IDs of a Labels object. The new
|
|
75
|
+
"""Append new group ID columns to the IDs of a Labels object. The new
|
|
125
76
|
IDs must have a matching index.
|
|
77
|
+
|
|
78
|
+
Arguments:
|
|
79
|
+
new (pl.DataFrame):
|
|
80
|
+
A DataFrame of additional IDs to join with the current index column of `data`.
|
|
81
|
+
Must have a matching index column as the original `data`.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
A new Labels object including the new ID columns.
|
|
126
85
|
"""
|
|
127
86
|
new_ids = new.join(
|
|
128
87
|
self.ids, on=self.index_col, how="inner", maintain_order="right"
|
|
129
88
|
)
|
|
130
|
-
new_groups = tuple(
|
|
89
|
+
new_groups = tuple(col for col in new_ids.columns if col != self.index_col)
|
|
131
90
|
assert new_ids.height == self.ids.height, "SRA IDs height mismatch."
|
|
132
91
|
assert (
|
|
133
92
|
new_ids[self.index_col].to_list() == self.index
|
|
@@ -138,11 +97,37 @@ class Labels(BaseCuration):
|
|
|
138
97
|
)
|
|
139
98
|
|
|
140
99
|
def drop(self, *args, **kwargs):
|
|
141
|
-
"""Wrapper for polars drop.
|
|
100
|
+
"""Wrapper for polars drop. Drops any of the term columns.
|
|
101
|
+
ID columns are not dropped through this method.
|
|
102
|
+
"""
|
|
142
103
|
self.data = self.data.drop(*args, **kwargs)
|
|
143
104
|
|
|
144
105
|
def filter(self, condition: pl.Expr) -> Labels:
|
|
145
|
-
"""Filter both data and ids simultaneously using a mask.
|
|
106
|
+
"""Filter both data and ids simultaneously using a mask.
|
|
107
|
+
|
|
108
|
+
Arguments:
|
|
109
|
+
condition (pl.Expr):
|
|
110
|
+
Polars expression for filtering columns.
|
|
111
|
+
|
|
112
|
+
Examples:
|
|
113
|
+
>>> from metahq_core.curations.labels import Labels
|
|
114
|
+
>>> labels = {
|
|
115
|
+
'sample': ['GSM1', 'GSM2', 'GSM3'],
|
|
116
|
+
'series': ['GSE1', 'GSE1', 'GSE2'],
|
|
117
|
+
'UBERON:0000948': [1, -1, -1],
|
|
118
|
+
'UBERON:0002113': [-1, 1, -1],
|
|
119
|
+
'UBERON:0000955': [-1, -1, 1],
|
|
120
|
+
}
|
|
121
|
+
>>> labels = Labels.from_df(anno, index_col="sample", group_cols=["series"])
|
|
122
|
+
>>> labels.filter(pl.col("UBERON:0000948") == 1)
|
|
123
|
+
┌────────┬────────┬────────────────┬────────────────┬────────────────┐
|
|
124
|
+
│ sample ┆ series ┆ UBERON:0000948 ┆ UBERON:0002113 ┆ UBERON:0000955 │
|
|
125
|
+
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
126
|
+
│ str ┆ str ┆ i32 ┆ i32 ┆ i32 │
|
|
127
|
+
╞════════╪════════╪════════════════╪════════════════╪════════════════╡
|
|
128
|
+
│ GSM1 ┆ GSE1 ┆ 1 ┆ -1 ┆ -1 │
|
|
129
|
+
└────────┴────────┴────────────────┴────────────────┴────────────────┘
|
|
130
|
+
"""
|
|
146
131
|
mask = self.data.select(condition.arg_true()).to_numpy().reshape(-1)
|
|
147
132
|
|
|
148
133
|
filtered_data = (
|
|
@@ -166,29 +151,56 @@ class Labels(BaseCuration):
|
|
|
166
151
|
|
|
167
152
|
def save(
|
|
168
153
|
self,
|
|
169
|
-
outfile:
|
|
154
|
+
outfile: str | Path,
|
|
170
155
|
fmt: Literal["json", "parquet", "csv", "tsv"],
|
|
156
|
+
attribute: str,
|
|
157
|
+
level: str,
|
|
171
158
|
metadata: str | None = None,
|
|
172
159
|
):
|
|
173
|
-
"""
|
|
174
|
-
Save labels curation to json. Keys are terms and values are
|
|
175
|
-
positively annotated indices.
|
|
160
|
+
"""Save the labels curation.
|
|
176
161
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
Path to outfile.json.
|
|
162
|
+
Arguments:
|
|
163
|
+
outfile (str | Path):
|
|
164
|
+
Path to outfile.json.
|
|
181
165
|
|
|
182
|
-
|
|
183
|
-
|
|
166
|
+
fmt (Literal["json", "parquet", "csv", "tsv"]):
|
|
167
|
+
File format to save to.
|
|
168
|
+
|
|
169
|
+
attribute (str):
|
|
170
|
+
A supported MetaHQ annotated attribute.
|
|
171
|
+
|
|
172
|
+
level (str):
|
|
173
|
+
An index level supported by MetaHQ.
|
|
174
|
+
|
|
175
|
+
metadata (str | None):
|
|
176
|
+
Metadata fields to inlcude formatted as a comma
|
|
177
|
+
delimited string.
|
|
178
|
+
|
|
179
|
+
Examples:
|
|
180
|
+
|
|
181
|
+
If `metadata` is None, will only save the index column
|
|
182
|
+
with the remaining labels.
|
|
183
|
+
|
|
184
|
+
>>> from metahq_core.curations.labels import Labels
|
|
185
|
+
>>> labels = {
|
|
186
|
+
'sample': ['GSM1', 'GSM2', 'GSM3'],
|
|
187
|
+
'series': ['GSE1', 'GSE1', 'GSE2'],
|
|
188
|
+
'UBERON:0000948': [1, -1, -1],
|
|
189
|
+
'UBERON:0002113': [-1, 1, -1],
|
|
190
|
+
'UBERON:0000955': [-1, -1, 1],
|
|
191
|
+
}
|
|
192
|
+
>>> labels = Labels.from_df(anno, index_col='sample', group_cols=['series'])
|
|
193
|
+
>>> labels.save(
|
|
194
|
+
'/path/to/out.parquet', fmt="parquet", attribute="tissue", level="sample"
|
|
195
|
+
)
|
|
184
196
|
|
|
185
197
|
"""
|
|
186
|
-
LabelsExporter(logger=self.log, verbose=self.verbose).save(
|
|
198
|
+
LabelsExporter(attribute, level, logger=self.log, verbose=self.verbose).save(
|
|
187
199
|
self, fmt, outfile, metadata
|
|
188
200
|
)
|
|
189
201
|
|
|
190
202
|
def select(self, *args, **kwargs) -> Labels:
|
|
191
|
-
"""Select
|
|
203
|
+
"""Select label entity columns while maintaining ids."""
|
|
192
204
|
selected_data = self.data.select(*args, **kwargs)
|
|
193
205
|
|
|
194
206
|
return self.__class__(
|
|
@@ -202,48 +214,24 @@ class Labels(BaseCuration):
|
|
|
202
214
|
)
|
|
203
215
|
|
|
204
216
|
def slice(self, offset: int, length: int | None = None) -> Labels:
|
|
205
|
-
"""Slice both data and ids simultaneously using polars slice.
|
|
206
|
-
sliced_data = self.data.slice(offset, length)
|
|
207
|
-
sliced_ids_data = self._ids.data.slice(offset, length)
|
|
208
|
-
|
|
209
|
-
return self.__class__(
|
|
210
|
-
data=sliced_data,
|
|
211
|
-
ids=sliced_ids_data,
|
|
212
|
-
index_col=self.index_col,
|
|
213
|
-
group_cols=self.group_cols,
|
|
214
|
-
collapsed=self.collapsed,
|
|
215
|
-
logger=self.log,
|
|
216
|
-
verbose=self.verbose,
|
|
217
|
-
)
|
|
217
|
+
"""Slice both data and ids simultaneously using `polars` slice.
|
|
218
218
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
subset. Note the returned order may not match.
|
|
223
|
-
|
|
224
|
-
Parameters
|
|
225
|
-
----------
|
|
226
|
-
subset: list[str] | np.ndarray
|
|
227
|
-
Array-like of index IDs to select from the expression frame.
|
|
219
|
+
Arguments:
|
|
220
|
+
offset (int):
|
|
221
|
+
Index position to begin the slice.
|
|
228
222
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
A new LazyExp object with the subset of index IDs in the frame.
|
|
223
|
+
length (int | None):
|
|
224
|
+
Number of indices past `offset` to slice out.
|
|
232
225
|
|
|
226
|
+
Returns:
|
|
227
|
+
Sliced Labels object as a subset of the original Labels.
|
|
233
228
|
"""
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
)
|
|
237
|
-
|
|
238
|
-
diff = abs(len(mask) != len(subset))
|
|
239
|
-
if (diff != 0) and self.verbose:
|
|
240
|
-
self.log.warning("%s indices not found in the frame.", diff)
|
|
229
|
+
sliced_data = self.data.slice(offset, length)
|
|
230
|
+
sliced_ids_data = self._ids.data.slice(offset, length)
|
|
241
231
|
|
|
242
232
|
return self.__class__(
|
|
243
|
-
data=
|
|
244
|
-
|
|
245
|
-
.drop("index"),
|
|
246
|
-
ids=self._ids.filter_by_mask(mask).data,
|
|
233
|
+
data=sliced_data,
|
|
234
|
+
ids=sliced_ids_data,
|
|
247
235
|
index_col=self.index_col,
|
|
248
236
|
group_cols=self.group_cols,
|
|
249
237
|
collapsed=self.collapsed,
|
|
@@ -253,17 +241,53 @@ class Labels(BaseCuration):
|
|
|
253
241
|
|
|
254
242
|
def to_numpy(self) -> NpIntMatrix:
|
|
255
243
|
"""Wrapper for polars `to_numpy`."""
|
|
256
|
-
return
|
|
244
|
+
return self.data.to_numpy()
|
|
257
245
|
|
|
258
246
|
@classmethod
|
|
259
247
|
def from_df(
|
|
260
248
|
cls,
|
|
261
249
|
df: pl.DataFrame,
|
|
262
250
|
index_col: str,
|
|
263
|
-
group_cols: tuple[str, ...] | list[str]
|
|
251
|
+
group_cols: tuple[str, ...] | list[str],
|
|
264
252
|
**kwargs,
|
|
265
253
|
) -> Labels:
|
|
266
|
-
"""Creates a Labels object from a combined DataFrame.
|
|
254
|
+
"""Creates a Labels object from a combined DataFrame.
|
|
255
|
+
|
|
256
|
+
Attributes:
|
|
257
|
+
df (pl.DataFrame):
|
|
258
|
+
Polars DataFrame with index and group ID columns and columns for each
|
|
259
|
+
attribute entity for each index (e.g. male or female, tissues, diseases, etc).
|
|
260
|
+
|
|
261
|
+
index_col (str):
|
|
262
|
+
Name of the column of data that contains the index IDs.
|
|
263
|
+
|
|
264
|
+
group_cols (tuple[str, ...]):
|
|
265
|
+
Names of columns of data that contain an ID for each index indicating if it belongs
|
|
266
|
+
to a particular group (e.g. dataset, sex, platform, etc.).
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
A Labels object constructed from `df`.
|
|
270
|
+
|
|
271
|
+
Examples:
|
|
272
|
+
>>> from metahq_core.curations.labels import Labels
|
|
273
|
+
>>> labels = {
|
|
274
|
+
'sample': ['GSM1', 'GSM2', 'GSM3'],
|
|
275
|
+
'series': ['GSE1', 'GSE1', 'GSE2'],
|
|
276
|
+
'UBERON:0000948': [1, -1, -1],
|
|
277
|
+
'UBERON:0002113': [-1, 1, -1],
|
|
278
|
+
'UBERON:0000955': [-1, -1, 1],
|
|
279
|
+
}
|
|
280
|
+
>>> labels = Labels.from_df(anno, index_col='sample', group_cols=['series'])
|
|
281
|
+
┌────────┬────────┬────────────────┬────────────────┬────────────────┐
|
|
282
|
+
│ sample ┆ series ┆ UBERON:0000948 ┆ UBERON:0002113 ┆ UBERON:0000955 │
|
|
283
|
+
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
284
|
+
│ str ┆ str ┆ i64 ┆ i64 ┆ i64 │
|
|
285
|
+
╞════════╪════════╪════════════════╪════════════════╪════════════════╡
|
|
286
|
+
│ GSM1 ┆ GSE1 ┆ 1 ┆ -1 ┆ -1 │
|
|
287
|
+
│ GSM2 ┆ GSE1 ┆ -1 ┆ -1 ┆ -1 │
|
|
288
|
+
│ GSM3 ┆ GSE2 ┆ -1 ┆ -1 ┆ 1 │
|
|
289
|
+
└────────┴────────┴────────────────┴────────────────┴────────────────┘
|
|
290
|
+
"""
|
|
267
291
|
id_columns = [index_col] + list(group_cols)
|
|
268
292
|
ids_data = df.select(id_columns)
|
|
269
293
|
annotation_data = df.drop(id_columns)
|
|
@@ -278,37 +302,144 @@ class Labels(BaseCuration):
|
|
|
278
302
|
|
|
279
303
|
@property
|
|
280
304
|
def entities(self) -> list[str]:
|
|
281
|
-
"""Returns column names of the
|
|
305
|
+
"""Returns column names of the Labels frame.
|
|
306
|
+
|
|
307
|
+
Examples:
|
|
308
|
+
>>> from metahq_core.curations.labels import Labels
|
|
309
|
+
>>> labels = {
|
|
310
|
+
'sample': ['GSM1', 'GSM2', 'GSM3'],
|
|
311
|
+
'series': ['GSE1', 'GSE1', 'GSE2'],
|
|
312
|
+
'UBERON:0000948': [1, -1, -1],
|
|
313
|
+
'UBERON:0002113': [-1, 1, -1],
|
|
314
|
+
'UBERON:0000955': [-1, -1, 1],
|
|
315
|
+
}
|
|
316
|
+
>>> labels = Labels.from_df(anno, index_col='sample', group_cols=['series'])
|
|
317
|
+
>>> labels.entities
|
|
318
|
+
['UBERON:0000948', 'UBERON:0002113', 'UBERON:0000955']
|
|
319
|
+
"""
|
|
282
320
|
return self.data.columns
|
|
283
321
|
|
|
284
322
|
@property
|
|
285
323
|
def groups(self) -> list[str]:
|
|
286
|
-
"""Returns the groups column of the
|
|
324
|
+
"""Returns the groups column of the Labels curation.
|
|
325
|
+
|
|
326
|
+
Examples:
|
|
327
|
+
>>> from metahq_core.curations.labels import Labels
|
|
328
|
+
>>> labels = {
|
|
329
|
+
'sample': ['GSM1', 'GSM2', 'GSM3'],
|
|
330
|
+
'series': ['GSE1', 'GSE1', 'GSE2'],
|
|
331
|
+
'UBERON:0000948': [1, -1, -1],
|
|
332
|
+
'UBERON:0002113': [-1, 1, -1],
|
|
333
|
+
'UBERON:0000955': [-1, -1, 1],
|
|
334
|
+
}
|
|
335
|
+
>>> labels = Labels.from_df(anno, index_col='sample', group_cols=['series'])
|
|
336
|
+
>>> labels.groups
|
|
337
|
+
['GSE1', 'GSE1', 'GSE2']
|
|
338
|
+
"""
|
|
287
339
|
return self.ids["group"].to_list()
|
|
288
340
|
|
|
289
341
|
@property
|
|
290
342
|
def ids(self) -> pl.DataFrame:
|
|
291
|
-
"""Return the IDs dataframe.
|
|
343
|
+
"""Return the IDs dataframe.
|
|
344
|
+
|
|
345
|
+
Examples:
|
|
346
|
+
>>> from metahq_core.curations.labels import Labels
|
|
347
|
+
>>> labels = {
|
|
348
|
+
'sample': ['GSM1', 'GSM2', 'GSM3'],
|
|
349
|
+
'series': ['GSE1', 'GSE1', 'GSE2'],
|
|
350
|
+
'UBERON:0000948': [1, -1, -1],
|
|
351
|
+
'UBERON:0002113': [-1, 1, -1],
|
|
352
|
+
'UBERON:0000955': [-1, -1, 1],
|
|
353
|
+
}
|
|
354
|
+
>>> labels = Labels.from_df(anno, index_col='sample', group_cols=['series'])
|
|
355
|
+
>>> labels.ids
|
|
356
|
+
┌────────┬────────┐
|
|
357
|
+
│ sample ┆ series │
|
|
358
|
+
│ --- ┆ --- │
|
|
359
|
+
│ str ┆ str │
|
|
360
|
+
╞════════╪════════╡
|
|
361
|
+
│ GSM1 ┆ GSE1 │
|
|
362
|
+
│ GSM2 ┆ GSE1 │
|
|
363
|
+
│ GSM3 ┆ GSE2 │
|
|
364
|
+
└────────┴────────┘
|
|
365
|
+
"""
|
|
292
366
|
return self._ids.data
|
|
293
367
|
|
|
294
368
|
@property
|
|
295
|
-
def index(self) -> list:
|
|
296
|
-
"""Return the index column as a list.
|
|
369
|
+
def index(self) -> list[str]:
|
|
370
|
+
"""Return the index column as a list.
|
|
371
|
+
|
|
372
|
+
Examples:
|
|
373
|
+
>>> from metahq_core.curations.labels import Labels
|
|
374
|
+
>>> labels = {
|
|
375
|
+
'sample': ['GSM1', 'GSM2', 'GSM3'],
|
|
376
|
+
'series': ['GSE1', 'GSE1', 'GSE2'],
|
|
377
|
+
'UBERON:0000948': [1, -1, -1],
|
|
378
|
+
'UBERON:0002113': [-1, 1, -1],
|
|
379
|
+
'UBERON:0000955': [-1, -1, 1],
|
|
380
|
+
}
|
|
381
|
+
>>> labels = Labels.from_df(anno, index_col='sample', group_cols=['series'])
|
|
382
|
+
>>> labels.index
|
|
383
|
+
['GSM1', 'GSM2', 'GSM3']
|
|
384
|
+
"""
|
|
297
385
|
return self._ids.index.to_list()
|
|
298
386
|
|
|
299
387
|
@property
|
|
300
388
|
def n_indices(self) -> int:
|
|
301
|
-
"""Returns number of indices.
|
|
389
|
+
"""Returns number of indices.
|
|
390
|
+
|
|
391
|
+
Examples:
|
|
392
|
+
>>> from metahq_core.curations.labels import Labels
|
|
393
|
+
>>> labels = {
|
|
394
|
+
'sample': ['GSM1', 'GSM2', 'GSM3'],
|
|
395
|
+
'series': ['GSE1', 'GSE1', 'GSE2'],
|
|
396
|
+
'UBERON:0000948': [1, -1, -1],
|
|
397
|
+
'UBERON:0002113': [-1, 1, -1],
|
|
398
|
+
'UBERON:0000955': [-1, -1, 1],
|
|
399
|
+
}
|
|
400
|
+
>>> labels = Labels.from_df(anno, index_col='sample', group_cols=['series'])
|
|
401
|
+
>>> labels.n_indices
|
|
402
|
+
3
|
|
403
|
+
"""
|
|
302
404
|
return self.data.height
|
|
303
405
|
|
|
304
406
|
@property
|
|
305
407
|
def n_entities(self) -> int:
|
|
306
|
-
"""Returns number of entities.
|
|
408
|
+
"""Returns number of entities.
|
|
409
|
+
|
|
410
|
+
Examples:
|
|
411
|
+
>>> from metahq_core.curations.labels import Labels
|
|
412
|
+
>>> labels = {
|
|
413
|
+
'sample': ['GSM1', 'GSM2', 'GSM3'],
|
|
414
|
+
'series': ['GSE1', 'GSE1', 'GSE2'],
|
|
415
|
+
'UBERON:0000948': [1, -1, -1],
|
|
416
|
+
'UBERON:0002113': [-1, 1, -1],
|
|
417
|
+
'UBERON:0000955': [-1, -1, 1],
|
|
418
|
+
'UBERON:0002107': [-1, -1, -1],
|
|
419
|
+
}
|
|
420
|
+
>>> labels = Labels.from_df(anno, index_col='sample', group_cols=['series'])
|
|
421
|
+
>>> labels.n_entities
|
|
422
|
+
4
|
|
423
|
+
"""
|
|
307
424
|
return len(self.entities)
|
|
308
425
|
|
|
309
426
|
@property
|
|
310
427
|
def unique_groups(self) -> list[str]:
|
|
311
|
-
"""Returns unique groups.
|
|
428
|
+
"""Returns unique groups.
|
|
429
|
+
|
|
430
|
+
Examples:
|
|
431
|
+
>>> from metahq_core.curations.labels import Labels
|
|
432
|
+
>>> labels = {
|
|
433
|
+
'sample': ['GSM1', 'GSM2', 'GSM3'],
|
|
434
|
+
'series': ['GSE1', 'GSE1', 'GSE2'],
|
|
435
|
+
'UBERON:0000948': [1, -1, -1],
|
|
436
|
+
'UBERON:0002113': [-1, 1, -1],
|
|
437
|
+
'UBERON:0000955': [-1, -1, 1],
|
|
438
|
+
}
|
|
439
|
+
>>> labels = Labels.from_df(anno, index_col='sample', group_cols=['series'])
|
|
440
|
+
>>> labels.unqiue_groups
|
|
441
|
+
['GSE1', 'GSE2']
|
|
442
|
+
"""
|
|
312
443
|
return list(set(self.groups))
|
|
313
444
|
|
|
314
445
|
def __repr__(self):
|