metahq-core 0.1.1__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metahq_core/__init__.py +1 -1
- metahq_core/curations/annotation_converter.py +5 -5
- metahq_core/curations/annotations.py +361 -151
- metahq_core/curations/index.py +104 -43
- metahq_core/curations/labels.py +259 -128
- metahq_core/curations/propagator.py +62 -85
- metahq_core/export/__init__.py +0 -0
- metahq_core/export/annotations.py +125 -59
- metahq_core/export/labels.py +128 -70
- metahq_core/logger.py +11 -18
- metahq_core/query.py +346 -241
- metahq_core/{ontology/loader.py → relations_loader.py} +2 -1
- metahq_core/search.py +37 -14
- metahq_core/util/io.py +109 -46
- metahq_core/util/supported.py +16 -5
- {metahq_core-0.1.1.dist-info → metahq_core-1.0.0rc1.dist-info}/METADATA +29 -5
- metahq_core-1.0.0rc1.dist-info/RECORD +30 -0
- {metahq_core-0.1.1.dist-info → metahq_core-1.0.0rc1.dist-info}/WHEEL +1 -1
- metahq_core-1.0.0rc1.dist-info/licenses/LICENSE +28 -0
- metahq_core/ontology/base.py +0 -376
- metahq_core/ontology/graph.py +0 -252
- metahq_core-0.1.1.dist-info/RECORD +0 -30
- /metahq_core/{ontology → curations}/__init__.py +0 -0
metahq_core/curations/index.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
Class to store and operate on indices for tabular data.
|
|
3
3
|
|
|
4
4
|
Author: Parker Hicks
|
|
5
5
|
Date: 2025-08-13
|
|
6
6
|
|
|
7
|
-
Last updated: 2025-
|
|
7
|
+
Last updated: 2025-11-28 by Parker Hicks
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
from __future__ import annotations
|
|
@@ -14,36 +14,24 @@ import polars as pl
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class Ids:
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
Attributes
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
to_numpy()
|
|
37
|
-
Return IDs as numpy array.
|
|
38
|
-
|
|
39
|
-
from_df()
|
|
40
|
-
Create an Ids object from a polars DataFrame.
|
|
41
|
-
|
|
42
|
-
Properties
|
|
43
|
-
----------
|
|
44
|
-
index: pl.Series
|
|
45
|
-
Returns the index column.
|
|
46
|
-
|
|
17
|
+
"""A class to store and operate on ID columns for tabular data.
|
|
18
|
+
Specifically made as an index for `polars.DataFrame` objects which
|
|
19
|
+
lack index anchoring and tracking.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
data (pl.DataFrame):
|
|
23
|
+
DataFrame containing ID columns (index, group, platform, etc.)
|
|
24
|
+
index_col (str):
|
|
25
|
+
Name of the column that contains the primary index IDs.
|
|
26
|
+
|
|
27
|
+
Examples:
|
|
28
|
+
>>> from metahq_core.curations.index import Ids
|
|
29
|
+
>>> ids = pl.DataFrame({
|
|
30
|
+
"sample": ["GSM1", "GSM2", "GSM3"],
|
|
31
|
+
"series": ["GSE1", "GSE1", "GSE2"],
|
|
32
|
+
"platform": ["GPL10", "GPL10", "GPL23"],
|
|
33
|
+
})
|
|
34
|
+
>>> ids = ids.from_dataframe(ids, index_col="sample")
|
|
47
35
|
"""
|
|
48
36
|
|
|
49
37
|
def __init__(self, data, index_col):
|
|
@@ -51,7 +39,30 @@ class Ids:
|
|
|
51
39
|
self.index_col: str = index_col
|
|
52
40
|
|
|
53
41
|
def filter_by_mask(self, mask: np.ndarray) -> Ids:
|
|
54
|
-
"""Filter the ids DataFrame using a boolean mask.
|
|
42
|
+
"""Filter the ids DataFrame using a boolean mask.
|
|
43
|
+
|
|
44
|
+
Arguments:
|
|
45
|
+
mask (np.ndarray):
|
|
46
|
+
Array of indices to keep.
|
|
47
|
+
|
|
48
|
+
Examples:
|
|
49
|
+
>>> from metahq_core.curations.index import Ids
|
|
50
|
+
>>> ids = pl.DataFrame({
|
|
51
|
+
"sample": ["GSM1", "GSM2", "GSM3"],
|
|
52
|
+
"series": ["GSE1", "GSE1", "GSE2"],
|
|
53
|
+
"platform": ["GPL10", "GPL10", "GPL23"],
|
|
54
|
+
})
|
|
55
|
+
>>> ids = Ids.from_dataframe(ids, index_col="sample")
|
|
56
|
+
>>> ids.filter_by_mask(np.array([1, 2])).data
|
|
57
|
+
┌────────┬────────┬──────────┐
|
|
58
|
+
│ sample ┆ series ┆ platform │
|
|
59
|
+
│ --- ┆ --- ┆ --- │
|
|
60
|
+
│ str ┆ str ┆ str │
|
|
61
|
+
╞════════╪════════╪══════════╡
|
|
62
|
+
│ GSM2 ┆ GSE1 ┆ GPL10 │
|
|
63
|
+
│ GSM3 ┆ GSE2 ┆ GPL23 │
|
|
64
|
+
└────────┴────────┴──────────┘
|
|
65
|
+
"""
|
|
55
66
|
filtered_data = (
|
|
56
67
|
self.data.with_row_index(name="tmp_idx")
|
|
57
68
|
.filter(pl.col("tmp_idx").is_in(mask))
|
|
@@ -60,32 +71,82 @@ class Ids:
|
|
|
60
71
|
return Ids(filtered_data, self.index_col)
|
|
61
72
|
|
|
62
73
|
def lazy(self) -> pl.LazyFrame:
|
|
63
|
-
"""
|
|
74
|
+
"""Wrapper for `polars.DataFrame.lazy()`.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
A `polars.LazyFrame` object of the `data` attribute.
|
|
78
|
+
"""
|
|
64
79
|
return self.data.lazy()
|
|
65
80
|
|
|
66
|
-
def to_numpy(self):
|
|
67
|
-
"""
|
|
81
|
+
def to_numpy(self) -> np.ndarray:
|
|
82
|
+
"""Wrapper for `polars.DataFrame.to_numpy()`.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
The `data` attribute as a numpy ndarray.
|
|
86
|
+
"""
|
|
68
87
|
return self.data.to_numpy()
|
|
69
88
|
|
|
70
89
|
@classmethod
|
|
71
|
-
def from_dataframe(cls, df: pl.DataFrame, index_col: str):
|
|
72
|
-
"""Creates an Ids object from a polars DataFrame.
|
|
90
|
+
def from_dataframe(cls, df: pl.DataFrame, index_col: str) -> Ids:
|
|
91
|
+
"""Creates an Ids object from a polars DataFrame.
|
|
92
|
+
|
|
93
|
+
Arguments:
|
|
94
|
+
df (pl.DataFrame):
|
|
95
|
+
A `polars.DataFrame` object with at least one column.
|
|
96
|
+
|
|
97
|
+
index_col (str):
|
|
98
|
+
The name of the column in `df` that should be treated
|
|
99
|
+
as the index of the DataFrame.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
An initialized Ids object.
|
|
103
|
+
|
|
104
|
+
Examples:
|
|
105
|
+
>>> import polars as pl
|
|
106
|
+
>>> from metahq_core.curations.index import Ids
|
|
107
|
+
>>> ids = pl.DataFrame({
|
|
108
|
+
"sample": ["GSM1", "GSM2", "GSM3"],
|
|
109
|
+
"series": ["GSE1", "GSE1", "GSE2"],
|
|
110
|
+
"platform": ["GPL10", "GPL10", "GPL23"],
|
|
111
|
+
})
|
|
112
|
+
>>> Ids.from_dataframe(ids, index_col="sample")
|
|
113
|
+
"""
|
|
73
114
|
return cls(df, index_col)
|
|
74
115
|
|
|
75
|
-
def __getitem__(self, idx):
|
|
116
|
+
def __getitem__(self, idx) -> Ids:
|
|
76
117
|
"""Slice the Ids frame with various indexing methods."""
|
|
77
118
|
return Ids(self.data[idx], self.index_col)
|
|
78
119
|
|
|
79
|
-
def __len__(self):
|
|
120
|
+
def __len__(self) -> int:
|
|
80
121
|
"""Return the number of rows."""
|
|
81
122
|
return len(self.data)
|
|
82
123
|
|
|
83
124
|
@property
|
|
84
125
|
def columns(self) -> list[str]:
|
|
85
|
-
"""Returns columns of self.data.
|
|
126
|
+
"""Returns columns of self.data.
|
|
127
|
+
Wrapper for `polars.DataFrame.columns`.
|
|
128
|
+
"""
|
|
86
129
|
return self.data.columns
|
|
87
130
|
|
|
88
131
|
@property
|
|
89
|
-
def index(self):
|
|
90
|
-
"""Get the index column as a Series.
|
|
132
|
+
def index(self) -> pl.Series:
|
|
133
|
+
"""Get the index column as a Series.
|
|
134
|
+
|
|
135
|
+
Examples:
|
|
136
|
+
>>> import polars as pl
|
|
137
|
+
>>> from metahq_core.curations.index import Ids
|
|
138
|
+
>>> ids = pl.DataFrame({
|
|
139
|
+
"sample": ["GSM1", "GSM2", "GSM3"],
|
|
140
|
+
"series": ["GSE1", "GSE1", "GSE2"],
|
|
141
|
+
"platform": ["GPL10", "GPL10", "GPL23"],
|
|
142
|
+
})
|
|
143
|
+
>>> Ids.from_dataframe(ids, index_col="sample")
|
|
144
|
+
shape: (3,)
|
|
145
|
+
Series: 'sample' [str]
|
|
146
|
+
[
|
|
147
|
+
"GSM1"
|
|
148
|
+
"GSM2"
|
|
149
|
+
"GSM3"
|
|
150
|
+
]
|
|
151
|
+
"""
|
|
91
152
|
return self.data[self.index_col]
|