metahq-core 0.1.1__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
1
  """
2
- Dataclass to store and operate on indices for tabular data.
2
+ Class to store and operate on indices for tabular data.
3
3
 
4
4
  Author: Parker Hicks
5
5
  Date: 2025-08-13
6
6
 
7
- Last updated: 2025-09-05 by Parker Hicks
7
+ Last updated: 2025-11-28 by Parker Hicks
8
8
  """
9
9
 
10
10
  from __future__ import annotations
@@ -14,36 +14,24 @@ import polars as pl
14
14
 
15
15
 
16
16
  class Ids:
17
- """
18
- Dataclass to store and operate on ID columns for tabular data.
19
- Specifically made as an index for polars dataframes.
20
-
21
- Attributes
22
- ----------
23
- data: pl.DataFrame
24
- DataFrame containing ID columns (index, group, platform, etc.)
25
- index_col: str
26
- Name of the column that contains the primary index IDs
27
-
28
- Methods
29
- -------
30
- filter_by_mask()
31
- Filter rows of the frame by row indices.
32
-
33
- lazy()
34
- Wrapper for polars `lazy` conversion of a DataFrame to LazyFrame.
35
-
36
- to_numpy()
37
- Return IDs as numpy array.
38
-
39
- from_df()
40
- Create an Ids object from a polars DataFrame.
41
-
42
- Properties
43
- ----------
44
- index: pl.Series
45
- Returns the index column.
46
-
17
+ """A class to store and operate on ID columns for tabular data.
18
+ Specifically made as an index for `polars.DataFrame` objects which
19
+ lack index anchoring and tracking.
20
+
21
+ Attributes:
22
+ data (pl.DataFrame):
23
+ DataFrame containing ID columns (index, group, platform, etc.)
24
+ index_col (str):
25
+ Name of the column that contains the primary index IDs.
26
+
27
+ Examples:
28
+ >>> from metahq_core.curations.index import Ids
29
+ >>> ids = pl.DataFrame({
30
+ "sample": ["GSM1", "GSM2", "GSM3"],
31
+ "series": ["GSE1", "GSE1", "GSE2"],
32
+ "platform": ["GPL10", "GPL10", "GPL23"],
33
+ })
34
+ >>> ids = ids.from_dataframe(ids, index_col="sample")
47
35
  """
48
36
 
49
37
  def __init__(self, data, index_col):
@@ -51,7 +39,30 @@ class Ids:
51
39
  self.index_col: str = index_col
52
40
 
53
41
  def filter_by_mask(self, mask: np.ndarray) -> Ids:
54
- """Filter the ids DataFrame using a boolean mask."""
42
+ """Filter the ids DataFrame using a boolean mask.
43
+
44
+ Arguments:
45
+ mask (np.ndarray):
46
+ Array of indices to keep.
47
+
48
+ Examples:
49
+ >>> from metahq_core.curations.index import Ids
50
+ >>> ids = pl.DataFrame({
51
+ "sample": ["GSM1", "GSM2", "GSM3"],
52
+ "series": ["GSE1", "GSE1", "GSE2"],
53
+ "platform": ["GPL10", "GPL10", "GPL23"],
54
+ })
55
+ >>> ids = Ids.from_dataframe(ids, index_col="sample")
56
+ >>> ids.filter_by_mask(np.array([1, 2])).data
57
+ ┌────────┬────────┬──────────┐
58
+ │ sample ┆ series ┆ platform │
59
+ │ --- ┆ --- ┆ --- │
60
+ │ str ┆ str ┆ str │
61
+ ╞════════╪════════╪══════════╡
62
+ │ GSM2 ┆ GSE1 ┆ GPL10 │
63
+ │ GSM3 ┆ GSE2 ┆ GPL23 │
64
+ └────────┴────────┴──────────┘
65
+ """
55
66
  filtered_data = (
56
67
  self.data.with_row_index(name="tmp_idx")
57
68
  .filter(pl.col("tmp_idx").is_in(mask))
@@ -60,32 +71,82 @@ class Ids:
60
71
  return Ids(filtered_data, self.index_col)
61
72
 
62
73
  def lazy(self) -> pl.LazyFrame:
63
- """Returns the Ids as a polars LazyFrame."""
74
+ """Wrapper for `polars.DataFrame.lazy()`.
75
+
76
+ Returns:
77
+ A `polars.LazyFrame` object of the `data` attribute.
78
+ """
64
79
  return self.data.lazy()
65
80
 
66
- def to_numpy(self):
67
- """Returns the Ids as a numpy array."""
81
+ def to_numpy(self) -> np.ndarray:
82
+ """Wrapper for `polars.DataFrame.to_numpy()`.
83
+
84
+ Returns:
85
+ The `data` attribute as a numpy ndarray.
86
+ """
68
87
  return self.data.to_numpy()
69
88
 
70
89
  @classmethod
71
- def from_dataframe(cls, df: pl.DataFrame, index_col: str):
72
- """Creates an Ids object from a polars DataFrame."""
90
+ def from_dataframe(cls, df: pl.DataFrame, index_col: str) -> Ids:
91
+ """Creates an Ids object from a polars DataFrame.
92
+
93
+ Arguments:
94
+ df (pl.DataFrame):
95
+ A `polars.DataFrame` object with at least one column.
96
+
97
+ index_col (str):
98
+ The name of the column in `df` that should be treated
99
+ as the index of the DataFrame.
100
+
101
+ Returns:
102
+ An initialized Ids object.
103
+
104
+ Examples:
105
+ >>> import polars as pl
106
+ >>> from metahq_core.curations.index import Ids
107
+ >>> ids = pl.DataFrame({
108
+ "sample": ["GSM1", "GSM2", "GSM3"],
109
+ "series": ["GSE1", "GSE1", "GSE2"],
110
+ "platform": ["GPL10", "GPL10", "GPL23"],
111
+ })
112
+ >>> Ids.from_dataframe(ids, index_col="sample")
113
+ """
73
114
  return cls(df, index_col)
74
115
 
75
- def __getitem__(self, idx):
116
+ def __getitem__(self, idx) -> Ids:
76
117
  """Slice the Ids frame with various indexing methods."""
77
118
  return Ids(self.data[idx], self.index_col)
78
119
 
79
- def __len__(self):
120
+ def __len__(self) -> int:
80
121
  """Return the number of rows."""
81
122
  return len(self.data)
82
123
 
83
124
  @property
84
125
  def columns(self) -> list[str]:
85
- """Returns columns of self.data."""
126
+ """Returns columns of self.data.
127
+ Wrapper for `polars.DataFrame.columns`.
128
+ """
86
129
  return self.data.columns
87
130
 
88
131
  @property
89
- def index(self):
90
- """Get the index column as a Series."""
132
+ def index(self) -> pl.Series:
133
+ """Get the index column as a Series.
134
+
135
+ Examples:
136
+ >>> import polars as pl
137
+ >>> from metahq_core.curations.index import Ids
138
+ >>> ids = pl.DataFrame({
139
+ "sample": ["GSM1", "GSM2", "GSM3"],
140
+ "series": ["GSE1", "GSE1", "GSE2"],
141
+ "platform": ["GPL10", "GPL10", "GPL23"],
142
+ })
143
+ >>> Ids.from_dataframe(ids, index_col="sample")
144
+ shape: (3,)
145
+ Series: 'sample' [str]
146
+ [
147
+ "GSM1"
148
+ "GSM2"
149
+ "GSM3"
150
+ ]
151
+ """
91
152
  return self.data[self.index_col]