metahq-core 0.1.2__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ Class for storing and mutating annotation collections.
4
4
  Author: Parker Hicks
5
5
  Date: 2025-04-14
6
6
 
7
- Last updated: 2025-11-21 by Parker Hicks
7
+ Last updated: 2026-02-02 by Parker Hicks
8
8
  """
9
9
 
10
10
  from __future__ import annotations
@@ -20,7 +20,7 @@ from metahq_core.curations.index import Ids
20
20
  from metahq_core.curations.labels import Labels
21
21
  from metahq_core.export.annotations import AnnotationsExporter
22
22
  from metahq_core.logger import setup_logger
23
- from metahq_core.util.alltypes import FilePath
23
+ from metahq_core.util.supported import get_default_log_dir
24
24
 
25
25
  if TYPE_CHECKING:
26
26
  import logging
@@ -31,87 +31,24 @@ class Annotations(BaseCuration):
31
31
  Class to store and mutate annotations of samples to various attributes
32
32
  like tissues, dieases, sexes, ages, etc.
33
33
 
34
- Attributes
35
- ----------
36
- data: pl.DataFrame
37
- Polars DataFrame with columns `index`, `groups` and columns for each
38
- attribute entity for each index (e.g. male or female, tissues, diseases, etc).
34
+ Attributes:
35
+ data (pl.DataFrame):
36
+ Polars DataFrame with index and group ID columns and columns for each
37
+ attribute entity for each index (e.g. male or female, tissues, diseases, etc).
39
38
 
40
- disease: bool
41
- Indicates if the annotations are disease based. Used to account for control samples
42
- when converting annotations to labels.
39
+ disease (bool):
40
+ Indicates if the annotations are disease based. Used to account for control samples
41
+ when converting annotations to labels.
43
42
 
44
- index_col: IdArray
45
- Name of the column of data that contains the index IDs.
43
+ index_col (str):
44
+ Name of the column of data that contains the index IDs.
46
45
 
47
- group_cols: tuple
48
- Names of columns of data that contain an ID for each index indicating if it belongs
49
- to a particular group (e.g. dataset, sex, platform, etc.).
50
-
51
- collapsed: bool
52
- Indicates if the annotations have already been collapsed.
53
-
54
- Methods
55
- -------
56
- collapse()
57
- Collapses index annotations to group annotations.
58
-
59
- drop()
60
- Wrapper for polars `drop`.
61
-
62
- filter()
63
- Wrapper for polars `filter`.
64
-
65
- from_df()
66
- Creates an Annotations object from a polars DataFrame or LazyFrame.
67
-
68
- head()
69
- Wrapper for polars `head`.
70
-
71
- propagate_controls()
72
- Propagates control samples to diseases that other samples in the same
73
- dataset are annotated to.
74
-
75
- select()
76
- Wrapper for polars `select`.
77
-
78
- slice()
79
- Wrapper for polars `slice`.
80
-
81
- to_labels()
82
- Propagates annotations to labels for an annotations matrix, given a reference
83
- ontology.
84
-
85
- to_numpy()
86
- Returns the annotations frame as a numpy 2D array.
87
-
88
- to_parquet()
89
- Saves the annotations frame and IDs to a .parquet file.
90
-
91
- Properties
92
- ---------
93
- entities: list[str]
94
- columns of the annotations frame of ontology terms.
95
-
96
- groups: list[str]
97
- Groups associated with each index of the annotations curation.
98
- Note that groups are not unique.
99
-
100
- ids: pl.DataFrame
101
- The frame of all IDs within the annotations curation.
102
-
103
- index
104
- The index IDs of the annotations frame.
105
-
106
- n_entities: int
107
- Number of unique entities.
108
-
109
- n_index: int
110
- Number of indices.
111
-
112
- unique_groups: list[str]
113
- Unique groups in the annotations curation.
46
+ group_cols (tuple[str, ...]):
47
+ Names of columns of data that contain an ID for each index indicating if it belongs
48
+ to a particular group (e.g. dataset, sex, platform, etc.).
114
49
 
50
+ collapsed (bool):
51
+ Indicates if the annotations have already been collapsed.
115
52
  """
116
53
 
117
54
  def __init__(
@@ -123,7 +60,7 @@ class Annotations(BaseCuration):
123
60
  collapsed: bool = False,
124
61
  logger=None,
125
62
  loglevel=20,
126
- logdir=Path("."),
63
+ logdir=get_default_log_dir(),
127
64
  verbose=True,
128
65
  ):
129
66
  self.data = data
@@ -139,14 +76,21 @@ class Annotations(BaseCuration):
139
76
  self.verbose: bool = verbose
140
77
 
141
78
  def add_ids(self, new: pl.DataFrame) -> Annotations:
142
- """
143
- Append new group ID columns to the IDs of an Annotations object. The new
79
+ """Append new group ID columns to the IDs of an Annotations object. The new
144
80
  IDs must have a matching index.
81
+
82
+ Arguments:
83
+ new (pl.DataFrame):
84
+ A DataFrame of additional IDs to join with the current index column of `data`.
85
+ Must have a matching index column as the original `data`.
86
+
87
+ Returns:
88
+ A new Annotations object including the new ID columns.
145
89
  """
146
90
  new_ids = new.join(
147
91
  self.ids, on=self.index_col, how="inner", maintain_order="right"
148
92
  )
149
- new_groups = tuple([col for col in new_ids.columns if col != self.index_col])
93
+ new_groups = tuple(col for col in new_ids.columns if col != self.index_col)
150
94
  assert new_ids.height == self.ids.height, "SRA IDs height mismatch."
151
95
  assert (
152
96
  new_ids[self.index_col].to_list() == self.index
@@ -157,15 +101,13 @@ class Annotations(BaseCuration):
157
101
  )
158
102
 
159
103
  def collapse(self, on: str, inplace: bool = True):
160
- """
161
- Collapses annotations on the specified grouping column.
162
-
163
- Args
164
- ----
165
- on: str
166
- The column to collapse on (should be one of the group_cols)
167
- inplace: bool
168
- If True, updates this object and returns self. If False, returns new object.
104
+ """Collapses annotations on the specified grouping column.
105
+
106
+ Arguments:
107
+ on (str):
108
+ The column to collapse on. This should be one of the columns in `group_cols`.
109
+ inplace (bool):
110
+ If True, updates this object and returns self. Otherwise, returns new object.
169
111
  """
170
112
  params = self._collapse(on)
171
113
 
@@ -180,7 +122,9 @@ class Annotations(BaseCuration):
180
122
  return self.__class__(**params)
181
123
 
182
124
  def drop(self, *args, **kwargs) -> Annotations:
183
- """Wrapper for polars drop. Drops any of the term columns."""
125
+ """Wrapper for polars drop. Drops any of the term columns.
126
+ ID columns are not dropped through this method.
127
+ """
184
128
  return self.__class__(
185
129
  data=self.data.drop(*args, **kwargs),
186
130
  ids=self.ids,
@@ -192,7 +136,31 @@ class Annotations(BaseCuration):
192
136
  )
193
137
 
194
138
  def filter(self, condition: pl.Expr) -> Annotations:
195
- """Filter both data and ids simultaneously using a mask."""
139
+ """Filter both data and ids simultaneously using a mask.
140
+
141
+ Arguments:
142
+ condition (pl.Expr):
143
+ Polars expression for filtering columns.
144
+
145
+ Examples:
146
+ >>> from metahq_core.curations.annotations import Annotations
147
+ >>> anno = {
148
+ 'sample': ['GSM1', 'GSM2', 'GSM3'],
149
+ 'series': ['GSE1', 'GSE1', 'GSE2'],
150
+ 'UBERON:0000948': [1, 0, 0],
151
+ 'UBERON:0002113': [0, 1, 0],
152
+ 'UBERON:0000955': [0, 0, 1],
153
+ }
154
+ >>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
155
+ >>> anno.filter(pl.col("UBERON:0000948") == 1)
156
+ ┌────────┬────────┬────────────────┬────────────────┬────────────────┐
157
+ │ sample ┆ series ┆ UBERON:0000948 ┆ UBERON:0002113 ┆ UBERON:0000955 │
158
+ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
159
+ │ str ┆ str ┆ i32 ┆ i32 ┆ i32 │
160
+ ╞════════╪════════╪════════════════╪════════════════╪════════════════╡
161
+ │ GSM1 ┆ GSE1 ┆ 1 ┆ 0 ┆ 0 │
162
+ └────────┴────────┴────────────────┴────────────────┴────────────────┘
163
+ """
196
164
  mask = self.data.select(condition.arg_true()).to_numpy().reshape(-1)
197
165
 
198
166
  filtered_data = (
@@ -216,29 +184,58 @@ class Annotations(BaseCuration):
216
184
 
217
185
  def save(
218
186
  self,
219
- outfile: FilePath,
187
+ outfile: str | Path,
220
188
  fmt: Literal["json", "parquet", "csv", "tsv"],
189
+ attribute: str,
190
+ level: str,
221
191
  metadata: str | None = None,
222
192
  ):
223
- """
224
- Save annotations curation to json. Keys are terms and values are
225
- positively annotated indices.
193
+ """Save the annotations curation.
226
194
 
227
- Parameters
228
- ----------
229
- outfile: FilePath
230
- Path to outfile.json.
195
+ Arguments:
196
+ outfile (str | Path):
197
+ Path to outfile.json.
231
198
 
232
- metadata: bool
233
- If True, will add index titles to each entry.
199
+ fmt (Literal["json", "parquet", "csv", "tsv"]):
200
+ File format to save to.
234
201
 
202
+ attribute (str):
203
+ A supported MetaHQ annotated attribute.
204
+
205
+ level (str):
206
+ An index level supported by MetaHQ.
207
+
208
+ metadata (bool):
209
+ If True, will add index titles to each entry.
235
210
  """
236
- AnnotationsExporter(logger=self.log, verbose=self.verbose).save(
237
- self, fmt, outfile, metadata
238
- )
211
+ AnnotationsExporter(
212
+ attribute, level, logger=self.log, verbose=self.verbose
213
+ ).save(self, fmt, outfile, metadata)
239
214
 
240
215
  def sort_columns(self):
241
- """Sorts term columns."""
216
+ """Sorts term columns.
217
+
218
+ Examples:
219
+ >>> from metahq_core.curations.annotations import Annotations
220
+ >>> anno = {
221
+ 'sample': ['GSM1', 'GSM2', 'GSM3'],
222
+ 'series': ['GSE1', 'GSE1', 'GSE2'],
223
+ 'UBERON:0000948': [1, 0, 0],
224
+ 'UBERON:0002113': [0, 1, 0],
225
+ 'UBERON:0000955': [0, 0, 1],
226
+ }
227
+ >>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
228
+ >>> anno.sort_columns()
229
+ ┌────────┬────────┬────────────────┬────────────────┬────────────────┐
230
+ │ series ┆ sample ┆ UBERON:0000948 ┆ UBERON:0000955 ┆ UBERON:0002113 │
231
+ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
232
+ │ str ┆ str ┆ i32 ┆ i32 ┆ i32 │
233
+ ╞════════╪════════╪════════════════╪════════════════╪════════════════╡
234
+ │ GSE1 ┆ GSM1 ┆ 1 ┆ 0 ┆ 0 │
235
+ │ GSE1 ┆ GSM2 ┆ 0 ┆ 0 ┆ 1 │
236
+ │ GSE2 ┆ GSM3 ┆ 0 ┆ 1 ┆ 0 │
237
+ └────────┴────────┴────────────────┴────────────────┴────────────────┘
238
+ """
242
239
  return self.__class__(
243
240
  data=self.data.select(sorted(self.data.columns)),
244
241
  ids=self.ids,
@@ -260,35 +257,72 @@ class Annotations(BaseCuration):
260
257
 
261
258
  Assigns propagated labels to terms given their annotations.
262
259
 
263
- Parameters
264
- ----------
265
- to_terms: list[str]
266
- Array of terms to generate labels for, or "union"/"all".
267
-
268
- ontology: str
269
- The name of an ontology to reference for annotation propagation.
270
-
271
- mode: Literal[0, 1]
272
- Mode of propagation.
273
-
274
- If mode is 0, this will propagate any positive annotations
275
- from any descendants of the to_terms up to the to_terms.
276
-
277
- If mode 1, this will convert annotations to -1, 0, +1 labels
278
- where for a particular term, if an index is annotated to that term or
279
- any of its descendants, it recieves a +1 label. If it is annotated to an
280
- ancestor of that term, it receives a 0 (unsure) label. If it is not annotated
281
- to an ancestor or a descendant of that term, it recieves a -1 label.
282
- Any indices annotated to the control column are assigned a label of 2 for any
283
- terms that other indices within the same group are positively labeled to.
284
-
285
- control_col: str
286
- Column name for control annotations.
287
-
288
- Returns
289
- -------
290
- A Labels curation object with propagated -1, 0, +1 labels (and 2 if controls are present).
291
-
260
+ Arguments:
261
+ to_terms (list[str]):
262
+ Array of terms to generate labels for, or "union"/"all".
263
+
264
+ ontology (str):
265
+ The name of an ontology to reference for annotation propagation.
266
+
267
+ mode (Literal[0, 1]):
268
+ Mode of propagation.
269
+
270
+ If mode is 0, this will propagate any positive annotations
271
+ from any descendants of the to_terms up to the to_terms.
272
+
273
+ If mode 1, this will convert annotations to -1, 0, +1 labels
274
+ where for a particular term, if an index is annotated to that term or
275
+ any of its descendants, it recieves a +1 label. If it is annotated to an
276
+ ancestor of that term, it receives a 0 (unsure) label. If it is not annotated
277
+ to an ancestor or a descendant of that term, it recieves a -1 label.
278
+ Any indices annotated to the control column are assigned a label of 2 for any
279
+ terms that other indices within the same group are positively labeled to.
280
+
281
+ control_col (str):
282
+ Column name for control annotations.
283
+
284
+ Returns:
285
+ A Labels curation object with propagated -1, 0, +1 labels (and 2 if controls are
286
+ present). Any entries in `index_col` that have a 0 annotation/label across all
287
+ entity columns are dropped.
288
+
289
+ Examples:
290
+
291
+ With `mode=0`:
292
+
293
+ >>> anno = pl.DataFrame(
294
+ {
295
+ "series": ["GSE1", "GSE1", "GSE2"],
296
+ "sample": ["GSM1", "GSM2", "GSM3"],
297
+ "UBERON:0000948": [1, 0, 0],
298
+ "UBERON:0002349": [1, 1, 0],
299
+ "UBERON:0002113": [0, 0, 0],
300
+ "UBERON:0000955": [0, 0, 1],
301
+ }
302
+ )
303
+ >>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
304
+ >>> anno.propagate(to_terms=["UBERON:0000948"], ontology="uberon", mode=0)
305
+ ┌────────┬────────┬────────────────┐
306
+ │ sample ┆ series ┆ UBERON:0000948 │
307
+ │ --- ┆ --- ┆ --- │
308
+ │ str ┆ str ┆ i32 │
309
+ ╞════════╪════════╪════════════════╡
310
+ │ GSM1 ┆ GSE1 ┆ 1 │
311
+ │ GSM2 ┆ GSE1 ┆ 1 │
312
+ └────────┴────────┴────────────────┘
313
+
314
+ With `mode=1`:
315
+
316
+ >>> anno.propagate(to_terms=["UBERON:0000948"], ontology="uberon", mode=1)
317
+ ┌────────┬────────┬────────────────┐
318
+ │ sample ┆ series ┆ UBERON:0000948 │
319
+ │ --- ┆ --- ┆ --- │
320
+ │ str ┆ str ┆ i32 │
321
+ ╞════════╪════════╪════════════════╡
322
+ │ GSM1 ┆ GSE1 ┆ 1 │
323
+ │ GSM2 ┆ GSE1 ┆ 1 │
324
+ │ GSM3 ┆ GSE2 ┆ -1 │
325
+ └────────┴────────┴────────────────┘
292
326
  """
293
327
  converter = AnnotationsConverter(
294
328
  self,
@@ -334,7 +368,18 @@ class Annotations(BaseCuration):
334
368
  )
335
369
 
336
370
  def slice(self, offset: int, length: int | None = None) -> Annotations:
337
- """Slice both data and ids simultaneously using polars slice."""
371
+ """Slice both data and ids simultaneously using `polars` slice.
372
+
373
+ Arguments:
374
+ offset (int):
375
+ Index position to begin the slice.
376
+
377
+ length (int | None):
378
+ Number of indices past `offset` to slice out.
379
+
380
+ Returns:
381
+ Sliced Annotations object as a subset of the original Annotations.
382
+ """
338
383
  sliced_data = self.data.slice(offset, length)
339
384
  sliced_ids_data = self._ids.data.slice(offset, length)
340
385
 
@@ -349,7 +394,9 @@ class Annotations(BaseCuration):
349
394
  )
350
395
 
351
396
  def _collapse(self, on: str):
352
- """Collapses index-level annotations to group-level."""
397
+ """Collapses index-level annotations to group-level. Helper function
398
+ for `collapse`.
399
+ """
353
400
  index_anno = self.data.with_columns(self.ids[on])
354
401
  agg_anno = index_anno.group_by(on).agg(pl.col("*").sum()).sort(on)
355
402
  new_ids = self._collapse_ids(on, keep=agg_anno[on].to_list())
@@ -376,7 +423,9 @@ class Annotations(BaseCuration):
376
423
  return params
377
424
 
378
425
  def _collapse_ids(self, on: str, keep: list[str]):
379
- """Group IDs to keep in the new collapsed frame."""
426
+ """Group IDs to keep in the new collapsed frame. Helper function
427
+ for `collapse`.
428
+ """
380
429
  return (
381
430
  self.ids.drop(self.index_col)
382
431
  .unique()
@@ -389,11 +438,49 @@ class Annotations(BaseCuration):
389
438
  cls,
390
439
  df: pl.DataFrame,
391
440
  index_col: str,
392
- group_cols: tuple[str, ...] | list[str] = ("series", "platform"),
441
+ group_cols: tuple[str, ...] | list[str],
393
442
  **kwargs,
394
443
  ) -> Annotations:
395
- """Creates an Annotations object from a combined DataFrame."""
396
-
444
+ """Creates an Annotations object from a combined DataFrame.
445
+
446
+ Attributes:
447
+ df (pl.DataFrame):
448
+ Polars DataFrame with index and group ID columns and columns for each
449
+ attribute entity for each index (e.g. male or female, tissues, diseases, etc).
450
+
451
+ index_col (str):
452
+ Name of the column of data that contains the index IDs.
453
+
454
+ group_cols (tuple[str, ...]):
455
+ Names of columns of data that contain an ID for each index indicating if it belongs
456
+ to a particular group (e.g. dataset, sex, platform, etc.).
457
+
458
+ Returns:
459
+ An Annotations object constructed from `df`.
460
+
461
+ Examples:
462
+ >>> from metahq_core.curations.annotations import Annotations
463
+ >>> anno = pl.DataFrame(
464
+ {
465
+ "series": ["GSE1", "GSE1", "GSE2"],
466
+ "sample": ["GSM1", "GSM2", "GSM3"],
467
+ "UBERON:0000948": [1, 0, 0],
468
+ "UBERON:0002349": [1, 1, 0],
469
+ "UBERON:0002113": [0, 0, 0],
470
+ "UBERON:0000955": [0, 0, 1],
471
+ }
472
+ )
473
+ >>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
474
+ ┌────────┬────────┬────────────────┬────────────────┬────────────────┬────────────────┐
475
+ │ sample ┆ series ┆ UBERON:0000948 ┆ UBERON:0002349 ┆ UBERON:0002113 ┆ UBERON:0000955 │
476
+ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
477
+ │ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
478
+ ╞════════╪════════╪════════════════╪════════════════╪════════════════╪════════════════╡
479
+ │ GSM1 ┆ GSE1 ┆ 1 ┆ 1 ┆ 0 ┆ 0 │
480
+ │ GSM2 ┆ GSE1 ┆ 0 ┆ 1 ┆ 0 ┆ 0 │
481
+ │ GSM3 ┆ GSE2 ┆ 0 ┆ 0 ┆ 0 ┆ 1 │
482
+ └────────┴────────┴────────────────┴────────────────┴────────────────┴────────────────┘
483
+ """
397
484
  group_cols = tuple(group_cols)
398
485
  id_columns = [index_col] + list(group_cols)
399
486
  ids_data = df.select(id_columns)
@@ -409,37 +496,160 @@ class Annotations(BaseCuration):
409
496
 
410
497
  @property
411
498
  def entities(self) -> list[str]:
412
- """Returns term names of the Annotations frame."""
499
+ """Returns term names of the Annotations frame.
500
+
501
+ Examples:
502
+ >>> anno = pl.DataFrame(
503
+ {
504
+ "series": ["GSE1", "GSE1", "GSE2"],
505
+ "sample": ["GSM1", "GSM2", "GSM3"],
506
+ "UBERON:0000948": [1, 0, 0],
507
+ "UBERON:0002349": [1, 1, 0],
508
+ "UBERON:0002113": [0, 0, 0],
509
+ "UBERON:0000955": [0, 0, 1],
510
+ }
511
+ )
512
+ >>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
513
+ >>> anno.entities
514
+ ['UBERON:0000955', 'UBERON:0002349', 'UBERON:0000948', 'UBERON:0002113']
515
+ """
413
516
  return list(set(self.data.columns) - set(self.ids.columns))
414
517
 
415
518
  @property
416
519
  def groups(self) -> list[str]:
417
- """Returns the groups column of the Annotations curation."""
520
+ """Returns the groups column of the Annotations curation.
521
+
522
+
523
+ Examples:
524
+ >>> anno = pl.DataFrame(
525
+ {
526
+ "series": ["GSE1", "GSE1", "GSE2"],
527
+ "sample": ["GSM1", "GSM2", "GSM3"],
528
+ "UBERON:0000948": [1, 0, 0],
529
+ "UBERON:0002349": [1, 1, 0],
530
+ "UBERON:0002113": [0, 0, 0],
531
+ "UBERON:0000955": [0, 0, 1],
532
+ }
533
+ )
534
+ >>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
535
+ >>> anno.groups
536
+ ['GSE1', 'GSE1', 'GSE2']
537
+
538
+ """
418
539
  return self.ids["series"].to_list()
419
540
 
420
541
  @property
421
542
  def ids(self) -> pl.DataFrame:
422
- """Return the IDs dataframe."""
543
+ """Return the IDs dataframe.
544
+
545
+
546
+ Examples:
547
+ >>> anno = pl.DataFrame(
548
+ {
549
+ "series": ["GSE1", "GSE1", "GSE2"],
550
+ "sample": ["GSM1", "GSM2", "GSM3"],
551
+ "UBERON:0000948": [1, 0, 0],
552
+ "UBERON:0002349": [1, 1, 0],
553
+ "UBERON:0002113": [0, 0, 0],
554
+ "UBERON:0000955": [0, 0, 1],
555
+ }
556
+ )
557
+ >>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
558
+ >>> anno.ids
559
+ ┌────────┬────────┐
560
+ │ sample ┆ series │
561
+ │ --- ┆ --- │
562
+ │ str ┆ str │
563
+ ╞════════╪════════╡
564
+ │ GSM1 ┆ GSE1 │
565
+ │ GSM2 ┆ GSE1 │
566
+ │ GSM3 ┆ GSE2 │
567
+ └────────┴────────┘
568
+ """
423
569
  return self._ids.data
424
570
 
425
571
  @property
426
- def index(self) -> list:
427
- """Return the index column as a list."""
572
+ def index(self) -> list[str]:
573
+ """Return the index column as a list.
574
+
575
+ Examples:
576
+ >>> anno = pl.DataFrame(
577
+ {
578
+ "series": ["GSE1", "GSE1", "GSE2"],
579
+ "sample": ["GSM1", "GSM2", "GSM3"],
580
+ "UBERON:0000948": [1, 0, 0],
581
+ "UBERON:0002349": [1, 1, 0],
582
+ "UBERON:0002113": [0, 0, 0],
583
+ "UBERON:0000955": [0, 0, 1],
584
+ }
585
+ )
586
+ >>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
587
+ >>> anno.index
588
+ ['GSM1', 'GSM2', 'GSM3']
589
+ """
428
590
  return self._ids.index.to_list()
429
591
 
430
592
  @property
431
593
  def n_indices(self) -> int:
432
- """Returns number of indices."""
594
+ """Returns number of indices.
595
+
596
+ Examples:
597
+ >>> anno = pl.DataFrame(
598
+ {
599
+ "series": ["GSE1", "GSE1", "GSE2"],
600
+ "sample": ["GSM1", "GSM2", "GSM3"],
601
+ "UBERON:0000948": [1, 0, 0],
602
+ "UBERON:0002349": [1, 1, 0],
603
+ "UBERON:0002113": [0, 0, 0],
604
+ "UBERON:0000955": [0, 0, 1],
605
+ }
606
+ )
607
+ >>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
608
+ >>> anno.n_indices
609
+ 3
610
+ """
433
611
  return self.data.height
434
612
 
435
613
  @property
436
614
  def n_entities(self) -> int:
437
- """Returns number of entities."""
615
+ """Returns number of entities.
616
+
617
+ Examples:
618
+ >>> anno = pl.DataFrame(
619
+ {
620
+ "series": ["GSE1", "GSE1", "GSE2"],
621
+ "sample": ["GSM1", "GSM2", "GSM3"],
622
+ "UBERON:0000948": [1, 0, 0],
623
+ "UBERON:0002349": [1, 1, 0],
624
+ "UBERON:0002113": [0, 0, 0],
625
+ "UBERON:0000955": [0, 0, 1],
626
+ }
627
+ )
628
+ >>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
629
+ >>> anno.n_entities
630
+ 4
631
+ """
438
632
  return len(self.entities)
439
633
 
440
634
  @property
441
635
  def unique_groups(self) -> list[str]:
442
- """Returns unique groups."""
636
+ """Returns unique groups.
637
+
638
+ Examples:
639
+ >>> anno = pl.DataFrame(
640
+ {
641
+ "series": ["GSE1", "GSE1", "GSE2"],
642
+ "sample": ["GSM1", "GSM2", "GSM3"],
643
+ "UBERON:0000948": [1, 0, 0],
644
+ "UBERON:0002349": [1, 1, 0],
645
+ "UBERON:0002113": [0, 0, 0],
646
+ "UBERON:0000955": [0, 0, 1],
647
+ }
648
+ )
649
+ >>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
650
+ >>> anno.unique_groups
651
+ ['GSE2', 'GSE1']
652
+ """
443
653
  return list(set(self.groups))
444
654
 
445
655
  def __repr__(self):