metahq-core 0.1.1__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ Class for Labels export io classes.
4
4
  Author: Parker Hicks
5
5
  Date: 2025-09-08
6
6
 
7
- Last updated: 2025-11-21 by Parker Hicks
7
+ Last updated: 2026-02-05 by Parker Hicks
8
8
  """
9
9
 
10
10
  from __future__ import annotations
@@ -22,6 +22,7 @@ from metahq_core.util.supported import (
22
22
  disease_ontologies,
23
23
  geo_metadata,
24
24
  get_annotations,
25
+ get_default_log_dir,
25
26
  metadata_fields,
26
27
  supported,
27
28
  )
@@ -37,33 +38,84 @@ LABEL_KEY = {"1": "positive", "-1": "negative", "2": "control"}
37
38
 
38
39
 
39
40
  class LabelsExporter(BaseExporter):
40
- """Base abstract class for Exporter children."""
41
+ """Exporter for Labels curations.
41
42
 
42
- def __init__(self, logger=None, loglevel=20, logdir=Path("."), verbose=True):
43
+ Attributes:
44
+ attribute (Literal["tissue", "disease", "sex", "age"]):
45
+ Attribute of the annotations to save.
46
+
47
+ level (Literal["sample", "series"]):
48
+ Level of the annotations.
49
+
50
+ logger (logging.Logger):
51
+ Python builtin Logger.
52
+
53
+ loglevel (int):
54
+ Logging level.
55
+
56
+ logdir (str | Path):
57
+ Path to directory storing logs.
58
+
59
+ verbose (bool):
60
+ Controls logging outputs.
61
+
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ attribute: Literal["tissue", "disease", "sex", "age"],
67
+ level: Literal["sample", "series"],
68
+ logger=None,
69
+ loglevel=20,
70
+ logdir=get_default_log_dir(),
71
+ verbose=True,
72
+ ):
73
+ self.attribute = attribute
74
+ self._database = self._load_annotations(level)
43
75
 
44
76
  if logger is None:
45
77
  logger = setup_logger(__name__, level=loglevel, log_dir=logdir)
46
78
  self.log: logging.Logger = logger
47
79
  self.verbose: bool = verbose
48
80
 
49
- def get_sra(self, labels: Labels, fields: list[str]) -> Labels:
81
+ def add_sources(self, labels: Labels) -> Labels:
82
+ """Add the sources that contributed to the lables of each sample or dataset.
83
+
84
+ Arguments:
85
+ labels (Labels):
86
+ A populated Labels curation object.
87
+
88
+ Returns:
89
+ The Labels object with additional source IDs for each index.
90
+
50
91
  """
51
- Retrieve SRA IDs from the annotations if they exist.
92
+ sources = {labels.index_col: [], "sources": []}
93
+ for idx in labels.index:
94
+ sources[labels.index_col].append(idx)
95
+
96
+ # get sources for a particular index for the specified attribute
97
+ sources["sources"].append(
98
+ "|".join(list(self._database[idx][self.attribute].keys()))
99
+ )
100
+
101
+ return labels.add_ids(pl.DataFrame(sources))
102
+
103
+ def get_sra(self, labels: Labels, fields: list[str]) -> Labels:
104
+ """Retrieve SRA IDs from the annotations if they exist.
52
105
 
53
- Parameters
54
- ----------
55
- labels: Labels
56
- A Labels curation containing samples and terms matching user-specified
57
- filters.
106
+ Arguments:
107
+ labels (Labels):
108
+ A Labels curation containing samples and terms matching user-specified
109
+ filters.
58
110
 
59
- fields: list[str]
60
- SRA ID levels (i.e., srr, srx, srs, or srp)
111
+ fields (list[str]):
112
+ SRA ID levels (i.e., srr, srx, srs, or srp)
61
113
 
62
- Returns
63
- -------
64
- A new Annotations curation with merged SRA IDs.
114
+ Returns:
115
+ A new Annotations curation with merged SRA IDs.
65
116
 
66
117
  """
118
+
67
119
  _labels = self._load_annotations(
68
120
  level=labels.index_col
69
121
  ) # all MetaHQ annotations
@@ -91,18 +143,22 @@ class LabelsExporter(BaseExporter):
91
143
  metadata: str | None = None,
92
144
  **kwargs,
93
145
  ):
94
- """
146
+ """Save labels curation to json. Keys are terms and values are
147
+ positively, negative, netral, and control labeled entries.
95
148
 
96
- Save labels curation to json. Keys are terms and values are
97
- positively labelstated indices.
149
+ Arguments:
150
+ labels (Labels):
151
+ A populated Labels curation object.
152
+
153
+ fmt (Literal["json", "parquet", "csv", "tsv"]):
154
+ File format to save to.
155
+
156
+ file (FilePath):
157
+ Path to outfile.json.
98
158
 
99
- Parameters
100
- ----------
101
- outfile: FilePath
102
- Path to outfile.json.
159
+ metadata (str):
160
+ Metadata fields to include.
103
161
 
104
- metadata: str
105
- Metadata fields to include.
106
162
  """
107
163
  _ = checkdir(file, is_file=True)
108
164
  opt = {
@@ -119,34 +175,37 @@ class LabelsExporter(BaseExporter):
119
175
  def to_csv(
120
176
  self, curation: Labels, file: FilePath, metadata: str | None = None, **kwargs
121
177
  ):
122
- """
123
- Save labels to csv.
178
+ """Save labels to csv.
179
+
180
+ Arguments:
181
+ curation (Labels):
182
+ A populated Labels curation object.
124
183
 
125
- Parameters
126
- ----------
127
- outfile: FilePath
128
- Path to outfile.csv.
184
+ file (FilePath):
185
+ Path to outfile.csv.
129
186
 
130
- metadata: str
131
- Metadata fields to include.
187
+ metadata (str):
188
+ Metadata fields to include.
132
189
 
133
190
  """
134
191
  self._save_tabular("csv", curation, file, metadata, **kwargs)
135
192
 
136
193
  def to_json(self, curation: Labels, file: FilePath, metadata: str | None = None):
137
- """
138
- Save labels curation to json. Keys are terms and values are
194
+ """Save labels curation to json. Keys are terms and values are
139
195
  positively labelstated indices.
140
196
 
141
- Parameters
142
- ----------
143
- file: FilePath
144
- Path to outfile.json.
197
+ Arguments:
198
+ curation (Labels):
199
+ A populated Labels curation object.
145
200
 
146
- metadata: str
147
- Metadata fields to include.
201
+ file (FilePath):
202
+ Path to outfile.json.
203
+
204
+ metadata (str):
205
+ Metadata fields to include.
148
206
 
149
207
  """
208
+
150
209
  has_controls = any(
151
210
  term.startswith(disease_ontologies()) for term in curation.entities
152
211
  )
@@ -164,13 +223,14 @@ class LabelsExporter(BaseExporter):
164
223
  isinstance(metadata, str)
165
224
  & (metadata.strip().replace(",", "") == curation.index_col)
166
225
  ):
167
- # save with just index IDs
168
- stacked = curation.data.hstack(curation.ids)
169
- for row in stacked.iter_rows(named=True):
170
- self._write_row(row, _labels, curation.index_col)
226
+ metadata = curation.index_col
227
+
228
+ if isinstance(metadata, str):
229
+ # add sources
230
+ curation = self.add_sources(curation)
171
231
 
172
- elif isinstance(metadata, str):
173
232
  _metadata = self._parse_metafields(curation.index_col, metadata)
233
+ _metadata.extend(["sources"])
174
234
 
175
235
  if self._sra_in_metadata(_metadata):
176
236
  curation = self.get_sra(
@@ -209,19 +269,17 @@ class LabelsExporter(BaseExporter):
209
269
  metadata: str | None = None,
210
270
  **kwargs,
211
271
  ):
212
- """
213
- Save labels to parquet.
272
+ """Save labels to parquet.
214
273
 
215
- Parameters
216
- ----------
217
- curation: Labels
218
- Labels curation object to save.
274
+ Arguments:
275
+ curation (Labels):
276
+ Labels curation object to save.
219
277
 
220
- file: FilePath
221
- Path to outfile.parquet.
278
+ file (FilePath):
279
+ Path to outfile.parquet.
222
280
 
223
- metadata: str | None
224
- Metadata fields to include.
281
+ metadata (str | None):
282
+ Metadata fields to include.
225
283
 
226
284
  """
227
285
  self._save_tabular("parquet", curation, file, metadata, **kwargs)
@@ -229,16 +287,17 @@ class LabelsExporter(BaseExporter):
229
287
  def to_tsv(
230
288
  self, curation: Labels, file: FilePath, metadata: str | None = None, **kwargs
231
289
  ):
232
- """
233
- Save labels to tsv.
290
+ """Save labels to tsv.
291
+
292
+ Arguments:
293
+ curation (Labels):
294
+ A populated Labels curation object.
234
295
 
235
- Parameters
236
- ----------
237
- outfile: FilePath
238
- Path to outfile.tsv.
296
+ file (FilePath):
297
+ Path to outfile.tsv.
239
298
 
240
- metadata: str
241
- Metadata fields to include.
299
+ metadata (str):
300
+ Metadata fields to include.
242
301
 
243
302
  """
244
303
  self._save_tabular("tsv", curation, file, metadata, **kwargs)
@@ -358,6 +417,11 @@ class LabelsExporter(BaseExporter):
358
417
  curation = self.get_sra(
359
418
  curation, [field for field in _metadata if field in database_ids("sra")]
360
419
  )
420
+
421
+ # add sources
422
+ curation = self.add_sources(curation)
423
+ _metadata = _metadata + ["sources"]
424
+
361
425
  if "description" in _metadata:
362
426
  self._save_table_with_description(
363
427
  file, curation, _metadata, fmt=fmt, **kwargs
@@ -394,13 +458,7 @@ class LabelsExporter(BaseExporter):
394
458
  for entity in labels:
395
459
  label = str(row[entity])
396
460
  if label in LABEL_KEY:
397
- try:
398
- labels[entity][LABEL_KEY[label]].append(idx)
399
- except KeyError:
400
- print(labels[entity])
401
- print(LABEL_KEY[label])
402
- print(idx)
403
- exit()
461
+ labels[entity][LABEL_KEY[label]].append(idx)
404
462
 
405
463
  def _write_row_with_metadata(
406
464
  self,
metahq_core/logger.py CHANGED
@@ -4,11 +4,10 @@ Logger setup.
4
4
  Author: Parker Hicks
5
5
  Date: 2025-10-16
6
6
 
7
- Last updated: 2025-11-21 by Parker Hicks
7
+ Last updated: 2026-02-04 by Parker Hicks
8
8
  """
9
9
 
10
10
  import logging
11
- from logging.handlers import TimedRotatingFileHandler
12
11
  from pathlib import Path
13
12
 
14
13
 
@@ -17,22 +16,19 @@ def setup_logger(
17
16
  log_dir: str | Path,
18
17
  level: int = logging.INFO,
19
18
  ) -> logging.Logger:
20
- """
21
- Sets up a logger.
19
+ """Sets up a logger.
22
20
 
23
- Parameters
24
- ----------
25
- name: str
26
- Logger name.
21
+ Arguments:
22
+ name (str):
23
+ Logger name.
27
24
 
28
- log_dir: str | Path
29
- Path to logging directory. Default is ~/metahq/logs
25
+ log_dir (str | Path):
26
+ Path to logging directory.
30
27
 
31
- level: int
32
- Logging level.
28
+ level (int):
29
+ Logging level.
33
30
 
34
- Returns
35
- -------
31
+ Returns:
36
32
  Configured logger.
37
33
 
38
34
  """
@@ -56,11 +52,8 @@ def setup_logger(
56
52
  logger.addHandler(console_handler)
57
53
 
58
54
  # file handler
59
- file_handler = TimedRotatingFileHandler(
55
+ file_handler = logging.FileHandler(
60
56
  Path(log_dir) / "log.log",
61
- when="midnight",
62
- interval=1,
63
- backupCount=30,
64
57
  encoding="utf-8",
65
58
  )
66
59
  file_handler.setFormatter(formatter)