pyobo 0.10.7__py3-none-any.whl → 0.10.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyobo/api/hierarchy.py CHANGED
@@ -168,14 +168,15 @@ def is_descendent(prefix, identifier, ancestor_prefix, ancestor_identifier) -> b
168
168
  @lru_cache()
169
169
  def get_descendants(
170
170
  prefix: str,
171
- identifier: str,
171
+ identifier: Optional[str] = None,
172
172
  include_part_of: bool = True,
173
173
  include_has_member: bool = False,
174
174
  use_tqdm: bool = False,
175
175
  force: bool = False,
176
176
  **kwargs,
177
177
  ) -> Optional[Set[str]]:
178
- """Get all of the descendants (children) of the term as CURIEs."""
178
+ """Get all the descendants (children) of the term as CURIEs."""
179
+ curie, prefix, identifier = _pic(prefix, identifier)
179
180
  hierarchy = get_hierarchy(
180
181
  prefix=prefix,
181
182
  include_has_member=include_has_member,
@@ -184,23 +185,32 @@ def get_descendants(
184
185
  force=force,
185
186
  **kwargs,
186
187
  )
187
- curie = f"{prefix}:{identifier}"
188
188
  if curie not in hierarchy:
189
189
  return None
190
190
  return nx.ancestors(hierarchy, curie) # note this is backwards
191
191
 
192
192
 
193
+ def _pic(prefix, identifier=None) -> Tuple[str, str, str]:
194
+ if identifier is None:
195
+ curie = prefix
196
+ prefix, identifier = prefix.split(":")
197
+ else:
198
+ curie = f"{prefix}:{identifier}"
199
+ return curie, prefix, identifier
200
+
201
+
193
202
  @lru_cache()
194
203
  def get_children(
195
204
  prefix: str,
196
- identifier: str,
205
+ identifier: Optional[str] = None,
197
206
  include_part_of: bool = True,
198
207
  include_has_member: bool = False,
199
208
  use_tqdm: bool = False,
200
209
  force: bool = False,
201
210
  **kwargs,
202
211
  ) -> Optional[Set[str]]:
203
- """Get all of the descendants (children) of the term as CURIEs."""
212
+ """Get all the descendants (children) of the term as CURIEs."""
213
+ curie, prefix, identifier = _pic(prefix, identifier)
204
214
  hierarchy = get_hierarchy(
205
215
  prefix=prefix,
206
216
  include_has_member=include_has_member,
@@ -209,7 +219,6 @@ def get_children(
209
219
  force=force,
210
220
  **kwargs,
211
221
  )
212
- curie = f"{prefix}:{identifier}"
213
222
  if curie not in hierarchy:
214
223
  return None
215
224
  return set(hierarchy.predecessors(curie))
@@ -228,14 +237,15 @@ def has_ancestor(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bo
228
237
  @lru_cache()
229
238
  def get_ancestors(
230
239
  prefix: str,
231
- identifier: str,
240
+ identifier: Optional[str] = None,
232
241
  include_part_of: bool = True,
233
242
  include_has_member: bool = False,
234
243
  use_tqdm: bool = False,
235
244
  force: bool = False,
236
245
  **kwargs,
237
246
  ) -> Optional[Set[str]]:
238
- """Get all of the ancestors (parents) of the term as CURIEs."""
247
+ """Get all the ancestors (parents) of the term as CURIEs."""
248
+ curie, prefix, identifier = _pic(prefix, identifier)
239
249
  hierarchy = get_hierarchy(
240
250
  prefix=prefix,
241
251
  include_has_member=include_has_member,
@@ -244,7 +254,6 @@ def get_ancestors(
244
254
  force=force,
245
255
  **kwargs,
246
256
  )
247
- curie = f"{prefix}:{identifier}"
248
257
  if curie not in hierarchy:
249
258
  return None
250
259
  return nx.descendants(hierarchy, curie) # note this is backwards
@@ -252,7 +261,7 @@ def get_ancestors(
252
261
 
253
262
  def get_subhierarchy(
254
263
  prefix: str,
255
- identifier: str,
264
+ identifier: Optional[str] = None,
256
265
  include_part_of: bool = True,
257
266
  include_has_member: bool = False,
258
267
  use_tqdm: bool = False,
@@ -260,6 +269,7 @@ def get_subhierarchy(
260
269
  **kwargs,
261
270
  ) -> nx.DiGraph:
262
271
  """Get the subhierarchy for a given node."""
272
+ curie, prefix, identifier = _pic(prefix, identifier)
263
273
  hierarchy = get_hierarchy(
264
274
  prefix=prefix,
265
275
  include_has_member=include_has_member,
@@ -271,7 +281,7 @@ def get_subhierarchy(
271
281
  logger.info(
272
282
  "getting descendants of %s:%s ! %s", prefix, identifier, get_name(prefix, identifier)
273
283
  )
274
- curies = nx.ancestors(hierarchy, f"{prefix}:{identifier}") # note this is backwards
284
+ curies = nx.ancestors(hierarchy, curie) # note this is backwards
275
285
  logger.info("inducing subgraph")
276
286
  sg = hierarchy.subgraph(curies).copy()
277
287
  logger.info("subgraph has %d nodes/%d edges", sg.number_of_nodes(), sg.number_of_edges())
pyobo/api/properties.py CHANGED
@@ -28,14 +28,17 @@ logger = logging.getLogger(__name__)
28
28
 
29
29
 
30
30
  @wrap_norm_prefix
31
- def get_properties_df(prefix: str, *, force: bool = False) -> pd.DataFrame:
31
+ def get_properties_df(
32
+ prefix: str, *, force: bool = False, version: Optional[str] = None
33
+ ) -> pd.DataFrame:
32
34
  """Extract properties.
33
35
 
34
36
  :param prefix: the resource to load
35
37
  :param force: should the resource be re-downloaded, re-parsed, and re-cached?
36
38
  :returns: A dataframe with the properties
37
39
  """
38
- version = get_version(prefix)
40
+ if version is None:
41
+ version = get_version(prefix)
39
42
  path = prefix_cache_join(prefix, name="properties.tsv", version=version)
40
43
 
41
44
  @cached_df(path=path, dtype=str, force=force)
@@ -59,6 +62,7 @@ def get_filtered_properties_mapping(
59
62
  *,
60
63
  use_tqdm: bool = False,
61
64
  force: bool = False,
65
+ version: Optional[str] = None,
62
66
  ) -> Mapping[str, str]:
63
67
  """Extract a single property for each term as a dictionary.
64
68
 
@@ -68,7 +72,12 @@ def get_filtered_properties_mapping(
68
72
  :param force: should the resource be re-downloaded, re-parsed, and re-cached?
69
73
  :returns: A mapping from identifier to property value
70
74
  """
71
- version = get_version(prefix)
75
+ df = get_properties_df(prefix=prefix, force=force, version=version)
76
+ df = df[df["property"] == prop]
77
+ return dict(df[[f"{prefix}_id", "value"]].values)
78
+
79
+ if version is None:
80
+ version = get_version(prefix)
72
81
  path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
73
82
  all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
74
83
 
@@ -95,6 +104,7 @@ def get_filtered_properties_multimapping(
95
104
  *,
96
105
  use_tqdm: bool = False,
97
106
  force: bool = False,
107
+ version: Optional[str] = None,
98
108
  ) -> Mapping[str, List[str]]:
99
109
  """Extract multiple properties for each term as a dictionary.
100
110
 
@@ -104,7 +114,8 @@ def get_filtered_properties_multimapping(
104
114
  :param force: should the resource be re-downloaded, re-parsed, and re-cached?
105
115
  :returns: A mapping from identifier to property values
106
116
  """
107
- version = get_version(prefix)
117
+ if version is None:
118
+ version = get_version(prefix)
108
119
  path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
109
120
  all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
110
121
 
@@ -124,7 +135,7 @@ def get_filtered_properties_multimapping(
124
135
  return _mapping_getter()
125
136
 
126
137
 
127
- def get_property(prefix: str, identifier: str, prop: str) -> Optional[str]:
138
+ def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[str]:
128
139
  """Extract a single property for the given entity.
129
140
 
130
141
  :param prefix: the resource to load
@@ -136,11 +147,13 @@ def get_property(prefix: str, identifier: str, prop: str) -> Optional[str]:
136
147
  >>> pyobo.get_property('chebi', '132964', 'http://purl.obolibrary.org/obo/chebi/smiles')
137
148
  "C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F"
138
149
  """
139
- filtered_properties_mapping = get_filtered_properties_mapping(prefix=prefix, prop=prop)
150
+ filtered_properties_mapping = get_filtered_properties_mapping(
151
+ prefix=prefix, prop=prop, **kwargs
152
+ )
140
153
  return filtered_properties_mapping.get(identifier)
141
154
 
142
155
 
143
- def get_properties(prefix: str, identifier: str, prop: str) -> Optional[List[str]]:
156
+ def get_properties(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[List[str]]:
144
157
  """Extract a set of properties for the given entity.
145
158
 
146
159
  :param prefix: the resource to load
@@ -149,7 +162,7 @@ def get_properties(prefix: str, identifier: str, prop: str) -> Optional[List[str
149
162
  :returns: Multiple values for the property. If only one is expected, use :func:`get_property`
150
163
  """
151
164
  filtered_properties_multimapping = get_filtered_properties_multimapping(
152
- prefix=prefix, prop=prop
165
+ prefix=prefix, prop=prop, **kwargs
153
166
  )
154
167
  return filtered_properties_multimapping.get(identifier)
155
168
 
@@ -161,6 +174,7 @@ def get_filtered_properties_df(
161
174
  *,
162
175
  use_tqdm: bool = False,
163
176
  force: bool = False,
177
+ version: Optional[str] = None,
164
178
  ) -> pd.DataFrame:
165
179
  """Extract a single property for each term.
166
180
 
@@ -170,7 +184,8 @@ def get_filtered_properties_df(
170
184
  :param force: should the resource be re-downloaded, re-parsed, and re-cached?
171
185
  :returns: A dataframe from identifier to property value. Columns are [<prefix>_id, value].
172
186
  """
173
- version = get_version(prefix)
187
+ if version is None:
188
+ version = get_version(prefix)
174
189
  path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
175
190
  all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
176
191
 
pyobo/api/xrefs.py CHANGED
@@ -142,7 +142,9 @@ def get_sssom_df(
142
142
  df = get_xrefs_df(prefix=prefix, **kwargs)
143
143
  rows: List[Tuple[str, ...]] = []
144
144
  with logging_redirect_tqdm():
145
- for source_id, target_prefix, target_id in tqdm(df.values, unit="mapping", unit_scale=True):
145
+ for source_id, target_prefix, target_id in tqdm(
146
+ df.values, unit="mapping", unit_scale=True, desc=f"[{prefix}] SSSOM"
147
+ ):
146
148
  source = Reference(prefix=prefix, identifier=source_id)
147
149
  target = Reference(prefix=target_prefix, identifier=target_id)
148
150
 
pyobo/getters.py CHANGED
@@ -55,7 +55,7 @@ class UnhandledFormat(NoBuild):
55
55
 
56
56
  #: The following prefixes can not be loaded through ROBOT without
57
57
  #: turning off integrity checks
58
- REQUIRES_NO_ROBOT_CHECK = {"clo", "vo"}
58
+ REQUIRES_NO_ROBOT_CHECK = {"clo", "vo", "orphanet.ordo", "orphanet"}
59
59
 
60
60
 
61
61
  @wrap_norm_prefix
@@ -117,7 +117,7 @@ def get_ontology(
117
117
 
118
118
  ontology_format, path = _ensure_ontology_path(prefix, force=force, version=version)
119
119
  if path is None:
120
- raise NoBuild
120
+ raise NoBuild(prefix)
121
121
  elif ontology_format == "obo":
122
122
  pass # all gucci
123
123
  elif ontology_format == "owl":
pyobo/gilda_utils.py CHANGED
@@ -3,6 +3,7 @@
3
3
  """PyOBO's Gilda utilities."""
4
4
 
5
5
  import logging
6
+ from subprocess import CalledProcessError
6
7
  from typing import Iterable, List, Optional, Tuple, Type, Union
7
8
 
8
9
  import bioregistry
@@ -96,6 +97,7 @@ def get_grounder(
96
97
  versions: Union[None, str, Iterable[Union[str, None]]] = None,
97
98
  strict: bool = True,
98
99
  skip_obsolete: bool = False,
100
+ progress: bool = True,
99
101
  ) -> Grounder:
100
102
  """Get a Gilda grounder for the given prefix(es)."""
101
103
  unnamed = set() if unnamed is None else set(unnamed)
@@ -113,7 +115,7 @@ def get_grounder(
113
115
  raise ValueError
114
116
 
115
117
  terms: List[gilda.term.Term] = []
116
- for prefix, version in zip(prefixes, versions):
118
+ for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions):
117
119
  try:
118
120
  p_terms = list(
119
121
  get_gilda_terms(
@@ -122,9 +124,10 @@ def get_grounder(
122
124
  version=version,
123
125
  strict=strict,
124
126
  skip_obsolete=skip_obsolete,
127
+ progress=progress,
125
128
  )
126
129
  )
127
- except NoBuild:
130
+ except (NoBuild, CalledProcessError):
128
131
  continue
129
132
  else:
130
133
  terms.extend(p_terms)
@@ -144,17 +147,21 @@ def _fast_term(
144
147
  name: str,
145
148
  status: str,
146
149
  organism: Optional[str] = None,
147
- ) -> gilda.term.Term:
148
- return gilda.term.Term(
149
- norm_text=normalize(text),
150
- text=text,
151
- db=prefix,
152
- id=identifier,
153
- entry_name=name,
154
- status=status,
155
- source=prefix,
156
- organism=organism,
157
- )
150
+ ) -> Optional[gilda.term.Term]:
151
+ try:
152
+ term = gilda.term.Term(
153
+ norm_text=normalize(text),
154
+ text=text,
155
+ db=prefix,
156
+ id=identifier,
157
+ entry_name=name,
158
+ status=status,
159
+ source=prefix,
160
+ organism=organism,
161
+ )
162
+ except ValueError:
163
+ return None
164
+ return term
158
165
 
159
166
 
160
167
  def get_gilda_terms(
@@ -164,17 +171,24 @@ def get_gilda_terms(
164
171
  version: Optional[str] = None,
165
172
  strict: bool = True,
166
173
  skip_obsolete: bool = False,
174
+ progress: bool = True,
167
175
  ) -> Iterable[gilda.term.Term]:
168
176
  """Get gilda terms for the given namespace."""
169
177
  id_to_name = get_id_name_mapping(prefix, version=version, strict=strict)
170
178
  id_to_species = get_id_species_mapping(prefix, version=version, strict=strict)
171
179
  obsoletes = get_obsolete(prefix, version=version, strict=strict) if skip_obsolete else set()
172
180
 
173
- it = tqdm(id_to_name.items(), desc=f"[{prefix}] mapping", unit_scale=True, unit="name")
181
+ it = tqdm(
182
+ id_to_name.items(),
183
+ desc=f"[{prefix}] mapping",
184
+ unit_scale=True,
185
+ unit="name",
186
+ disable=not progress,
187
+ )
174
188
  for identifier, name in it:
175
189
  if identifier in obsoletes:
176
190
  continue
177
- yield _fast_term(
191
+ term = _fast_term(
178
192
  text=name,
179
193
  prefix=prefix,
180
194
  identifier=identifier,
@@ -182,11 +196,17 @@ def get_gilda_terms(
182
196
  status="name",
183
197
  organism=id_to_species.get(identifier),
184
198
  )
199
+ if term is not None:
200
+ yield term
185
201
 
186
202
  id_to_synonyms = get_id_synonyms_mapping(prefix, version=version)
187
203
  if id_to_synonyms:
188
204
  it = tqdm(
189
- id_to_synonyms.items(), desc=f"[{prefix}] mapping", unit_scale=True, unit="synonym"
205
+ id_to_synonyms.items(),
206
+ desc=f"[{prefix}] mapping",
207
+ unit_scale=True,
208
+ unit="synonym",
209
+ disable=not progress,
190
210
  )
191
211
  for identifier, synonyms in it:
192
212
  if identifier in obsoletes:
@@ -195,7 +215,7 @@ def get_gilda_terms(
195
215
  for synonym in synonyms:
196
216
  if not synonym:
197
217
  continue
198
- yield _fast_term(
218
+ term = _fast_term(
199
219
  text=synonym,
200
220
  prefix=prefix,
201
221
  identifier=identifier,
@@ -203,13 +223,21 @@ def get_gilda_terms(
203
223
  status="synonym",
204
224
  organism=id_to_species.get(identifier),
205
225
  )
226
+ if term is not None:
227
+ yield term
206
228
 
207
229
  if identifiers_are_names:
208
- it = tqdm(get_ids(prefix), desc=f"[{prefix}] mapping", unit_scale=True, unit="id")
230
+ it = tqdm(
231
+ get_ids(prefix),
232
+ desc=f"[{prefix}] mapping",
233
+ unit_scale=True,
234
+ unit="id",
235
+ disable=not progress,
236
+ )
209
237
  for identifier in it:
210
238
  if identifier in obsoletes:
211
239
  continue
212
- yield _fast_term(
240
+ term = _fast_term(
213
241
  text=identifier,
214
242
  prefix=prefix,
215
243
  identifier=identifier,
@@ -217,3 +245,5 @@ def get_gilda_terms(
217
245
  status="name",
218
246
  organism=id_to_species.get(identifier),
219
247
  )
248
+ if term is not None:
249
+ yield term
pyobo/sources/__init__.py CHANGED
@@ -8,6 +8,7 @@ from .antibodyregistry import AntibodyRegistryGetter
8
8
  from .ccle import CCLEGetter
9
9
  from .cgnc import CGNCGetter
10
10
  from .chembl import ChEMBLCompoundGetter
11
+ from .civic_gene import CIVICGeneGetter
11
12
  from .complexportal import ComplexPortalGetter
12
13
  from .conso import CONSOGetter
13
14
  from .cpt import CPTGetter
@@ -38,6 +39,7 @@ from .mirbase_mature import MiRBaseMatureGetter
38
39
  from .msigdb import MSigDBGetter
39
40
  from .ncbigene import NCBIGeneGetter
40
41
  from .npass import NPASSGetter
42
+ from .omim_ps import OMIMPSGetter
41
43
  from .pathbank import PathBankGetter
42
44
  from .pfam import PfamGetter
43
45
  from .pfam_clan import PfamClanGetter
@@ -61,6 +63,7 @@ __all__ = [
61
63
  "AntibodyRegistryGetter",
62
64
  "CCLEGetter",
63
65
  "CGNCGetter",
66
+ "CIVICGeneGetter",
64
67
  "CONSOGetter",
65
68
  "CPTGetter",
66
69
  "CVXGetter",
@@ -94,6 +97,7 @@ __all__ = [
94
97
  "MiRBaseMatureGetter",
95
98
  "NCBIGeneGetter",
96
99
  "NPASSGetter",
100
+ "OMIMPSGetter",
97
101
  "PIDGetter",
98
102
  "PathBankGetter",
99
103
  "PfamClanGetter",
pyobo/sources/cgnc.py CHANGED
@@ -69,7 +69,7 @@ def get_terms(force: bool = False) -> Iterable[Term]:
69
69
  term = Term.from_triple(
70
70
  prefix=PREFIX,
71
71
  identifier=cgnc_id,
72
- name=name,
72
+ name=name if pd.notna(name) else None,
73
73
  )
74
74
  term.set_species(identifier="9031", name="Gallus gallus")
75
75
  if entrez_id and pd.notna(entrez_id):
pyobo/sources/chebi.py CHANGED
@@ -15,12 +15,14 @@ __all__ = [
15
15
  ]
16
16
 
17
17
 
18
- def get_chebi_id_smiles_mapping() -> Mapping[str, str]:
18
+ def get_chebi_id_smiles_mapping(**kwargs) -> Mapping[str, str]:
19
19
  """Get a mapping from ChEBI identifiers to SMILES.
20
20
 
21
21
  This is common enough that it gets its own function :)
22
22
  """
23
- return get_filtered_properties_mapping("chebi", "http://purl.obolibrary.org/obo/chebi/smiles")
23
+ return get_filtered_properties_mapping(
24
+ "chebi", "http://purl.obolibrary.org/obo/chebi/smiles", **kwargs
25
+ )
24
26
 
25
27
 
26
28
  def get_chebi_smiles_id_mapping() -> Mapping[str, str]:
@@ -0,0 +1,55 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """Converter for CiVIC Genes."""
4
+
5
+ from typing import Iterable, Optional
6
+
7
+ import pandas as pd
8
+
9
+ from pyobo.struct import Obo, Reference, Term
10
+ from pyobo.utils.path import ensure_df
11
+
12
+ __all__ = [
13
+ "CIVICGeneGetter",
14
+ ]
15
+
16
+ PREFIX = "civic.gid"
17
+ URL = "https://civicdb.org/downloads/nightly/nightly-GeneSummaries.tsv"
18
+
19
+
20
+ def _sort(_o, t):
21
+ return int(t.identifier)
22
+
23
+
24
+ class CIVICGeneGetter(Obo):
25
+ """An ontology representation of CiVIC's gene nomenclature."""
26
+
27
+ bioversions_key = ontology = PREFIX
28
+ term_sort_key = _sort
29
+
30
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
31
+ """Iterate over gene terms for CiVIC."""
32
+ yield from get_terms(self.data_version, force=force)
33
+
34
+
35
+ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
36
+ """Get CIVIC terms."""
37
+ # if version is not None:
38
+ # version_dt: datetime.date = dateutil.parser.parse(version)
39
+ # else:
40
+ # version_dt: datetime.date = datetime.today()
41
+ # version = version_dt.strftime("01-%b-%Y")
42
+ # version is like 01-Feb-2024
43
+ url = f"https://civicdb.org/downloads/{version}/{version}-GeneSummaries.tsv"
44
+ df = ensure_df(prefix=PREFIX, url=url, sep="\t", force=force, dtype=str, version=version)
45
+ for identifier, _, name, entrez_id, description, _last_review, _flag in df.values:
46
+ term = Term(
47
+ reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
48
+ definition=description if pd.notna(description) else None,
49
+ )
50
+ term.append_exact_match(Reference(prefix="ncbigene", identifier=entrez_id))
51
+ yield term
52
+
53
+
54
+ if __name__ == "__main__":
55
+ CIVICGeneGetter.cli()
pyobo/sources/cvx.py CHANGED
@@ -7,7 +7,7 @@ from typing import Iterable
7
7
 
8
8
  import pandas as pd
9
9
 
10
- from pyobo import Obo, Term
10
+ from pyobo import Obo, Reference, Term
11
11
 
12
12
  __all__ = [
13
13
  "CVXGetter",
@@ -28,6 +28,11 @@ class CVXGetter(Obo):
28
28
  return iter_terms()
29
29
 
30
30
 
31
+ # This got split, which it's not obvious how to deal with this
32
+ MANUAL_OBSOLETE = {"15"}
33
+ REPLACEMENTS = {"31": "85", "154": "86", "180": "13"}
34
+
35
+
31
36
  def iter_terms() -> Iterable[Term]:
32
37
  """Iterate over terms in CVX."""
33
38
  dd = defaultdict(set)
@@ -60,11 +65,22 @@ def iter_terms() -> Iterable[Term]:
60
65
  cvx_df[col] = cvx_df[col].map(lambda s: s.strip() if pd.notna(s) else s)
61
66
  terms = {}
62
67
  for cvx, short_name, full_name, notes, status, nonvaccine, _updated in cvx_df.values:
63
- term = Term.from_triple(PREFIX, cvx, full_name)
68
+ if cvx == "99":
69
+ continue # this is a placeholder
70
+
71
+ is_obsolete = cvx in MANUAL_OBSOLETE or (pd.notna(notes) and "do not use" in notes.lower())
72
+ term = Term(
73
+ reference=Reference(prefix=PREFIX, identifier=cvx, name=full_name),
74
+ is_obsolete=is_obsolete,
75
+ )
64
76
  if short_name != full_name:
65
77
  term.append_synonym(short_name)
66
78
  if pd.notna(notes):
67
79
  term.append_comment(notes)
80
+ if is_obsolete:
81
+ replacement_identifier = REPLACEMENTS.get(cvx)
82
+ if replacement_identifier:
83
+ term.append_replaced_by(Reference(prefix=PREFIX, identifier=replacement_identifier))
68
84
  if pd.notna(status):
69
85
  term.append_property("status", status)
70
86
  if pd.notna(nonvaccine):
pyobo/sources/famplex.py CHANGED
@@ -151,9 +151,11 @@ def _get_xref_df(version: str) -> Mapping[str, List[Reference]]:
151
151
  }
152
152
  xrefs_df[0] = xrefs_df[0].map(lambda s: ns_remapping.get(s, s))
153
153
  xrefs_df[1] = [
154
- bioregistry.standardize_identifier(xref_prefix, xref_identifier)
155
- if xref_prefix != "nextprot.family"
156
- else xref_identifier[len("FA:") :]
154
+ (
155
+ bioregistry.standardize_identifier(xref_prefix, xref_identifier)
156
+ if xref_prefix != "nextprot.family"
157
+ else xref_identifier[len("FA:") :]
158
+ )
157
159
  for xref_prefix, xref_identifier in xrefs_df[[0, 1]].values
158
160
  ]
159
161
 
pyobo/sources/mesh.py CHANGED
@@ -6,7 +6,7 @@ import datetime
6
6
  import itertools as itt
7
7
  import logging
8
8
  import re
9
- from typing import Any, Dict, Iterable, List, Mapping, Optional, Set, Tuple
9
+ from typing import Any, Collection, Dict, Iterable, List, Mapping, Optional, Set, Tuple
10
10
  from xml.etree.ElementTree import Element
11
11
 
12
12
  from tqdm.auto import tqdm
@@ -19,6 +19,7 @@ from pyobo.utils.path import ensure_path, prefix_directory_join
19
19
 
20
20
  __all__ = [
21
21
  "MeSHGetter",
22
+ "get_mesh_category_curies",
22
23
  ]
23
24
 
24
25
  logger = logging.getLogger(__name__)
@@ -317,5 +318,32 @@ def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]:
317
318
  ]
318
319
 
319
320
 
321
+ def get_mesh_category_curies(letter: str, skip: Optional[Collection[str]] = None) -> List[str]:
322
+ """Get the MeSH LUIDs for a category, by letter (e.g., "A").
323
+
324
+ :param letter: The MeSH tree, A for anatomy, C for disease, etc.
325
+ :param skip: An optional collection of MeSH tree codes to skip, such as "A03"
326
+ :returns: A list of MeSH CURIE strings for the top level of each MeSH tree.
327
+
328
+ .. seealso:: https://meshb.nlm.nih.gov/treeView
329
+ """
330
+ import bioversions
331
+
332
+ mesh_version = bioversions.get_version("mesh")
333
+ if mesh_version is None:
334
+ raise ValueError
335
+ tree_to_mesh = get_tree_to_mesh_id(mesh_version)
336
+ rv = []
337
+ for i in range(1, 100):
338
+ key = f"{letter}{i:02}"
339
+ if skip and key in skip:
340
+ continue
341
+ mesh_id = tree_to_mesh.get(key)
342
+ if mesh_id is None:
343
+ break
344
+ rv.append(f"mesh:{mesh_id}")
345
+ return rv
346
+
347
+
320
348
  if __name__ == "__main__":
321
349
  get_obo(force=True).write_default(force=True, write_obo=True)
pyobo/sources/ncbigene.py CHANGED
@@ -171,15 +171,17 @@ def get_terms(force: bool = False) -> Iterable[Term]:
171
171
  continue
172
172
  term = Term(
173
173
  reference=Reference(prefix=PREFIX, identifier=gene_id, name=symbol),
174
- definition=description,
174
+ definition=description if pd.notna(description) else None,
175
175
  )
176
176
  term.set_species(identifier=tax_id)
177
177
  if pd.notna(xref_curies):
178
178
  for xref_curie in xref_curies.split("|"):
179
179
  if xref_curie.startswith("EnsemblRapid"):
180
180
  continue
181
- if xref_curie.startswith("AllianceGenome"):
181
+ elif xref_curie.startswith("AllianceGenome"):
182
182
  xref_curie = xref_curie[len("xref_curie") :]
183
+ elif xref_curie.startswith("nome:WB:"):
184
+ xref_curie = xref_curie[len("nome:") :]
183
185
  xref_prefix, xref_id = bioregistry.parse_curie(xref_curie)
184
186
  if xref_prefix and xref_id:
185
187
  term.append_xref(Reference(prefix=xref_prefix, identifier=xref_id))
@@ -187,7 +189,7 @@ def get_terms(force: bool = False) -> Iterable[Term]:
187
189
  p = xref_curie.split(":")[0]
188
190
  if p not in warning_prefixes:
189
191
  warning_prefixes.add(p)
190
- tqdm.write(f"[{PREFIX}] unhandled xref prefix: {p}")
192
+ tqdm.write(f"[{PREFIX}] unhandled prefix in xref: {xref_curie}")
191
193
  yield term
192
194
 
193
195