pyobo 0.10.12__py3-none-any.whl → 0.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. pyobo/__init__.py +0 -2
  2. pyobo/__main__.py +0 -2
  3. pyobo/api/__init__.py +0 -2
  4. pyobo/api/alts.py +6 -7
  5. pyobo/api/hierarchy.py +14 -15
  6. pyobo/api/metadata.py +3 -4
  7. pyobo/api/names.py +31 -32
  8. pyobo/api/properties.py +6 -7
  9. pyobo/api/relations.py +12 -11
  10. pyobo/api/species.py +5 -6
  11. pyobo/api/typedefs.py +1 -3
  12. pyobo/api/utils.py +61 -5
  13. pyobo/api/xrefs.py +4 -5
  14. pyobo/aws.py +3 -5
  15. pyobo/cli/__init__.py +0 -2
  16. pyobo/cli/aws.py +0 -2
  17. pyobo/cli/cli.py +0 -4
  18. pyobo/cli/database.py +1 -3
  19. pyobo/cli/lookup.py +0 -2
  20. pyobo/cli/utils.py +0 -2
  21. pyobo/constants.py +1 -33
  22. pyobo/getters.py +19 -26
  23. pyobo/gilda_utils.py +19 -17
  24. pyobo/identifier_utils.py +10 -10
  25. pyobo/mocks.py +5 -6
  26. pyobo/normalizer.py +24 -24
  27. pyobo/obographs.py +8 -5
  28. pyobo/plugins.py +3 -4
  29. pyobo/py.typed +0 -0
  30. pyobo/reader.py +19 -21
  31. pyobo/registries/__init__.py +0 -2
  32. pyobo/registries/metaregistry.py +6 -8
  33. pyobo/resource_utils.py +1 -3
  34. pyobo/resources/__init__.py +0 -2
  35. pyobo/resources/ncbitaxon.py +2 -3
  36. pyobo/resources/ro.py +2 -4
  37. pyobo/resources/so.py +55 -0
  38. pyobo/resources/so.tsv +2604 -0
  39. pyobo/sources/README.md +15 -0
  40. pyobo/sources/__init__.py +0 -2
  41. pyobo/sources/agrovoc.py +3 -3
  42. pyobo/sources/antibodyregistry.py +2 -3
  43. pyobo/sources/biogrid.py +4 -4
  44. pyobo/sources/ccle.py +3 -4
  45. pyobo/sources/cgnc.py +1 -3
  46. pyobo/sources/chebi.py +2 -4
  47. pyobo/sources/chembl.py +1 -3
  48. pyobo/sources/civic_gene.py +2 -3
  49. pyobo/sources/complexportal.py +57 -20
  50. pyobo/sources/conso.py +2 -4
  51. pyobo/sources/cpt.py +1 -3
  52. pyobo/sources/credit.py +1 -1
  53. pyobo/sources/cvx.py +1 -3
  54. pyobo/sources/depmap.py +3 -4
  55. pyobo/sources/dictybase_gene.py +15 -12
  56. pyobo/sources/drugbank.py +6 -7
  57. pyobo/sources/drugbank_salt.py +3 -4
  58. pyobo/sources/drugcentral.py +9 -8
  59. pyobo/sources/expasy.py +33 -16
  60. pyobo/sources/famplex.py +3 -5
  61. pyobo/sources/flybase.py +5 -6
  62. pyobo/sources/geonames.py +1 -1
  63. pyobo/sources/gmt_utils.py +5 -6
  64. pyobo/sources/go.py +4 -6
  65. pyobo/sources/gwascentral_phenotype.py +1 -3
  66. pyobo/sources/gwascentral_study.py +2 -3
  67. pyobo/sources/hgnc.py +30 -26
  68. pyobo/sources/hgncgenefamily.py +9 -11
  69. pyobo/sources/icd10.py +3 -4
  70. pyobo/sources/icd11.py +3 -4
  71. pyobo/sources/icd_utils.py +6 -7
  72. pyobo/sources/interpro.py +3 -5
  73. pyobo/sources/itis.py +1 -3
  74. pyobo/sources/kegg/__init__.py +0 -2
  75. pyobo/sources/kegg/api.py +3 -4
  76. pyobo/sources/kegg/genes.py +3 -4
  77. pyobo/sources/kegg/genome.py +19 -9
  78. pyobo/sources/kegg/pathway.py +5 -6
  79. pyobo/sources/mesh.py +19 -21
  80. pyobo/sources/mgi.py +1 -3
  81. pyobo/sources/mirbase.py +13 -9
  82. pyobo/sources/mirbase_constants.py +0 -2
  83. pyobo/sources/mirbase_family.py +1 -3
  84. pyobo/sources/mirbase_mature.py +1 -3
  85. pyobo/sources/msigdb.py +4 -5
  86. pyobo/sources/ncbigene.py +3 -5
  87. pyobo/sources/npass.py +2 -4
  88. pyobo/sources/omim_ps.py +1 -3
  89. pyobo/sources/pathbank.py +35 -28
  90. pyobo/sources/pfam.py +1 -3
  91. pyobo/sources/pfam_clan.py +1 -3
  92. pyobo/sources/pid.py +3 -5
  93. pyobo/sources/pombase.py +7 -6
  94. pyobo/sources/pubchem.py +2 -3
  95. pyobo/sources/reactome.py +30 -11
  96. pyobo/sources/rgd.py +3 -4
  97. pyobo/sources/rhea.py +7 -8
  98. pyobo/sources/ror.py +3 -2
  99. pyobo/sources/selventa/__init__.py +0 -2
  100. pyobo/sources/selventa/schem.py +1 -3
  101. pyobo/sources/selventa/scomp.py +1 -3
  102. pyobo/sources/selventa/sdis.py +1 -3
  103. pyobo/sources/selventa/sfam.py +1 -3
  104. pyobo/sources/sgd.py +1 -3
  105. pyobo/sources/slm.py +29 -17
  106. pyobo/sources/umls/__init__.py +0 -2
  107. pyobo/sources/umls/__main__.py +0 -2
  108. pyobo/sources/umls/get_synonym_types.py +1 -1
  109. pyobo/sources/umls/umls.py +2 -4
  110. pyobo/sources/uniprot/__init__.py +0 -2
  111. pyobo/sources/uniprot/uniprot.py +11 -10
  112. pyobo/sources/uniprot/uniprot_ptm.py +6 -5
  113. pyobo/sources/utils.py +3 -5
  114. pyobo/sources/wikipathways.py +1 -3
  115. pyobo/sources/zfin.py +20 -9
  116. pyobo/ssg/__init__.py +3 -2
  117. pyobo/struct/__init__.py +0 -2
  118. pyobo/struct/reference.py +22 -23
  119. pyobo/struct/struct.py +132 -116
  120. pyobo/struct/typedef.py +14 -10
  121. pyobo/struct/utils.py +0 -2
  122. pyobo/utils/__init__.py +0 -2
  123. pyobo/utils/cache.py +14 -6
  124. pyobo/utils/io.py +9 -10
  125. pyobo/utils/iter.py +5 -6
  126. pyobo/utils/misc.py +1 -3
  127. pyobo/utils/ndex_utils.py +6 -7
  128. pyobo/utils/path.py +4 -5
  129. pyobo/version.py +3 -5
  130. pyobo/xrefdb/__init__.py +0 -2
  131. pyobo/xrefdb/canonicalizer.py +27 -18
  132. pyobo/xrefdb/priority.py +0 -2
  133. pyobo/xrefdb/sources/__init__.py +3 -4
  134. pyobo/xrefdb/sources/biomappings.py +0 -2
  135. pyobo/xrefdb/sources/cbms2019.py +0 -2
  136. pyobo/xrefdb/sources/chembl.py +0 -2
  137. pyobo/xrefdb/sources/compath.py +1 -3
  138. pyobo/xrefdb/sources/famplex.py +3 -5
  139. pyobo/xrefdb/sources/gilda.py +0 -2
  140. pyobo/xrefdb/sources/intact.py +5 -5
  141. pyobo/xrefdb/sources/ncit.py +1 -3
  142. pyobo/xrefdb/sources/pubchem.py +2 -5
  143. pyobo/xrefdb/sources/wikidata.py +2 -4
  144. pyobo/xrefdb/xrefs_pipeline.py +15 -16
  145. {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/LICENSE +1 -1
  146. pyobo-0.11.1.dist-info/METADATA +711 -0
  147. pyobo-0.11.1.dist-info/RECORD +173 -0
  148. {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/WHEEL +1 -1
  149. pyobo-0.11.1.dist-info/entry_points.txt +2 -0
  150. pyobo-0.10.12.dist-info/METADATA +0 -499
  151. pyobo-0.10.12.dist-info/RECORD +0 -169
  152. pyobo-0.10.12.dist-info/entry_points.txt +0 -15
  153. {pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/top_level.txt +0 -0
pyobo/sources/rhea.py CHANGED
@@ -1,9 +1,8 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for Rhea."""
4
2
 
5
3
  import logging
6
- from typing import TYPE_CHECKING, Dict, Iterable, Optional
4
+ from collections.abc import Iterable
5
+ from typing import TYPE_CHECKING, Optional
7
6
 
8
7
  import pystow
9
8
 
@@ -71,7 +70,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl
71
70
  version,
72
71
  url=RHEA_RDF_GZ_URL,
73
72
  force=force,
74
- parse_kwargs=dict(format="xml"),
73
+ parse_kwargs={"format": "xml"},
75
74
  )
76
75
 
77
76
 
@@ -103,10 +102,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
103
102
  )
104
103
  names = {str(identifier): str(name) for _, identifier, name in result}
105
104
 
106
- terms: Dict[str, Term] = {}
107
- master_to_left: Dict[str, str] = {}
108
- master_to_right: Dict[str, str] = {}
109
- master_to_bi: Dict[str, str] = {}
105
+ terms: dict[str, Term] = {}
106
+ master_to_left: dict[str, str] = {}
107
+ master_to_right: dict[str, str] = {}
108
+ master_to_bi: dict[str, str] = {}
110
109
 
111
110
  directions = ensure_df(
112
111
  PREFIX,
pyobo/sources/ror.py CHANGED
@@ -4,7 +4,8 @@ from __future__ import annotations
4
4
 
5
5
  import json
6
6
  import zipfile
7
- from typing import Any, Iterable
7
+ from collections.abc import Iterable
8
+ from typing import Any
8
9
 
9
10
  import bioregistry
10
11
  import zenodo_client
@@ -62,7 +63,7 @@ class RORGetter(Obo):
62
63
  "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
63
64
  }
64
65
 
65
- def __post_init__(self): # noqa: D105
66
+ def __post_init__(self):
66
67
  self.data_version, _url, _path = _get_info()
67
68
  super().__post_init__()
68
69
 
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Importers for selventa terminologies."""
4
2
 
5
3
  from .schem import SCHEMGetter
@@ -1,11 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Selventa chemicals.
4
2
 
5
3
  .. seealso:: https://github.com/pyobo/pyobo/issues/27
6
4
  """
7
5
 
8
- from typing import Iterable
6
+ from collections.abc import Iterable
9
7
 
10
8
  import pandas as pd
11
9
 
@@ -1,8 +1,6 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Selventa complexes."""
4
2
 
5
- from typing import Iterable
3
+ from collections.abc import Iterable
6
4
 
7
5
  import pandas as pd
8
6
 
@@ -1,11 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Selventa diseases.
4
2
 
5
3
  .. seealso:: https://github.com/pyobo/pyobo/issues/26
6
4
  """
7
5
 
8
- from typing import Iterable
6
+ from collections.abc import Iterable
9
7
 
10
8
  import pandas as pd
11
9
 
@@ -1,8 +1,6 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Selventa families."""
4
2
 
5
- from typing import Iterable
3
+ from collections.abc import Iterable
6
4
 
7
5
  import pandas as pd
8
6
 
pyobo/sources/sgd.py CHANGED
@@ -1,8 +1,6 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for SGD."""
4
2
 
5
- from typing import Iterable
3
+ from collections.abc import Iterable
6
4
  from urllib.parse import unquote_plus
7
5
 
8
6
  from ..struct import Obo, Reference, Synonym, Term, from_species
pyobo/sources/slm.py CHANGED
@@ -1,8 +1,6 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Swisslipids."""
4
2
 
5
- from typing import Iterable
3
+ from collections.abc import Iterable
6
4
 
7
5
  import pandas as pd
8
6
  from tqdm.auto import tqdm
@@ -79,10 +77,10 @@ def iter_terms(version: str, force: bool = False):
79
77
  smiles,
80
78
  inchi,
81
79
  inchikey,
82
- chebi_id,
83
- lipidmaps_id,
84
- hmdb_id,
85
- pmids,
80
+ chebi_ids,
81
+ lipidmaps_ids,
82
+ hmdb_ids,
83
+ pubmed_ids,
86
84
  ) in tqdm(
87
85
  df[COLUMNS].values, desc=f"[{PREFIX}] generating terms", unit_scale=True, unit="lipid"
88
86
  ):
@@ -105,21 +103,35 @@ def iter_terms(version: str, force: bool = False):
105
103
  inchi = inchi[len("InChI=") :]
106
104
  term.append_property(has_inchi, inchi)
107
105
  if pd.notna(inchikey):
108
- if inchikey.startswith("InChIKey="):
109
- inchikey = inchikey[len("InChIKey=") :]
110
- term.append_exact_match(Reference(prefix="inchikey", identifier=inchikey))
111
- if pd.notna(chebi_id):
112
- term.append_exact_match(("chebi", chebi_id))
113
- if pd.notna(lipidmaps_id):
106
+ inchikey = inchikey.removeprefix("InChIKey=").strip()
107
+ if inchikey and inchikey != "none":
108
+ try:
109
+ inchi_ref = Reference(prefix="inchikey", identifier=inchikey)
110
+ except ValueError:
111
+ tqdm.write(
112
+ f"[slm:{identifier}] had invalid inchikey reference: ({type(inchikey)}) {inchikey}"
113
+ )
114
+ else:
115
+ term.append_exact_match(inchi_ref)
116
+ for chebi_id in _split(chebi_ids):
117
+ term.append_xref(("chebi", chebi_id))
118
+ for lipidmaps_id in _split(lipidmaps_ids):
114
119
  term.append_exact_match(("lipidmaps", lipidmaps_id))
115
- if pd.notna(hmdb_id):
120
+ for hmdb_id in _split(hmdb_ids):
116
121
  term.append_exact_match(("hmdb", hmdb_id))
117
- if pd.notna(pmids):
118
- for pmid in pmids.split("|"):
119
- term.append_provenance(("pubmed", pmid))
122
+ for pubmed_id in _split(pubmed_ids):
123
+ term.append_provenance(("pubmed", pubmed_id))
120
124
  # TODO how to handle class, parents, and components?
121
125
  yield term
122
126
 
123
127
 
128
+ def _split(s: str) -> Iterable[str]:
129
+ if pd.notna(s):
130
+ for x in s.split("|"):
131
+ x = x.strip()
132
+ if x:
133
+ yield x
134
+
135
+
124
136
  if __name__ == "__main__":
125
137
  get_obo().write_default(write_obo=True, use_tqdm=True)
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for UMLS."""
4
2
 
5
3
  from .umls import UMLSGetter, get_obo # noqa: F401
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """CLI for UMLS exporter."""
4
2
 
5
3
  from .umls import UMLSGetter
@@ -1,7 +1,7 @@
1
1
  """Utilities for UMLS synonyms."""
2
2
 
3
+ from collections.abc import Mapping
3
4
  from pathlib import Path
4
- from typing import Mapping
5
5
 
6
6
  import requests
7
7
  from bs4 import BeautifulSoup
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for UMLS.
4
2
 
5
3
  Run with ``python -m pyobo.sources.umls``
@@ -8,7 +6,7 @@ Run with ``python -m pyobo.sources.umls``
8
6
  import itertools as itt
9
7
  import operator
10
8
  from collections import defaultdict
11
- from typing import Iterable, Mapping, Set
9
+ from collections.abc import Iterable, Mapping
12
10
 
13
11
  import bioregistry
14
12
  import pandas as pd
@@ -67,7 +65,7 @@ def get_obo() -> Obo:
67
65
  return UMLSGetter()
68
66
 
69
67
 
70
- def get_semantic_types() -> Mapping[str, Set[str]]:
68
+ def get_semantic_types() -> Mapping[str, set[str]]:
71
69
  """Get UMLS semantic types for each term."""
72
70
  dd = defaultdict(set)
73
71
  with open_umls_semantic_types() as file:
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converters for UniProt resources."""
4
2
 
5
3
  from .uniprot import PREFIX, UniProtGetter
@@ -1,10 +1,9 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for UniProt."""
4
2
 
3
+ from collections.abc import Iterable
5
4
  from operator import attrgetter
6
5
  from pathlib import Path
7
- from typing import Iterable, List, Optional, cast
6
+ from typing import Optional, cast
8
7
 
9
8
  from tqdm.auto import tqdm
10
9
 
@@ -57,6 +56,7 @@ class UniProtGetter(Obo):
57
56
  gene_product_of,
58
57
  molecularly_interacts_with,
59
58
  derives_from,
59
+ located_in,
60
60
  ]
61
61
 
62
62
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
@@ -82,7 +82,7 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
82
82
  pubmeds,
83
83
  pdbs,
84
84
  proteome,
85
- gene_id,
85
+ gene_ids,
86
86
  rhea_curies,
87
87
  go_components,
88
88
  go_functions,
@@ -94,13 +94,14 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
94
94
  description = description.removeprefix("FUNCTION: ")
95
95
  term = Term(
96
96
  reference=Reference(prefix=PREFIX, identifier=uniprot_id, name=accession),
97
- definition=description or None,
97
+ # definition=description or None,
98
98
  )
99
99
  term.set_species(taxonomy_id)
100
- if gene_id:
101
- term.append_relationship(
102
- gene_product_of, Reference(prefix="ncbigene", identifier=gene_id)
103
- )
100
+ if gene_ids:
101
+ for gene_id in gene_ids.split(";"):
102
+ term.append_relationship(
103
+ gene_product_of, Reference(prefix="ncbigene", identifier=gene_id.strip())
104
+ )
104
105
 
105
106
  # TODO add type=Reference(prefix="xsd", identifier="boolean")
106
107
  term.append_property("reviewed", "true")
@@ -154,7 +155,7 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
154
155
  yield term
155
156
 
156
157
 
157
- def _parse_go(go_terms) -> List[Reference]:
158
+ def _parse_go(go_terms) -> list[Reference]:
158
159
  rv = []
159
160
  if go_terms:
160
161
  for go_term in go_terms.split(";"):
@@ -27,7 +27,8 @@ DR Cross-reference to external Optional; once or more
27
27
 
28
28
  import itertools as itt
29
29
  from collections import defaultdict
30
- from typing import DefaultDict, Iterable, List, Mapping, Optional, Tuple
30
+ from collections.abc import Iterable, Mapping
31
+ from typing import Optional
31
32
 
32
33
  from tqdm.auto import tqdm
33
34
 
@@ -63,18 +64,18 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
63
64
  path = ensure_path(PREFIX, url=URL, force=force)
64
65
  with open(path) as file:
65
66
  lines = list(file)
66
- it: Iterable[Tuple[str, str]] = ((line[:2], line[2:].strip()) for line in lines[47:-5])
67
+ it: Iterable[tuple[str, str]] = ((line[:2], line[2:].strip()) for line in lines[47:-5])
67
68
  for i, (_, term_lines) in enumerate(itt.groupby(it, key=lambda p: p[0] == "//")):
68
69
  term = _parse(i, term_lines)
69
70
  if term:
70
71
  yield term
71
72
 
72
73
 
73
- def _parse(i, lines: Iterable[Tuple[str, str]]) -> Optional[Term]:
74
- dd_: DefaultDict[str, List[str]] = defaultdict(list)
74
+ def _parse(i, lines: Iterable[tuple[str, str]]) -> Optional[Term]:
75
+ dd_: defaultdict[str, list[str]] = defaultdict(list)
75
76
  for key, value in lines:
76
77
  dd_[key].append(value)
77
- dd: Mapping[str, List[str]] = dict(dd_)
78
+ dd: Mapping[str, list[str]] = dict(dd_)
78
79
 
79
80
  if "//" in dd:
80
81
  return None
pyobo/sources/utils.py CHANGED
@@ -1,9 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Utilities for converters."""
4
2
 
5
3
  import logging
6
- from typing import Mapping, Set, Tuple
4
+ from collections.abc import Mapping
7
5
 
8
6
  from ..utils.io import multisetdict
9
7
 
@@ -15,7 +13,7 @@ __all__ = [
15
13
  logger = logging.getLogger(__name__)
16
14
 
17
15
 
18
- def get_go_mapping(path: str, prefix: str) -> Mapping[str, Set[Tuple[str, str]]]:
16
+ def get_go_mapping(path: str, prefix: str) -> Mapping[str, set[tuple[str, str]]]:
19
17
  """Get a GO mapping file."""
20
18
  with open(path) as file:
21
19
  return multisetdict(
@@ -23,7 +21,7 @@ def get_go_mapping(path: str, prefix: str) -> Mapping[str, Set[Tuple[str, str]]]
23
21
  )
24
22
 
25
23
 
26
- def process_go_mapping_line(line: str, prefix: str) -> Tuple[str, Tuple[str, str]]:
24
+ def process_go_mapping_line(line: str, prefix: str) -> tuple[str, tuple[str, str]]:
27
25
  """Process a GO mapping line."""
28
26
  line1 = line[len(f"{prefix}:") :]
29
27
  line2, go_id = line1.rsplit(";", 1)
@@ -1,10 +1,8 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for WikiPathways."""
4
2
 
5
3
  import logging
6
4
  import urllib.error
7
- from typing import Iterable
5
+ from collections.abc import Iterable
8
6
 
9
7
  from .gmt_utils import parse_wikipathways_gmt
10
8
  from ..constants import SPECIES_REMAPPING
pyobo/sources/zfin.py CHANGED
@@ -1,13 +1,13 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Converter for ZFIN."""
4
2
 
5
3
  import logging
6
4
  from collections import defaultdict
7
- from typing import Iterable, Optional
5
+ from collections.abc import Iterable
6
+ from typing import Optional
8
7
 
9
8
  from tqdm.auto import tqdm
10
9
 
10
+ from pyobo.resources.so import get_so_name
11
11
  from pyobo.struct import (
12
12
  Obo,
13
13
  Reference,
@@ -114,7 +114,9 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
114
114
  )
115
115
  df["sequence_ontology_id"] = df["sequence_ontology_id"].map(lambda x: x[len("SO:") :])
116
116
  so = {
117
- sequence_ontology_id: Reference.auto(prefix="SO", identifier=sequence_ontology_id)
117
+ sequence_ontology_id: Reference(
118
+ prefix="SO", identifier=sequence_ontology_id, name=get_so_name(sequence_ontology_id)
119
+ )
118
120
  for sequence_ontology_id in df["sequence_ontology_id"].unique()
119
121
  }
120
122
  for _, reference in sorted(so.items()):
@@ -136,17 +138,26 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
136
138
  term.append_alt(alt_id)
137
139
  entrez_id = entrez_mappings.get(identifier)
138
140
  if entrez_id:
139
- term.append_exact_match(Reference(prefix="ncbigene", identifier=entrez_id))
141
+ try:
142
+ ncbigene_ref = Reference(prefix="ncbigene", identifier=entrez_id)
143
+ except ValueError:
144
+ tqdm.write(f"[zfin] invalid NCBI gene: {entrez_id}")
145
+ else:
146
+ term.append_exact_match(ncbigene_ref)
140
147
  for uniprot_id in uniprot_mappings.get(identifier, []):
141
- term.append_relationship(has_gene_product, Reference.auto("uniprot", uniprot_id))
148
+ term.append_relationship(
149
+ has_gene_product, Reference(prefix="uniprot", identifier=uniprot_id)
150
+ )
142
151
  for hgnc_id in human_orthologs.get(identifier, []):
143
- term.append_relationship(orthologous, Reference.auto("hgnc", hgnc_id))
152
+ term.append_relationship(orthologous, Reference(prefix="hgnc", identifier=hgnc_id))
144
153
  for mgi_curie in mouse_orthologs.get(identifier, []):
145
- mouse_ortholog = Reference.from_curie(mgi_curie, auto=True)
154
+ mouse_ortholog = Reference.from_curie(mgi_curie)
146
155
  if mouse_ortholog:
147
156
  term.append_relationship(orthologous, mouse_ortholog)
148
157
  for flybase_id in fly_orthologs.get(identifier, []):
149
- term.append_relationship(orthologous, Reference.auto("flybase", flybase_id))
158
+ term.append_relationship(
159
+ orthologous, Reference(prefix="flybase", identifier=flybase_id)
160
+ )
150
161
 
151
162
  yield term
152
163
 
pyobo/ssg/__init__.py CHANGED
@@ -2,9 +2,10 @@
2
2
 
3
3
  import itertools as itt
4
4
  from collections import defaultdict
5
+ from collections.abc import Sequence
5
6
  from operator import attrgetter
6
7
  from pathlib import Path
7
- from typing import Optional, Sequence, Tuple, Union
8
+ from typing import Optional, Union
8
9
 
9
10
  import bioregistry
10
11
  from bioregistry.constants import BIOREGISTRY_DEFAULT_BASE_URL
@@ -37,7 +38,7 @@ def make_site(
37
38
  metaregistry_metaprefix: Optional[str] = None,
38
39
  metaregistry_name: Optional[str] = None,
39
40
  metaregistry_base_url: Optional[str] = None,
40
- show_properties_in_manifest: Optional[Sequence[Tuple[str, str]]] = None,
41
+ show_properties_in_manifest: Optional[Sequence[tuple[str, str]]] = None,
41
42
  ) -> None:
42
43
  """Make a website in the given directory.
43
44
 
pyobo/struct/__init__.py CHANGED
@@ -1,5 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Data structures for OBO."""
4
2
 
5
3
  from .reference import Reference # noqa: F401
pyobo/struct/reference.py CHANGED
@@ -1,15 +1,14 @@
1
- # -*- coding: utf-8 -*-
2
-
3
1
  """Data structures for OBO."""
4
2
 
5
- from typing import Optional, Tuple
3
+ from typing import Optional
6
4
 
7
5
  import bioregistry
8
6
  import curies
9
7
  from curies.api import ExpansionError
10
- from pydantic import Field, root_validator, validator
8
+ from pydantic import Field, field_validator, model_validator
11
9
 
12
10
  from .utils import obo_escape
11
+ from ..constants import GLOBAL_CHECK_IDS
13
12
  from ..identifier_utils import normalize_curie
14
13
 
15
14
  __all__ = [
@@ -23,7 +22,7 @@ class Reference(curies.Reference):
23
22
 
24
23
  name: Optional[str] = Field(default=None, description="the name of the reference")
25
24
 
26
- @validator("prefix")
25
+ @field_validator("prefix")
27
26
  def validate_prefix(cls, v): # noqa
28
27
  """Validate the prefix for this reference."""
29
28
  norm_prefix = bioregistry.normalize_prefix(v)
@@ -41,19 +40,19 @@ class Reference(curies.Reference):
41
40
  """Get the preferred curie for this reference."""
42
41
  return f"{self.preferred_prefix}:{self.identifier}"
43
42
 
44
- @root_validator(pre=True)
43
+ @model_validator(mode="before")
45
44
  def validate_identifier(cls, values): # noqa
46
45
  """Validate the identifier."""
47
46
  prefix, identifier = values.get("prefix"), values.get("identifier")
48
47
  if not prefix or not identifier:
49
48
  return values
50
- norm_prefix = bioregistry.normalize_prefix(prefix)
51
- if norm_prefix is None:
49
+ resource = bioregistry.get_resource(prefix)
50
+ if resource is None:
52
51
  raise ExpansionError(f"Unknown prefix: {prefix}")
53
- values["prefix"] = norm_prefix
54
- values["identifier"] = bioregistry.standardize_identifier(norm_prefix, identifier).strip()
55
- # if not bioregistry.is_valid_identifier(norm_prefix, values["identifier"]):
56
- # raise ValueError(f"non-standard identifier: {norm_prefix}:{norm_identifier}")
52
+ values["prefix"] = resource.prefix
53
+ values["identifier"] = resource.standardize_identifier(identifier)
54
+ if GLOBAL_CHECK_IDS and not resource.is_valid_identifier(values["identifier"]):
55
+ raise ValueError(f"non-standard identifier: {resource.prefix}:{values['identifier']}")
57
56
  return values
58
57
 
59
58
  @classmethod
@@ -62,7 +61,7 @@ class Reference(curies.Reference):
62
61
  from ..api import get_name
63
62
 
64
63
  name = get_name(prefix, identifier)
65
- return cls(prefix=prefix, identifier=identifier, name=name)
64
+ return cls.model_validate({"prefix": prefix, "identifier": identifier, "name": name})
66
65
 
67
66
  @property
68
67
  def bioregistry_link(self) -> str:
@@ -118,13 +117,13 @@ class Reference(curies.Reference):
118
117
  return None
119
118
  if name is None and auto:
120
119
  return cls.auto(prefix=prefix, identifier=identifier)
121
- return cls(prefix=prefix, identifier=identifier, name=name)
120
+ return cls.model_validate({"prefix": prefix, "identifier": identifier, "name": name})
122
121
 
123
122
  @property
124
123
  def _escaped_identifier(self):
125
124
  return obo_escape(self.identifier)
126
125
 
127
- def __str__(self): # noqa: D105
126
+ def __str__(self):
128
127
  identifier_lower = self.identifier.lower()
129
128
  if identifier_lower.startswith(f"{self.prefix.lower()}:"):
130
129
  rv = identifier_lower
@@ -134,7 +133,7 @@ class Reference(curies.Reference):
134
133
  rv = f"{rv} ! {self.name}"
135
134
  return rv
136
135
 
137
- def __hash__(self): # noqa: D105
136
+ def __hash__(self):
138
137
  return hash((self.__class__, self.prefix, self.identifier))
139
138
 
140
139
 
@@ -145,32 +144,32 @@ class Referenced:
145
144
 
146
145
  @property
147
146
  def prefix(self):
148
- """The prefix of the typedef.""" # noqa: D401
147
+ """The prefix of the typedef."""
149
148
  return self.reference.prefix
150
149
 
151
150
  @property
152
151
  def name(self):
153
- """The name of the typedef.""" # noqa: D401
152
+ """The name of the typedef."""
154
153
  return self.reference.name
155
154
 
156
155
  @property
157
156
  def identifier(self) -> str:
158
- """The local unique identifier for this typedef.""" # noqa: D401
157
+ """The local unique identifier for this typedef."""
159
158
  return self.reference.identifier
160
159
 
161
160
  @property
162
161
  def curie(self) -> str:
163
- """The CURIE for this typedef.""" # noqa: D401
162
+ """The CURIE for this typedef."""
164
163
  return self.reference.curie
165
164
 
166
165
  @property
167
166
  def preferred_curie(self) -> str:
168
- """The preferred CURIE for this typedef.""" # noqa: D401
167
+ """The preferred CURIE for this typedef."""
169
168
  return self.reference.preferred_curie
170
169
 
171
170
  @property
172
- def pair(self) -> Tuple[str, str]:
173
- """The pair of namespace/identifier.""" # noqa: D401
171
+ def pair(self) -> tuple[str, str]:
172
+ """The pair of namespace/identifier."""
174
173
  return self.reference.pair
175
174
 
176
175
  @property