pyobo 0.10.7__py3-none-any.whl → 0.10.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,39 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """Converter for OMIM Phenotypic Series."""
4
+
5
+ import logging
6
+ from typing import Iterable
7
+
8
+ from bioversions.utils import get_soup
9
+
10
+ from pyobo.struct import Obo, Term
11
+
12
+ __all__ = [
13
+ "OMIMPSGetter",
14
+ ]
15
+
16
+
17
+ logger = logging.getLogger(__name__)
18
+ PREFIX = "omim.ps"
19
+ URL = "https://omim.org/phenotypicSeriesTitles/all"
20
+
21
+
22
+ class OMIMPSGetter(Obo):
23
+ """An ontology representation of OMIM Phenotypic Series."""
24
+
25
+ ontology = bioversions_key = PREFIX
26
+
27
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
28
+ """Iterate over terms in the ontology."""
29
+ soup = get_soup(URL, user_agent="Mozilla/5.0")
30
+ rows = soup.find(id="mimContent").find("table").find("tbody").find_all("tr")
31
+ for row in rows:
32
+ anchor = row.find("td").find("a")
33
+ name = anchor.text.strip()
34
+ identifier = anchor.attrs["href"][len("/phenotypicSeries/") :]
35
+ yield Term.from_triple(PREFIX, identifier, name)
36
+
37
+
38
+ if __name__ == "__main__":
39
+ OMIMPSGetter.cli()
pyobo/sources/rhea.py CHANGED
@@ -3,31 +3,51 @@
3
3
  """Converter for Rhea."""
4
4
 
5
5
  import logging
6
- from typing import Iterable
6
+ from typing import TYPE_CHECKING, Dict, Iterable, Optional
7
7
 
8
+ import bioversions
8
9
  import pystow
9
10
 
10
11
  from pyobo.struct import Obo, Reference, Term
11
12
  from pyobo.struct.typedef import (
13
+ TypeDef,
14
+ enabled_by,
12
15
  has_bidirectional_reaction,
16
+ has_input,
13
17
  has_left_to_right_reaction,
18
+ has_output,
19
+ has_participant,
14
20
  has_right_to_left_reaction,
21
+ reaction_enabled_by_molecular_function,
15
22
  )
16
23
  from pyobo.utils.path import ensure_df
17
24
 
25
+ if TYPE_CHECKING:
26
+ import rdflib
27
+
18
28
  __all__ = [
19
29
  "RheaGetter",
20
30
  ]
21
31
 
22
32
  logger = logging.getLogger(__name__)
23
33
  PREFIX = "rhea"
34
+ RHEA_RDF_GZ_URL = "ftp://ftp.expasy.org/databases/rhea/rdf/rhea.rdf.gz"
24
35
 
25
36
 
26
37
  class RheaGetter(Obo):
27
38
  """An ontology representation of Rhea's chemical reaction database."""
28
39
 
29
40
  ontology = bioversions_key = PREFIX
30
- typedefs = [has_left_to_right_reaction, has_bidirectional_reaction, has_right_to_left_reaction]
41
+ typedefs = [
42
+ has_left_to_right_reaction,
43
+ has_bidirectional_reaction,
44
+ has_right_to_left_reaction,
45
+ enabled_by,
46
+ has_input,
47
+ has_output,
48
+ has_participant,
49
+ reaction_enabled_by_molecular_function,
50
+ ]
31
51
 
32
52
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
33
53
  """Iterate over terms in the ontology."""
@@ -39,25 +59,54 @@ def get_obo(force: bool = False) -> Obo:
39
59
  return RheaGetter(force=force)
40
60
 
41
61
 
62
+ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdflib.Graph":
63
+ """Get the Rhea RDF graph."""
64
+ # see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf
65
+ if version is None:
66
+ version = bioversions.get_version(PREFIX)
67
+ return pystow.ensure_rdf(
68
+ "pyobo",
69
+ "raw",
70
+ PREFIX,
71
+ version,
72
+ url=RHEA_RDF_GZ_URL,
73
+ force=force,
74
+ parse_kwargs=dict(format="xml"),
75
+ )
76
+
77
+
78
+ def _get_lr_name(name: str) -> str:
79
+ return name.replace(" = ", " => ")
80
+
81
+
82
+ def _get_rl_name(name: str) -> str:
83
+ left, right = name.split(" = ", 1)
84
+ return f"{right} => {left}"
85
+
86
+
87
+ def _get_bi_name(name: str) -> str:
88
+ return name.replace(" = ", " <=> ")
89
+
90
+
42
91
  def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
43
92
  """Iterate over terms in Rhea."""
44
- url = "ftp://ftp.expasy.org/databases/rhea/rdf/rhea.rdf.gz"
45
- graph = pystow.ensure_rdf(
46
- "pyobo", "raw", PREFIX, version, url=url, force=force, parse_kwargs=dict(format="xml")
47
- )
93
+ graph = ensure_rhea_rdf(version=version, force=force)
48
94
  result = graph.query(
49
- """
50
- PREFIX rh:<http://rdf.rhea-db.org/>
51
- SELECT ?reaction ?reactionId ?reactionLabel WHERE {
52
- ?reaction rdfs:subClassOf rh:Reaction .
53
- ?reaction rh:id ?reactionId .
54
- ?reaction rdfs:label ?reactionLabel .
55
- }
95
+ """\
96
+ PREFIX rh:<http://rdf.rhea-db.org/>
97
+ SELECT ?reaction ?reactionId ?reactionLabel WHERE {
98
+ ?reaction rdfs:subClassOf rh:Reaction ;
99
+ rh:id ?reactionId ;
100
+ rdfs:label ?reactionLabel .
101
+ }
56
102
  """
57
103
  )
58
- names = {str(identifier): name for _, identifier, name in result}
104
+ names = {str(identifier): str(name) for _, identifier, name in result}
59
105
 
60
- terms = {}
106
+ terms: Dict[str, Term] = {}
107
+ master_to_left: Dict[str, str] = {}
108
+ master_to_right: Dict[str, str] = {}
109
+ master_to_bi: Dict[str, str] = {}
61
110
 
62
111
  directions = ensure_df(
63
112
  PREFIX,
@@ -66,12 +115,16 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
66
115
  force=force,
67
116
  )
68
117
  for master, lr, rl, bi in directions.values:
69
- terms[master] = Term(
70
- reference=Reference(prefix=PREFIX, identifier=master, name=names.get(master))
71
- )
72
- terms[lr] = Term(reference=Reference(prefix=PREFIX, identifier=lr, name=names.get(lr)))
73
- terms[rl] = Term(reference=Reference(prefix=PREFIX, identifier=rl, name=names.get(rl)))
74
- terms[bi] = Term(reference=Reference(prefix=PREFIX, identifier=bi, name=names.get(bi)))
118
+ master_to_left[master] = lr
119
+ master_to_right[master] = rl
120
+ master_to_bi[master] = bi
121
+
122
+ name = names[master]
123
+
124
+ terms[master] = Term(reference=Reference(prefix=PREFIX, identifier=master, name=name))
125
+ terms[lr] = Term(reference=Reference(prefix=PREFIX, identifier=lr, name=_get_lr_name(name)))
126
+ terms[rl] = Term(reference=Reference(prefix=PREFIX, identifier=rl, name=_get_rl_name(name)))
127
+ terms[bi] = Term(reference=Reference(prefix=PREFIX, identifier=bi, name=_get_bi_name(name)))
75
128
 
76
129
  terms[master].append_relationship(has_left_to_right_reaction, terms[lr])
77
130
  terms[master].append_relationship(has_right_to_left_reaction, terms[rl])
@@ -80,6 +133,38 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
80
133
  terms[rl].append_parent(terms[master])
81
134
  terms[bi].append_parent(terms[master])
82
135
 
136
+ # inspired by https://github.com/geneontology/go-ontology/blob/master/src/sparql/construct-rhea-reactions.sparql
137
+ sparql = """\
138
+ PREFIX rh:<http://rdf.rhea-db.org/>
139
+ SELECT ?reactionId ?side ?chebi WHERE {
140
+ ?reaction rdfs:subClassOf rh:Reaction ;
141
+ rh:id ?reactionId .
142
+
143
+ ?reaction rh:side ?side .
144
+ ?side rh:contains ?participant .
145
+ ?participant rh:compound ?compound .
146
+ ?compound rh:chebi|rh:underlyingChebi|(rh:reactivePart/rh:chebi) ?chebi .
147
+ }
148
+ """
149
+ for master_rhea_id, side_uri, chebi_uri in graph.query(sparql):
150
+ master_rhea_id = str(master_rhea_id)
151
+ chebi_reference = Reference(
152
+ prefix="chebi", identifier=chebi_uri[len("http://purl.obolibrary.org/obo/CHEBI_") :]
153
+ )
154
+ side = side_uri.split("_")[-1] # L or R
155
+ if side == "L":
156
+ left_rhea_id = master_to_left[master_rhea_id]
157
+ right_rhea_id = master_to_right[master_rhea_id]
158
+ elif side == "R":
159
+ left_rhea_id = master_to_right[master_rhea_id]
160
+ right_rhea_id = master_to_left[master_rhea_id]
161
+ else:
162
+ raise ValueError(f"Invalid side: {side_uri}")
163
+ terms[master_rhea_id].append_relationship(has_participant, chebi_reference)
164
+ terms[master_to_bi[master_rhea_id]].append_relationship(has_participant, chebi_reference)
165
+ terms[left_rhea_id].append_relationship(has_input, chebi_reference)
166
+ terms[right_rhea_id].append_relationship(has_output, chebi_reference)
167
+
83
168
  hierarchy = ensure_df(
84
169
  PREFIX,
85
170
  url="ftp://ftp.expasy.org/databases/rhea/tsv/rhea-relationships.tsv",
@@ -91,12 +176,14 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
91
176
  raise ValueError(f"RHEA unrecognized relation: {relation}")
92
177
  terms[source].append_parent(terms[target])
93
178
 
94
- for xref_prefix, url in [
95
- ("ecocyc", "rhea2ecocyc"),
96
- ("kegg.reaction", "rhea2kegg_reaction"),
97
- ("reactome", "rhea2reactome"),
98
- ("macie", "rhea2macie"),
99
- ("metacyc", "rhea2metacyc"),
179
+ for xref_prefix, url, relation in [
180
+ ("ecocyc", "rhea2ecocyc", None),
181
+ ("kegg.reaction", "rhea2kegg_reaction", None),
182
+ ("reactome", "rhea2reactome", None),
183
+ ("macie", "rhea2macie", None),
184
+ ("metacyc", "rhea2metacyc", None),
185
+ ("go", "rhea2go", reaction_enabled_by_molecular_function),
186
+ ("uniprot", "rhea2uniprot", enabled_by),
100
187
  ]:
101
188
  xref_df = ensure_df(
102
189
  PREFIX,
@@ -104,26 +191,44 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
104
191
  version=version,
105
192
  force=force,
106
193
  )
107
- for rhea_id, _, _, xref_id in xref_df.values:
108
- if rhea_id not in terms:
194
+ for directional_rhea_id, _direction, _master_rhea_id, xref_id in xref_df.values:
195
+ if directional_rhea_id not in terms:
109
196
  logger.debug(
110
197
  "[%s] could not find %s:%s for xref %s:%s",
111
198
  PREFIX,
112
199
  PREFIX,
113
- rhea_id,
200
+ directional_rhea_id,
114
201
  xref_prefix,
115
202
  xref_id,
116
203
  )
117
204
  continue
118
- terms[rhea_id].append_xref(Reference(prefix=xref_prefix, identifier=xref_id))
205
+ target_reference = Reference(prefix=xref_prefix, identifier=xref_id)
206
+ if isinstance(relation, TypeDef):
207
+ terms[directional_rhea_id].append_relationship(relation, target_reference)
208
+ else:
209
+ terms[directional_rhea_id].append_xref(target_reference)
119
210
 
120
- # TODO are EC codes equivalent?
121
- # TODO uniprot enabled by (RO:0002333)
122
- # TODO names?
123
- # TODO participants?
211
+ ec_df = ensure_df(
212
+ PREFIX,
213
+ url="ftp://ftp.expasy.org/databases/rhea/tsv/rhea-ec-iubmb.tsv",
214
+ version=version,
215
+ force=force,
216
+ )
217
+ for (
218
+ directional_rhea_id,
219
+ _status,
220
+ _direction,
221
+ _master_id,
222
+ ec,
223
+ _enzyme_status,
224
+ _iubmb,
225
+ ) in ec_df.values:
226
+ terms[directional_rhea_id].append_relationship(
227
+ enabled_by, Reference(prefix="eccode", identifier=ec)
228
+ )
124
229
 
125
230
  yield from terms.values()
126
231
 
127
232
 
128
233
  if __name__ == "__main__":
129
- RheaGetter.cli()
234
+ RheaGetter().write_default(write_obo=True, force=True)
@@ -7,12 +7,13 @@ Run with ``python -m pyobo.sources.umls``
7
7
 
8
8
  import itertools as itt
9
9
  import operator
10
- from typing import Iterable
10
+ from collections import defaultdict
11
+ from typing import Iterable, Mapping, Set
11
12
 
12
13
  import bioregistry
13
14
  import pandas as pd
14
15
  from tqdm.auto import tqdm
15
- from umls_downloader import open_umls
16
+ from umls_downloader import open_umls, open_umls_semantic_types
16
17
 
17
18
  from pyobo import Obo, Reference, Synonym, SynonymTypeDef, Term
18
19
 
@@ -66,8 +67,20 @@ def get_obo() -> Obo:
66
67
  return UMLSGetter()
67
68
 
68
69
 
70
+ def get_semantic_types() -> Mapping[str, Set[str]]:
71
+ """Get UMLS semantic types for each term."""
72
+ dd = defaultdict(set)
73
+ with open_umls_semantic_types() as file:
74
+ for line in tqdm(file, unit_scale=True):
75
+ cui, sty, _ = line.decode("utf8").split("|", 2)
76
+ dd[cui].add(sty)
77
+ return dict(dd)
78
+
79
+
69
80
  def iter_terms(version: str) -> Iterable[Term]:
70
81
  """Iterate over UMLS terms."""
82
+ semantic_types = get_semantic_types()
83
+
71
84
  with open_umls(version=version) as file:
72
85
  it = tqdm(file, unit_scale=True, desc="[umls] parsing")
73
86
  lines = (line.decode("utf-8").strip().split("|") for line in it)
@@ -118,6 +131,8 @@ def iter_terms(version: str) -> Iterable[Term]:
118
131
  synonyms=synonyms,
119
132
  xrefs=xrefs,
120
133
  )
134
+ for sty_id in semantic_types.get(cui, set()):
135
+ term.append_parent(Reference(prefix="sty", identifier=sty_id))
121
136
  yield term
122
137
 
123
138
 
@@ -2,8 +2,9 @@
2
2
 
3
3
  """Converter for UniProt."""
4
4
 
5
+ from operator import attrgetter
5
6
  from pathlib import Path
6
- from typing import Iterable, Optional
7
+ from typing import Iterable, List, Optional, cast
7
8
 
8
9
  import bioversions
9
10
  from tqdm.auto import tqdm
@@ -11,22 +12,52 @@ from tqdm.auto import tqdm
11
12
  from pyobo import Obo, Reference
12
13
  from pyobo.constants import RAW_MODULE
13
14
  from pyobo.identifier_utils import standardize_ec
14
- from pyobo.struct import Term, enables, from_species
15
+ from pyobo.struct import Term, derives_from, enables, from_species, participates_in
16
+ from pyobo.struct.typedef import gene_product_of, located_in, molecularly_interacts_with
15
17
  from pyobo.utils.io import open_reader
16
18
 
17
19
  PREFIX = "uniprot"
18
- REVIEWED_URL = (
19
- "https://rest.uniprot.org/uniprotkb/stream?compressed=true"
20
- "&fields=accession%2Cid%2Corganism_id%2Cprotein_name%2Cec%2Clit_pubmed_id%2Cxref_pdb"
21
- "&format=tsv&query=%28%2A%29%20AND%20%28reviewed%3Atrue%29"
22
- )
20
+ BASE_URL = "https://rest.uniprot.org/uniprotkb/stream"
21
+ SEARCH_URL = "https://rest.uniprot.org/uniprotkb/search"
22
+ QUERY = "(*) AND (reviewed:true)"
23
+ FIELDS = [
24
+ "accession",
25
+ "id",
26
+ "organism_id",
27
+ "protein_name",
28
+ "ec",
29
+ "lit_pubmed_id",
30
+ "xref_pdb",
31
+ "xref_proteomes",
32
+ "xref_geneid",
33
+ "rhea",
34
+ "go_c",
35
+ "go_f",
36
+ "go_p",
37
+ "ft_binding",
38
+ "cc_function",
39
+ ]
40
+ PARAMS = {
41
+ "compressed": "true",
42
+ "format": "tsv",
43
+ # "size": 10, # only used with search
44
+ "query": QUERY,
45
+ "fields": FIELDS,
46
+ }
23
47
 
24
48
 
25
49
  class UniProtGetter(Obo):
26
50
  """An ontology representation of the UniProt database."""
27
51
 
28
52
  bioversions_key = ontology = PREFIX
29
- typedefs = [from_species, enables]
53
+ typedefs = [
54
+ from_species,
55
+ enables,
56
+ participates_in,
57
+ gene_product_of,
58
+ molecularly_interacts_with,
59
+ derives_from,
60
+ ]
30
61
 
31
62
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
32
63
  """Iterate over terms in the ontology."""
@@ -42,13 +73,73 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
42
73
  """Iterate over UniProt Terms."""
43
74
  with open_reader(ensure(version=version)) as reader:
44
75
  _ = next(reader) # header
45
- for uniprot_id, name, taxonomy_id, _synonyms, ecs, pubmeds, pdbs in tqdm(
46
- reader, desc="Mapping UniProt", unit_scale=True
47
- ):
48
- term = Term.from_triple(prefix=PREFIX, identifier=uniprot_id, name=name)
49
- # TODO add gene encodes from relationship
50
- # TODO add description
76
+ for (
77
+ uniprot_id,
78
+ accession,
79
+ taxonomy_id,
80
+ _name, # this field should have the name, but it's a mismatch of random name annotations
81
+ ecs,
82
+ pubmeds,
83
+ pdbs,
84
+ proteome,
85
+ gene_id,
86
+ rhea_curies,
87
+ go_components,
88
+ go_functions,
89
+ go_processes,
90
+ bindings,
91
+ description,
92
+ ) in tqdm(reader, desc="Mapping UniProt", unit_scale=True):
93
+ if description:
94
+ description = description.removeprefix("FUNCTION: ")
95
+ term = Term(
96
+ reference=Reference(prefix=PREFIX, identifier=uniprot_id, name=accession),
97
+ definition=description or None,
98
+ )
51
99
  term.set_species(taxonomy_id)
100
+ if gene_id:
101
+ term.append_relationship(
102
+ gene_product_of, Reference(prefix="ncbigene", identifier=gene_id)
103
+ )
104
+
105
+ # TODO add type=Reference(prefix="xsd", identifier="boolean")
106
+ term.append_property("reviewed", "true")
107
+
108
+ for go_process_ref in _parse_go(go_processes):
109
+ term.append_relationship(participates_in, go_process_ref)
110
+ for go_function_ref in _parse_go(go_functions):
111
+ term.append_relationship(enables, go_function_ref)
112
+ for go_component_ref in _parse_go(go_components):
113
+ term.append_relationship(located_in, go_component_ref)
114
+
115
+ if proteome:
116
+ uniprot_proteome_id = proteome.split(":")[0]
117
+ term.append_relationship(
118
+ derives_from,
119
+ Reference(prefix="uniprot.proteome", identifier=uniprot_proteome_id),
120
+ )
121
+
122
+ if rhea_curies:
123
+ for rhea_curie in rhea_curies.split(" "):
124
+ term.append_relationship(
125
+ # FIXME this needs a different relation than enables
126
+ # see https://github.com/biopragmatics/pyobo/pull/168#issuecomment-1918680152
127
+ enables,
128
+ cast(Reference, Reference.from_curie(rhea_curie, strict=True)),
129
+ )
130
+
131
+ if bindings:
132
+ binding_references = set()
133
+ for part in bindings.split(";"):
134
+ part = part.strip()
135
+ if part.startswith("/ligand_id"):
136
+ curie = part.removeprefix('/ligand_id="').rstrip('"')
137
+ binding_references.add(
138
+ cast(Reference, Reference.from_curie(curie, strict=True))
139
+ )
140
+ for binding_reference in sorted(binding_references, key=attrgetter("curie")):
141
+ term.append_relationship(molecularly_interacts_with, binding_reference)
142
+
52
143
  if ecs:
53
144
  for ec in ecs.split(";"):
54
145
  term.append_relationship(
@@ -63,11 +154,27 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
63
154
  yield term
64
155
 
65
156
 
66
- def ensure(version: Optional[str] = None) -> Path:
157
+ def _parse_go(go_terms) -> List[Reference]:
158
+ rv = []
159
+ if go_terms:
160
+ for go_term in go_terms.split(";"):
161
+ go_id = go_term.rsplit("[GO:")[1].rstrip("]")
162
+ rv.append(Reference(prefix="go", identifier=go_id))
163
+ return rv
164
+
165
+
166
+ def ensure(version: Optional[str] = None, force: bool = False) -> Path:
67
167
  """Ensure the reviewed uniprot names are available."""
68
168
  if version is None:
69
169
  version = bioversions.get_version("uniprot")
70
- return RAW_MODULE.ensure(PREFIX, version, name="reviewed.tsv.gz", url=REVIEWED_URL)
170
+ return RAW_MODULE.ensure(
171
+ PREFIX,
172
+ version,
173
+ force=force,
174
+ name="reviewed.tsv.gz",
175
+ url=BASE_URL, # switch to SEARCH_URL for debugging
176
+ download_kwargs={"backend": "requests", "params": PARAMS},
177
+ )
71
178
 
72
179
 
73
180
  if __name__ == "__main__":
pyobo/struct/__init__.py CHANGED
@@ -15,6 +15,7 @@ from .struct import ( # noqa: F401
15
15
  from .typedef import ( # noqa: F401
16
16
  RelationHint,
17
17
  TypeDef,
18
+ derives_from,
18
19
  enables,
19
20
  from_species,
20
21
  gene_product_member_of,
pyobo/struct/struct.py CHANGED
@@ -53,6 +53,7 @@ from .typedef import (
53
53
  orthologous,
54
54
  part_of,
55
55
  see_also,
56
+ term_replaced_by,
56
57
  )
57
58
  from .utils import comma_separate, obo_escape_slim
58
59
  from ..constants import (
@@ -299,6 +300,11 @@ class Term(Referenced):
299
300
  self.append_property(comment.curie, value)
300
301
  return self
301
302
 
303
+ def append_replaced_by(self, reference: ReferenceHint) -> "Term":
304
+ """Add a replaced by relationship."""
305
+ self.append_relationship(term_replaced_by, reference)
306
+ return self
307
+
302
308
  def append_parent(self, reference: ReferenceHint) -> "Term":
303
309
  """Add a parent to this entity."""
304
310
  reference = _ensure_ref(reference)
@@ -395,14 +401,14 @@ class Term(Referenced):
395
401
 
396
402
  def iterate_relations(self) -> Iterable[Tuple[TypeDef, Reference]]:
397
403
  """Iterate over pairs of typedefs and targets."""
398
- for typedef, targets in self.relationships.items():
399
- for target in targets:
404
+ for typedef, targets in sorted(self.relationships.items(), key=_sort_relations):
405
+ for target in sorted(targets, key=lambda ref: ref.preferred_curie):
400
406
  yield typedef, target
401
407
 
402
408
  def iterate_properties(self) -> Iterable[Tuple[str, str]]:
403
409
  """Iterate over pairs of property and values."""
404
- for prop, values in self.properties.items():
405
- for value in values:
410
+ for prop, values in sorted(self.properties.items()):
411
+ for value in sorted(values):
406
412
  yield prop, value
407
413
 
408
414
  def iterate_obo_lines(self, *, ontology, typedefs) -> Iterable[str]:
@@ -466,7 +472,7 @@ _TYPEDEF_WARNINGS: Set[Tuple[str, str]] = set()
466
472
 
467
473
  def _sort_relations(r):
468
474
  typedef, _references = r
469
- return typedef.reference.name or typedef.reference.identifier
475
+ return typedef.preferred_curie
470
476
 
471
477
 
472
478
  def _sort_properties(r):
@@ -1017,7 +1023,7 @@ class Obo:
1017
1023
  def iterate_id_name(self, *, use_tqdm: bool = False) -> Iterable[Tuple[str, str]]:
1018
1024
  """Iterate identifier name pairs."""
1019
1025
  for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"):
1020
- if term.name:
1026
+ if term.prefix == self.ontology and term.name:
1021
1027
  yield term.identifier, term.name
1022
1028
 
1023
1029
  def get_id_name_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, str]:
pyobo/struct/typedef.py CHANGED
@@ -48,6 +48,10 @@ __all__ = [
48
48
  ]
49
49
 
50
50
 
51
+ def _bool_to_obo(v: bool) -> str:
52
+ return "true" if v else "false"
53
+
54
+
51
55
  @dataclass
52
56
  class TypeDef(Referenced):
53
57
  """A type definition in OBO.
@@ -88,7 +92,7 @@ class TypeDef(Referenced):
88
92
  yield f'def: "{self.definition}"'
89
93
 
90
94
  if self.is_metadata_tag is not None:
91
- yield f'is_metadata_tag: {"true" if self.is_metadata_tag else "false"}'
95
+ yield f"is_metadata_tag: {_bool_to_obo(self.is_metadata_tag)}"
92
96
 
93
97
  if self.namespace:
94
98
  yield f"namespace: {self.namespace}"
@@ -113,6 +117,10 @@ class TypeDef(Referenced):
113
117
  yield f"holds_over_chain: {_chain} ! {_names}"
114
118
  if self.inverse:
115
119
  yield f"inverse_of: {self.inverse}"
120
+ if self.domain:
121
+ yield f"domain: {self.domain}"
122
+ if self.range:
123
+ yield f"range: {self.range}"
116
124
 
117
125
  @classmethod
118
126
  def from_triple(cls, prefix: str, identifier: str, name: Optional[str] = None) -> "TypeDef":
@@ -161,13 +169,19 @@ species_specific = TypeDef(
161
169
  "species with RO:0002162 (in taxon)",
162
170
  )
163
171
  has_left_to_right_reaction = TypeDef(
164
- Reference(prefix="debio", identifier="0000007", name="has left-to-right reaction")
172
+ Reference(prefix="debio", identifier="0000007", name="has left-to-right reaction"),
173
+ is_metadata_tag=True,
165
174
  )
166
175
  has_right_to_left_reaction = TypeDef(
167
- Reference(prefix="debio", identifier="0000008", name="has right-to-left reaction")
176
+ Reference(prefix="debio", identifier="0000008", name="has right-to-left reaction"),
177
+ is_metadata_tag=True,
168
178
  )
169
179
  has_bidirectional_reaction = TypeDef(
170
- Reference(prefix="debio", identifier="0000009", name="has bi-directional reaction")
180
+ Reference(prefix="debio", identifier="0000009", name="has bi-directional reaction"),
181
+ is_metadata_tag=True,
182
+ )
183
+ reaction_enabled_by_molecular_function = TypeDef(
184
+ Reference(prefix="debio", identifier="0000047", name="reaction enabled by molecular function")
171
185
  )
172
186
 
173
187
 
@@ -191,6 +205,15 @@ has_participant = TypeDef(
191
205
  comment="Inverse of has participant",
192
206
  inverse=Reference(prefix=RO_PREFIX, identifier="0000056", name="participates in"),
193
207
  )
208
+ derives_from = TypeDef(
209
+ reference=Reference(prefix=RO_PREFIX, identifier="0001000", name="derives from"),
210
+ )
211
+ molecularly_interacts_with = TypeDef(
212
+ reference=Reference(prefix=RO_PREFIX, identifier="0002436", name="molecularly interacts with"),
213
+ )
214
+ located_in = TypeDef(
215
+ reference=Reference(prefix=RO_PREFIX, identifier="0001025", name="located in"),
216
+ )
194
217
  exact_match = TypeDef(
195
218
  reference=Reference(prefix="skos", identifier="exactMatch", name="exact match"),
196
219
  )
@@ -291,7 +314,14 @@ editor_note = TypeDef.from_triple(prefix=IAO_PREFIX, identifier="0000116", name=
291
314
  is_immediately_transformed_from = TypeDef.from_triple(
292
315
  prefix=SIO_PREFIX, identifier="000658", name="is immediately transformed from"
293
316
  )
294
- enables = TypeDef.from_triple(prefix="RO", identifier="0002327", name="enables")
317
+
318
+ _enables_reference = Reference(prefix=RO_PREFIX, identifier="0002327", name="enables")
319
+ _enabled_by_reference = Reference(prefix=RO_PREFIX, identifier="0002333", name="enabled by")
320
+ enables = TypeDef(reference=_enables_reference, inverse=_enabled_by_reference)
321
+ enabled_by = TypeDef(reference=_enabled_by_reference, inverse=_enables_reference)
322
+
323
+ has_input = TypeDef.from_triple(prefix=RO_PREFIX, identifier="0002233", name="has input")
324
+ has_output = TypeDef.from_triple(prefix=RO_PREFIX, identifier="0002234", name="has output")
295
325
 
296
326
  """ChEBI"""
297
327