pyobo 0.12.10__py3-none-any.whl → 0.12.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. pyobo/__init__.py +6 -0
  2. pyobo/api/__init__.py +11 -1
  3. pyobo/api/alts.py +18 -4
  4. pyobo/api/embedding.py +108 -9
  5. pyobo/api/names.py +28 -6
  6. pyobo/api/xrefs.py +21 -1
  7. pyobo/cli/cli.py +9 -3
  8. pyobo/cli/database.py +63 -22
  9. pyobo/cli/lookup.py +39 -24
  10. pyobo/cli/utils.py +6 -2
  11. pyobo/constants.py +66 -7
  12. pyobo/getters.py +8 -3
  13. pyobo/ner/api.py +17 -10
  14. pyobo/ner/scispacy_utils.py +2 -0
  15. pyobo/plugins.py +3 -1
  16. pyobo/sources/__init__.py +2 -0
  17. pyobo/sources/antibodyregistry.py +3 -3
  18. pyobo/sources/bigg/bigg_compartment.py +1 -1
  19. pyobo/sources/complexportal.py +3 -3
  20. pyobo/sources/conso.py +3 -3
  21. pyobo/sources/famplex.py +3 -3
  22. pyobo/sources/goldbook.py +86 -0
  23. pyobo/sources/hgnc/hgnc.py +157 -96
  24. pyobo/sources/hgnc/hgncgenefamily.py +14 -13
  25. pyobo/sources/msigdb.py +3 -3
  26. pyobo/sources/omim_ps.py +8 -2
  27. pyobo/sources/reactome.py +3 -3
  28. pyobo/sources/rgd.py +7 -11
  29. pyobo/sources/slm.py +3 -3
  30. pyobo/sources/uniprot/uniprot.py +3 -3
  31. pyobo/sources/wikipathways.py +7 -2
  32. pyobo/struct/__init__.py +2 -2
  33. pyobo/struct/functional/macros.py +1 -1
  34. pyobo/struct/functional/obo_to_functional.py +7 -3
  35. pyobo/struct/obo/reader.py +4 -4
  36. pyobo/struct/struct.py +48 -18
  37. pyobo/struct/struct_utils.py +19 -5
  38. pyobo/struct/typedef.py +19 -3
  39. pyobo/struct/vocabulary.py +6 -3
  40. pyobo/utils/path.py +5 -4
  41. pyobo/version.py +1 -1
  42. {pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/METADATA +45 -23
  43. {pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/RECORD +46 -45
  44. {pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/WHEEL +1 -1
  45. {pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/entry_points.txt +0 -0
  46. {pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/licenses/LICENSE +0 -0
@@ -5,9 +5,10 @@ from collections.abc import Iterable, Mapping
5
5
 
6
6
  import pandas as pd
7
7
 
8
- from ...struct import Obo, Reference, SynonymTypeDef, Term, has_citation
8
+ from ...struct import Obo, Reference, Term, is_mentioned_by
9
+ from ...struct.struct import abbreviation as symbol_type
9
10
  from ...struct.typedef import enables, exact_match, from_species
10
- from ...utils.path import ensure_path
11
+ from ...utils.path import ensure_df
11
12
 
12
13
  __all__ = [
13
14
  "HGNCGroupGetter",
@@ -15,13 +16,9 @@ __all__ = [
15
16
 
16
17
  PREFIX = "hgnc.genegroup"
17
18
  FAMILIES_URL = "https://storage.googleapis.com/public-download-files/hgnc/csv/csv/genefamily_db_tables/family.csv"
18
- # TODO use family_alias.csv
19
+ FAMILIES_ALIAS_URL = "https://storage.googleapis.com/public-download-files/hgnc/csv/csv/genefamily_db_tables/family_alias.csv"
19
20
  HIERARCHY_URL = "https://storage.googleapis.com/public-download-files/hgnc/csv/csv/genefamily_db_tables/hierarchy.csv"
20
21
 
21
- symbol_type = SynonymTypeDef(
22
- reference=Reference(prefix="OMO", identifier="0004000", name="has symbol")
23
- )
24
-
25
22
 
26
23
  class HGNCGroupGetter(Obo):
27
24
  """An ontology representation of HGNC's gene group nomenclature."""
@@ -29,7 +26,7 @@ class HGNCGroupGetter(Obo):
29
26
  ontology = PREFIX
30
27
  bioversions_key = "hgnc"
31
28
  synonym_typedefs = [symbol_type]
32
- typedefs = [from_species, enables, exact_match, has_citation]
29
+ typedefs = [from_species, enables, exact_match, is_mentioned_by]
33
30
 
34
31
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
35
32
  """Iterate over terms in the ontology."""
@@ -38,8 +35,7 @@ class HGNCGroupGetter(Obo):
38
35
 
39
36
  def get_hierarchy(force: bool = False) -> Mapping[str, list[str]]:
40
37
  """Get the HGNC Gene Families hierarchy as a dictionary."""
41
- path = ensure_path(PREFIX, url=HIERARCHY_URL, force=force)
42
- df = pd.read_csv(path, dtype={"parent_fam_id": str, "child_fam_id": str})
38
+ df = ensure_df(PREFIX, url=HIERARCHY_URL, force=force, sep=",")
43
39
  d = defaultdict(list)
44
40
  for parent_id, child_id in df.values:
45
41
  d[child_id].append(parent_id)
@@ -75,9 +71,12 @@ def get_terms(force: bool = False) -> Iterable[Term]:
75
71
 
76
72
 
77
73
  def _get_terms_helper(force: bool = False) -> Iterable[Term]:
78
- path = ensure_path(PREFIX, url=FAMILIES_URL, force=force)
79
- df = pd.read_csv(path, dtype={"id": str})
74
+ alias_df = ensure_df(PREFIX, url=FAMILIES_ALIAS_URL, force=force, sep=",")
75
+ aliases = defaultdict(set)
76
+ for _id, family_id, alias in alias_df.values:
77
+ aliases[family_id].add(alias)
80
78
 
79
+ df = ensure_df(PREFIX, url=FAMILIES_URL, force=force, sep=",")
81
80
  for gene_group_id, symbol, name, pubmed_ids, definition, desc_go in df[COLUMNS].values:
82
81
  if not definition or pd.isna(definition):
83
82
  definition = None
@@ -89,12 +88,14 @@ def _get_terms_helper(force: bool = False) -> Iterable[Term]:
89
88
  for s in pubmed_ids.replace(" ", ",").split(","):
90
89
  s = s.strip()
91
90
  if s:
92
- term.append_provenance(Reference(prefix="pubmed", identifier=s))
91
+ term.append_mentioned_by(Reference(prefix="pubmed", identifier=s))
93
92
  if desc_go and pd.notna(desc_go):
94
93
  go_id = desc_go[len("http://purl.uniprot.org/go/") :]
95
94
  term.append_relationship(enables, Reference(prefix="GO", identifier=go_id))
96
95
  if symbol and pd.notna(symbol):
97
96
  term.append_synonym(symbol, type=symbol_type)
97
+ for alias in aliases[gene_group_id]:
98
+ term.append_synonym(alias)
98
99
  term.set_species(identifier="9606", name="Homo sapiens")
99
100
  yield term
100
101
 
pyobo/sources/msigdb.py CHANGED
@@ -8,7 +8,7 @@ from lxml import etree
8
8
  from pydantic import ValidationError
9
9
  from tqdm.auto import tqdm
10
10
 
11
- from pyobo.struct import Obo, Reference, Term, TypeDef, has_citation, has_participant
11
+ from pyobo.struct import Obo, Reference, Term, TypeDef, has_participant, is_mentioned_by
12
12
  from pyobo.utils.path import ensure_path
13
13
 
14
14
  __all__ = [
@@ -43,7 +43,7 @@ class MSigDBGetter(Obo):
43
43
  """An ontology representation of MMSigDB's gene set nomenclature."""
44
44
 
45
45
  ontology = bioversions_key = PREFIX
46
- typedefs = [has_participant, has_citation, *(p for _, p in PROPERTIES)]
46
+ typedefs = [has_participant, is_mentioned_by, *(p for _, p in PROPERTIES)]
47
47
 
48
48
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
49
49
  """Iterate over terms in the ontology."""
@@ -112,7 +112,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
112
112
  elif reference_id.startswith("GSE"):
113
113
  term.append_see_also(Reference(prefix="gse", identifier=reference_id))
114
114
  else:
115
- term.append_provenance(Reference(prefix="pubmed", identifier=reference_id))
115
+ term.append_mentioned_by(Reference(prefix="pubmed", identifier=reference_id))
116
116
 
117
117
  for key, typedef in PROPERTIES:
118
118
  if value := attrib[key].strip():
pyobo/sources/omim_ps.py CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  import logging
4
4
  from collections.abc import Iterable
5
+ from typing import cast
5
6
 
6
7
  from bioversions.utils import get_soup
7
8
 
@@ -34,9 +35,14 @@ class OMIMPSGetter(Obo):
34
35
  if tbody is None:
35
36
  raise ValueError("omim.ps failed - scraper could not find table body in HTML")
36
37
  for row in tbody.find_all("tr"):
37
- anchor = row.find("td").find("a")
38
+ td = row.find("td")
39
+ if td is None:
40
+ continue
41
+ anchor = td.find("a")
42
+ if anchor is None or anchor.text is None:
43
+ continue
38
44
  name = anchor.text.strip()
39
- identifier = anchor.attrs["href"][len("/phenotypicSeries/") :]
45
+ identifier = cast(str, anchor.attrs["href"])[len("/phenotypicSeries/") :]
40
46
  yield Term.from_triple(PREFIX, identifier, name)
41
47
 
42
48
 
pyobo/sources/reactome.py CHANGED
@@ -11,7 +11,7 @@ from tqdm.auto import tqdm
11
11
  from ..api import get_id_multirelations_mapping
12
12
  from ..constants import SPECIES_REMAPPING
13
13
  from ..resources.ncbitaxon import get_ncbitaxon_id
14
- from ..struct import Obo, Reference, Term, from_species, has_citation, has_participant
14
+ from ..struct import Obo, Reference, Term, from_species, has_participant, is_mentioned_by
15
15
  from ..utils.io import multidict
16
16
  from ..utils.path import ensure_df
17
17
 
@@ -32,7 +32,7 @@ class ReactomeGetter(Obo):
32
32
  """An ontology representation of the Reactome pathway database."""
33
33
 
34
34
  ontology = bioversions_key = PREFIX
35
- typedefs = [from_species, has_participant, has_citation]
35
+ typedefs = [from_species, has_participant, is_mentioned_by]
36
36
  root_terms = [ROOT]
37
37
 
38
38
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
@@ -76,7 +76,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
76
76
  reference=Reference(prefix=PREFIX, identifier=reactome_id, name=name),
77
77
  )
78
78
  for pubmed_id in provenance_d.get(reactome_id, []):
79
- term.append_provenance(Reference(prefix="pubmed", identifier=pubmed_id))
79
+ term.append_mentioned_by(Reference(prefix="pubmed", identifier=pubmed_id))
80
80
 
81
81
  if not taxonomy_id or pd.isna(taxonomy_id):
82
82
  raise ValueError(f"unmapped species: {species_name}")
pyobo/sources/rgd.py CHANGED
@@ -9,22 +9,18 @@ from tqdm.auto import tqdm
9
9
  from pyobo.struct import (
10
10
  Obo,
11
11
  Reference,
12
- SynonymTypeDef,
13
12
  Term,
14
- default_reference,
15
13
  from_species,
16
- has_citation,
17
14
  has_gene_product,
15
+ is_mentioned_by,
18
16
  transcribes_to,
19
17
  )
18
+ from pyobo.struct.struct import previous_gene_symbol, previous_name
20
19
  from pyobo.utils.path import ensure_df
21
20
 
22
21
  logger = logging.getLogger(__name__)
23
22
  PREFIX = "rgd"
24
23
 
25
- old_symbol_type = SynonymTypeDef(reference=default_reference(PREFIX, "old_symbol"))
26
- old_name_type = SynonymTypeDef(reference=default_reference(PREFIX, "old_name"))
27
-
28
24
  # NOTE unigene id was discontinue in January 18th, 2021 dump
29
25
 
30
26
  GENES_URL = "https://download.rgd.mcw.edu/data_release/GENES_RAT.txt"
@@ -73,8 +69,8 @@ class RGDGetter(Obo):
73
69
  """An ontology representation of RGD's rat gene nomenclature."""
74
70
 
75
71
  bioversions_key = ontology = PREFIX
76
- typedefs = [from_species, transcribes_to, has_gene_product, has_citation]
77
- synonym_typedefs = [old_name_type, old_symbol_type]
72
+ typedefs = [from_species, transcribes_to, has_gene_product, is_mentioned_by]
73
+ synonym_typedefs = [previous_name, previous_gene_symbol]
78
74
 
79
75
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
80
76
  """Iterate over terms in the ontology."""
@@ -119,11 +115,11 @@ def get_terms(force: bool = False, version: str | None = None) -> Iterable[Term]
119
115
  old_names = row["OLD_NAME"]
120
116
  if old_names and pd.notna(old_names):
121
117
  for old_name in old_names.split(";"):
122
- term.append_synonym(old_name, type=old_name_type)
118
+ term.append_synonym(old_name, type=previous_name)
123
119
  old_symbols = row["OLD_SYMBOL"]
124
120
  if old_symbols and pd.notna(old_symbols):
125
121
  for old_symbol in old_symbols.split(";"):
126
- term.append_synonym(old_symbol, type=old_symbol_type)
122
+ term.append_synonym(old_symbol, type=previous_gene_symbol)
127
123
  for prefix, key in namespace_to_column:
128
124
  xref_ids = str(row[key])
129
125
  if xref_ids and pd.notna(xref_ids):
@@ -154,7 +150,7 @@ def get_terms(force: bool = False, version: str | None = None) -> Iterable[Term]
154
150
  pubmed_ids = row["CURATED_REF_PUBMED_ID"]
155
151
  if pubmed_ids and pd.notna(pubmed_ids):
156
152
  for pubmed_id in str(pubmed_ids).split(";"):
157
- term.append_provenance(Reference(prefix="pubmed", identifier=pubmed_id))
153
+ term.append_mentioned_by(Reference(prefix="pubmed", identifier=pubmed_id))
158
154
 
159
155
  term.set_species(identifier="10116", name="Rattus norvegicus")
160
156
  yield term
pyobo/sources/slm.py CHANGED
@@ -7,7 +7,7 @@ from tqdm.auto import tqdm
7
7
 
8
8
  from pyobo import Obo, Reference, Term, TypeDef
9
9
  from pyobo.struct.struct import abbreviation as abbreviation_typedef
10
- from pyobo.struct.typedef import exact_match, has_citation, has_inchi, has_smiles
10
+ from pyobo.struct.typedef import exact_match, has_inchi, has_smiles, is_mentioned_by
11
11
  from pyobo.utils.path import ensure_df
12
12
 
13
13
  __all__ = [
@@ -43,7 +43,7 @@ class SLMGetter(Obo):
43
43
  """An ontology representation of SwissLipid's lipid nomenclature."""
44
44
 
45
45
  ontology = bioversions_key = PREFIX
46
- typedefs = [exact_match, LEVEL, has_inchi, has_smiles, has_citation]
46
+ typedefs = [exact_match, LEVEL, has_inchi, has_smiles, is_mentioned_by]
47
47
  synonym_typedefs = [abbreviation_typedef]
48
48
 
49
49
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
@@ -117,7 +117,7 @@ def iter_terms(version: str, force: bool = False):
117
117
  for hmdb_id in _split(hmdb_ids):
118
118
  term.append_exact_match(("hmdb", hmdb_id))
119
119
  for pubmed_id in _split(pubmed_ids):
120
- term.append_provenance(Reference(prefix="pubmed", identifier=pubmed_id))
120
+ term.append_mentioned_by(Reference(prefix="pubmed", identifier=pubmed_id))
121
121
  # TODO how to handle class, parents, and components?
122
122
  yield term
123
123
 
@@ -19,7 +19,7 @@ from pyobo.struct import (
19
19
  derives_from,
20
20
  enables,
21
21
  from_species,
22
- has_citation,
22
+ is_mentioned_by,
23
23
  participates_in,
24
24
  )
25
25
  from pyobo.struct.typedef import gene_product_of, located_in, molecularly_interacts_with
@@ -68,7 +68,7 @@ class UniProtGetter(Obo):
68
68
  derives_from,
69
69
  located_in,
70
70
  IS_REVIEWED,
71
- has_citation,
71
+ is_mentioned_by,
72
72
  ]
73
73
 
74
74
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
@@ -156,7 +156,7 @@ def iter_terms(version: str | None = None) -> Iterable[Term]:
156
156
  )
157
157
  for pubmed in pubmeds.split(";"):
158
158
  if pubmed := pubmed.strip():
159
- term.append_provenance(Reference(prefix="pubmed", identifier=pubmed))
159
+ term.append_mentioned_by(Reference(prefix="pubmed", identifier=pubmed))
160
160
  for pdb in pdbs.split(";"):
161
161
  if pdb := pdb.strip():
162
162
  term.append_xref(Reference(prefix="pdb", identifier=pdb))
@@ -80,7 +80,12 @@ def iter_terms(version: str, *, include_descriptions: bool = False) -> Iterable[
80
80
  taxonomy_name = SPECIES_REMAPPING.get(species_code, species_code)
81
81
 
82
82
  for identifier, _version, _revision, name, _species, genes in parse_wikipathways_gmt(path):
83
- graph = read_zipfile_rdf(archive, inner_path=f"wp/{identifier}.ttl")
83
+ try:
84
+ graph = read_zipfile_rdf(archive, inner_path=f"wp/{identifier}.ttl")
85
+ except KeyError:
86
+ tqdm.write(f"[wikipathways:{identifier}] was not found inside zip file, skipping")
87
+ continue
88
+
84
89
  uri = f"https://identifiers.org/wikipathways/{identifier}"
85
90
 
86
91
  definition: str | None = None
@@ -122,7 +127,7 @@ def iter_terms(version: str, *, include_descriptions: bool = False) -> Iterable[
122
127
  pw_references.add(ref)
123
128
  term.append_parent(ref)
124
129
  if not parents:
125
- tqdm.write(f"[{term.curie}] could not find parent")
130
+ tqdm.write(f"[{term.curie}] could not find annotation to parent in PW")
126
131
  term.append_parent(ROOT)
127
132
 
128
133
  diseases = graph.query(
pyobo/struct/__init__.py CHANGED
@@ -26,12 +26,12 @@ from .typedef import (
26
26
  from_species,
27
27
  gene_product_member_of,
28
28
  has_category,
29
- has_citation,
30
29
  has_gene_product,
31
30
  has_member,
32
31
  has_part,
33
32
  has_participant,
34
33
  is_a,
34
+ is_mentioned_by,
35
35
  member_of,
36
36
  orthologous,
37
37
  part_of,
@@ -65,12 +65,12 @@ __all__ = [
65
65
  "from_species",
66
66
  "gene_product_member_of",
67
67
  "has_category",
68
- "has_citation",
69
68
  "has_gene_product",
70
69
  "has_member",
71
70
  "has_part",
72
71
  "has_participant",
73
72
  "is_a",
73
+ "is_mentioned_by",
74
74
  "make_ad_hoc_ontology",
75
75
  "member_of",
76
76
  "orthologous",
@@ -72,7 +72,7 @@ class RelationshipMacro(Macro):
72
72
  annotations: f.Annotations | None = None,
73
73
  ) -> None:
74
74
  """Instantiate the object-to-object SubClassOf macro."""
75
- super().__init__(f.SubClassOf(s, f.ObjectSomeValuesFrom(p, o)))
75
+ super().__init__(f.SubClassOf(s, f.ObjectSomeValuesFrom(p, o), annotations=annotations))
76
76
 
77
77
 
78
78
  class StringMacro(Macro):
@@ -128,11 +128,15 @@ def get_term_axioms(term: Term) -> Iterable[f.Box]:
128
128
  if term.type == "Term":
129
129
  yield f.Declaration(s, type="Class")
130
130
  for parent in term.parents:
131
- yield f.SubClassOf(s, parent)
132
- else:
131
+ yield f.SubClassOf(s, parent, annotations=_get_annotations(term, pv.is_a, parent))
132
+ elif term.type == "Instance":
133
133
  yield f.Declaration(s, type="NamedIndividual")
134
134
  for parent in term.parents:
135
- yield f.ClassAssertion(parent, s)
135
+ yield f.ClassAssertion(
136
+ parent, s, annotations=_get_annotations(term, pv.rdf_type, parent)
137
+ )
138
+ else:
139
+ raise ValueError(f"invalid term type: {term.type}")
136
140
  # 2
137
141
  if term.is_anonymous is not None:
138
142
  yield m.IsAnonymousMacro(s, term.is_anonymous)
@@ -345,7 +345,7 @@ def _get_terms(
345
345
  _process_equivalent_to(term, data, ontology_prefix=ontology_prefix, strict=strict)
346
346
  _process_disjoint_from(term, data, ontology_prefix=ontology_prefix, strict=strict)
347
347
  _process_consider(term, data, ontology_prefix=ontology_prefix, strict=strict)
348
- _process_comment(term, data, ontology_prefix=ontology_prefix, strict=strict)
348
+ _process_comment(term, data)
349
349
  _process_description(term, data, ontology_prefix=ontology_prefix, strict=strict)
350
350
  _process_creation_date(term, data)
351
351
 
@@ -367,7 +367,7 @@ def _process_description(term: Stanza, data, *, ontology_prefix: str, strict: bo
367
367
  )
368
368
 
369
369
 
370
- def _process_comment(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
370
+ def _process_comment(term: Stanza, data) -> None:
371
371
  if comment := data.get("comment"):
372
372
  term.append_comment(comment)
373
373
 
@@ -668,7 +668,7 @@ def _handle_xref(
668
668
  # TODO this is not what spec calls for, maybe
669
669
  # need a flag in macro config for this
670
670
  if xref.prefix in PROVENANCE_PREFIXES:
671
- return term.append_provenance(xref, annotations=annotations)
671
+ return term.append_mentioned_by(xref, annotations=annotations)
672
672
 
673
673
  return term.append_xref(xref, annotations=annotations)
674
674
 
@@ -902,7 +902,7 @@ def iterate_typedefs(
902
902
  _process_equivalent_to(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
903
903
  _process_disjoint_from(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
904
904
  _process_consider(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
905
- _process_comment(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
905
+ _process_comment(typedef, data)
906
906
  _process_description(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
907
907
  _process_creation_date(typedef, data)
908
908
 
pyobo/struct/struct.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import datetime
6
+ import inspect
6
7
  import itertools as itt
7
8
  import json
8
9
  import logging
@@ -106,6 +107,7 @@ SSSOM_DF_COLUMNS = [
106
107
  "contributor",
107
108
  ]
108
109
  FORMAT_VERSION = "1.4"
110
+ _SOURCES = Path(__file__).parent.parent.joinpath("sources").resolve()
109
111
 
110
112
 
111
113
  @dataclass
@@ -246,12 +248,22 @@ DEFAULT_SYNONYM_TYPE = SynonymTypeDef(
246
248
  reference=Reference(prefix="oboInOwl", identifier="SynonymType", name="synonym type"),
247
249
  )
248
250
  abbreviation = SynonymTypeDef(
249
- reference=Reference(prefix="OMO", identifier="0003000", name="abbreviation")
251
+ reference=Reference(prefix="omo", identifier="0003000", name="abbreviation")
250
252
  )
251
253
  acronym = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003012", name="acronym"))
252
254
  uk_spelling = SynonymTypeDef(
253
255
  reference=Reference(prefix="omo", identifier="0003005", name="UK spelling synonym")
254
256
  )
257
+ previous_name = SynonymTypeDef(
258
+ reference=Reference(prefix="omo", identifier="0003008", name="previous name")
259
+ )
260
+ previous_gene_symbol = SynonymTypeDef(
261
+ reference=Reference(prefix="omo", identifier="0003015", name="previous gene symbol")
262
+ )
263
+ gene_symbol_synonym = SynonymTypeDef(
264
+ reference=Reference(prefix="omo", identifier="0003016", name="gene symbol synonym")
265
+ )
266
+
255
267
  default_synonym_typedefs: dict[ReferenceTuple, SynonymTypeDef] = {
256
268
  abbreviation.pair: abbreviation,
257
269
  acronym.pair: acronym,
@@ -621,8 +633,15 @@ class Obo:
621
633
  raise ValueError(f"{self.ontology} is missing data_version")
622
634
  elif "/" in self.data_version:
623
635
  raise ValueError(f"{self.ontology} has a slash in version: {self.data_version}")
636
+
637
+ file_path = Path(inspect.getfile(self.__class__)).resolve()
638
+ script_url = f"https://github.com/biopragmatics/pyobo/blob/main/src/pyobo/sources/{file_path.relative_to(_SOURCES)}"
639
+
624
640
  if self.auto_generated_by is None:
625
- self.auto_generated_by = f"PyOBO v{get_pyobo_version(with_git_hash=True)} on {datetime.datetime.now().isoformat()}" # type:ignore
641
+ self.auto_generated_by = (
642
+ f"PyOBO v{get_pyobo_version(with_git_hash=True)} on "
643
+ f"{datetime.datetime.now().isoformat()} by {script_url}"
644
+ ) # type:ignore
626
645
 
627
646
  def _get_clean_idspaces(self) -> dict[str, str]:
628
647
  """Get normalized idspace dictionary."""
@@ -927,7 +946,9 @@ class Obo:
927
946
  case OBOLiteral():
928
947
  end = f'"{obo_escape_slim(value.value)}" {reference_escape(value.datatype, ontology_prefix=self.ontology)}'
929
948
  case Reference():
930
- end = reference_escape(value, ontology_prefix=self.ontology)
949
+ end = reference_escape(
950
+ value, ontology_prefix=self.ontology, add_name_comment=True
951
+ )
931
952
  case _:
932
953
  raise TypeError(f"Invalid property value: {value}")
933
954
  yield f"property_value: {reference_escape(predicate, ontology_prefix=self.ontology)} {end}"
@@ -946,21 +967,30 @@ class Obo:
946
967
  license_literal = OBOLiteral.string(license_spdx_id)
947
968
  yield Annotation(v.has_license, license_literal)
948
969
 
949
- if description := bioregistry.get_description(self.ontology):
950
- yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
951
- if homepage := bioregistry.get_homepage(self.ontology):
952
- yield Annotation(v.has_homepage, OBOLiteral.uri(homepage))
953
- if repository := bioregistry.get_repository(self.ontology):
954
- yield Annotation(v.has_repository, OBOLiteral.uri(repository))
955
- if logo := bioregistry.get_logo(self.ontology):
956
- yield Annotation(v.has_logo, OBOLiteral.uri(logo))
957
- if mailing_list := bioregistry.get_mailing_list(self.ontology):
958
- yield Annotation(v.has_mailing_list, OBOLiteral.string(mailing_list))
959
- if (maintainer := bioregistry.get_contact(self.ontology)) and maintainer.orcid:
960
- yield Annotation(
961
- v.has_maintainer,
962
- Reference(prefix="orcid", identifier=maintainer.orcid, name=maintainer.name),
963
- )
970
+ if resource := bioregistry.get_resource(self.ontology):
971
+ if description := resource.get_description():
972
+ yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
973
+ if homepage := resource.get_homepage():
974
+ yield Annotation(v.has_homepage, OBOLiteral.uri(homepage))
975
+ if repository := resource.get_repository():
976
+ yield Annotation(v.has_repository, OBOLiteral.uri(repository))
977
+ if logo := resource.get_logo():
978
+ yield Annotation(v.has_logo, OBOLiteral.uri(logo))
979
+ if mailing_list := resource.get_mailing_list():
980
+ yield Annotation(v.has_mailing_list, OBOLiteral.string(mailing_list))
981
+ if (maintainer := resource.get_contact()) and maintainer.orcid:
982
+ yield Annotation(
983
+ v.has_maintainer,
984
+ Reference(prefix="orcid", identifier=maintainer.orcid, name=maintainer.name),
985
+ )
986
+ for maintainer in resource.contact_extras or []:
987
+ if maintainer.orcid:
988
+ yield Annotation(
989
+ v.has_maintainer,
990
+ Reference(
991
+ prefix="orcid", identifier=maintainer.orcid, name=maintainer.name
992
+ ),
993
+ )
964
994
 
965
995
  # Root terms
966
996
  for root_term in self.root_terms or []:
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  import datetime
6
6
  import itertools as itt
7
7
  import logging
8
+ import warnings
8
9
  from abc import ABC, abstractmethod
9
10
  from collections import defaultdict
10
11
  from collections.abc import Iterable, Mapping, Sequence
@@ -840,7 +841,7 @@ class Stanza(Referenced, HasReferencesMixin):
840
841
  """Get definition provenance."""
841
842
  # return as a tuple to make sure nobody is appending on it
842
843
  return (
843
- *self.get_property_objects(v.has_citation),
844
+ *self.get_property_objects(v.is_mentioned_by),
844
845
  # This gets all of the xrefs on _any_ axiom,
845
846
  # which includes the definition provenance
846
847
  *(
@@ -867,8 +868,18 @@ class Stanza(Referenced, HasReferencesMixin):
867
868
  *,
868
869
  annotations: Iterable[Annotation] | None = None,
869
870
  ) -> Self:
870
- """Append a citation."""
871
- return self.annotate_object(v.has_citation, reference, annotations=annotations)
871
+ """Append a creative work that mentions this term."""
872
+ warnings.warn("use append_mentioned_by instead", DeprecationWarning, stacklevel=2)
873
+ return self.append_mentioned_by(reference, annotations=annotations)
874
+
875
+ def append_mentioned_by(
876
+ self,
877
+ reference: Reference,
878
+ *,
879
+ annotations: Iterable[Annotation] | None = None,
880
+ ) -> Self:
881
+ """Append a creative work that mentions this term."""
882
+ return self.annotate_object(v.is_mentioned_by, reference, annotations=annotations)
872
883
 
873
884
 
874
885
  ReferenceHint: TypeAlias = (
@@ -1004,8 +1015,11 @@ def _format_obo_trailing_modifiers(
1004
1015
  match prop.value:
1005
1016
  case Reference():
1006
1017
  right = reference_escape(prop.value, ontology_prefix=ontology_prefix)
1007
- case OBOLiteral(value, _datatype, _language):
1008
- right = value
1018
+ case OBOLiteral(value, datatype, _language):
1019
+ if datatype == v.xsd_string:
1020
+ right = f'"{obo_escape_slim(value)}"'
1021
+ else:
1022
+ right = value
1009
1023
  modifiers.append((left, right))
1010
1024
  inner = ", ".join(f"{key}={value}" for key, value in modifiers)
1011
1025
  return " {" + inner + "}"
pyobo/struct/typedef.py CHANGED
@@ -20,11 +20,13 @@ __all__ = [
20
20
  "derives_from_organism",
21
21
  "editor_note",
22
22
  "enables",
23
+ "ends",
23
24
  "exact_match",
24
25
  "example_of_usage",
25
26
  "from_species",
26
27
  "gene_product_member_of",
27
28
  "has_contributor",
29
+ "has_creator",
28
30
  "has_dbxref",
29
31
  "has_depiction",
30
32
  "has_end_date",
@@ -57,6 +59,7 @@ __all__ = [
57
59
  "role_of",
58
60
  "see_also",
59
61
  "species_specific",
62
+ "starts",
60
63
  "superclass_of",
61
64
  "transcribes_to",
62
65
  "translates_to",
@@ -116,6 +119,12 @@ molecularly_interacts_with = TypeDef(
116
119
  located_in = TypeDef(
117
120
  reference=Reference(prefix=RO_PREFIX, identifier="0001025", name="located in"),
118
121
  )
122
+ starts = TypeDef(
123
+ reference=Reference(prefix=RO_PREFIX, identifier="0002223", name="starts"),
124
+ )
125
+ ends = TypeDef(
126
+ reference=Reference(prefix=RO_PREFIX, identifier="0002229", name="ends"),
127
+ )
119
128
  contributes_to_condition = TypeDef(
120
129
  reference=Reference(prefix=RO_PREFIX, identifier="0003304", name="contributes to condition"),
121
130
  )
@@ -258,10 +267,15 @@ has_functional_parent = TypeDef(
258
267
  reference=Reference(prefix="ro", identifier="0018038", name="has functional parent"),
259
268
  )
260
269
 
261
- has_citation = TypeDef(
262
- reference=v.has_citation,
270
+ is_mentioned_by = TypeDef(
271
+ reference=v.is_mentioned_by,
263
272
  is_metadata_tag=True,
264
- range=Reference(prefix="IAO", identifier="0000013", name="journal article"),
273
+ inverse=v.mentions,
274
+ )
275
+ mentions = TypeDef(
276
+ reference=v.mentions,
277
+ is_metadata_tag=True,
278
+ inverse=v.is_mentioned_by,
265
279
  )
266
280
 
267
281
  has_smiles = TypeDef(reference=v.has_smiles, is_metadata_tag=True).append_xref(v.debio_has_smiles)
@@ -306,6 +320,8 @@ has_end_date = TypeDef(
306
320
 
307
321
  has_title = TypeDef(reference=v.has_title, is_metadata_tag=True)
308
322
  has_license = TypeDef(reference=v.has_license, is_metadata_tag=True)
323
+ has_creator = TypeDef(reference=v.has_creator, is_metadata_tag=True)
324
+
309
325
  has_description = TypeDef(reference=v.has_description, is_metadata_tag=True)
310
326
  obo_autogenerated_by = TypeDef(reference=v.obo_autogenerated_by, is_metadata_tag=True)
311
327
  obo_has_format_version = TypeDef(reference=v.obo_has_format_version, is_metadata_tag=True)
@@ -5,7 +5,7 @@ from collections.abc import Sequence
5
5
  import curies
6
6
  from curies import vocabulary as _v
7
7
 
8
- from .reference import Reference, default_reference
8
+ from .reference import Reference
9
9
 
10
10
  __all__ = [
11
11
  "equivalent_class",
@@ -41,6 +41,7 @@ mapping_has_justification = Reference(
41
41
  )
42
42
  mapping_has_confidence = Reference(prefix="sssom", identifier="confidence", name="has confidence")
43
43
  has_contributor = _c(_v.has_contributor)
44
+ has_creator = Reference(prefix="dcterms", identifier="creator", name="creator")
44
45
  has_source = _c(_v.has_source)
45
46
  has_date = _c(_v.has_date)
46
47
  has_dbxref = _c(_v.has_dbxref)
@@ -84,10 +85,12 @@ has_inchi = Reference(prefix="chemrof", identifier="inchi_string")
84
85
  debio_has_smiles = Reference(prefix="debio", identifier="0000022", name="has SMILES")
85
86
  has_smiles = Reference(prefix="chemrof", identifier="smiles_string")
86
87
 
87
- # TODO update to use debio, or put in RO
88
- has_citation = default_reference(prefix="RO", identifier="hasCitation", name="has citation")
88
+ is_mentioned_by = Reference(prefix="mito", identifier="isMentionedBy", name="is mentioned by")
89
+ mentions = Reference(prefix="mito", identifier="mentions", name="mentions")
90
+
89
91
  has_description = _c(_v.has_description)
90
92
  has_license = _c(_v.has_license)
93
+ has_license = _c(_v.has_license)
91
94
  has_title = _c(_v.has_title)
92
95
 
93
96
  has_homepage = Reference(prefix="foaf", identifier="homepage", name="has homepage")