pyobo 0.12.10__py3-none-any.whl → 0.12.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +6 -0
- pyobo/api/__init__.py +11 -1
- pyobo/api/alts.py +18 -4
- pyobo/api/embedding.py +108 -9
- pyobo/api/names.py +28 -6
- pyobo/api/xrefs.py +21 -1
- pyobo/cli/cli.py +9 -3
- pyobo/cli/database.py +63 -22
- pyobo/cli/lookup.py +39 -24
- pyobo/cli/utils.py +6 -2
- pyobo/constants.py +66 -7
- pyobo/getters.py +8 -3
- pyobo/ner/api.py +17 -10
- pyobo/ner/scispacy_utils.py +2 -0
- pyobo/plugins.py +3 -1
- pyobo/sources/__init__.py +2 -0
- pyobo/sources/antibodyregistry.py +3 -3
- pyobo/sources/bigg/bigg_compartment.py +1 -1
- pyobo/sources/complexportal.py +3 -3
- pyobo/sources/conso.py +3 -3
- pyobo/sources/famplex.py +3 -3
- pyobo/sources/goldbook.py +86 -0
- pyobo/sources/hgnc/hgnc.py +157 -96
- pyobo/sources/hgnc/hgncgenefamily.py +14 -13
- pyobo/sources/msigdb.py +3 -3
- pyobo/sources/omim_ps.py +8 -2
- pyobo/sources/reactome.py +3 -3
- pyobo/sources/rgd.py +7 -11
- pyobo/sources/slm.py +3 -3
- pyobo/sources/uniprot/uniprot.py +3 -3
- pyobo/sources/wikipathways.py +7 -2
- pyobo/struct/__init__.py +2 -2
- pyobo/struct/functional/macros.py +1 -1
- pyobo/struct/functional/obo_to_functional.py +7 -3
- pyobo/struct/obo/reader.py +4 -4
- pyobo/struct/struct.py +48 -18
- pyobo/struct/struct_utils.py +19 -5
- pyobo/struct/typedef.py +19 -3
- pyobo/struct/vocabulary.py +6 -3
- pyobo/utils/path.py +5 -4
- pyobo/version.py +1 -1
- {pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/METADATA +45 -23
- {pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/RECORD +46 -45
- {pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/WHEEL +1 -1
- {pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/entry_points.txt +0 -0
- {pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,9 +5,10 @@ from collections.abc import Iterable, Mapping
|
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
-
from ...struct import Obo, Reference,
|
|
8
|
+
from ...struct import Obo, Reference, Term, is_mentioned_by
|
|
9
|
+
from ...struct.struct import abbreviation as symbol_type
|
|
9
10
|
from ...struct.typedef import enables, exact_match, from_species
|
|
10
|
-
from ...utils.path import
|
|
11
|
+
from ...utils.path import ensure_df
|
|
11
12
|
|
|
12
13
|
__all__ = [
|
|
13
14
|
"HGNCGroupGetter",
|
|
@@ -15,13 +16,9 @@ __all__ = [
|
|
|
15
16
|
|
|
16
17
|
PREFIX = "hgnc.genegroup"
|
|
17
18
|
FAMILIES_URL = "https://storage.googleapis.com/public-download-files/hgnc/csv/csv/genefamily_db_tables/family.csv"
|
|
18
|
-
|
|
19
|
+
FAMILIES_ALIAS_URL = "https://storage.googleapis.com/public-download-files/hgnc/csv/csv/genefamily_db_tables/family_alias.csv"
|
|
19
20
|
HIERARCHY_URL = "https://storage.googleapis.com/public-download-files/hgnc/csv/csv/genefamily_db_tables/hierarchy.csv"
|
|
20
21
|
|
|
21
|
-
symbol_type = SynonymTypeDef(
|
|
22
|
-
reference=Reference(prefix="OMO", identifier="0004000", name="has symbol")
|
|
23
|
-
)
|
|
24
|
-
|
|
25
22
|
|
|
26
23
|
class HGNCGroupGetter(Obo):
|
|
27
24
|
"""An ontology representation of HGNC's gene group nomenclature."""
|
|
@@ -29,7 +26,7 @@ class HGNCGroupGetter(Obo):
|
|
|
29
26
|
ontology = PREFIX
|
|
30
27
|
bioversions_key = "hgnc"
|
|
31
28
|
synonym_typedefs = [symbol_type]
|
|
32
|
-
typedefs = [from_species, enables, exact_match,
|
|
29
|
+
typedefs = [from_species, enables, exact_match, is_mentioned_by]
|
|
33
30
|
|
|
34
31
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
35
32
|
"""Iterate over terms in the ontology."""
|
|
@@ -38,8 +35,7 @@ class HGNCGroupGetter(Obo):
|
|
|
38
35
|
|
|
39
36
|
def get_hierarchy(force: bool = False) -> Mapping[str, list[str]]:
|
|
40
37
|
"""Get the HGNC Gene Families hierarchy as a dictionary."""
|
|
41
|
-
|
|
42
|
-
df = pd.read_csv(path, dtype={"parent_fam_id": str, "child_fam_id": str})
|
|
38
|
+
df = ensure_df(PREFIX, url=HIERARCHY_URL, force=force, sep=",")
|
|
43
39
|
d = defaultdict(list)
|
|
44
40
|
for parent_id, child_id in df.values:
|
|
45
41
|
d[child_id].append(parent_id)
|
|
@@ -75,9 +71,12 @@ def get_terms(force: bool = False) -> Iterable[Term]:
|
|
|
75
71
|
|
|
76
72
|
|
|
77
73
|
def _get_terms_helper(force: bool = False) -> Iterable[Term]:
|
|
78
|
-
|
|
79
|
-
|
|
74
|
+
alias_df = ensure_df(PREFIX, url=FAMILIES_ALIAS_URL, force=force, sep=",")
|
|
75
|
+
aliases = defaultdict(set)
|
|
76
|
+
for _id, family_id, alias in alias_df.values:
|
|
77
|
+
aliases[family_id].add(alias)
|
|
80
78
|
|
|
79
|
+
df = ensure_df(PREFIX, url=FAMILIES_URL, force=force, sep=",")
|
|
81
80
|
for gene_group_id, symbol, name, pubmed_ids, definition, desc_go in df[COLUMNS].values:
|
|
82
81
|
if not definition or pd.isna(definition):
|
|
83
82
|
definition = None
|
|
@@ -89,12 +88,14 @@ def _get_terms_helper(force: bool = False) -> Iterable[Term]:
|
|
|
89
88
|
for s in pubmed_ids.replace(" ", ",").split(","):
|
|
90
89
|
s = s.strip()
|
|
91
90
|
if s:
|
|
92
|
-
term.
|
|
91
|
+
term.append_mentioned_by(Reference(prefix="pubmed", identifier=s))
|
|
93
92
|
if desc_go and pd.notna(desc_go):
|
|
94
93
|
go_id = desc_go[len("http://purl.uniprot.org/go/") :]
|
|
95
94
|
term.append_relationship(enables, Reference(prefix="GO", identifier=go_id))
|
|
96
95
|
if symbol and pd.notna(symbol):
|
|
97
96
|
term.append_synonym(symbol, type=symbol_type)
|
|
97
|
+
for alias in aliases[gene_group_id]:
|
|
98
|
+
term.append_synonym(alias)
|
|
98
99
|
term.set_species(identifier="9606", name="Homo sapiens")
|
|
99
100
|
yield term
|
|
100
101
|
|
pyobo/sources/msigdb.py
CHANGED
|
@@ -8,7 +8,7 @@ from lxml import etree
|
|
|
8
8
|
from pydantic import ValidationError
|
|
9
9
|
from tqdm.auto import tqdm
|
|
10
10
|
|
|
11
|
-
from pyobo.struct import Obo, Reference, Term, TypeDef,
|
|
11
|
+
from pyobo.struct import Obo, Reference, Term, TypeDef, has_participant, is_mentioned_by
|
|
12
12
|
from pyobo.utils.path import ensure_path
|
|
13
13
|
|
|
14
14
|
__all__ = [
|
|
@@ -43,7 +43,7 @@ class MSigDBGetter(Obo):
|
|
|
43
43
|
"""An ontology representation of MMSigDB's gene set nomenclature."""
|
|
44
44
|
|
|
45
45
|
ontology = bioversions_key = PREFIX
|
|
46
|
-
typedefs = [has_participant,
|
|
46
|
+
typedefs = [has_participant, is_mentioned_by, *(p for _, p in PROPERTIES)]
|
|
47
47
|
|
|
48
48
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
49
49
|
"""Iterate over terms in the ontology."""
|
|
@@ -112,7 +112,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
112
112
|
elif reference_id.startswith("GSE"):
|
|
113
113
|
term.append_see_also(Reference(prefix="gse", identifier=reference_id))
|
|
114
114
|
else:
|
|
115
|
-
term.
|
|
115
|
+
term.append_mentioned_by(Reference(prefix="pubmed", identifier=reference_id))
|
|
116
116
|
|
|
117
117
|
for key, typedef in PROPERTIES:
|
|
118
118
|
if value := attrib[key].strip():
|
pyobo/sources/omim_ps.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections.abc import Iterable
|
|
5
|
+
from typing import cast
|
|
5
6
|
|
|
6
7
|
from bioversions.utils import get_soup
|
|
7
8
|
|
|
@@ -34,9 +35,14 @@ class OMIMPSGetter(Obo):
|
|
|
34
35
|
if tbody is None:
|
|
35
36
|
raise ValueError("omim.ps failed - scraper could not find table body in HTML")
|
|
36
37
|
for row in tbody.find_all("tr"):
|
|
37
|
-
|
|
38
|
+
td = row.find("td")
|
|
39
|
+
if td is None:
|
|
40
|
+
continue
|
|
41
|
+
anchor = td.find("a")
|
|
42
|
+
if anchor is None or anchor.text is None:
|
|
43
|
+
continue
|
|
38
44
|
name = anchor.text.strip()
|
|
39
|
-
identifier = anchor.attrs["href"][len("/phenotypicSeries/") :]
|
|
45
|
+
identifier = cast(str, anchor.attrs["href"])[len("/phenotypicSeries/") :]
|
|
40
46
|
yield Term.from_triple(PREFIX, identifier, name)
|
|
41
47
|
|
|
42
48
|
|
pyobo/sources/reactome.py
CHANGED
|
@@ -11,7 +11,7 @@ from tqdm.auto import tqdm
|
|
|
11
11
|
from ..api import get_id_multirelations_mapping
|
|
12
12
|
from ..constants import SPECIES_REMAPPING
|
|
13
13
|
from ..resources.ncbitaxon import get_ncbitaxon_id
|
|
14
|
-
from ..struct import Obo, Reference, Term, from_species,
|
|
14
|
+
from ..struct import Obo, Reference, Term, from_species, has_participant, is_mentioned_by
|
|
15
15
|
from ..utils.io import multidict
|
|
16
16
|
from ..utils.path import ensure_df
|
|
17
17
|
|
|
@@ -32,7 +32,7 @@ class ReactomeGetter(Obo):
|
|
|
32
32
|
"""An ontology representation of the Reactome pathway database."""
|
|
33
33
|
|
|
34
34
|
ontology = bioversions_key = PREFIX
|
|
35
|
-
typedefs = [from_species, has_participant,
|
|
35
|
+
typedefs = [from_species, has_participant, is_mentioned_by]
|
|
36
36
|
root_terms = [ROOT]
|
|
37
37
|
|
|
38
38
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
@@ -76,7 +76,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
|
|
|
76
76
|
reference=Reference(prefix=PREFIX, identifier=reactome_id, name=name),
|
|
77
77
|
)
|
|
78
78
|
for pubmed_id in provenance_d.get(reactome_id, []):
|
|
79
|
-
term.
|
|
79
|
+
term.append_mentioned_by(Reference(prefix="pubmed", identifier=pubmed_id))
|
|
80
80
|
|
|
81
81
|
if not taxonomy_id or pd.isna(taxonomy_id):
|
|
82
82
|
raise ValueError(f"unmapped species: {species_name}")
|
pyobo/sources/rgd.py
CHANGED
|
@@ -9,22 +9,18 @@ from tqdm.auto import tqdm
|
|
|
9
9
|
from pyobo.struct import (
|
|
10
10
|
Obo,
|
|
11
11
|
Reference,
|
|
12
|
-
SynonymTypeDef,
|
|
13
12
|
Term,
|
|
14
|
-
default_reference,
|
|
15
13
|
from_species,
|
|
16
|
-
has_citation,
|
|
17
14
|
has_gene_product,
|
|
15
|
+
is_mentioned_by,
|
|
18
16
|
transcribes_to,
|
|
19
17
|
)
|
|
18
|
+
from pyobo.struct.struct import previous_gene_symbol, previous_name
|
|
20
19
|
from pyobo.utils.path import ensure_df
|
|
21
20
|
|
|
22
21
|
logger = logging.getLogger(__name__)
|
|
23
22
|
PREFIX = "rgd"
|
|
24
23
|
|
|
25
|
-
old_symbol_type = SynonymTypeDef(reference=default_reference(PREFIX, "old_symbol"))
|
|
26
|
-
old_name_type = SynonymTypeDef(reference=default_reference(PREFIX, "old_name"))
|
|
27
|
-
|
|
28
24
|
# NOTE unigene id was discontinue in January 18th, 2021 dump
|
|
29
25
|
|
|
30
26
|
GENES_URL = "https://download.rgd.mcw.edu/data_release/GENES_RAT.txt"
|
|
@@ -73,8 +69,8 @@ class RGDGetter(Obo):
|
|
|
73
69
|
"""An ontology representation of RGD's rat gene nomenclature."""
|
|
74
70
|
|
|
75
71
|
bioversions_key = ontology = PREFIX
|
|
76
|
-
typedefs = [from_species, transcribes_to, has_gene_product,
|
|
77
|
-
synonym_typedefs = [
|
|
72
|
+
typedefs = [from_species, transcribes_to, has_gene_product, is_mentioned_by]
|
|
73
|
+
synonym_typedefs = [previous_name, previous_gene_symbol]
|
|
78
74
|
|
|
79
75
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
80
76
|
"""Iterate over terms in the ontology."""
|
|
@@ -119,11 +115,11 @@ def get_terms(force: bool = False, version: str | None = None) -> Iterable[Term]
|
|
|
119
115
|
old_names = row["OLD_NAME"]
|
|
120
116
|
if old_names and pd.notna(old_names):
|
|
121
117
|
for old_name in old_names.split(";"):
|
|
122
|
-
term.append_synonym(old_name, type=
|
|
118
|
+
term.append_synonym(old_name, type=previous_name)
|
|
123
119
|
old_symbols = row["OLD_SYMBOL"]
|
|
124
120
|
if old_symbols and pd.notna(old_symbols):
|
|
125
121
|
for old_symbol in old_symbols.split(";"):
|
|
126
|
-
term.append_synonym(old_symbol, type=
|
|
122
|
+
term.append_synonym(old_symbol, type=previous_gene_symbol)
|
|
127
123
|
for prefix, key in namespace_to_column:
|
|
128
124
|
xref_ids = str(row[key])
|
|
129
125
|
if xref_ids and pd.notna(xref_ids):
|
|
@@ -154,7 +150,7 @@ def get_terms(force: bool = False, version: str | None = None) -> Iterable[Term]
|
|
|
154
150
|
pubmed_ids = row["CURATED_REF_PUBMED_ID"]
|
|
155
151
|
if pubmed_ids and pd.notna(pubmed_ids):
|
|
156
152
|
for pubmed_id in str(pubmed_ids).split(";"):
|
|
157
|
-
term.
|
|
153
|
+
term.append_mentioned_by(Reference(prefix="pubmed", identifier=pubmed_id))
|
|
158
154
|
|
|
159
155
|
term.set_species(identifier="10116", name="Rattus norvegicus")
|
|
160
156
|
yield term
|
pyobo/sources/slm.py
CHANGED
|
@@ -7,7 +7,7 @@ from tqdm.auto import tqdm
|
|
|
7
7
|
|
|
8
8
|
from pyobo import Obo, Reference, Term, TypeDef
|
|
9
9
|
from pyobo.struct.struct import abbreviation as abbreviation_typedef
|
|
10
|
-
from pyobo.struct.typedef import exact_match,
|
|
10
|
+
from pyobo.struct.typedef import exact_match, has_inchi, has_smiles, is_mentioned_by
|
|
11
11
|
from pyobo.utils.path import ensure_df
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
@@ -43,7 +43,7 @@ class SLMGetter(Obo):
|
|
|
43
43
|
"""An ontology representation of SwissLipid's lipid nomenclature."""
|
|
44
44
|
|
|
45
45
|
ontology = bioversions_key = PREFIX
|
|
46
|
-
typedefs = [exact_match, LEVEL, has_inchi, has_smiles,
|
|
46
|
+
typedefs = [exact_match, LEVEL, has_inchi, has_smiles, is_mentioned_by]
|
|
47
47
|
synonym_typedefs = [abbreviation_typedef]
|
|
48
48
|
|
|
49
49
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
@@ -117,7 +117,7 @@ def iter_terms(version: str, force: bool = False):
|
|
|
117
117
|
for hmdb_id in _split(hmdb_ids):
|
|
118
118
|
term.append_exact_match(("hmdb", hmdb_id))
|
|
119
119
|
for pubmed_id in _split(pubmed_ids):
|
|
120
|
-
term.
|
|
120
|
+
term.append_mentioned_by(Reference(prefix="pubmed", identifier=pubmed_id))
|
|
121
121
|
# TODO how to handle class, parents, and components?
|
|
122
122
|
yield term
|
|
123
123
|
|
pyobo/sources/uniprot/uniprot.py
CHANGED
|
@@ -19,7 +19,7 @@ from pyobo.struct import (
|
|
|
19
19
|
derives_from,
|
|
20
20
|
enables,
|
|
21
21
|
from_species,
|
|
22
|
-
|
|
22
|
+
is_mentioned_by,
|
|
23
23
|
participates_in,
|
|
24
24
|
)
|
|
25
25
|
from pyobo.struct.typedef import gene_product_of, located_in, molecularly_interacts_with
|
|
@@ -68,7 +68,7 @@ class UniProtGetter(Obo):
|
|
|
68
68
|
derives_from,
|
|
69
69
|
located_in,
|
|
70
70
|
IS_REVIEWED,
|
|
71
|
-
|
|
71
|
+
is_mentioned_by,
|
|
72
72
|
]
|
|
73
73
|
|
|
74
74
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
@@ -156,7 +156,7 @@ def iter_terms(version: str | None = None) -> Iterable[Term]:
|
|
|
156
156
|
)
|
|
157
157
|
for pubmed in pubmeds.split(";"):
|
|
158
158
|
if pubmed := pubmed.strip():
|
|
159
|
-
term.
|
|
159
|
+
term.append_mentioned_by(Reference(prefix="pubmed", identifier=pubmed))
|
|
160
160
|
for pdb in pdbs.split(";"):
|
|
161
161
|
if pdb := pdb.strip():
|
|
162
162
|
term.append_xref(Reference(prefix="pdb", identifier=pdb))
|
pyobo/sources/wikipathways.py
CHANGED
|
@@ -80,7 +80,12 @@ def iter_terms(version: str, *, include_descriptions: bool = False) -> Iterable[
|
|
|
80
80
|
taxonomy_name = SPECIES_REMAPPING.get(species_code, species_code)
|
|
81
81
|
|
|
82
82
|
for identifier, _version, _revision, name, _species, genes in parse_wikipathways_gmt(path):
|
|
83
|
-
|
|
83
|
+
try:
|
|
84
|
+
graph = read_zipfile_rdf(archive, inner_path=f"wp/{identifier}.ttl")
|
|
85
|
+
except KeyError:
|
|
86
|
+
tqdm.write(f"[wikipathways:{identifier}] was not found inside zip file, skipping")
|
|
87
|
+
continue
|
|
88
|
+
|
|
84
89
|
uri = f"https://identifiers.org/wikipathways/{identifier}"
|
|
85
90
|
|
|
86
91
|
definition: str | None = None
|
|
@@ -122,7 +127,7 @@ def iter_terms(version: str, *, include_descriptions: bool = False) -> Iterable[
|
|
|
122
127
|
pw_references.add(ref)
|
|
123
128
|
term.append_parent(ref)
|
|
124
129
|
if not parents:
|
|
125
|
-
tqdm.write(f"[{term.curie}] could not find parent")
|
|
130
|
+
tqdm.write(f"[{term.curie}] could not find annotation to parent in PW")
|
|
126
131
|
term.append_parent(ROOT)
|
|
127
132
|
|
|
128
133
|
diseases = graph.query(
|
pyobo/struct/__init__.py
CHANGED
|
@@ -26,12 +26,12 @@ from .typedef import (
|
|
|
26
26
|
from_species,
|
|
27
27
|
gene_product_member_of,
|
|
28
28
|
has_category,
|
|
29
|
-
has_citation,
|
|
30
29
|
has_gene_product,
|
|
31
30
|
has_member,
|
|
32
31
|
has_part,
|
|
33
32
|
has_participant,
|
|
34
33
|
is_a,
|
|
34
|
+
is_mentioned_by,
|
|
35
35
|
member_of,
|
|
36
36
|
orthologous,
|
|
37
37
|
part_of,
|
|
@@ -65,12 +65,12 @@ __all__ = [
|
|
|
65
65
|
"from_species",
|
|
66
66
|
"gene_product_member_of",
|
|
67
67
|
"has_category",
|
|
68
|
-
"has_citation",
|
|
69
68
|
"has_gene_product",
|
|
70
69
|
"has_member",
|
|
71
70
|
"has_part",
|
|
72
71
|
"has_participant",
|
|
73
72
|
"is_a",
|
|
73
|
+
"is_mentioned_by",
|
|
74
74
|
"make_ad_hoc_ontology",
|
|
75
75
|
"member_of",
|
|
76
76
|
"orthologous",
|
|
@@ -72,7 +72,7 @@ class RelationshipMacro(Macro):
|
|
|
72
72
|
annotations: f.Annotations | None = None,
|
|
73
73
|
) -> None:
|
|
74
74
|
"""Instantiate the object-to-object SubClassOf macro."""
|
|
75
|
-
super().__init__(f.SubClassOf(s, f.ObjectSomeValuesFrom(p, o)))
|
|
75
|
+
super().__init__(f.SubClassOf(s, f.ObjectSomeValuesFrom(p, o), annotations=annotations))
|
|
76
76
|
|
|
77
77
|
|
|
78
78
|
class StringMacro(Macro):
|
|
@@ -128,11 +128,15 @@ def get_term_axioms(term: Term) -> Iterable[f.Box]:
|
|
|
128
128
|
if term.type == "Term":
|
|
129
129
|
yield f.Declaration(s, type="Class")
|
|
130
130
|
for parent in term.parents:
|
|
131
|
-
yield f.SubClassOf(s, parent)
|
|
132
|
-
|
|
131
|
+
yield f.SubClassOf(s, parent, annotations=_get_annotations(term, pv.is_a, parent))
|
|
132
|
+
elif term.type == "Instance":
|
|
133
133
|
yield f.Declaration(s, type="NamedIndividual")
|
|
134
134
|
for parent in term.parents:
|
|
135
|
-
yield f.ClassAssertion(
|
|
135
|
+
yield f.ClassAssertion(
|
|
136
|
+
parent, s, annotations=_get_annotations(term, pv.rdf_type, parent)
|
|
137
|
+
)
|
|
138
|
+
else:
|
|
139
|
+
raise ValueError(f"invalid term type: {term.type}")
|
|
136
140
|
# 2
|
|
137
141
|
if term.is_anonymous is not None:
|
|
138
142
|
yield m.IsAnonymousMacro(s, term.is_anonymous)
|
pyobo/struct/obo/reader.py
CHANGED
|
@@ -345,7 +345,7 @@ def _get_terms(
|
|
|
345
345
|
_process_equivalent_to(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
346
346
|
_process_disjoint_from(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
347
347
|
_process_consider(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
348
|
-
_process_comment(term, data
|
|
348
|
+
_process_comment(term, data)
|
|
349
349
|
_process_description(term, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
350
350
|
_process_creation_date(term, data)
|
|
351
351
|
|
|
@@ -367,7 +367,7 @@ def _process_description(term: Stanza, data, *, ontology_prefix: str, strict: bo
|
|
|
367
367
|
)
|
|
368
368
|
|
|
369
369
|
|
|
370
|
-
def _process_comment(term: Stanza, data
|
|
370
|
+
def _process_comment(term: Stanza, data) -> None:
|
|
371
371
|
if comment := data.get("comment"):
|
|
372
372
|
term.append_comment(comment)
|
|
373
373
|
|
|
@@ -668,7 +668,7 @@ def _handle_xref(
|
|
|
668
668
|
# TODO this is not what spec calls for, maybe
|
|
669
669
|
# need a flag in macro config for this
|
|
670
670
|
if xref.prefix in PROVENANCE_PREFIXES:
|
|
671
|
-
return term.
|
|
671
|
+
return term.append_mentioned_by(xref, annotations=annotations)
|
|
672
672
|
|
|
673
673
|
return term.append_xref(xref, annotations=annotations)
|
|
674
674
|
|
|
@@ -902,7 +902,7 @@ def iterate_typedefs(
|
|
|
902
902
|
_process_equivalent_to(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
903
903
|
_process_disjoint_from(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
904
904
|
_process_consider(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
905
|
-
_process_comment(typedef, data
|
|
905
|
+
_process_comment(typedef, data)
|
|
906
906
|
_process_description(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
|
|
907
907
|
_process_creation_date(typedef, data)
|
|
908
908
|
|
pyobo/struct/struct.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import datetime
|
|
6
|
+
import inspect
|
|
6
7
|
import itertools as itt
|
|
7
8
|
import json
|
|
8
9
|
import logging
|
|
@@ -106,6 +107,7 @@ SSSOM_DF_COLUMNS = [
|
|
|
106
107
|
"contributor",
|
|
107
108
|
]
|
|
108
109
|
FORMAT_VERSION = "1.4"
|
|
110
|
+
_SOURCES = Path(__file__).parent.parent.joinpath("sources").resolve()
|
|
109
111
|
|
|
110
112
|
|
|
111
113
|
@dataclass
|
|
@@ -246,12 +248,22 @@ DEFAULT_SYNONYM_TYPE = SynonymTypeDef(
|
|
|
246
248
|
reference=Reference(prefix="oboInOwl", identifier="SynonymType", name="synonym type"),
|
|
247
249
|
)
|
|
248
250
|
abbreviation = SynonymTypeDef(
|
|
249
|
-
reference=Reference(prefix="
|
|
251
|
+
reference=Reference(prefix="omo", identifier="0003000", name="abbreviation")
|
|
250
252
|
)
|
|
251
253
|
acronym = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003012", name="acronym"))
|
|
252
254
|
uk_spelling = SynonymTypeDef(
|
|
253
255
|
reference=Reference(prefix="omo", identifier="0003005", name="UK spelling synonym")
|
|
254
256
|
)
|
|
257
|
+
previous_name = SynonymTypeDef(
|
|
258
|
+
reference=Reference(prefix="omo", identifier="0003008", name="previous name")
|
|
259
|
+
)
|
|
260
|
+
previous_gene_symbol = SynonymTypeDef(
|
|
261
|
+
reference=Reference(prefix="omo", identifier="0003015", name="previous gene symbol")
|
|
262
|
+
)
|
|
263
|
+
gene_symbol_synonym = SynonymTypeDef(
|
|
264
|
+
reference=Reference(prefix="omo", identifier="0003016", name="gene symbol synonym")
|
|
265
|
+
)
|
|
266
|
+
|
|
255
267
|
default_synonym_typedefs: dict[ReferenceTuple, SynonymTypeDef] = {
|
|
256
268
|
abbreviation.pair: abbreviation,
|
|
257
269
|
acronym.pair: acronym,
|
|
@@ -621,8 +633,15 @@ class Obo:
|
|
|
621
633
|
raise ValueError(f"{self.ontology} is missing data_version")
|
|
622
634
|
elif "/" in self.data_version:
|
|
623
635
|
raise ValueError(f"{self.ontology} has a slash in version: {self.data_version}")
|
|
636
|
+
|
|
637
|
+
file_path = Path(inspect.getfile(self.__class__)).resolve()
|
|
638
|
+
script_url = f"https://github.com/biopragmatics/pyobo/blob/main/src/pyobo/sources/{file_path.relative_to(_SOURCES)}"
|
|
639
|
+
|
|
624
640
|
if self.auto_generated_by is None:
|
|
625
|
-
self.auto_generated_by =
|
|
641
|
+
self.auto_generated_by = (
|
|
642
|
+
f"PyOBO v{get_pyobo_version(with_git_hash=True)} on "
|
|
643
|
+
f"{datetime.datetime.now().isoformat()} by {script_url}"
|
|
644
|
+
) # type:ignore
|
|
626
645
|
|
|
627
646
|
def _get_clean_idspaces(self) -> dict[str, str]:
|
|
628
647
|
"""Get normalized idspace dictionary."""
|
|
@@ -927,7 +946,9 @@ class Obo:
|
|
|
927
946
|
case OBOLiteral():
|
|
928
947
|
end = f'"{obo_escape_slim(value.value)}" {reference_escape(value.datatype, ontology_prefix=self.ontology)}'
|
|
929
948
|
case Reference():
|
|
930
|
-
end = reference_escape(
|
|
949
|
+
end = reference_escape(
|
|
950
|
+
value, ontology_prefix=self.ontology, add_name_comment=True
|
|
951
|
+
)
|
|
931
952
|
case _:
|
|
932
953
|
raise TypeError(f"Invalid property value: {value}")
|
|
933
954
|
yield f"property_value: {reference_escape(predicate, ontology_prefix=self.ontology)} {end}"
|
|
@@ -946,21 +967,30 @@ class Obo:
|
|
|
946
967
|
license_literal = OBOLiteral.string(license_spdx_id)
|
|
947
968
|
yield Annotation(v.has_license, license_literal)
|
|
948
969
|
|
|
949
|
-
if
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
970
|
+
if resource := bioregistry.get_resource(self.ontology):
|
|
971
|
+
if description := resource.get_description():
|
|
972
|
+
yield Annotation(v.has_description, OBOLiteral.string(description.strip()))
|
|
973
|
+
if homepage := resource.get_homepage():
|
|
974
|
+
yield Annotation(v.has_homepage, OBOLiteral.uri(homepage))
|
|
975
|
+
if repository := resource.get_repository():
|
|
976
|
+
yield Annotation(v.has_repository, OBOLiteral.uri(repository))
|
|
977
|
+
if logo := resource.get_logo():
|
|
978
|
+
yield Annotation(v.has_logo, OBOLiteral.uri(logo))
|
|
979
|
+
if mailing_list := resource.get_mailing_list():
|
|
980
|
+
yield Annotation(v.has_mailing_list, OBOLiteral.string(mailing_list))
|
|
981
|
+
if (maintainer := resource.get_contact()) and maintainer.orcid:
|
|
982
|
+
yield Annotation(
|
|
983
|
+
v.has_maintainer,
|
|
984
|
+
Reference(prefix="orcid", identifier=maintainer.orcid, name=maintainer.name),
|
|
985
|
+
)
|
|
986
|
+
for maintainer in resource.contact_extras or []:
|
|
987
|
+
if maintainer.orcid:
|
|
988
|
+
yield Annotation(
|
|
989
|
+
v.has_maintainer,
|
|
990
|
+
Reference(
|
|
991
|
+
prefix="orcid", identifier=maintainer.orcid, name=maintainer.name
|
|
992
|
+
),
|
|
993
|
+
)
|
|
964
994
|
|
|
965
995
|
# Root terms
|
|
966
996
|
for root_term in self.root_terms or []:
|
pyobo/struct/struct_utils.py
CHANGED
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import datetime
|
|
6
6
|
import itertools as itt
|
|
7
7
|
import logging
|
|
8
|
+
import warnings
|
|
8
9
|
from abc import ABC, abstractmethod
|
|
9
10
|
from collections import defaultdict
|
|
10
11
|
from collections.abc import Iterable, Mapping, Sequence
|
|
@@ -840,7 +841,7 @@ class Stanza(Referenced, HasReferencesMixin):
|
|
|
840
841
|
"""Get definition provenance."""
|
|
841
842
|
# return as a tuple to make sure nobody is appending on it
|
|
842
843
|
return (
|
|
843
|
-
*self.get_property_objects(v.
|
|
844
|
+
*self.get_property_objects(v.is_mentioned_by),
|
|
844
845
|
# This gets all of the xrefs on _any_ axiom,
|
|
845
846
|
# which includes the definition provenance
|
|
846
847
|
*(
|
|
@@ -867,8 +868,18 @@ class Stanza(Referenced, HasReferencesMixin):
|
|
|
867
868
|
*,
|
|
868
869
|
annotations: Iterable[Annotation] | None = None,
|
|
869
870
|
) -> Self:
|
|
870
|
-
"""Append a
|
|
871
|
-
|
|
871
|
+
"""Append a creative work that mentions this term."""
|
|
872
|
+
warnings.warn("use append_mentioned_by instead", DeprecationWarning, stacklevel=2)
|
|
873
|
+
return self.append_mentioned_by(reference, annotations=annotations)
|
|
874
|
+
|
|
875
|
+
def append_mentioned_by(
|
|
876
|
+
self,
|
|
877
|
+
reference: Reference,
|
|
878
|
+
*,
|
|
879
|
+
annotations: Iterable[Annotation] | None = None,
|
|
880
|
+
) -> Self:
|
|
881
|
+
"""Append a creative work that mentions this term."""
|
|
882
|
+
return self.annotate_object(v.is_mentioned_by, reference, annotations=annotations)
|
|
872
883
|
|
|
873
884
|
|
|
874
885
|
ReferenceHint: TypeAlias = (
|
|
@@ -1004,8 +1015,11 @@ def _format_obo_trailing_modifiers(
|
|
|
1004
1015
|
match prop.value:
|
|
1005
1016
|
case Reference():
|
|
1006
1017
|
right = reference_escape(prop.value, ontology_prefix=ontology_prefix)
|
|
1007
|
-
case OBOLiteral(value,
|
|
1008
|
-
|
|
1018
|
+
case OBOLiteral(value, datatype, _language):
|
|
1019
|
+
if datatype == v.xsd_string:
|
|
1020
|
+
right = f'"{obo_escape_slim(value)}"'
|
|
1021
|
+
else:
|
|
1022
|
+
right = value
|
|
1009
1023
|
modifiers.append((left, right))
|
|
1010
1024
|
inner = ", ".join(f"{key}={value}" for key, value in modifiers)
|
|
1011
1025
|
return " {" + inner + "}"
|
pyobo/struct/typedef.py
CHANGED
|
@@ -20,11 +20,13 @@ __all__ = [
|
|
|
20
20
|
"derives_from_organism",
|
|
21
21
|
"editor_note",
|
|
22
22
|
"enables",
|
|
23
|
+
"ends",
|
|
23
24
|
"exact_match",
|
|
24
25
|
"example_of_usage",
|
|
25
26
|
"from_species",
|
|
26
27
|
"gene_product_member_of",
|
|
27
28
|
"has_contributor",
|
|
29
|
+
"has_creator",
|
|
28
30
|
"has_dbxref",
|
|
29
31
|
"has_depiction",
|
|
30
32
|
"has_end_date",
|
|
@@ -57,6 +59,7 @@ __all__ = [
|
|
|
57
59
|
"role_of",
|
|
58
60
|
"see_also",
|
|
59
61
|
"species_specific",
|
|
62
|
+
"starts",
|
|
60
63
|
"superclass_of",
|
|
61
64
|
"transcribes_to",
|
|
62
65
|
"translates_to",
|
|
@@ -116,6 +119,12 @@ molecularly_interacts_with = TypeDef(
|
|
|
116
119
|
located_in = TypeDef(
|
|
117
120
|
reference=Reference(prefix=RO_PREFIX, identifier="0001025", name="located in"),
|
|
118
121
|
)
|
|
122
|
+
starts = TypeDef(
|
|
123
|
+
reference=Reference(prefix=RO_PREFIX, identifier="0002223", name="starts"),
|
|
124
|
+
)
|
|
125
|
+
ends = TypeDef(
|
|
126
|
+
reference=Reference(prefix=RO_PREFIX, identifier="0002229", name="ends"),
|
|
127
|
+
)
|
|
119
128
|
contributes_to_condition = TypeDef(
|
|
120
129
|
reference=Reference(prefix=RO_PREFIX, identifier="0003304", name="contributes to condition"),
|
|
121
130
|
)
|
|
@@ -258,10 +267,15 @@ has_functional_parent = TypeDef(
|
|
|
258
267
|
reference=Reference(prefix="ro", identifier="0018038", name="has functional parent"),
|
|
259
268
|
)
|
|
260
269
|
|
|
261
|
-
|
|
262
|
-
reference=v.
|
|
270
|
+
is_mentioned_by = TypeDef(
|
|
271
|
+
reference=v.is_mentioned_by,
|
|
263
272
|
is_metadata_tag=True,
|
|
264
|
-
|
|
273
|
+
inverse=v.mentions,
|
|
274
|
+
)
|
|
275
|
+
mentions = TypeDef(
|
|
276
|
+
reference=v.mentions,
|
|
277
|
+
is_metadata_tag=True,
|
|
278
|
+
inverse=v.is_mentioned_by,
|
|
265
279
|
)
|
|
266
280
|
|
|
267
281
|
has_smiles = TypeDef(reference=v.has_smiles, is_metadata_tag=True).append_xref(v.debio_has_smiles)
|
|
@@ -306,6 +320,8 @@ has_end_date = TypeDef(
|
|
|
306
320
|
|
|
307
321
|
has_title = TypeDef(reference=v.has_title, is_metadata_tag=True)
|
|
308
322
|
has_license = TypeDef(reference=v.has_license, is_metadata_tag=True)
|
|
323
|
+
has_creator = TypeDef(reference=v.has_creator, is_metadata_tag=True)
|
|
324
|
+
|
|
309
325
|
has_description = TypeDef(reference=v.has_description, is_metadata_tag=True)
|
|
310
326
|
obo_autogenerated_by = TypeDef(reference=v.obo_autogenerated_by, is_metadata_tag=True)
|
|
311
327
|
obo_has_format_version = TypeDef(reference=v.obo_has_format_version, is_metadata_tag=True)
|
pyobo/struct/vocabulary.py
CHANGED
|
@@ -5,7 +5,7 @@ from collections.abc import Sequence
|
|
|
5
5
|
import curies
|
|
6
6
|
from curies import vocabulary as _v
|
|
7
7
|
|
|
8
|
-
from .reference import Reference
|
|
8
|
+
from .reference import Reference
|
|
9
9
|
|
|
10
10
|
__all__ = [
|
|
11
11
|
"equivalent_class",
|
|
@@ -41,6 +41,7 @@ mapping_has_justification = Reference(
|
|
|
41
41
|
)
|
|
42
42
|
mapping_has_confidence = Reference(prefix="sssom", identifier="confidence", name="has confidence")
|
|
43
43
|
has_contributor = _c(_v.has_contributor)
|
|
44
|
+
has_creator = Reference(prefix="dcterms", identifier="creator", name="creator")
|
|
44
45
|
has_source = _c(_v.has_source)
|
|
45
46
|
has_date = _c(_v.has_date)
|
|
46
47
|
has_dbxref = _c(_v.has_dbxref)
|
|
@@ -84,10 +85,12 @@ has_inchi = Reference(prefix="chemrof", identifier="inchi_string")
|
|
|
84
85
|
debio_has_smiles = Reference(prefix="debio", identifier="0000022", name="has SMILES")
|
|
85
86
|
has_smiles = Reference(prefix="chemrof", identifier="smiles_string")
|
|
86
87
|
|
|
87
|
-
|
|
88
|
-
|
|
88
|
+
is_mentioned_by = Reference(prefix="mito", identifier="isMentionedBy", name="is mentioned by")
|
|
89
|
+
mentions = Reference(prefix="mito", identifier="mentions", name="mentions")
|
|
90
|
+
|
|
89
91
|
has_description = _c(_v.has_description)
|
|
90
92
|
has_license = _c(_v.has_license)
|
|
93
|
+
has_license = _c(_v.has_license)
|
|
91
94
|
has_title = _c(_v.has_title)
|
|
92
95
|
|
|
93
96
|
has_homepage = Reference(prefix="foaf", identifier="homepage", name="has homepage")
|