pyobo 0.10.10__py3-none-any.whl → 0.10.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. pyobo/api/alts.py +13 -8
  2. pyobo/api/hierarchy.py +9 -5
  3. pyobo/api/metadata.py +6 -3
  4. pyobo/api/names.py +34 -11
  5. pyobo/api/relations.py +11 -3
  6. pyobo/api/species.py +3 -3
  7. pyobo/api/typedefs.py +6 -2
  8. pyobo/api/utils.py +5 -0
  9. pyobo/api/xrefs.py +10 -3
  10. pyobo/aws.py +12 -7
  11. pyobo/cli/lookup.py +5 -4
  12. pyobo/constants.py +31 -10
  13. pyobo/gilda_utils.py +21 -0
  14. pyobo/identifier_utils.py +22 -5
  15. pyobo/reader.py +1 -1
  16. pyobo/sources/__init__.py +2 -0
  17. pyobo/sources/antibodyregistry.py +7 -6
  18. pyobo/sources/biogrid.py +8 -4
  19. pyobo/sources/ccle.py +5 -5
  20. pyobo/sources/credit.py +68 -0
  21. pyobo/sources/geonames.py +27 -9
  22. pyobo/sources/hgnc.py +2 -2
  23. pyobo/sources/mesh.py +9 -7
  24. pyobo/sources/msigdb.py +1 -1
  25. pyobo/sources/npass.py +1 -1
  26. pyobo/sources/pubchem.py +3 -3
  27. pyobo/sources/rgd.py +1 -1
  28. pyobo/sources/rhea.py +2 -2
  29. pyobo/sources/ror.py +67 -21
  30. pyobo/sources/uniprot/uniprot.py +2 -2
  31. pyobo/struct/struct.py +4 -3
  32. pyobo/struct/typedef.py +10 -0
  33. pyobo/utils/path.py +2 -1
  34. pyobo/version.py +1 -1
  35. pyobo/xrefdb/sources/__init__.py +6 -3
  36. pyobo/xrefdb/sources/chembl.py +5 -5
  37. pyobo/xrefdb/sources/pubchem.py +3 -2
  38. pyobo/xrefdb/sources/wikidata.py +8 -1
  39. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/METADATA +23 -23
  40. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/RECORD +44 -44
  41. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/WHEEL +1 -1
  42. pyobo/xrefdb/bengo.py +0 -44
  43. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/LICENSE +0 -0
  44. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/entry_points.txt +0 -0
  45. {pyobo-0.10.10.dist-info → pyobo-0.10.12.dist-info}/top_level.txt +0 -0
@@ -5,12 +5,12 @@
5
5
  import logging
6
6
  from typing import Iterable, Mapping, Optional
7
7
 
8
- import bioversions
9
8
  import pandas as pd
10
9
  from bioregistry.utils import removeprefix
11
10
  from tqdm.auto import tqdm
12
11
 
13
12
  from pyobo import Obo, Term
13
+ from pyobo.api.utils import get_version
14
14
  from pyobo.utils.path import ensure_df
15
15
 
16
16
  __all__ = [
@@ -24,9 +24,10 @@ URL = "http://antibodyregistry.org/php/fileHandler.php"
24
24
  CHUNKSIZE = 20_000
25
25
 
26
26
 
27
- def get_chunks(force: bool = False) -> pd.DataFrame:
27
+ def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame:
28
28
  """Get the BioGRID identifiers mapping dataframe."""
29
- version = bioversions.get_version(PREFIX)
29
+ if version is None:
30
+ version = get_version(PREFIX)
30
31
  df = ensure_df(
31
32
  PREFIX,
32
33
  url=URL,
@@ -47,7 +48,7 @@ class AntibodyRegistryGetter(Obo):
47
48
 
48
49
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
49
50
  """Iterate over terms in the ontology."""
50
- return iter_terms(force=force)
51
+ return iter_terms(force=force, version=self._version_or_raise)
51
52
 
52
53
 
53
54
  def get_obo(*, force: bool = False) -> Obo:
@@ -74,9 +75,9 @@ SKIP = {
74
75
  }
75
76
 
76
77
 
77
- def iter_terms(force: bool = False) -> Iterable[Term]:
78
+ def iter_terms(*, force: bool = False, version: Optional[str] = None) -> Iterable[Term]:
78
79
  """Iterate over antibodies."""
79
- chunks = get_chunks(force=force)
80
+ chunks = get_chunks(force=force, version=version)
80
81
  needs_curating = set()
81
82
  # df['vendor'] = df['vendor'].map(bioregistry.normalize_prefix)
82
83
  it = tqdm(chunks, desc=f"{PREFIX}, chunkssize={CHUNKSIZE}")
pyobo/sources/biogrid.py CHANGED
@@ -2,12 +2,12 @@
2
2
 
3
3
  """Extract and convert BioGRID identifiers."""
4
4
 
5
+ from functools import partial
5
6
  from typing import Mapping, Optional
6
7
 
7
- import bioversions
8
8
  import pandas as pd
9
9
 
10
- from pyobo.constants import version_getter
10
+ from pyobo.api.utils import get_version
11
11
  from pyobo.resources.ncbitaxon import get_ncbitaxon_id
12
12
  from pyobo.utils.cache import cached_mapping
13
13
  from pyobo.utils.path import ensure_df, prefix_directory_join
@@ -52,7 +52,7 @@ def _lookup(name: str) -> Optional[str]:
52
52
 
53
53
  def get_df() -> pd.DataFrame:
54
54
  """Get the BioGRID identifiers mapping dataframe."""
55
- version = bioversions.get_version("biogrid")
55
+ version = get_version("biogrid")
56
56
  url = f"{BASE_URL}/BIOGRID-{version}/BIOGRID-IDENTIFIERS-{version}.tab.zip"
57
57
  df = ensure_df(PREFIX, url=url, skiprows=28, dtype=str, version=version)
58
58
  df["taxonomy_id"] = df["ORGANISM_OFFICIAL_NAME"].map(_lookup)
@@ -61,7 +61,11 @@ def get_df() -> pd.DataFrame:
61
61
 
62
62
  @cached_mapping(
63
63
  path=prefix_directory_join(
64
- PREFIX, "cache", "xrefs", name="ncbigene.tsv", version=version_getter(PREFIX)
64
+ PREFIX,
65
+ "cache",
66
+ "xrefs",
67
+ name="ncbigene.tsv",
68
+ version=partial(get_version, PREFIX),
65
69
  ),
66
70
  header=["biogrid_id", "ncbigene_id"],
67
71
  )
pyobo/sources/ccle.py CHANGED
@@ -50,7 +50,7 @@ def iter_terms(version: Optional[str] = None, force: bool = False) -> Iterable[T
50
50
  yield term
51
51
 
52
52
 
53
- def get_version() -> str:
53
+ def get_ccle_static_version() -> str:
54
54
  """Get the default version of CCLE's cell lines."""
55
55
  return "2019"
56
56
 
@@ -58,21 +58,21 @@ def get_version() -> str:
58
58
  def get_url(version: Optional[str] = None) -> str:
59
59
  """Get the cBioPortal URL for the given version of CCLE's cell lines."""
60
60
  if version is None:
61
- version = get_version()
61
+ version = get_ccle_static_version()
62
62
  return f"https://cbioportal-datahub.s3.amazonaws.com/ccle_broad_{version}.tar.gz"
63
63
 
64
64
 
65
65
  def get_inner(version: Optional[str] = None) -> str:
66
66
  """Get the inner tarfile path."""
67
67
  if version is None:
68
- version = get_version()
68
+ version = get_ccle_static_version()
69
69
  return f"ccle_broad_{version}/data_clinical_sample.txt"
70
70
 
71
71
 
72
72
  def ensure(version: Optional[str] = None, **kwargs) -> Path:
73
73
  """Ensure the given version is downloaded."""
74
74
  if version is None:
75
- version = get_version()
75
+ version = get_ccle_static_version()
76
76
  url = get_url(version=version)
77
77
  return pystow.ensure("pyobo", "raw", PREFIX, version, url=url, **kwargs)
78
78
 
@@ -80,7 +80,7 @@ def ensure(version: Optional[str] = None, **kwargs) -> Path:
80
80
  def ensure_df(version: Optional[str] = None, force: bool = False) -> pd.DataFrame:
81
81
  """Get the CCLE clinical sample dataframe."""
82
82
  if version is None:
83
- version = get_version()
83
+ version = get_ccle_static_version()
84
84
  path = ensure(version=version, force=force)
85
85
  inner_path = get_inner(version=version)
86
86
  with tarfile.open(path) as tf:
@@ -0,0 +1,68 @@
1
+ """Converter for the Contributor Roles Taxonomy."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Iterable
7
+
8
+ from more_itertools import chunked
9
+
10
+ from pyobo.struct import Obo, Term
11
+ from pyobo.utils.path import ensure_path
12
+
13
+ __all__ = [
14
+ "CreditGetter",
15
+ ]
16
+
17
+ url = "https://api.github.com/repos/CASRAI-CRedIT/Dictionary/contents/Picklists/Contributor%20Roles"
18
+ PREFIX = "credit"
19
+
20
+
21
+ class CreditGetter(Obo):
22
+ """An ontology representation of the Contributor Roles Taxonomy."""
23
+
24
+ ontology = PREFIX
25
+ static_version = "2022"
26
+ idspaces = {
27
+ PREFIX: "https://credit.niso.org/contributor-roles/",
28
+ }
29
+
30
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
31
+ """Iterate over terms in the ontology."""
32
+ return get_terms(force=force)
33
+
34
+
35
+ def get_obo(force: bool = False) -> Obo:
36
+ """Get RGD as OBO."""
37
+ return CreditGetter(force=force)
38
+
39
+
40
+ def get_terms(force: bool = False) -> list[Term]:
41
+ """Get terms from the Contributor Roles Taxonomy via GitHub."""
42
+ path = ensure_path(PREFIX, url=url, name="picklist-api.json", force=force)
43
+ with open(path) as f:
44
+ data = json.load(f)
45
+ terms = []
46
+ for x in data:
47
+ name = x["name"].removesuffix(".md").lower()
48
+
49
+ pp = ensure_path(PREFIX, "picklist", url=x["download_url"], backend="requests")
50
+ with open(pp) as f:
51
+ header, *rest = f.read().splitlines()
52
+ name = header = header.removeprefix("# Contributor Roles/")
53
+ dd = {k.removeprefix("## "): v for k, v in chunked(rest, 2)}
54
+ identifier = (
55
+ dd["Canonical URL"]
56
+ .removeprefix("https://credit.niso.org/contributor-roles/")
57
+ .rstrip("/")
58
+ )
59
+ desc = dd["Short definition"]
60
+ terms.append(
61
+ Term.from_triple(prefix=PREFIX, identifier=identifier, name=name, definition=desc)
62
+ )
63
+
64
+ return terms
65
+
66
+
67
+ if __name__ == "__main__":
68
+ get_obo(force=True).write_default(write_obo=True)
pyobo/sources/geonames.py CHANGED
@@ -1,5 +1,7 @@
1
1
  """Get terms from geonames."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import logging
4
6
  from typing import Collection, Iterable, Mapping
5
7
 
@@ -146,15 +148,7 @@ def get_code_to_admin2(
146
148
  return code_to_admin2
147
149
 
148
150
 
149
- def get_cities(
150
- code_to_country,
151
- code_to_admin1,
152
- code_to_admin2,
153
- *,
154
- minimum_population: int = 100_000,
155
- force: bool = False,
156
- ) -> Mapping[str, Term]:
157
- """Get a mapping from city code to term."""
151
+ def _get_cities_df(force: bool = False) -> pd.DataFrame:
158
152
  columns = [
159
153
  "geonames_id",
160
154
  "name",
@@ -184,7 +178,19 @@ def get_cities(
184
178
  names=columns,
185
179
  dtype=str,
186
180
  )
181
+ return cities_df
182
+
187
183
 
184
+ def get_cities(
185
+ code_to_country,
186
+ code_to_admin1,
187
+ code_to_admin2,
188
+ *,
189
+ minimum_population: int = 100_000,
190
+ force: bool = False,
191
+ ) -> Mapping[str, Term]:
192
+ """Get a mapping from city code to term."""
193
+ cities_df = _get_cities_df(force=force)
188
194
  cities_df = cities_df[cities_df.population.astype(int) > minimum_population]
189
195
  cities_df.synonyms = cities_df.synonyms.str.split(",")
190
196
 
@@ -235,5 +241,17 @@ def get_cities(
235
241
  return terms
236
242
 
237
243
 
244
+ def get_city_to_country() -> dict[str, str]:
245
+ """Get a mapping from city GeoNames to country GeoNames id."""
246
+ rv = {}
247
+ code_to_country = get_code_to_country()
248
+ cities_df = _get_cities_df()
249
+ for city_geonames_id, country_code in cities_df[["geonames_id", "country_code"]].values:
250
+ if pd.isna(city_geonames_id) or pd.isna(country_code):
251
+ continue
252
+ rv[city_geonames_id] = code_to_country[country_code].identifier
253
+ return rv
254
+
255
+
238
256
  if __name__ == "__main__":
239
257
  GeonamesGetter().write_default(write_obo=True, force=True)
pyobo/sources/hgnc.py CHANGED
@@ -10,10 +10,10 @@ from collections import Counter, defaultdict
10
10
  from operator import attrgetter
11
11
  from typing import DefaultDict, Dict, Iterable, Optional
12
12
 
13
- import bioversions
14
13
  from tabulate import tabulate
15
14
  from tqdm.auto import tqdm
16
15
 
16
+ from pyobo.api.utils import get_version
17
17
  from pyobo.struct import (
18
18
  Obo,
19
19
  Reference,
@@ -241,7 +241,7 @@ def get_obo(*, force: bool = False) -> Obo:
241
241
  def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]: # noqa:C901
242
242
  """Get HGNC terms."""
243
243
  if version is None:
244
- version = bioversions.get_version("hgnc")
244
+ version = get_version("hgnc")
245
245
  unhandled_entry_keys: typing.Counter[str] = Counter()
246
246
  unhandle_locus_types: DefaultDict[str, Dict[str, Term]] = defaultdict(dict)
247
247
  path = ensure_path(
pyobo/sources/mesh.py CHANGED
@@ -11,6 +11,7 @@ from xml.etree.ElementTree import Element
11
11
 
12
12
  from tqdm.auto import tqdm
13
13
 
14
+ from pyobo.api.utils import get_version
14
15
  from pyobo.identifier_utils import standardize_ec
15
16
  from pyobo.struct import Obo, Reference, Synonym, Term
16
17
  from pyobo.utils.cache import cached_json, cached_mapping
@@ -318,21 +319,22 @@ def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]:
318
319
  ]
319
320
 
320
321
 
321
- def get_mesh_category_curies(letter: str, skip: Optional[Collection[str]] = None) -> List[str]:
322
+ def get_mesh_category_curies(
323
+ letter: str, *, skip: Optional[Collection[str]] = None, version: Optional[str] = None
324
+ ) -> List[str]:
322
325
  """Get the MeSH LUIDs for a category, by letter (e.g., "A").
323
326
 
324
327
  :param letter: The MeSH tree, A for anatomy, C for disease, etc.
325
328
  :param skip: An optional collection of MeSH tree codes to skip, such as "A03"
329
+ :param version: The MeSH version to use. Defaults to latest
326
330
  :returns: A list of MeSH CURIE strings for the top level of each MeSH tree.
327
331
 
328
332
  .. seealso:: https://meshb.nlm.nih.gov/treeView
329
333
  """
330
- import bioversions
331
-
332
- mesh_version = bioversions.get_version("mesh")
333
- if mesh_version is None:
334
- raise ValueError
335
- tree_to_mesh = get_tree_to_mesh_id(mesh_version)
334
+ if version is None:
335
+ version = get_version("mesh")
336
+ assert version is not None
337
+ tree_to_mesh = get_tree_to_mesh_id(version=version)
336
338
  rv = []
337
339
  for i in range(1, 100):
338
340
  key = f"{letter}{i:02}"
pyobo/sources/msigdb.py CHANGED
@@ -137,7 +137,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
137
137
  def _get_definition(attrib) -> Optional[str]:
138
138
  rv = attrib["DESCRIPTION_FULL"].strip() or attrib["DESCRIPTION_BRIEF"].strip() or None
139
139
  if rv is not None:
140
- return rv.replace("\d", "").replace("\s", "") # noqa: W605
140
+ return rv.replace(r"\d", "").replace(r"\s", "") # noqa: W605
141
141
  return None
142
142
 
143
143
 
pyobo/sources/npass.py CHANGED
@@ -64,7 +64,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
64
64
  )
65
65
 
66
66
  for xref_prefix, xref_id in [
67
- ("chembl", chembl_id),
67
+ ("chembl.compound", chembl_id),
68
68
  # ("zinc", zinc_id),
69
69
  ]:
70
70
  if pd.notna(xref_id):
pyobo/sources/pubchem.py CHANGED
@@ -5,12 +5,12 @@
5
5
  import logging
6
6
  from typing import Iterable, Mapping, Optional
7
7
 
8
- import bioversions
9
8
  import pandas as pd
10
9
  from bioregistry.utils import removeprefix
11
10
  from tqdm.auto import tqdm
12
11
 
13
12
  from ..api import get_name_id_mapping
13
+ from ..api.utils import get_version
14
14
  from ..struct import Obo, Reference, Synonym, Term
15
15
  from ..utils.iter import iterate_gzips_together
16
16
  from ..utils.path import ensure_df, ensure_path
@@ -26,7 +26,7 @@ PREFIX = "pubchem.compound"
26
26
 
27
27
  def _get_pubchem_extras_url(version: Optional[str], end: str) -> str:
28
28
  if version is None:
29
- version = bioversions.get_version("pubchem")
29
+ version = get_version("pubchem")
30
30
  return f"ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/Monthly/{version}/Extras/{end}"
31
31
 
32
32
 
@@ -100,7 +100,7 @@ def get_pubchem_id_to_mesh_id(version: str) -> Mapping[str, str]:
100
100
 
101
101
  def _ensure_cid_name_path(*, version: Optional[str] = None, force: bool = False) -> str:
102
102
  if version is None:
103
- version = bioversions.get_version("pubchem")
103
+ version = get_version("pubchem")
104
104
  # 2 tab-separated columns: compound_id, name
105
105
  cid_name_url = _get_pubchem_extras_url(version, "CID-Title.gz")
106
106
  cid_name_path = ensure_path(PREFIX, url=cid_name_url, version=version, force=force)
pyobo/sources/rgd.py CHANGED
@@ -28,7 +28,7 @@ old_name_type = SynonymTypeDef.from_text("old_name")
28
28
 
29
29
  # NOTE unigene id was discontinue in January 18th, 2021 dump
30
30
 
31
- GENES_URL = "https://download.rgd.mcw.edu/data_release/GENES.RAT.txt"
31
+ GENES_URL = "https://download.rgd.mcw.edu/data_release/GENES_RAT.txt"
32
32
  GENES_HEADER = [
33
33
  "GENE_RGD_ID",
34
34
  "SYMBOL",
pyobo/sources/rhea.py CHANGED
@@ -5,9 +5,9 @@
5
5
  import logging
6
6
  from typing import TYPE_CHECKING, Dict, Iterable, Optional
7
7
 
8
- import bioversions
9
8
  import pystow
10
9
 
10
+ from pyobo.api.utils import get_version
11
11
  from pyobo.struct import Obo, Reference, Term
12
12
  from pyobo.struct.typedef import (
13
13
  TypeDef,
@@ -63,7 +63,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl
63
63
  """Get the Rhea RDF graph."""
64
64
  # see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf
65
65
  if version is None:
66
- version = bioversions.get_version(PREFIX)
66
+ version = get_version(PREFIX)
67
67
  return pystow.ensure_rdf(
68
68
  "pyobo",
69
69
  "raw",
pyobo/sources/ror.py CHANGED
@@ -1,34 +1,40 @@
1
1
  """Convert the Research Organization Registry (ROR) into an ontology."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import json
4
6
  import zipfile
5
- from typing import Iterable
7
+ from typing import Any, Iterable
6
8
 
7
9
  import bioregistry
8
10
  import zenodo_client
9
11
  from tqdm.auto import tqdm
10
12
 
11
- from pyobo.struct import Obo, Reference, Term, TypeDef
13
+ from pyobo.struct import Obo, Reference, Term
12
14
  from pyobo.struct.struct import acronym
15
+ from pyobo.struct.typedef import (
16
+ has_homepage,
17
+ has_part,
18
+ has_predecessor,
19
+ has_successor,
20
+ located_in,
21
+ part_of,
22
+ see_also,
23
+ )
13
24
 
14
25
  PREFIX = "ror"
15
26
  ROR_ZENODO_RECORD_ID = "10086202"
16
27
 
17
28
  # Constants
18
29
  ORG_CLASS = Reference(prefix="OBI", identifier="0000245")
19
- LOCATED_IN = Reference(prefix="RO", identifier="0001025")
20
- PART_OF = Reference(prefix="BFO", identifier="0000050")
21
- HAS_PART = Reference(prefix="BFO", identifier="0000051")
22
- SUCCESSOR = Reference(prefix="BFO", identifier="0000063")
23
- PREDECESSOR = Reference(prefix="BFO", identifier="0000062")
24
30
 
25
31
  RMAP = {
26
- "Related": TypeDef.from_triple("rdfs", "seeAlso"),
27
- "Child": TypeDef(HAS_PART),
28
- "Parent": TypeDef(PART_OF),
29
- "Predecessor": TypeDef(PREDECESSOR),
30
- "Successor": TypeDef(SUCCESSOR),
31
- "Located in": TypeDef(LOCATED_IN),
32
+ "Related": see_also,
33
+ "Child": has_part,
34
+ "Parent": part_of,
35
+ "Predecessor": has_predecessor,
36
+ "Successor": has_successor,
37
+ "Located in": located_in,
32
38
  }
33
39
  NAME_REMAPPING = {
34
40
  "'s-Hertogenbosch": "Den Bosch", # SMH Netherlands, why u gotta be like this
@@ -43,16 +49,16 @@ class RORGetter(Obo):
43
49
  """An ontology representation of the ROR."""
44
50
 
45
51
  ontology = bioregistry_key = PREFIX
46
- typedefs = list(RMAP.values())
52
+ typedefs = [has_homepage, *RMAP.values()]
47
53
  synonym_typedefs = [acronym]
48
54
  idspaces = {
49
55
  "ror": "https://ror.org/",
50
56
  "geonames": "https://www.geonames.org/",
51
- "envo": "http://purl.obolibrary.org/obo/ENVO_",
52
- "bfo": "http://purl.obolibrary.org/obo/BFO_",
53
- "ro": "http://purl.obolibrary.org/obo/RO_",
54
- "obi": "http://purl.obolibrary.org/obo/OBI_",
55
- "omo": "http://purl.obolibrary.org/obo/OMO_",
57
+ "ENVO": "http://purl.obolibrary.org/obo/ENVO_",
58
+ "BFO": "http://purl.obolibrary.org/obo/BFO_",
59
+ "RO": "http://purl.obolibrary.org/obo/RO_",
60
+ "OBI": "http://purl.obolibrary.org/obo/OBI_",
61
+ "OMO": "http://purl.obolibrary.org/obo/OMO_",
56
62
  "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
57
63
  }
58
64
 
@@ -65,6 +71,18 @@ class RORGetter(Obo):
65
71
  return iterate_ror_terms(force=force)
66
72
 
67
73
 
74
+ ROR_ORGANIZATION_TYPE_TO_OBI = {
75
+ "Education": ...,
76
+ "Facility": ...,
77
+ "Company": ...,
78
+ "Government": ...,
79
+ "Healthcare": ...,
80
+ "Other": ...,
81
+ "Archive": ...,
82
+ }
83
+ _MISSED_ORG_TYPES: set[str] = set()
84
+
85
+
68
86
  def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
69
87
  """Iterate over terms in ROR."""
70
88
  version, source_uri, records = get_latest(force=force)
@@ -74,10 +92,23 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
74
92
  name = record["name"]
75
93
  name = NAME_REMAPPING.get(name, name)
76
94
 
95
+ organization_types = record.get("types", [])
96
+ description = f"{organization_types[0]} in {record['country']['country_name']}"
97
+ if established := record["established"]:
98
+ description += f" established in {established}"
99
+
77
100
  term = Term(
78
- reference=Reference(prefix=PREFIX, identifier=identifier, name=name), type="Instance"
101
+ reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
102
+ type="Instance",
103
+ definition=description,
79
104
  )
80
105
  term.append_parent(ORG_CLASS)
106
+ # TODO replace term.append_parent(ORG_CLASS) with:
107
+ # for organization_type in organization_types:
108
+ # term.append_parent(ORG_PARENTS[organization_type])
109
+
110
+ for link in record.get("links", []):
111
+ term.append_property(has_homepage, link)
81
112
 
82
113
  if name.startswith("The "):
83
114
  term.append_synonym(name.removeprefix("The "))
@@ -159,5 +190,20 @@ def get_latest(*, force: bool = False):
159
190
  raise FileNotFoundError
160
191
 
161
192
 
193
+ def get_ror_to_country_geonames(**kwargs: Any) -> dict[str, str]:
194
+ """Get a mapping of ROR ids to GeoNames IDs for countries."""
195
+ from pyobo.sources.geonames import get_city_to_country
196
+
197
+ city_to_country = get_city_to_country()
198
+ rv = {}
199
+ for term in iterate_ror_terms(**kwargs):
200
+ city_geonames_reference = term.get_relationship(located_in)
201
+ if city_geonames_reference is None:
202
+ continue
203
+ if city_geonames_reference.identifier in city_to_country:
204
+ rv[term.identifier] = city_to_country[city_geonames_reference.identifier]
205
+ return rv
206
+
207
+
162
208
  if __name__ == "__main__":
163
- RORGetter().write_default(write_obo=True, force=True)
209
+ RORGetter(force=True).write_default(write_obo=True, force=True)
@@ -6,10 +6,10 @@ from operator import attrgetter
6
6
  from pathlib import Path
7
7
  from typing import Iterable, List, Optional, cast
8
8
 
9
- import bioversions
10
9
  from tqdm.auto import tqdm
11
10
 
12
11
  from pyobo import Obo, Reference
12
+ from pyobo.api.utils import get_version
13
13
  from pyobo.constants import RAW_MODULE
14
14
  from pyobo.identifier_utils import standardize_ec
15
15
  from pyobo.struct import Term, derives_from, enables, from_species, participates_in
@@ -166,7 +166,7 @@ def _parse_go(go_terms) -> List[Reference]:
166
166
  def ensure(version: Optional[str] = None, force: bool = False) -> Path:
167
167
  """Ensure the reviewed uniprot names are available."""
168
168
  if version is None:
169
- version = bioversions.get_version("uniprot")
169
+ version = get_version("uniprot")
170
170
  return RAW_MODULE.ensure(
171
171
  PREFIX,
172
172
  version,
pyobo/struct/struct.py CHANGED
@@ -56,6 +56,7 @@ from .typedef import (
56
56
  term_replaced_by,
57
57
  )
58
58
  from .utils import comma_separate, obo_escape_slim
59
+ from ..api.utils import get_version
59
60
  from ..constants import (
60
61
  DATE_FORMAT,
61
62
  NCBITAXON_PREFIX,
@@ -77,6 +78,8 @@ __all__ = [
77
78
  "Term",
78
79
  "Obo",
79
80
  "make_ad_hoc_ontology",
81
+ "abbreviation",
82
+ "acronym",
80
83
  ]
81
84
 
82
85
  logger = logging.getLogger(__name__)
@@ -583,10 +586,8 @@ class Obo:
583
586
 
584
587
  def _get_version(self) -> Optional[str]:
585
588
  if self.bioversions_key:
586
- import bioversions
587
-
588
589
  try:
589
- return bioversions.get_version(self.bioversions_key)
590
+ return get_version(self.bioversions_key)
590
591
  except KeyError:
591
592
  logger.warning(f"[{self.bioversions_key}] bioversions doesn't list this resource ")
592
593
  except IOError:
pyobo/struct/typedef.py CHANGED
@@ -42,9 +42,13 @@ __all__ = [
42
42
  "has_participant",
43
43
  "exact_match",
44
44
  "has_dbxref",
45
+ "located_in",
46
+ "has_successor",
47
+ "has_predecessor",
45
48
  # Properties
46
49
  "has_inchi",
47
50
  "has_smiles",
51
+ "has_homepage",
48
52
  ]
49
53
 
50
54
 
@@ -323,6 +327,9 @@ enabled_by = TypeDef(reference=_enabled_by_reference, inverse=_enables_reference
323
327
  has_input = TypeDef.from_triple(prefix=RO_PREFIX, identifier="0002233", name="has input")
324
328
  has_output = TypeDef.from_triple(prefix=RO_PREFIX, identifier="0002234", name="has output")
325
329
 
330
+ has_successor = TypeDef.from_triple(prefix="BFO", identifier="0000063", name="has successor")
331
+ has_predecessor = TypeDef.from_triple(prefix="BFO", identifier="0000062", name="has predecessor")
332
+
326
333
  """ChEBI"""
327
334
 
328
335
  is_conjugate_base_of = TypeDef(
@@ -355,6 +362,9 @@ has_inchi = TypeDef(
355
362
  reference=Reference(prefix="debio", identifier="0000020", name="has InChI"),
356
363
  )
357
364
 
365
+ has_homepage = TypeDef(
366
+ reference=Reference(prefix="foaf", identifier="homepage", name="homepage"), is_metadata_tag=True
367
+ )
358
368
 
359
369
  default_typedefs: Dict[Tuple[str, str], TypeDef] = {
360
370
  v.pair: v for k, v in locals().items() if isinstance(v, TypeDef)
pyobo/utils/path.py CHANGED
@@ -25,7 +25,7 @@ __all__ = [
25
25
 
26
26
  logger = logging.getLogger(__name__)
27
27
 
28
- VersionHint = Union[None, str, Callable[[], str]]
28
+ VersionHint = Union[None, str, Callable[[], Optional[str]]]
29
29
 
30
30
  requests_ftp.monkeypatch_session()
31
31
 
@@ -46,6 +46,7 @@ def prefix_directory_join(
46
46
  logger.info("[%s] got version %s", prefix, version)
47
47
  elif not isinstance(version, str):
48
48
  raise TypeError(f"Invalid type: {version} ({type(version)})")
49
+ assert version is not None
49
50
  version = cleanup_version(version, prefix=prefix)
50
51
  if version is not None and "/" in version:
51
52
  raise ValueError(f"[{prefix}] Can not have slash in version: {version}")
pyobo/version.py CHANGED
@@ -14,7 +14,7 @@ __all__ = [
14
14
  "get_git_hash",
15
15
  ]
16
16
 
17
- VERSION = "0.10.10"
17
+ VERSION = "0.10.12"
18
18
 
19
19
 
20
20
  def get_git_hash() -> str: