pyobo 0.10.5__py3-none-any.whl → 0.10.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyobo/__init__.py CHANGED
@@ -25,6 +25,7 @@ from .api import ( # noqa: F401
25
25
  get_name,
26
26
  get_name_by_curie,
27
27
  get_name_id_mapping,
28
+ get_obsolete,
28
29
  get_primary_curie,
29
30
  get_primary_identifier,
30
31
  get_properties,
pyobo/api/__init__.py CHANGED
@@ -27,6 +27,7 @@ from .names import ( # noqa: F401
27
27
  get_name,
28
28
  get_name_by_curie,
29
29
  get_name_id_mapping,
30
+ get_obsolete,
30
31
  get_synonyms,
31
32
  )
32
33
  from .properties import ( # noqa: F401
pyobo/api/names.py CHANGED
@@ -24,6 +24,7 @@ __all__ = [
24
24
  "get_id_definition_mapping",
25
25
  "get_synonyms",
26
26
  "get_id_synonyms_mapping",
27
+ "get_obsolete",
27
28
  ]
28
29
 
29
30
  logger = logging.getLogger(__name__)
@@ -184,6 +185,26 @@ def get_id_definition_mapping(
184
185
  return _get_mapping()
185
186
 
186
187
 
188
+ def get_obsolete(
189
+ prefix: str,
190
+ *,
191
+ force: bool = False,
192
+ strict: bool = False,
193
+ version: Optional[str] = None,
194
+ ) -> Set[str]:
195
+ """Get the set of obsolete local unique identifiers."""
196
+ if version is None:
197
+ version = get_version(prefix)
198
+ path = prefix_cache_join(prefix, name="obsolete.tsv", version=version)
199
+
200
+ @cached_collection(path=path, force=force)
201
+ def _get_obsolete() -> Set[str]:
202
+ ontology = get_ontology(prefix, force=force, strict=strict, version=version)
203
+ return ontology.get_obsolete()
204
+
205
+ return set(_get_obsolete())
206
+
207
+
187
208
  @wrap_norm_prefix
188
209
  def get_synonyms(prefix: str, identifier: str) -> Optional[List[str]]:
189
210
  """Get the synonyms for an entity."""
pyobo/gilda_utils.py CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  """PyOBO's Gilda utilities."""
4
4
 
5
- import itertools as itt
6
5
  import logging
7
6
  from typing import Iterable, List, Optional, Tuple, Type, Union
8
7
 
@@ -11,6 +10,7 @@ import gilda.api
11
10
  import gilda.term
12
11
  from gilda.grounder import Grounder
13
12
  from gilda.process import normalize
13
+ from gilda.term import filter_out_duplicates
14
14
  from tqdm.auto import tqdm
15
15
 
16
16
  from pyobo import (
@@ -18,6 +18,7 @@ from pyobo import (
18
18
  get_id_species_mapping,
19
19
  get_id_synonyms_mapping,
20
20
  get_ids,
21
+ get_obsolete,
21
22
  )
22
23
  from pyobo.getters import NoBuild
23
24
  from pyobo.utils.io import multidict
@@ -31,32 +32,6 @@ __all__ = [
31
32
  logger = logging.getLogger(__name__)
32
33
 
33
34
 
34
- _STATUSES = {"curated": 1, "name": 2, "synonym": 3, "former_name": 4}
35
-
36
-
37
- def filter_out_duplicates(terms: List[gilda.term.Term]) -> List[gilda.term.Term]:
38
- """Filter out duplicates."""
39
- # TODO import from gilda.term import filter_out_duplicates when it gets moved,
40
- # see https://github.com/indralab/gilda/pull/103
41
- logger.debug("filtering %d terms for uniqueness", len(terms))
42
- new_terms: List[gilda.term.Term] = [
43
- min(terms_group, key=_status_key)
44
- for _, terms_group in itt.groupby(sorted(terms, key=_term_key), key=_term_key)
45
- ]
46
- # Re-sort the terms
47
- new_terms = sorted(new_terms, key=lambda x: (x.text, x.db, x.id))
48
- logger.debug("got %d unique terms.", len(new_terms))
49
- return new_terms
50
-
51
-
52
- def _status_key(term: gilda.term.Term) -> int:
53
- return _STATUSES[term.status]
54
-
55
-
56
- def _term_key(term: gilda.term.Term) -> Tuple[str, str, str]:
57
- return term.db, term.id, term.text
58
-
59
-
60
35
  def iter_gilda_prediction_tuples(
61
36
  prefix: str,
62
37
  relation: str = "skos:exactMatch",
@@ -115,10 +90,12 @@ def normalize_identifier(prefix: str, identifier: str) -> str:
115
90
 
116
91
  def get_grounder(
117
92
  prefixes: Union[str, Iterable[str]],
93
+ *,
118
94
  unnamed: Optional[Iterable[str]] = None,
119
95
  grounder_cls: Optional[Type[Grounder]] = None,
120
96
  versions: Union[None, str, Iterable[Union[str, None]]] = None,
121
97
  strict: bool = True,
98
+ skip_obsolete: bool = False,
122
99
  ) -> Grounder:
123
100
  """Get a Gilda grounder for the given prefix(es)."""
124
101
  unnamed = set() if unnamed is None else set(unnamed)
@@ -140,7 +117,11 @@ def get_grounder(
140
117
  try:
141
118
  p_terms = list(
142
119
  get_gilda_terms(
143
- prefix, identifiers_are_names=prefix in unnamed, version=version, strict=strict
120
+ prefix,
121
+ identifiers_are_names=prefix in unnamed,
122
+ version=version,
123
+ strict=strict,
124
+ skip_obsolete=skip_obsolete,
144
125
  )
145
126
  )
146
127
  except NoBuild:
@@ -155,26 +136,50 @@ def get_grounder(
155
136
  return grounder_cls(terms_dict)
156
137
 
157
138
 
139
+ def _fast_term(
140
+ *,
141
+ text: str,
142
+ prefix: str,
143
+ identifier: str,
144
+ name: str,
145
+ status: str,
146
+ organism: Optional[str] = None,
147
+ ) -> gilda.term.Term:
148
+ return gilda.term.Term(
149
+ norm_text=normalize(text),
150
+ text=text,
151
+ db=prefix,
152
+ id=identifier,
153
+ entry_name=name,
154
+ status=status,
155
+ source=prefix,
156
+ organism=organism,
157
+ )
158
+
159
+
158
160
  def get_gilda_terms(
159
161
  prefix: str,
162
+ *,
160
163
  identifiers_are_names: bool = False,
161
164
  version: Optional[str] = None,
162
165
  strict: bool = True,
166
+ skip_obsolete: bool = False,
163
167
  ) -> Iterable[gilda.term.Term]:
164
168
  """Get gilda terms for the given namespace."""
165
169
  id_to_name = get_id_name_mapping(prefix, version=version, strict=strict)
166
170
  id_to_species = get_id_species_mapping(prefix, version=version, strict=strict)
171
+ obsoletes = get_obsolete(prefix, version=version, strict=strict) if skip_obsolete else set()
167
172
 
168
173
  it = tqdm(id_to_name.items(), desc=f"[{prefix}] mapping", unit_scale=True, unit="name")
169
174
  for identifier, name in it:
170
- yield gilda.term.Term(
171
- norm_text=normalize(name),
175
+ if identifier in obsoletes:
176
+ continue
177
+ yield _fast_term(
172
178
  text=name,
173
- db=prefix,
174
- id=identifier,
175
- entry_name=name,
179
+ prefix=prefix,
180
+ identifier=identifier,
181
+ name=name,
176
182
  status="name",
177
- source=prefix,
178
183
  organism=id_to_species.get(identifier),
179
184
  )
180
185
 
@@ -184,29 +189,31 @@ def get_gilda_terms(
184
189
  id_to_synonyms.items(), desc=f"[{prefix}] mapping", unit_scale=True, unit="synonym"
185
190
  )
186
191
  for identifier, synonyms in it:
192
+ if identifier in obsoletes:
193
+ continue
187
194
  name = id_to_name[identifier]
188
195
  for synonym in synonyms:
189
- yield gilda.term.Term(
190
- norm_text=normalize(synonym),
196
+ if not synonym:
197
+ continue
198
+ yield _fast_term(
191
199
  text=synonym,
192
- db=prefix,
193
- id=identifier,
194
- entry_name=name,
200
+ prefix=prefix,
201
+ identifier=identifier,
202
+ name=name,
195
203
  status="synonym",
196
- source=prefix,
197
204
  organism=id_to_species.get(identifier),
198
205
  )
199
206
 
200
207
  if identifiers_are_names:
201
208
  it = tqdm(get_ids(prefix), desc=f"[{prefix}] mapping", unit_scale=True, unit="id")
202
209
  for identifier in it:
203
- yield gilda.term.Term(
204
- norm_text=normalize(identifier),
210
+ if identifier in obsoletes:
211
+ continue
212
+ yield _fast_term(
205
213
  text=identifier,
206
- db=prefix,
207
- id=identifier,
208
- entry_name=None,
209
- status="identifier",
210
- source=prefix,
214
+ prefix=prefix,
215
+ identifier=identifier,
216
+ name=identifier,
217
+ status="name",
211
218
  organism=id_to_species.get(identifier),
212
219
  )
pyobo/sources/__init__.py CHANGED
@@ -20,6 +20,7 @@ from .drugcentral import DrugCentralGetter
20
20
  from .expasy import ExpasyGetter
21
21
  from .famplex import FamPlexGetter
22
22
  from .flybase import FlyBaseGetter
23
+ from .geonames import GeonamesGetter
23
24
  from .gwascentral_phenotype import GWASCentralPhenotypeGetter
24
25
  from .gwascentral_study import GWASCentralStudyGetter
25
26
  from .hgnc import HGNCGetter
@@ -46,6 +47,7 @@ from .pubchem import PubChemCompoundGetter
46
47
  from .reactome import ReactomeGetter
47
48
  from .rgd import RGDGetter
48
49
  from .rhea import RheaGetter
50
+ from .ror import RORGetter
49
51
  from .selventa import SCHEMGetter, SCOMPGetter, SDISGetter, SFAMGetter
50
52
  from .sgd import SGDGetter
51
53
  from .slm import SLMGetter
@@ -74,6 +76,7 @@ __all__ = [
74
76
  "FlyBaseGetter",
75
77
  "GWASCentralPhenotypeGetter",
76
78
  "GWASCentralStudyGetter",
79
+ "GeonamesGetter",
77
80
  "HGNCGetter",
78
81
  "HGNCGroupGetter",
79
82
  "ICD10Getter",
@@ -98,6 +101,7 @@ __all__ = [
98
101
  "PomBaseGetter",
99
102
  "PubChemCompoundGetter",
100
103
  "RGDGetter",
104
+ "RORGetter",
101
105
  "ReactomeGetter",
102
106
  "RheaGetter",
103
107
  "SCHEMGetter",
pyobo/sources/cgnc.py CHANGED
@@ -8,6 +8,7 @@ from typing import Iterable
8
8
  import pandas as pd
9
9
 
10
10
  from pyobo.struct import Obo, Reference, Term, from_species
11
+ from pyobo.struct.typedef import exact_match
11
12
  from pyobo.utils.path import ensure_df
12
13
 
13
14
  __all__ = [
@@ -25,7 +26,7 @@ class CGNCGetter(Obo):
25
26
 
26
27
  ontology = PREFIX
27
28
  dynamic_version = True
28
- typedefs = [from_species]
29
+ typedefs = [from_species, exact_match]
29
30
 
30
31
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
31
32
  """Iterate over terms in the ontology."""
pyobo/sources/chembl.py CHANGED
@@ -12,7 +12,7 @@ from typing import Iterable
12
12
  import chembl_downloader
13
13
 
14
14
  from pyobo.struct import Obo, Reference, Term
15
- from pyobo.struct.typedef import has_inchi, has_smiles
15
+ from pyobo.struct.typedef import exact_match, has_inchi, has_smiles
16
16
 
17
17
  __all__ = [
18
18
  "ChEMBLCompoundGetter",
@@ -45,6 +45,7 @@ class ChEMBLCompoundGetter(Obo):
45
45
 
46
46
  ontology = "chembl.compound"
47
47
  bioversions_key = "chembl"
48
+ typedefs = [exact_match]
48
49
 
49
50
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
50
51
  """Iterate over terms in the ontology."""
pyobo/sources/depmap.py CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
8
8
  import pystow
9
9
 
10
10
  from pyobo import Obo, Reference, Term
11
+ from pyobo.struct.typedef import exact_match
11
12
 
12
13
  __all__ = [
13
14
  "get_obo",
@@ -23,6 +24,7 @@ class DepMapGetter(Obo):
23
24
 
24
25
  ontology = bioversions_key = PREFIX
25
26
  data_version = VERSION
27
+ typedefs = [exact_match]
26
28
 
27
29
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
28
30
  """Iterate over terms in the ontology."""
@@ -12,7 +12,7 @@ import psycopg2
12
12
  from tqdm.auto import tqdm
13
13
 
14
14
  from pyobo.struct import Obo, Reference, Synonym, Term
15
- from pyobo.struct.typedef import has_inchi, has_smiles
15
+ from pyobo.struct.typedef import exact_match, has_inchi, has_smiles
16
16
 
17
17
  __all__ = [
18
18
  "DrugCentralGetter",
@@ -34,6 +34,7 @@ class DrugCentralGetter(Obo):
34
34
  """An ontology representation of the DrugCentral database."""
35
35
 
36
36
  ontology = bioversions_key = PREFIX
37
+ typedefs = [exact_match]
37
38
 
38
39
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
39
40
  """Iterate over terms in the ontology."""
@@ -0,0 +1,229 @@
1
+ """Get terms from geonames."""
2
+
3
+ import logging
4
+ from typing import Collection, Iterable, Mapping
5
+
6
+ import pandas as pd
7
+ from pystow.utils import read_zipfile_csv
8
+ from tqdm import tqdm
9
+
10
+ from pyobo import Obo, Term
11
+ from pyobo.struct import Reference, part_of
12
+ from pyobo.utils.path import ensure_df, ensure_path
13
+
14
+ __all__ = ["GeonamesGetter"]
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ PREFIX = "geonames"
19
+ COUNTRIES_URL = "https://download.geonames.org/export/dump/countryInfo.txt"
20
+ ADMIN1_URL = "https://download.geonames.org/export/dump/admin1CodesASCII.txt"
21
+ ADMIN2_URL = "https://download.geonames.org/export/dump/admin2Codes.txt"
22
+ CITIES_URL = "https://download.geonames.org/export/dump/cities15000.zip"
23
+
24
+
25
+ class GeonamesGetter(Obo):
26
+ """An ontology representation of GeoNames."""
27
+
28
+ ontology = PREFIX
29
+ dynamic_version = True
30
+ typedefs = [part_of]
31
+
32
+ def iter_terms(self, force: bool = False) -> Iterable[Term]:
33
+ """Iterate over terms in the ontology."""
34
+ return get_terms(force=force)
35
+
36
+
37
+ def get_terms(*, force: bool = False) -> Collection[Term]:
38
+ """Get terms."""
39
+ code_to_country = get_code_to_country(force=force)
40
+ code_to_admin1 = get_code_to_admin1(code_to_country, force=force)
41
+ code_to_admin2 = get_code_to_admin2(code_to_admin1, force=force)
42
+ id_to_term = get_cities(
43
+ code_to_country=code_to_country,
44
+ code_to_admin1=code_to_admin1,
45
+ code_to_admin2=code_to_admin2,
46
+ force=force,
47
+ )
48
+ return id_to_term.values()
49
+
50
+
51
+ def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
52
+ """Get a mapping from country code to country term."""
53
+ countries_df = ensure_df(
54
+ PREFIX,
55
+ url=COUNTRIES_URL,
56
+ force=force,
57
+ skiprows=49,
58
+ keep_default_na=False, # NA is a country code
59
+ dtype=str,
60
+ )
61
+ logger.info(f"got {len(countries_df.index):,} countries")
62
+ reorder = ["geonameid", *(c for c in countries_df.columns if c != "geonameid")]
63
+ countries_df = countries_df[reorder]
64
+ code_to_country = {}
65
+ cols = ["geonameid", "Country", "#ISO", "fips", "ISO3"]
66
+ for identifier, name, code, fips, iso3 in countries_df[cols].values:
67
+ if pd.isna(code):
68
+ continue
69
+ term = Term.from_triple(
70
+ "geonames", identifier, name if pd.notna(name) else None, type="Instance"
71
+ )
72
+ term.append_synonym(code)
73
+ if name.startswith("The "):
74
+ term.append_synonym(name.removeprefix("The "))
75
+ if pd.notna(fips):
76
+ term.append_synonym(fips)
77
+ if pd.notna(iso3):
78
+ term.append_synonym(iso3)
79
+ term.append_property("code", code)
80
+ code_to_country[code] = term
81
+ logger.info(f"got {len(code_to_country):,} country records")
82
+ return code_to_country
83
+
84
+
85
+ def get_code_to_admin1(
86
+ code_to_country: Mapping[str, Term], *, force: bool = False
87
+ ) -> Mapping[str, Term]:
88
+ """Get a mapping from admin1 code to term."""
89
+ admin1_df = ensure_df(
90
+ PREFIX,
91
+ url=ADMIN1_URL,
92
+ header=None,
93
+ names=["code", "name", "asciiname", "geonames_id"],
94
+ dtype=str,
95
+ force=force,
96
+ )
97
+ code_to_admin1 = {}
98
+ for code, name, asciiname, identifier in admin1_df.values:
99
+ if pd.isna(identifier) or pd.isna(code):
100
+ tqdm.write(f"Missing info for {name} / {asciiname} / {code=} / {identifier=}")
101
+ continue
102
+
103
+ term = Term.from_triple(
104
+ "geonames", identifier, name if pd.notna(name) else None, type="Instance"
105
+ )
106
+ term.append_property("code", code)
107
+ code_to_admin1[code] = term
108
+
109
+ country_code = code.split(".")[0]
110
+ country_term = code_to_country[country_code]
111
+ term.append_relationship(part_of, country_term)
112
+ return code_to_admin1
113
+
114
+
115
+ def get_code_to_admin2(
116
+ code_to_admin1: Mapping[str, Term], *, force: bool = False
117
+ ) -> Mapping[str, Term]:
118
+ """Get a mapping from admin2 code to term."""
119
+ admin2_df = ensure_df(
120
+ PREFIX,
121
+ url=ADMIN2_URL,
122
+ header=None,
123
+ names=["code", "name", "asciiname", "geonames_id"],
124
+ dtype=str,
125
+ force=force,
126
+ )
127
+ code_to_admin2 = {}
128
+ for identifier, name, code in admin2_df[["geonames_id", "name", "code"]].values:
129
+ if pd.isna(identifier) or pd.isna(code):
130
+ continue
131
+ term = Term.from_triple(
132
+ "geonames", identifier, name if pd.notna(name) else None, type="Instance"
133
+ )
134
+ term.append_property("code", code)
135
+ code_to_admin2[code] = term
136
+ admin1_code = code.rsplit(".", 1)[0]
137
+ admin1_term = code_to_admin1[admin1_code]
138
+ term.append_relationship(part_of, admin1_term)
139
+ return code_to_admin2
140
+
141
+
142
+ def get_cities(
143
+ code_to_country,
144
+ code_to_admin1,
145
+ code_to_admin2,
146
+ *,
147
+ minimum_population: int = 100_000,
148
+ force: bool = False,
149
+ ) -> Mapping[str, Term]:
150
+ """Get a mapping from city code to term."""
151
+ columns = [
152
+ "geonames_id",
153
+ "name",
154
+ "asciiname",
155
+ "synonyms",
156
+ "latitude",
157
+ "longitude",
158
+ "feature_class",
159
+ "feature_code",
160
+ "country_code",
161
+ "cc2",
162
+ "admin1",
163
+ "admin2",
164
+ "admin3",
165
+ "admin4",
166
+ "population",
167
+ "elevation",
168
+ "dem",
169
+ "timezone",
170
+ "date_modified",
171
+ ]
172
+ path = ensure_path(PREFIX, url=CITIES_URL, force=force)
173
+ cities_df = read_zipfile_csv(
174
+ path=path,
175
+ inner_path="cities15000.txt",
176
+ header=None,
177
+ names=columns,
178
+ dtype=str,
179
+ )
180
+
181
+ cities_df = cities_df[cities_df.population.astype(int) > minimum_population]
182
+ cities_df.synonyms = cities_df.synonyms.str.split(",")
183
+
184
+ terms = {}
185
+ for term in code_to_country.values():
186
+ terms[term.identifier] = term
187
+
188
+ cols = ["geonames_id", "name", "synonyms", "country_code", "admin1", "admin2", "feature_code"]
189
+ for identifier, name, synonyms, country, admin1, admin2, feature_code in cities_df[cols].values:
190
+ terms[identifier] = term = Term.from_triple(
191
+ "geonames", identifier, name if pd.notna(name) else None, type="Instance"
192
+ )
193
+ term.append_parent(Reference(prefix="geonames.feature", identifier=feature_code))
194
+ if synonyms and not isinstance(synonyms, float):
195
+ for synonym in synonyms:
196
+ if pd.notna(synonym):
197
+ term.append_synonym(synonym)
198
+
199
+ if pd.isna(admin1):
200
+ tqdm.write(f"[geonames:{identifier}] missing admin 1 code for {name} ({country})")
201
+ continue
202
+
203
+ admin1_full = f"{country}.{admin1}"
204
+ admin1_term = code_to_admin1.get(admin1_full)
205
+ if admin1_term is None:
206
+ logger.info(f"could not find admin1 {admin1_full}")
207
+ continue
208
+
209
+ terms[admin1_term.identifier] = admin1_term
210
+
211
+ if pd.notna(admin2):
212
+ admin2_full = f"{country}.{admin1}.{admin2}"
213
+ admin2_term = code_to_admin2.get(admin2_full)
214
+ if admin2_term is None or admin1_term is None:
215
+ pass
216
+ # print("could not find admin2", admin2_full)
217
+ else:
218
+ term.append_relationship(part_of, admin2_term)
219
+ terms[admin2_term.identifier] = admin2_term
220
+
221
+ else: # pd.notna(admin1):
222
+ # If there's no admin 2, just annotate directly onto admin 1
223
+ term.append_relationship(part_of, admin1_term)
224
+
225
+ return terms
226
+
227
+
228
+ if __name__ == "__main__":
229
+ GeonamesGetter().write_default(write_obo=True, force=True)
pyobo/sources/hgnc.py CHANGED
@@ -27,6 +27,7 @@ from pyobo.struct import (
27
27
  orthologous,
28
28
  transcribes_to,
29
29
  )
30
+ from pyobo.struct.typedef import exact_match
30
31
  from pyobo.utils.path import ensure_path, prefix_directory_join
31
32
 
32
33
  __all__ = [
@@ -108,6 +109,28 @@ ENCODINGS = {
108
109
  "unknown": "GRP",
109
110
  }
110
111
 
112
+ SKIP_KEYS = {
113
+ "date_approved_reserved",
114
+ "_version_",
115
+ "uuid",
116
+ "date_modified",
117
+ "date_name_changed",
118
+ "date_symbol_changed",
119
+ "symbol_report_tag",
120
+ "location_sortable",
121
+ "curator_notes",
122
+ "agr", # repeat of HGNC ID
123
+ "gencc", # repeat of HGNC ID
124
+ "bioparadigms_slc", # repeat of symbol
125
+ "lncrnadb", # repeat of symbol
126
+ "gtrnadb", # repeat of symbol
127
+ "horde_id", # repeat of symbol
128
+ "imgt", # repeat of symbol
129
+ "cd", # symbol
130
+ "homeodb", # TODO add to bioregistry, though this is defunct
131
+ "mamit-trnadb", # TODO add to bioregistry, though this is defunct
132
+ }
133
+
111
134
  #: A mapping from HGNC's locus_type annotations to sequence ontology identifiers
112
135
  LOCUS_TYPE_TO_SO = {
113
136
  # protein-coding gene
@@ -190,6 +213,7 @@ class HGNCGetter(Obo):
190
213
  transcribes_to,
191
214
  orthologous,
192
215
  member_of,
216
+ exact_match,
193
217
  ]
194
218
  idspaces = IDSPACES
195
219
  synonym_typedefs = [
@@ -330,6 +354,12 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
330
354
  else:
331
355
  tqdm.write(f"unhandled IUPHAR: {iuphar}")
332
356
 
357
+ for lrg_info in entry.pop("lsdb", []):
358
+ if lrg_info.startswith("LRG_"):
359
+ lrg_curie = lrg_info.split("|")[0]
360
+ _, lrg_id = lrg_curie.split("_")
361
+ term.append_xref(Reference(prefix="lrg", identifier=lrg_id))
362
+
333
363
  for xref_prefix, key in gene_xrefs:
334
364
  xref_identifiers = entry.pop(key, None)
335
365
  if xref_identifiers is None:
@@ -397,7 +427,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
397
427
  term.set_species(identifier="9606", name="Homo sapiens")
398
428
 
399
429
  for key in entry:
400
- unhandled_entry_keys[key] += 1
430
+ if key not in SKIP_KEYS:
431
+ unhandled_entry_keys[key] += 1
401
432
  yield term
402
433
 
403
434
  with open(prefix_directory_join(PREFIX, name="unhandled.json"), "w") as file:
pyobo/sources/mgi.py CHANGED
@@ -9,6 +9,8 @@ from typing import Iterable
9
9
  import pandas as pd
10
10
  from tqdm.auto import tqdm
11
11
 
12
+ from pyobo.struct.typedef import exact_match
13
+
12
14
  from ..struct import (
13
15
  Obo,
14
16
  Reference,
@@ -37,7 +39,7 @@ class MGIGetter(Obo):
37
39
 
38
40
  ontology = PREFIX
39
41
  dynamic_version = True
40
- typedefs = [from_species, has_gene_product, transcribes_to]
42
+ typedefs = [from_species, has_gene_product, transcribes_to, exact_match]
41
43
 
42
44
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
43
45
  """Iterate over terms in the ontology."""
pyobo/sources/mirbase.py CHANGED
@@ -136,6 +136,8 @@ def _process_definitions_lines(
136
136
  xref_prefix, xref_identifier, xref_label = map(str.strip, line.split(";"))
137
137
  xref_prefix = xref_prefix.lower()
138
138
  xref_prefix = xref_mapping.get(xref_prefix, xref_prefix)
139
+ if xref_prefix == "pictar":
140
+ continue
139
141
  xrefs.append(
140
142
  Reference(prefix=xref_prefix, identifier=xref_identifier, name=xref_label or None)
141
143
  )
@@ -26,6 +26,7 @@ class MiRBaseFamilyGetter(Obo):
26
26
 
27
27
  ontology = PREFIX
28
28
  bioversions_key = "mirbase"
29
+ typedefs = [has_member]
29
30
 
30
31
  def iter_terms(self, force: bool = False) -> Iterable[Term]:
31
32
  """Iterate over terms in the ontology."""
@@ -40,7 +41,9 @@ def get_obo(force: bool = False) -> Obo:
40
41
  def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
41
42
  """Get miRBase family terms."""
42
43
  df = get_df(version, force=force)
43
- for family_id, name, mirna_id, mirna_name in tqdm(df.values, total=len(df.index)):
44
+ for family_id, name, mirna_id, mirna_name in tqdm(
45
+ df.values, total=len(df.index), unit_scale=True, desc="miRBase Family"
46
+ ):
44
47
  term = Term(
45
48
  reference=Reference(prefix=PREFIX, identifier=family_id, name=name),
46
49
  )
@@ -65,4 +68,4 @@ def get_df(version: str, force: bool = False) -> pd.DataFrame:
65
68
 
66
69
 
67
70
  if __name__ == "__main__":
68
- get_obo().write_default(use_tqdm=True)
71
+ get_obo().write_default(use_tqdm=True, write_obo=True, force=True)