pyobo 0.12.13__py3-none-any.whl → 0.12.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/constants.py +1 -1
- pyobo/sources/hgnc/hgnc.py +3 -0
- pyobo/sources/mesh.py +8 -1
- pyobo/sources/ror.py +262 -84
- pyobo/struct/struct.py +15 -7
- pyobo/struct/typedef.py +20 -0
- pyobo/utils/misc.py +9 -0
- pyobo/version.py +1 -1
- {pyobo-0.12.13.dist-info → pyobo-0.12.14.dist-info}/METADATA +3 -3
- {pyobo-0.12.13.dist-info → pyobo-0.12.14.dist-info}/RECORD +13 -13
- {pyobo-0.12.13.dist-info → pyobo-0.12.14.dist-info}/WHEEL +1 -1
- {pyobo-0.12.13.dist-info → pyobo-0.12.14.dist-info}/entry_points.txt +0 -0
- {pyobo-0.12.13.dist-info → pyobo-0.12.14.dist-info}/licenses/LICENSE +0 -0
pyobo/constants.py
CHANGED
|
@@ -267,7 +267,7 @@ def _get_json_download(prefix: str) -> str | None:
|
|
|
267
267
|
def _get_rdf_download(prefix: str) -> str | None:
|
|
268
268
|
import bioregistry
|
|
269
269
|
|
|
270
|
-
return bioregistry.get_rdf_download(prefix)
|
|
270
|
+
return bioregistry.get_rdf_download(prefix, get_format=False)
|
|
271
271
|
|
|
272
272
|
|
|
273
273
|
#: Functions that get ontology files. Order matters in this list,
|
pyobo/sources/hgnc/hgnc.py
CHANGED
|
@@ -210,6 +210,7 @@ class HGNCGetter(Obo):
|
|
|
210
210
|
for so_id in sorted(set(LOCUS_TYPE_TO_SO.values()))
|
|
211
211
|
if so_id
|
|
212
212
|
]
|
|
213
|
+
skip_maintainers = True
|
|
213
214
|
|
|
214
215
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
215
216
|
"""Iterate over terms in the ontology."""
|
|
@@ -342,6 +343,8 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]
|
|
|
342
343
|
term.append_exact_match(
|
|
343
344
|
Reference(prefix="iuphar.ligand", identifier=iuphar[len("ligandId:") :])
|
|
344
345
|
)
|
|
346
|
+
elif iuphar.startswith("HGNC:"):
|
|
347
|
+
pass
|
|
345
348
|
else:
|
|
346
349
|
tqdm.write(f"[hgnc:{identifier}] unhandled IUPHAR: {iuphar}")
|
|
347
350
|
|
pyobo/sources/mesh.py
CHANGED
|
@@ -131,7 +131,14 @@ def get_terms(version: str, *, force: bool = False) -> Iterable[Term]:
|
|
|
131
131
|
for term in concept["terms"]:
|
|
132
132
|
synonyms.add(term["name"])
|
|
133
133
|
for xref_prefix, xref_identifier in concept.get("xrefs", []):
|
|
134
|
-
|
|
134
|
+
try:
|
|
135
|
+
xref = Reference(prefix=xref_prefix, identifier=xref_identifier)
|
|
136
|
+
except ValueError:
|
|
137
|
+
tqdm.write(
|
|
138
|
+
f"[mesh:{identifier}] has invalid xref {xref_prefix}:{xref_identifier}"
|
|
139
|
+
)
|
|
140
|
+
else:
|
|
141
|
+
xrefs.append(xref)
|
|
135
142
|
|
|
136
143
|
mesh_id_to_term[identifier] = Term(
|
|
137
144
|
definition=definition,
|
pyobo/sources/ror.py
CHANGED
|
@@ -2,15 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import datetime
|
|
5
6
|
import json
|
|
6
7
|
import logging
|
|
7
8
|
import zipfile
|
|
8
9
|
from collections.abc import Iterable
|
|
9
|
-
from
|
|
10
|
+
from functools import lru_cache
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Literal, NamedTuple, TypeAlias
|
|
10
13
|
|
|
11
14
|
import bioregistry
|
|
12
15
|
import zenodo_client
|
|
13
|
-
from pydantic import ValidationError
|
|
16
|
+
from pydantic import BaseModel, ValidationError
|
|
14
17
|
from tqdm.auto import tqdm
|
|
15
18
|
|
|
16
19
|
from pyobo.struct import Obo, Reference, Term
|
|
@@ -25,21 +28,29 @@ from pyobo.struct.typedef import (
|
|
|
25
28
|
see_also,
|
|
26
29
|
)
|
|
27
30
|
|
|
31
|
+
__all__ = [
|
|
32
|
+
"OrganizationType",
|
|
33
|
+
"RORStatus",
|
|
34
|
+
"get_ror_records",
|
|
35
|
+
"get_ror_status",
|
|
36
|
+
"get_ror_to_country_geonames",
|
|
37
|
+
]
|
|
38
|
+
|
|
28
39
|
logger = logging.getLogger(__name__)
|
|
29
40
|
PREFIX = "ror"
|
|
30
|
-
ROR_ZENODO_RECORD_ID = "
|
|
41
|
+
ROR_ZENODO_RECORD_ID = "17953395"
|
|
31
42
|
|
|
32
43
|
# Constants
|
|
33
44
|
ORG_CLASS = Reference(prefix="OBI", identifier="0000245", name="organization")
|
|
34
45
|
CITY_CLASS = Reference(prefix="ENVO", identifier="00000856", name="city")
|
|
35
46
|
|
|
36
47
|
RMAP = {
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"
|
|
41
|
-
"
|
|
42
|
-
"
|
|
48
|
+
"related": see_also,
|
|
49
|
+
"child": has_part,
|
|
50
|
+
"parent": part_of,
|
|
51
|
+
"predecessor": has_predecessor,
|
|
52
|
+
"successor": has_successor,
|
|
53
|
+
"located in": located_in,
|
|
43
54
|
}
|
|
44
55
|
NAME_REMAPPING = {
|
|
45
56
|
"'s-Hertogenbosch": "Den Bosch", # SMH Netherlands, why u gotta be like this
|
|
@@ -59,7 +70,7 @@ class RORGetter(Obo):
|
|
|
59
70
|
root_terms = [CITY_CLASS, ORG_CLASS]
|
|
60
71
|
|
|
61
72
|
def __post_init__(self):
|
|
62
|
-
self.data_version, _url, _path =
|
|
73
|
+
self.data_version, _url, _path = get_ror_status()
|
|
63
74
|
super().__post_init__()
|
|
64
75
|
|
|
65
76
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
@@ -72,14 +83,27 @@ class RORGetter(Obo):
|
|
|
72
83
|
yield from iterate_ror_terms(force=force)
|
|
73
84
|
|
|
74
85
|
|
|
75
|
-
|
|
76
|
-
"
|
|
77
|
-
"
|
|
78
|
-
"
|
|
79
|
-
"
|
|
80
|
-
"
|
|
81
|
-
"
|
|
82
|
-
"
|
|
86
|
+
OrganizationType: TypeAlias = Literal[
|
|
87
|
+
"education",
|
|
88
|
+
"facility",
|
|
89
|
+
"funder",
|
|
90
|
+
"company",
|
|
91
|
+
"government",
|
|
92
|
+
"healthcare",
|
|
93
|
+
"archive",
|
|
94
|
+
"nonprofit",
|
|
95
|
+
"other",
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
ROR_ORGANIZATION_TYPE_TO_OBI: dict[OrganizationType, Term] = {
|
|
99
|
+
"education": Term.default(PREFIX, "education", "educational organization"),
|
|
100
|
+
"facility": Term.default(PREFIX, "facility", "facility"),
|
|
101
|
+
"funder": Term.default(PREFIX, "funder", "funder"),
|
|
102
|
+
"company": Term.default(PREFIX, "company", "company"),
|
|
103
|
+
"government": Term.default(PREFIX, "government", "government organization"),
|
|
104
|
+
"healthcare": Term.default(PREFIX, "healthcare", "healthcare organization"),
|
|
105
|
+
"archive": Term.default(PREFIX, "archive", "archival organization"),
|
|
106
|
+
"nonprofit": Term.default(PREFIX, "healthcare", "nonprofit organization")
|
|
83
107
|
.append_xref(Reference(prefix="ICO", identifier="0000048"))
|
|
84
108
|
.append_xref(Reference(prefix="GSSO", identifier="004615")),
|
|
85
109
|
}
|
|
@@ -91,93 +115,214 @@ for _k, v in ROR_ORGANIZATION_TYPE_TO_OBI.items():
|
|
|
91
115
|
_MISSED_ORG_TYPES: set[str] = set()
|
|
92
116
|
|
|
93
117
|
|
|
118
|
+
class LocationDetails(BaseModel):
|
|
119
|
+
"""The location details slot in the ROR schema."""
|
|
120
|
+
|
|
121
|
+
continent_code: str
|
|
122
|
+
continent_name: str
|
|
123
|
+
country_code: str
|
|
124
|
+
country_name: str
|
|
125
|
+
country_subdivision_code: str | None = None
|
|
126
|
+
country_subdivision_name: str | None = None
|
|
127
|
+
lat: float
|
|
128
|
+
lng: float
|
|
129
|
+
name: str
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class Location(BaseModel):
|
|
133
|
+
"""The lcoation slot in the ROR schema."""
|
|
134
|
+
|
|
135
|
+
geonames_id: int
|
|
136
|
+
geonames_details: LocationDetails
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class ExternalID(BaseModel):
|
|
140
|
+
"""The external ID slot in the ROR schema."""
|
|
141
|
+
|
|
142
|
+
type: str
|
|
143
|
+
all: list[str]
|
|
144
|
+
preferred: str | None = None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class Link(BaseModel):
|
|
148
|
+
"""The link slot in the ROR schema."""
|
|
149
|
+
|
|
150
|
+
type: str
|
|
151
|
+
value: str
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class Name(BaseModel):
|
|
155
|
+
"""The name slot in the ROR schema."""
|
|
156
|
+
|
|
157
|
+
value: str
|
|
158
|
+
types: list[str]
|
|
159
|
+
lang: str | None = None
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class Relationship(BaseModel):
|
|
163
|
+
"""The relationship slot in the ROR schema."""
|
|
164
|
+
|
|
165
|
+
type: str
|
|
166
|
+
label: str
|
|
167
|
+
id: str
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class DateAnnotated(BaseModel):
|
|
171
|
+
"""The annotated date slot in the ROR schema."""
|
|
172
|
+
|
|
173
|
+
date: datetime.date
|
|
174
|
+
schema_version: str
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class Admin(BaseModel):
|
|
178
|
+
"""The admin slot in the ROR schema."""
|
|
179
|
+
|
|
180
|
+
created: DateAnnotated
|
|
181
|
+
last_modified: DateAnnotated
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
Status: TypeAlias = Literal["active", "inactive", "withdrawn"]
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class Record(BaseModel):
|
|
188
|
+
"""A ROR record."""
|
|
189
|
+
|
|
190
|
+
locations: list[Location]
|
|
191
|
+
established: int | None = None
|
|
192
|
+
external_ids: list[ExternalID]
|
|
193
|
+
id: str
|
|
194
|
+
domains: list[str]
|
|
195
|
+
links: list[Link]
|
|
196
|
+
names: list[Name]
|
|
197
|
+
relationships: list[Relationship]
|
|
198
|
+
status: Status
|
|
199
|
+
types: list[OrganizationType]
|
|
200
|
+
admin: Admin
|
|
201
|
+
|
|
202
|
+
def get_preferred_label(self) -> str | None:
|
|
203
|
+
"""Get the preferred label."""
|
|
204
|
+
primary_name: str | None = None
|
|
205
|
+
for name in self.names:
|
|
206
|
+
if "ror_display" in name.types:
|
|
207
|
+
primary_name = name.value
|
|
208
|
+
if primary_name is None:
|
|
209
|
+
return None
|
|
210
|
+
primary_name = NAME_REMAPPING.get(primary_name, primary_name)
|
|
211
|
+
return primary_name
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
_description_prefix = {
|
|
215
|
+
"education": "an educational organization",
|
|
216
|
+
"facility": "a facility",
|
|
217
|
+
"funder": "a funder",
|
|
218
|
+
"company": "a company",
|
|
219
|
+
"government": "a governmental organization",
|
|
220
|
+
"healthcare": "a healthcare organization",
|
|
221
|
+
"archive": "an archive",
|
|
222
|
+
"nonprofit": "a nonprofit organization",
|
|
223
|
+
"other": "an organization",
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _get_description(record: Record) -> str | None:
|
|
228
|
+
description = (
|
|
229
|
+
f"{_description_prefix[record.types[0]]} in {record.locations[0].geonames_details.name}"
|
|
230
|
+
)
|
|
231
|
+
if record.established:
|
|
232
|
+
description += f" established in {record.established}"
|
|
233
|
+
return description
|
|
234
|
+
|
|
235
|
+
|
|
94
236
|
def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
|
|
95
237
|
"""Iterate over terms in ROR."""
|
|
96
|
-
|
|
238
|
+
status, records = get_ror_records(force=force)
|
|
97
239
|
unhandled_xref_prefixes: set[str] = set()
|
|
98
240
|
|
|
99
241
|
seen_geonames_references = set()
|
|
100
|
-
for record in tqdm(records, unit_scale=True, unit="record", desc=f"{PREFIX} v{
|
|
101
|
-
identifier = record
|
|
102
|
-
name = record["name"]
|
|
103
|
-
name = NAME_REMAPPING.get(name, name)
|
|
242
|
+
for record in tqdm(records, unit_scale=True, unit="record", desc=f"{PREFIX} v{status.version}"):
|
|
243
|
+
identifier = record.id.removeprefix("https://ror.org/")
|
|
104
244
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
description += f" established in {established}"
|
|
245
|
+
primary_name = record.get_preferred_label()
|
|
246
|
+
if primary_name is None:
|
|
247
|
+
raise ValueError("should have got a primary name...")
|
|
109
248
|
|
|
110
249
|
term = Term(
|
|
111
|
-
reference=Reference(prefix=PREFIX, identifier=identifier, name=
|
|
250
|
+
reference=Reference(prefix=PREFIX, identifier=identifier, name=primary_name),
|
|
112
251
|
type="Instance",
|
|
113
|
-
definition=
|
|
252
|
+
definition=_get_description(record),
|
|
114
253
|
)
|
|
115
|
-
for organization_type in
|
|
116
|
-
if organization_type
|
|
117
|
-
term.append_parent(ORG_CLASS)
|
|
118
|
-
else:
|
|
254
|
+
for organization_type in record.types:
|
|
255
|
+
if organization_type in ROR_ORGANIZATION_TYPE_TO_OBI:
|
|
119
256
|
term.append_parent(ROR_ORGANIZATION_TYPE_TO_OBI[organization_type])
|
|
257
|
+
else:
|
|
258
|
+
term.append_parent(ORG_CLASS)
|
|
120
259
|
|
|
121
|
-
for link in record.
|
|
122
|
-
term.annotate_uri(has_homepage, link)
|
|
260
|
+
for link in record.links:
|
|
261
|
+
term.annotate_uri(has_homepage, link.value)
|
|
123
262
|
|
|
124
|
-
if
|
|
125
|
-
term.append_synonym(
|
|
263
|
+
if primary_name.startswith("The "):
|
|
264
|
+
term.append_synonym(primary_name.removeprefix("The "))
|
|
126
265
|
|
|
127
|
-
for relationship in record.
|
|
128
|
-
target_id = relationship
|
|
266
|
+
for relationship in record.relationships:
|
|
267
|
+
target_id = relationship.id.removeprefix("https://ror.org/")
|
|
129
268
|
term.append_relationship(
|
|
130
|
-
RMAP[relationship
|
|
269
|
+
RMAP[relationship.type], Reference(prefix=PREFIX, identifier=target_id)
|
|
131
270
|
)
|
|
132
271
|
|
|
133
|
-
if record.
|
|
272
|
+
if record.status != "active":
|
|
134
273
|
term.is_obsolete = True
|
|
135
274
|
|
|
136
|
-
for
|
|
137
|
-
city = address.get("geonames_city")
|
|
138
|
-
if not city:
|
|
139
|
-
continue
|
|
275
|
+
for location in record.locations:
|
|
140
276
|
geonames_reference = Reference(
|
|
141
|
-
prefix="geonames",
|
|
277
|
+
prefix="geonames",
|
|
278
|
+
identifier=str(location.geonames_id),
|
|
279
|
+
name=location.geonames_details.name,
|
|
142
280
|
)
|
|
143
281
|
seen_geonames_references.add(geonames_reference)
|
|
144
|
-
term.append_relationship(RMAP["
|
|
145
|
-
|
|
146
|
-
for
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
term.append_synonym(
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
282
|
+
term.append_relationship(RMAP["located in"], geonames_reference)
|
|
283
|
+
|
|
284
|
+
for name in record.names:
|
|
285
|
+
if "ror_display" in name.types:
|
|
286
|
+
continue
|
|
287
|
+
elif name.types == ["acronym"]:
|
|
288
|
+
term.append_synonym(name.value, type=acronym)
|
|
289
|
+
elif name.types == ["alias"]:
|
|
290
|
+
synonym = name.value.strip().replace("\n", " ")
|
|
291
|
+
term.append_synonym(synonym)
|
|
292
|
+
if synonym.startswith("The "):
|
|
293
|
+
term.append_synonym(synonym.removeprefix("The "), language=name.lang)
|
|
294
|
+
elif name.types == ["label"]:
|
|
295
|
+
label = name.value.strip().replace("\n", " ")
|
|
296
|
+
term.append_synonym(label, language=name.lang)
|
|
297
|
+
if label.startswith("The "):
|
|
298
|
+
term.append_synonym(label.removeprefix("The "), language=name.lang)
|
|
299
|
+
else:
|
|
300
|
+
tqdm.write(
|
|
301
|
+
f"[ror:{identifier}] unhandled name types: {name.types} for {name.value}"
|
|
302
|
+
)
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
for external_id in record.external_ids:
|
|
306
|
+
if external_id.type.lower() == "orgref":
|
|
165
307
|
# OrgRef refers to wikipedia page id, see
|
|
166
308
|
# https://stackoverflow.com/questions/6168020/what-is-wikipedia-pageid-how-to-change-it-into-real-page-url
|
|
167
309
|
continue
|
|
168
|
-
norm_prefix = bioregistry.normalize_prefix(
|
|
310
|
+
norm_prefix = bioregistry.normalize_prefix(external_id.type)
|
|
311
|
+
xref_ids = external_id.all
|
|
312
|
+
|
|
169
313
|
if norm_prefix is None:
|
|
170
|
-
if
|
|
171
|
-
tqdm.write(
|
|
172
|
-
|
|
314
|
+
if external_id.type not in unhandled_xref_prefixes:
|
|
315
|
+
tqdm.write(
|
|
316
|
+
f"Unhandled prefix: {external_id.type} in {primary_name} ({term.curie}). Values:"
|
|
317
|
+
)
|
|
318
|
+
for xref_id in xref_ids:
|
|
173
319
|
tqdm.write(f"- {xref_id}")
|
|
174
|
-
unhandled_xref_prefixes.add(
|
|
320
|
+
unhandled_xref_prefixes.add(external_id.type)
|
|
175
321
|
continue
|
|
176
322
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
for xref_id in identifiers:
|
|
323
|
+
if isinstance(xref_ids, str):
|
|
324
|
+
xref_ids = [xref_ids]
|
|
325
|
+
for xref_id in xref_ids:
|
|
181
326
|
xref_id = xref_id.replace(" ", "")
|
|
182
327
|
try:
|
|
183
328
|
xref = Reference(prefix=norm_prefix, identifier=xref_id)
|
|
@@ -194,27 +339,60 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
|
|
|
194
339
|
yield geonames_term
|
|
195
340
|
|
|
196
341
|
|
|
197
|
-
|
|
342
|
+
class RORStatus(NamedTuple):
|
|
343
|
+
"""A version information tuple."""
|
|
344
|
+
|
|
345
|
+
version: str
|
|
346
|
+
url: str
|
|
347
|
+
path: Path
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def get_ror_status(*, force: bool = False, authenticate_zenodo: bool = True) -> RORStatus:
|
|
351
|
+
"""Ensure the latest ROR record, metadata, and filepath.
|
|
352
|
+
|
|
353
|
+
:param force: Should the record be downloaded again? This almost
|
|
354
|
+
never needs to be true, since the data doesn't change for
|
|
355
|
+
a given version
|
|
356
|
+
:param authenticate_zenodo: Should Zenodo be authenticated?
|
|
357
|
+
This isn't required, but can help avoid rate limits
|
|
358
|
+
:return: A version information tuple
|
|
359
|
+
|
|
360
|
+
.. note::
|
|
361
|
+
|
|
362
|
+
this goes into the ``~/.data/zenodo/6347574`` folder,
|
|
363
|
+
because 6347574 is the super-record ID, which groups all
|
|
364
|
+
versions together. this is different from the value
|
|
365
|
+
for :data:`ROR_ZENODO_RECORD_ID`
|
|
366
|
+
"""
|
|
198
367
|
client = zenodo_client.Zenodo()
|
|
199
|
-
latest_record_id = client.get_latest_record(
|
|
200
|
-
|
|
368
|
+
latest_record_id = client.get_latest_record(
|
|
369
|
+
ROR_ZENODO_RECORD_ID, authenticate=authenticate_zenodo
|
|
370
|
+
)
|
|
371
|
+
response = client.get_record(latest_record_id, authenticate=authenticate_zenodo)
|
|
201
372
|
response_json = response.json()
|
|
202
373
|
version = response_json["metadata"]["version"].lstrip("v")
|
|
203
374
|
file_record = response_json["files"][0]
|
|
204
375
|
name = file_record["key"]
|
|
205
376
|
url = file_record["links"]["self"]
|
|
206
377
|
path = client.download(latest_record_id, name=name, force=force)
|
|
207
|
-
return version, url, path
|
|
378
|
+
return RORStatus(version=version, url=url, path=path)
|
|
208
379
|
|
|
209
380
|
|
|
210
|
-
|
|
381
|
+
@lru_cache
|
|
382
|
+
def get_ror_records(
|
|
383
|
+
*, force: bool = False, authenticate_zenodo: bool = True
|
|
384
|
+
) -> tuple[RORStatus, list[Record]]:
|
|
211
385
|
"""Get the latest ROR metadata and records."""
|
|
212
|
-
|
|
213
|
-
with zipfile.ZipFile(path) as zf:
|
|
386
|
+
status = get_ror_status(force=force, authenticate_zenodo=authenticate_zenodo)
|
|
387
|
+
with zipfile.ZipFile(status.path) as zf:
|
|
214
388
|
for zip_info in zf.filelist:
|
|
215
389
|
if zip_info.filename.endswith(".json"):
|
|
216
390
|
with zf.open(zip_info) as file:
|
|
217
|
-
|
|
391
|
+
records = [
|
|
392
|
+
Record.model_validate(record)
|
|
393
|
+
for record in tqdm(json.load(file), unit_scale=True)
|
|
394
|
+
]
|
|
395
|
+
return status, records
|
|
218
396
|
raise FileNotFoundError
|
|
219
397
|
|
|
220
398
|
|
pyobo/struct/struct.py
CHANGED
|
@@ -611,6 +611,11 @@ class Obo:
|
|
|
611
611
|
|
|
612
612
|
ontology_version_iri: ClassVar[str | None] = None
|
|
613
613
|
|
|
614
|
+
#: Allow skipping adding maintainers annotations, in case
|
|
615
|
+
#: the resource maintainers don't want their names associated
|
|
616
|
+
#: with the OWL exports that e.g. end up on EBI OLS
|
|
617
|
+
skip_maintainers: ClassVar[bool] = False
|
|
618
|
+
|
|
614
619
|
def __post_init__(self):
|
|
615
620
|
"""Run post-init checks."""
|
|
616
621
|
if self.ontology is None:
|
|
@@ -978,19 +983,22 @@ class Obo:
|
|
|
978
983
|
yield Annotation(v.has_logo, OBOLiteral.uri(logo))
|
|
979
984
|
if mailing_list := resource.get_mailing_list():
|
|
980
985
|
yield Annotation(v.has_mailing_list, OBOLiteral.string(mailing_list))
|
|
981
|
-
if
|
|
982
|
-
|
|
983
|
-
v.has_maintainer,
|
|
984
|
-
Reference(prefix="orcid", identifier=maintainer.orcid, name=maintainer.name),
|
|
985
|
-
)
|
|
986
|
-
for maintainer in resource.contact_extras or []:
|
|
987
|
-
if maintainer.orcid:
|
|
986
|
+
if not self.skip_maintainers:
|
|
987
|
+
if (maintainer := resource.get_contact()) and maintainer.orcid:
|
|
988
988
|
yield Annotation(
|
|
989
989
|
v.has_maintainer,
|
|
990
990
|
Reference(
|
|
991
991
|
prefix="orcid", identifier=maintainer.orcid, name=maintainer.name
|
|
992
992
|
),
|
|
993
993
|
)
|
|
994
|
+
for maintainer in resource.contact_extras or []:
|
|
995
|
+
if maintainer.orcid:
|
|
996
|
+
yield Annotation(
|
|
997
|
+
v.has_maintainer,
|
|
998
|
+
Reference(
|
|
999
|
+
prefix="orcid", identifier=maintainer.orcid, name=maintainer.name
|
|
1000
|
+
),
|
|
1001
|
+
)
|
|
994
1002
|
|
|
995
1003
|
# Root terms
|
|
996
1004
|
for root_term in self.root_terms or []:
|
pyobo/struct/typedef.py
CHANGED
|
@@ -20,6 +20,7 @@ __all__ = [
|
|
|
20
20
|
"derives_from_organism",
|
|
21
21
|
"directly_regulates_activity_of",
|
|
22
22
|
"editor_note",
|
|
23
|
+
"editor_preferred_term",
|
|
23
24
|
"enables",
|
|
24
25
|
"ends",
|
|
25
26
|
"exact_match",
|
|
@@ -28,6 +29,7 @@ __all__ = [
|
|
|
28
29
|
"gene_product_member_of",
|
|
29
30
|
"has_contributor",
|
|
30
31
|
"has_creator",
|
|
32
|
+
"has_curation_status",
|
|
31
33
|
"has_dbxref",
|
|
32
34
|
"has_depiction",
|
|
33
35
|
"has_end_date",
|
|
@@ -43,17 +45,20 @@ __all__ = [
|
|
|
43
45
|
"has_role",
|
|
44
46
|
"has_salt",
|
|
45
47
|
"has_smiles",
|
|
48
|
+
"has_source",
|
|
46
49
|
"has_start_date",
|
|
47
50
|
"has_successor",
|
|
48
51
|
"has_taxonomy_rank",
|
|
49
52
|
"is_a",
|
|
50
53
|
"is_agonist_of",
|
|
51
54
|
"is_antagonist_of",
|
|
55
|
+
"is_defined_by",
|
|
52
56
|
"is_inverse_agonist_of",
|
|
53
57
|
"located_in",
|
|
54
58
|
"mapping_has_confidence",
|
|
55
59
|
"mapping_has_justification",
|
|
56
60
|
"match_typedefs",
|
|
61
|
+
"may_be_identical_to",
|
|
57
62
|
"member_of",
|
|
58
63
|
"narrow_match",
|
|
59
64
|
"negatively_regulates",
|
|
@@ -151,6 +156,10 @@ subproperty_of = TypeDef(reference=v.subproperty_of)
|
|
|
151
156
|
see_also = TypeDef(reference=v.see_also, is_metadata_tag=True)
|
|
152
157
|
comment = TypeDef(reference=v.comment, is_metadata_tag=True)
|
|
153
158
|
label = TypeDef(reference=v.label, is_metadata_tag=True)
|
|
159
|
+
is_defined_by = TypeDef(
|
|
160
|
+
reference=Reference(prefix="rdfs", identifier="isDefinedBy", name="is defined by"),
|
|
161
|
+
is_metadata_tag=True,
|
|
162
|
+
)
|
|
154
163
|
has_member = TypeDef(
|
|
155
164
|
reference=Reference(prefix=RO_PREFIX, identifier="0002351", name="has member"),
|
|
156
165
|
)
|
|
@@ -227,6 +236,17 @@ definition_source = TypeDef(
|
|
|
227
236
|
reference=Reference(prefix=IAO_PREFIX, identifier="0000119", name="definition source"),
|
|
228
237
|
is_metadata_tag=True,
|
|
229
238
|
)
|
|
239
|
+
may_be_identical_to = TypeDef(
|
|
240
|
+
reference=Reference(prefix=IAO_PREFIX, identifier="0006011", name="may be identical to")
|
|
241
|
+
)
|
|
242
|
+
# todo this is also useful for SSSLM
|
|
243
|
+
editor_preferred_term = TypeDef(
|
|
244
|
+
reference=Reference(prefix=IAO_PREFIX, identifier="0000111", name="editor preferred term")
|
|
245
|
+
)
|
|
246
|
+
has_curation_status = TypeDef(
|
|
247
|
+
reference=Reference(prefix=IAO_PREFIX, identifier="0000114", name="has curation status")
|
|
248
|
+
)
|
|
249
|
+
|
|
230
250
|
has_dbxref = TypeDef(reference=v.has_dbxref, is_metadata_tag=True)
|
|
231
251
|
|
|
232
252
|
editor_note = TypeDef(
|
pyobo/utils/misc.py
CHANGED
|
@@ -48,6 +48,12 @@ VERSION_PREFIXES = [
|
|
|
48
48
|
"https://w3id.org/lehrplan/ontology/", # like in https://w3id.org/lehrplan/ontology/1.0.0-4
|
|
49
49
|
"http://www.ebi.ac.uk/swo/version/", # http://www.ebi.ac.uk/swo/version/6.0
|
|
50
50
|
"https://w3id.org/emi/version/",
|
|
51
|
+
"https://nfdi4culture.de/ontology/", # https://nfdi4culture.de/ontology/3.0.0
|
|
52
|
+
"http://purls.helmholtz-metadaten.de/mwo/mwo.owl/", # http://purls.helmholtz-metadaten.de/mwo/mwo.owl/3.0.0
|
|
53
|
+
"http://publications.europa.eu/ontology/euvoc#",
|
|
54
|
+
"http://purls.helmholtz-metadaten.de/cdos/cdco/", # http://purls.helmholtz-metadaten.de/cdos/cdco/1.0.0
|
|
55
|
+
"https://endlessforms.info/gdo/releases/gdoa-",
|
|
56
|
+
"https://openenergyplatform.org/ontology/oeo/releases/",
|
|
51
57
|
]
|
|
52
58
|
VERSION_PREFIX_SPLITS = [
|
|
53
59
|
"http://www.ebi.ac.uk/efo/releases/v",
|
|
@@ -56,10 +62,13 @@ VERSION_PREFIX_SPLITS = [
|
|
|
56
62
|
"http://ontology.neuinfo.org/NIF/ttl/nif/version/",
|
|
57
63
|
"http://nmrml.org/cv/v", # as in http://nmrml.org/cv/v1.1.0/nmrCV
|
|
58
64
|
"http://enanomapper.github.io/ontologies/releases/", # as in http://enanomapper.github.io/ontologies/releases/10.0/enanomapper
|
|
65
|
+
"https://w3id.org/sulo/sulo-", # as in https://w3id.org/sulo/sulo-0.2.4.ttl
|
|
66
|
+
"https://spec.industrialontologies.org/ontology/", # as in https://spec.industrialontologies.org/ontology/202401/core/Core
|
|
59
67
|
]
|
|
60
68
|
BAD = {
|
|
61
69
|
"http://purl.obolibrary.org/obo",
|
|
62
70
|
"http://www.bioassayontology.org/bao/bao_complete",
|
|
71
|
+
"https://vocabularies.wikipathways.org",
|
|
63
72
|
}
|
|
64
73
|
|
|
65
74
|
|
pyobo/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyobo
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.14
|
|
4
4
|
Summary: A python package for handling and generating OBO
|
|
5
5
|
Keywords: snekpack,cookiecutter,ontologies,biomedical ontologies,life sciences,natural sciences,bioinformatics,cheminformatics,Open Biomedical Ontologies,OBO
|
|
6
6
|
Author: Charles Tapley Hoyt
|
|
@@ -40,11 +40,11 @@ Requires-Dist: bioversions>=0.8.101
|
|
|
40
40
|
Requires-Dist: bioregistry>=0.12.30
|
|
41
41
|
Requires-Dist: bioontologies>=0.7.2
|
|
42
42
|
Requires-Dist: ssslm>=0.0.13
|
|
43
|
-
Requires-Dist: zenodo-client>=0.
|
|
43
|
+
Requires-Dist: zenodo-client>=0.4.0
|
|
44
44
|
Requires-Dist: class-resolver>=0.6.0
|
|
45
45
|
Requires-Dist: pydantic>=2.0
|
|
46
46
|
Requires-Dist: curies>=0.10.17
|
|
47
|
-
Requires-Dist: curies-processing>=0.1.
|
|
47
|
+
Requires-Dist: curies-processing>=0.1.6
|
|
48
48
|
Requires-Dist: python-dateutil
|
|
49
49
|
Requires-Dist: networkx>=3.4
|
|
50
50
|
Requires-Dist: drugbank-downloader
|
|
@@ -21,7 +21,7 @@ pyobo/cli/database.py,sha256=iq4-eJ_8Kgim0MMnSqf1wu2hvHDZk9BWNMVuwykz2Ko,13407
|
|
|
21
21
|
pyobo/cli/database_utils.py,sha256=jQ7qSg-oZk-Q-RC121NkUbqXeNg0K8fV96igXaK3ra4,5540
|
|
22
22
|
pyobo/cli/lookup.py,sha256=-Tz6E3OP6HBPipVqSrMn7LyzrezbT0pngeDExF5uyn4,9467
|
|
23
23
|
pyobo/cli/utils.py,sha256=SfW9JC8olzp4AcAsDodMPdgHfteeXq0Ngd8yQ2kzOMA,1812
|
|
24
|
-
pyobo/constants.py,sha256=
|
|
24
|
+
pyobo/constants.py,sha256=_Y4IACPsRu5jHntoc8nEHKLGDl1Bl7YrdS0gG4qxmqw,7582
|
|
25
25
|
pyobo/getters.py,sha256=exZNjlseRoWhshQYArWwN2KLvkk4NmCQnHM88YUYf-w,18340
|
|
26
26
|
pyobo/gilda_utils.py,sha256=uThAArALSNzJi-hrR9lUvtQci5VIw4Cqq2L6mtkp0Ko,1990
|
|
27
27
|
pyobo/identifier_utils/__init__.py,sha256=iSHAZhOwqWZmYpOfpXNLuKouSYj7ZSViaX7BF5uT8so,793
|
|
@@ -95,7 +95,7 @@ pyobo/sources/gwascentral/__init__.py,sha256=VdHIgBVR-36B7gLuhR4XV5UXn_EgNZ0-i1g
|
|
|
95
95
|
pyobo/sources/gwascentral/gwascentral_phenotype.py,sha256=SvMCF3sYqc3F5nNXCf76AzIhNT698ZHHnTpO0IGft8o,1788
|
|
96
96
|
pyobo/sources/gwascentral/gwascentral_study.py,sha256=fzDiRuWU84085gclzuWNLbxDEsIdVysqXScMU0BdXnM,2672
|
|
97
97
|
pyobo/sources/hgnc/__init__.py,sha256=1QhM7GdP6_AxCMzaBz2k3SRLkDTOkS2hUhPMLETSGWM,157
|
|
98
|
-
pyobo/sources/hgnc/hgnc.py,sha256=
|
|
98
|
+
pyobo/sources/hgnc/hgnc.py,sha256=vGpohqlDIZAvF372EDmhqik22z5yuCOAAMZ1TtNnSwo,20426
|
|
99
99
|
pyobo/sources/hgnc/hgncgenefamily.py,sha256=NGkQXlyac0283WHeYXr3_o__Ak-hpbNnB0NIr2J0QGo,3912
|
|
100
100
|
pyobo/sources/iana_media_type.py,sha256=X20G2a1ozF-JPaqNs3tUrNoeSb7jv_YsClKSTPH-8io,5024
|
|
101
101
|
pyobo/sources/icd/__init__.py,sha256=DrZVl76P8essJ4tlZPcauFVrXVkIIZB6sJ0c6Km6SWI,142
|
|
@@ -111,7 +111,7 @@ pyobo/sources/kegg/api.py,sha256=szu25gS3ED5vUcKHqh6XI6g5EZz52VDS_UFIaIvU5Jw,419
|
|
|
111
111
|
pyobo/sources/kegg/genes.py,sha256=x0oVbRYS5xUPUHhYuoAojsIPyfjAi8t7Rfr3Faptdw0,3558
|
|
112
112
|
pyobo/sources/kegg/genome.py,sha256=Mq7bZitjfJbIHk2g2gnSJEqZljABRlqhpXIDXlOGzHo,4161
|
|
113
113
|
pyobo/sources/kegg/pathway.py,sha256=gjM97-nWmgpJO2k2gRLWHZIhldMz0IHtNnmzVb-yNa8,5892
|
|
114
|
-
pyobo/sources/mesh.py,sha256=
|
|
114
|
+
pyobo/sources/mesh.py,sha256=7BXsrBCSnpHlJ_zb5udo9YvTC6S3Wkc-eaHt5O5grgE,15837
|
|
115
115
|
pyobo/sources/mgi.py,sha256=fUD4bUuwe8k-H4tF54VvDyUxT-xeBMZUj_flYRsfpc8,5541
|
|
116
116
|
pyobo/sources/mirbase/__init__.py,sha256=8_YENjSy55_CKkGdGTA7Ujuv8dt-Yfh90V-ht3jwplk,252
|
|
117
117
|
pyobo/sources/mirbase/mirbase.py,sha256=9huL0PWAVcEDk-4unZw-GInyEE1C_FXwRxN65HG3xxc,6387
|
|
@@ -146,7 +146,7 @@ pyobo/sources/pubchem.py,sha256=lTsc0dgrSJ5kxlcaUcUXJydi-xsMf2fI2EcWsbW8xTU,5153
|
|
|
146
146
|
pyobo/sources/reactome.py,sha256=yzeCCy19Hxr7KFUrg1tePjN40eZLN9G4dm4q0A7izbI,5842
|
|
147
147
|
pyobo/sources/rgd.py,sha256=I6HC7_GZqFPXlRIuuroOgZM7wG85jcE3Po-sry8a1XU,5194
|
|
148
148
|
pyobo/sources/rhea.py,sha256=dbhyVJ1yoAQoxoWp47OKegLAmMh6qoe-7_DafJDeWrk,8164
|
|
149
|
-
pyobo/sources/ror.py,sha256=
|
|
149
|
+
pyobo/sources/ror.py,sha256=pFKV5qzTSU2XxvZhMOB-WyMLQ6u2dsAqadACswCqyNk,13387
|
|
150
150
|
pyobo/sources/selventa/__init__.py,sha256=cIIDlWL9znL2r0aZIJ2r1FN3NM9io74w-He2egzJwX8,254
|
|
151
151
|
pyobo/sources/selventa/schem.py,sha256=vqN_a_NLeshj-2dRuZelfOLJQ5dyU3ZghfSRIq9EFVA,1162
|
|
152
152
|
pyobo/sources/selventa/scomp.py,sha256=nshskLQeXuzf5t79STJsVKJg3R80CX3DuJBUrEhwSbc,1531
|
|
@@ -191,21 +191,21 @@ pyobo/struct/obograph/export.py,sha256=yk3MHulDM_SMohfwiFdeB62-XGJ2ZNgRUZGjGjiyo
|
|
|
191
191
|
pyobo/struct/obograph/reader.py,sha256=264yVeD8a3jGx9EaGUZVxFbSQ_pwQ_6ckVw9S8wiJfM,8525
|
|
192
192
|
pyobo/struct/obograph/utils.py,sha256=je0kSkC24MU9pWRHq1_K-J5jWhjWESY6NI3TpZqvZ_Q,1516
|
|
193
193
|
pyobo/struct/reference.py,sha256=qgwTa-0VIoDklQ7LjlYH-mf2WG0_uO7KlHt0PSBail4,11744
|
|
194
|
-
pyobo/struct/struct.py,sha256
|
|
194
|
+
pyobo/struct/struct.py,sha256=FaNyYUSFgM700xE74i8zSVQQiR4g91jbmAMOqPyJ9NQ,96669
|
|
195
195
|
pyobo/struct/struct_utils.py,sha256=VFC82fgphTs7sBSNGn53sv2WMl7Ho9srALFq92bRFQQ,41021
|
|
196
|
-
pyobo/struct/typedef.py,sha256=
|
|
196
|
+
pyobo/struct/typedef.py,sha256=jvETRmNc4g09X7waSjcvEkw7g0kNteLaDOt3mjKbmDc,14871
|
|
197
197
|
pyobo/struct/utils.py,sha256=zkpOE42JQIfkN0rc5qNENK03VIKmkf_57tHojMJK71Y,906
|
|
198
198
|
pyobo/struct/vocabulary.py,sha256=wY_wlV17NyoPQZMiCkf72GL4bIB9gwTHIoMlx7W8Cf4,5375
|
|
199
199
|
pyobo/utils/__init__.py,sha256=CANSY8vGq_6v8rWhWRIdnk-Wo5LA2R9Wjg1nqbWqLOw,17
|
|
200
200
|
pyobo/utils/cache.py,sha256=x7Dnw8vHYsoslXWiV9L9ZdTu5ZueIR1ki5Ncdpb7H10,3136
|
|
201
201
|
pyobo/utils/io.py,sha256=QTvGjNDkaIf78Tca23B5RW_aVweKiSsxmgwSKXcMSNo,3921
|
|
202
202
|
pyobo/utils/iter.py,sha256=rYRbbaFJHxMaE0yU-rQZoCagYIrtev09uY0mxFkf5zY,1524
|
|
203
|
-
pyobo/utils/misc.py,sha256=
|
|
203
|
+
pyobo/utils/misc.py,sha256=DVLxPynunSubp2xSVKFF_E57BPS3p5wU4Z4A38PI17M,8717
|
|
204
204
|
pyobo/utils/ndex_utils.py,sha256=EokCWS00Wrk_4y8ldeQuUyaaC6yNzBg3DagUl-J2czY,2326
|
|
205
205
|
pyobo/utils/path.py,sha256=snV58UHxHO6GI2QPPE46ssR4RWozaw83V59sS_I9BY8,4109
|
|
206
|
-
pyobo/version.py,sha256=-
|
|
207
|
-
pyobo-0.12.
|
|
208
|
-
pyobo-0.12.
|
|
209
|
-
pyobo-0.12.
|
|
210
|
-
pyobo-0.12.
|
|
211
|
-
pyobo-0.12.
|
|
206
|
+
pyobo/version.py,sha256=-RjPuUzkByyIQ26Vfaw4q_Cb--M-vMCnMRNP78aBv58,927
|
|
207
|
+
pyobo-0.12.14.dist-info/licenses/LICENSE,sha256=QcgJZKGxlW5BwBNnCBL8VZLVtRvXs81Ch9lJRQSIpJg,1076
|
|
208
|
+
pyobo-0.12.14.dist-info/WHEEL,sha256=eycQt0QpYmJMLKpE3X9iDk8R04v2ZF0x82ogq-zP6bQ,79
|
|
209
|
+
pyobo-0.12.14.dist-info/entry_points.txt,sha256=ANgzvuwF_9_1ipCoxJtbBM6A4i2Mkt39gMPzQO6hvGs,42
|
|
210
|
+
pyobo-0.12.14.dist-info/METADATA,sha256=3bCbmYuRjLGPhWqwRm337UrFL0Nb7iU1--Ici26iZnc,22783
|
|
211
|
+
pyobo-0.12.14.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|