pyobo 0.12.1__py3-none-any.whl → 0.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,27 +0,0 @@
1
- """Load the manually curated metaregistry."""
2
-
3
- from functools import lru_cache
4
- from pathlib import Path
5
-
6
- from curies.preprocessing import PreprocessingRules, _load_rules
7
-
8
- from ..resources.goc import load_goc_map
9
-
10
- __all__ = [
11
- "get_rules",
12
- ]
13
-
14
- HERE = Path(__file__).parent.resolve()
15
- RULES_PATH = HERE.joinpath("preprocessing.json")
16
-
17
-
18
- @lru_cache(1)
19
- def get_rules() -> PreprocessingRules:
20
- """Get the CURIE/URI string preprocessing rules."""
21
- rules = _load_rules(RULES_PATH)
22
- rules.rewrites.full.update(load_goc_map())
23
- return rules
24
-
25
-
26
- if __name__ == "__main__":
27
- PreprocessingRules.lint_file(RULES_PATH)
pyobo/obographs.py DELETED
@@ -1,147 +0,0 @@
1
- """Convert PyOBO into OBO Graph."""
2
-
3
- from __future__ import annotations
4
-
5
- import logging
6
- from collections.abc import Iterable
7
-
8
- import bioregistry
9
- from bioontologies.obograph import (
10
- Definition,
11
- Edge,
12
- Graph,
13
- GraphDocument,
14
- Meta,
15
- Node,
16
- Synonym,
17
- Xref,
18
- )
19
- from bioontologies.robot import ParseResults
20
- from tqdm import tqdm
21
-
22
- from pyobo.struct import Obo, OBOLiteral, Reference, Term
23
- from pyobo.struct.typedef import definition_source, is_a
24
-
25
- __all__ = [
26
- "graph_from_obo",
27
- "parse_results_from_obo",
28
- ]
29
-
30
- logger = logging.getLogger(__name__)
31
-
32
-
33
- def parse_results_from_obo(obo: Obo) -> ParseResults:
34
- """Get parse results from an OBO graph."""
35
- graph = graph_from_obo(obo)
36
- return ParseResults(graph_document=GraphDocument(graphs=[graph]))
37
-
38
-
39
- def graph_from_obo(obo: Obo, use_tqdm: bool = True) -> Graph:
40
- """Get an OBO Graph object from a PyOBO object."""
41
- nodes: list[Node] = []
42
- edges: list[Edge] = []
43
- for term in tqdm(
44
- obo,
45
- disable=not use_tqdm,
46
- unit="term",
47
- unit_scale=True,
48
- desc=f"[{obo._prefix_version}] to OBO Graph JSON",
49
- ):
50
- nodes.append(_get_class_node(term))
51
- edges.extend(_iter_edges(term))
52
- return Graph(
53
- id=bioregistry.get_bioregistry_iri("bioregistry", obo.ontology),
54
- prefix=obo.ontology,
55
- meta=_get_meta(obo),
56
- nodes=nodes,
57
- edges=edges,
58
- standardized=True, # from construction :)
59
- )
60
-
61
-
62
- def _get_meta(obo: Obo) -> Meta:
63
- return Meta(
64
- version=obo.data_version,
65
- )
66
-
67
-
68
- def _get_class_node(term: Term) -> Node:
69
- if term.provenance or term.definition:
70
- definition = Definition.from_parsed(
71
- value=term.definition, references=_prep_prov(term.provenance)
72
- )
73
- else:
74
- definition = None
75
- xrefs = [
76
- Xref.from_parsed(
77
- predicate=Reference.from_reference(mapping_predicate),
78
- value=Reference.from_reference(mapping_object),
79
- )
80
- for mapping_predicate, mapping_object in term.get_mappings(
81
- include_xrefs=True, add_context=False
82
- )
83
- ]
84
- synonyms = [
85
- Synonym.from_parsed(
86
- name=synonym.name,
87
- predicate=Reference.from_reference(synonym.predicate),
88
- synonym_type=Reference.from_reference(synonym.type) if synonym.type else None,
89
- references=_prep_prov(synonym.provenance),
90
- )
91
- for synonym in term.synonyms
92
- ]
93
-
94
- meta = Meta(
95
- definition=definition,
96
- xrefs=xrefs,
97
- synonyms=synonyms,
98
- basicPropertyValues=None, # TODO properties
99
- deprecated=term.is_obsolete or False,
100
- )
101
- return Node(
102
- # FIXME do expansion same as for OFN
103
- id=f"https://bioregistry.io/{term.curie}",
104
- lbl=term.name,
105
- meta=meta,
106
- type="CLASS",
107
- reference=Reference.from_reference(term.reference),
108
- standardized=True,
109
- )
110
-
111
-
112
- def _prep_prov(provenance):
113
- rv = []
114
- for x in provenance:
115
- match x:
116
- case Reference():
117
- rv.append(Reference.from_reference(x))
118
- case OBOLiteral():
119
- logger.debug("not implemented to convert literal provenance")
120
- continue
121
- return rv
122
-
123
-
124
- def _iter_edges(term: Term) -> Iterable[Edge]:
125
- for parent in term.parents:
126
- yield Edge.from_parsed(
127
- Reference.from_reference(term.reference),
128
- Reference.from_reference(is_a.reference),
129
- Reference.from_reference(parent),
130
- )
131
-
132
- for typedef, targets in term.relationships.items():
133
- for target in targets:
134
- yield Edge.from_parsed(
135
- Reference.from_reference(term.reference),
136
- Reference.from_reference(typedef),
137
- Reference.from_reference(target),
138
- )
139
-
140
- for provenance_reference in term.provenance:
141
- if isinstance(provenance_reference, Reference):
142
- yield Edge.from_parsed(
143
- Reference.from_reference(term.reference),
144
- Reference.from_reference(definition_source.reference),
145
- Reference.from_reference(provenance_reference),
146
- )
147
- # TODO also look through xrefs and seealso to get provenance xrefs?
pyobo/resources/goc.py DELETED
@@ -1,75 +0,0 @@
1
- """Get GOC to ORCID CURIE mappings.
2
-
3
- Due to historical reasons, the Gene Ontology and related resources use an internal
4
- curator identifier space ``GOC`` instead of ORCID. This namespace is partially mapped to
5
- ORCID and is version controlled `here
6
- <https://raw.githubusercontent.com/geneontology/go-site/refs/heads/master/metadata/users.yaml>`_.
7
-
8
- This module loads that namespace and uses :mod:`orcid_downloader` to try and add
9
- additional ORCID groundings. Then, this module is loaded in PyOBO's custom CURIE upgrade
10
- system so GOC CURIEs are seamlessly replaced with ORCID CURIEs, when possible.
11
-
12
- .. seealso::
13
-
14
- https://github.com/geneontology/go-ontology/issues/22551
15
- """
16
-
17
- import csv
18
- from pathlib import Path
19
-
20
- __all__ = ["load_goc_map"]
21
-
22
- URL = "https://raw.githubusercontent.com/geneontology/go-site/refs/heads/master/metadata/users.yaml"
23
-
24
- HERE = Path(__file__).parent.resolve()
25
- PATH = HERE.joinpath("goc.tsv")
26
-
27
-
28
- def load_goc_map() -> dict[str, str]:
29
- """Get GOC to ORCID mappings."""
30
- rv = {}
31
- with PATH.open() as f:
32
- for goc_curie, _, orcid, *_ in csv.reader(f, delimiter="\t"):
33
- rv[goc_curie] = f"orcid:{orcid}"
34
- rv[goc_curie.upper()] = f"orcid:{orcid}"
35
- return rv
36
-
37
-
38
- def main() -> None:
39
- """Generate GOC to ORCID mappings."""
40
- import orcid_downloader
41
- import requests
42
- import yaml
43
- from tqdm import tqdm
44
-
45
- columns = ["curie", "name", "orcid", "guessed"]
46
- res = requests.get(URL, timeout=5)
47
- records = yaml.safe_load(res.text)
48
- with PATH.open("w") as file:
49
- print(*columns, sep="\t", file=file)
50
- for record in tqdm(records, unit="person"):
51
- goc_curie = record.get("xref")
52
- if goc_curie is None or not goc_curie.startswith("GOC:"):
53
- continue
54
-
55
- guessed = False
56
- nickname = record["nickname"]
57
- uri = record.get("uri", "")
58
- if not uri:
59
- continue
60
- if "orcid.org" in uri:
61
- orcid = uri.removeprefix("https://orcid.org/").removeprefix("https://orcid.org/")
62
- if "orcid.org" not in uri:
63
- orcid = orcid_downloader.ground_researcher_unambiguous(nickname)
64
- if not orcid:
65
- tqdm.write(f"Could not guess ORCID for {nickname}")
66
- continue
67
-
68
- tqdm.write(f"Check if https://orcid.org/{orcid} is correct for {nickname}")
69
- guessed = True
70
-
71
- print(goc_curie, nickname, orcid, guessed, sep="\t", file=file)
72
-
73
-
74
- if __name__ == "__main__":
75
- main()
pyobo/resources/goc.tsv DELETED
@@ -1,188 +0,0 @@
1
- curie name orcid guessed
2
- GOC:jk Jim Knowles 0009-0009-5100-2472 True
3
- GOC:amu Anushya Muruganujan 0000-0001-7169-5864 True
4
- GOC:pt Paul Thomas 0000-0002-9074-3507 False
5
- GOC:hm Huaiyu Mi 0000-0001-8721-202X False
6
- GOC:sjc Seth Carbon 0000-0001-8244-1536 False
7
- GOC:jnx Jeremy Nguyen Xuan 0000-0002-4301-0968 True
8
- GOC:anm Anna Melidoni 0000-0002-2535-883X True
9
- GOC:ml2 Mike Livstone 0000-0001-5386-5823 True
10
- GOC:hd Heiko Dietze 0000-0003-0234-1688 False
11
- GOC:dp Dexter Pratt 0000-0002-1471-9513 True
12
- GOC:di Diane Inglis 0000-0003-3166-4638 False
13
- GOC:cy Courtland Yockey 0000-0003-4917-3490 True
14
- GOC:hal Hadil Alrohaif 0000-0002-6980-6972 True
15
- GOC:jbu Jessica Buxton 0000-0002-0918-9335 False
16
- GOC:kom Klaus Mitchell 0000-0001-9510-5320 False
17
- GOC:nc Nancy Campbell 0000-0001-9995-0839 False
18
- GOC:rl Ruth Lovering 0000-0002-9791-0064 False
19
- GOC:vk Varsha Khodiyar 0000-0002-2743-6918 True
20
- GOC:amm Anna Maria Masci 0000-0003-1940-6740 True
21
- GOC:dsd David S. Dougall 0000-0002-9043-2709 True
22
- GOC:pf Petra Fey 0000-0002-4532-2703 False
23
- GOC:pg Pascale Gaudet 0000-0003-1813-6857 False
24
- GOC:rjd Robert Dodson 0000-0002-2757-5950 False
25
- GOC:jh2 Jim Hu 0000-0001-9016-2684 False
26
- GOC:nv Nicole Vasilevsky 0000-0001-5208-3432 True
27
- GOC:ha Helen Attrill 0000-0003-3212-6364 False
28
- GOC:hb Heather Butler 0000-0003-4454-4889 True
29
- GOC:mc Marta Costa 0000-0001-5948-3092 False
30
- GOC:ma Michael Ashburner 0000-0002-6962-2807 False
31
- GOC:dos David Osumi-Sutherland 0000-0002-7073-9172 False
32
- GOC:sart Susan Tweedie 0000-0003-1818-8243 False
33
- GOC:mb Matt Berriman 0000-0002-9581-0377 False
34
- GOC:bf Rebecca Foulger 0000-0001-8682-8754 False
35
- GOC:ceb Cath Brooksbank 0000-0001-9395-7001 False
36
- GOC:jid Jennifer Deegan (nee Clark) 0000-0001-9227-417X False
37
- GOC:jl Jane Lomax 0000-0001-8865-4321 False
38
- GOC:mec Melanie Courtot 0000-0002-9551-6370 False
39
- GOC:pr Paola Roncaglia 0000-0002-2825-0621 False
40
- GOC:als Alice Dashow 0000-0003-3829-1600 False
41
- GOC:es Elena Speretta 0000-0003-1506-7438 False
42
- GOC:gg George Georghiou 0000-0001-5067-3199 False
43
- GOC:hbye Hema Bye-A-Jee 0000-0003-2464-7688 False
44
- GOC:pm Prudence Mutowo 0000-0002-4646-4172 True
45
- GOC:rph Rachael Huntley 0000-0001-6718-3559 False
46
- GOC:yaf Yasmin Alam-Faruque 0000-0001-8902-0232 True
47
- GOC:bhm Birgit Meldal 0000-0003-4062-6158 False
48
- GOC:imk Ingrid Keseler 0000-0003-1738-6117 True
49
- GOC:dph David Hill 0000-0001-7476-6306 False
50
- GOC:tfm Terry Meehan 0000-0003-1980-3228 True
51
- GOC:ajp Tony Planchart 0000-0001-8691-8856 False
52
- GOC:dms Dmitry Sitnikov 0000-0003-3394-9805 False
53
- GOC:smb Sue Bello 0000-0003-4606-0597 False
54
- GOC:hjd Harold Drabkin 0000-0003-2689-5511 False
55
- GOC:ln Li Ni 0000-0002-9796-7693 False
56
- GOC:jab Judith Blake 0000-0001-8522-334X False
57
- GOC:crds Claudia Rato da Silva 0000-0002-3971-046X True
58
- GOC:tw Trish Whetzel 0000-0002-3458-4839 True
59
- GOC:rv Randi Vita 0000-0001-8957-7612 True
60
- GOC:ml Magdalen Lindeberg 0000-0001-6386-4066 True
61
- GOC:gvg George Gkoutos 0000-0002-2061-091X True
62
- GOC:al Antonia Lock 0000-0003-1179-5999 False
63
- GOC:jb Jurg Bahler 0000-0003-4036-1532 True
64
- GOC:mah Midori Harris 0000-0003-4148-4606 False
65
- GOC:vw Val Wood 0000-0001-6330-7526 False
66
- GOC:lc Laurel Cooper 0000-0002-6379-8932 False
67
- GOC:rw Ramona Walls 0000-0001-8815-0078 True
68
- GOC:cna Cecilia Arighi 0000-0002-0803-4817 True
69
- GOC:bj Bijay Jassal 0000-0002-5039-5405 True
70
- GOC:mg2 Marc Gillespie 0000-0002-5766-1702 True
71
- GOC:pde Peter D'Eustachio 0000-0002-5494-626X False
72
- GOC:phg Phani Garapati 0000-0003-0941-2207 False
73
- GOC:sj Steven Jupe 0000-0001-5807-0069 True
74
- GOC:vs Veronica Shamovsky 0000-0002-2187-2241 True
75
- GOC:jsg John Garavelli 0000-0002-4131-735X True
76
- GOC:sjw Shur-Jen Wang 0000-0001-5256-8683 True
77
- GOC:sl Stan Laulederkind 0000-0001-5356-4174 False
78
- GOC:st Simon Twigger 0000-0001-5659-3632 True
79
- GOC:vp Victoria Petri 0000-0002-5540-8498 True
80
- GOC:cb Colin Batchelor 0000-0001-5985-7429 True
81
- GOC:cjm Chris Mungall 0000-0002-6601-2165 False
82
- GOC:clt Chandra Theesfeld 0000-0002-8379-6600 False
83
- GOC:dgf Dianna Fisk 0000-0003-4929-9472 False
84
- GOC:ew Edith Wong 0000-0001-9799-5523 False
85
- GOC:elh Eurie Hong 0000-0002-1775-4998 False
86
- GOC:jd Janos Demeter 0000-0002-7301-8055 False
87
- GOC:jh Jodi Hirschman 0000-0001-8850-9925 False
88
- GOC:krc Karen Christie 0000-0001-5501-853X False
89
- GOC:mcc Maria Costanzo 0000-0001-9043-693X False
90
- GOC:rb Rama Balakrishnan 0000-0003-2468-9933 False
91
- GOC:rn Rob Nash 0000-0002-3726-7441 False
92
- GOC:se Stacia Engel 0000-0001-5472-917X False
93
- GOC:ssd Selina Dwight 0000-0002-8546-7798 False
94
- GOC:dw Dani Welter 0000-0003-1058-2668 False
95
- GOC:ask A. S. Karthikeyan 0000-0003-0065-0217 True
96
- GOC:ct Christopher Tissier 0000-0002-0693-3202 True
97
- GOC:dhl Donghui Li 0000-0003-3335-4537 False
98
- GOC:ds David Swarbreck 0000-0002-5453-1013 True
99
- GOC:kad Kate Dreher 0000-0003-4652-4398 False
100
- GOC:tb Tanya Berardini 0000-0002-3837-8864 False
101
- GOC:dh Dan Haft 0000-0001-8101-4938 True
102
- GOC:lh Linda Hannick 0000-0002-8018-8466 False
103
- GOC:ef Erika Feltrin 0000-0002-9899-7456 False
104
- GOC:lm Lorenza Mittempergher 0000-0003-3425-3965 True
105
- GOC:ar Alan Ruttenberg 0000-0002-1604-3078 True
106
- GOC:hw Heather Wick 0000-0003-0961-0377 True
107
- GOC:pad Paul Denny 0000-0003-4659-6893 False
108
- GOC:mat Mathew Tata 0000-0002-0960-5677 True
109
- GOC:aa Andrea Auchincloss 0000-0002-5297-5390 True
110
- GOC:ae Anne Estreicher 0000-0001-6828-2508 True
111
- GOC:ab Alan Bridge 0000-0003-2148-9135 False
112
- GOC:ans Andre Stutz 0000-0002-7175-2168 True
113
- GOC:ag Arnaud Gos 0000-0002-5018-1378 True
114
- GOC:cr2 Catherine Rivoire 0000-0002-5979-8382 True
115
- GOC:ch Chantal Hulo 0000-0001-8176-7999 True
116
- GOC:dl2 Damien Lieberherr 0000-0002-9724-1710 True
117
- GOC:ecu Elena Cibrian-Uhalte 0000-0002-0987-9862 False
118
- GOC:fj Florence Jungo 0000-0002-7456-8390 True
119
- GOC:gc Gayatri Chavali 0000-0001-8575-1847 True
120
- GOC:gk Guillaume Keller 0000-0001-9497-8269 True
121
- GOC:ip Ivo Pedruzzi 0000-0001-8561-7170 True
122
- GOC:kd Kirill Degtyarenko 0000-0003-0058-650X True
123
- GOC:klp Klemens Pichler 0000-0001-6099-8931 False
124
- GOC:ka Kristian Axelsen 0000-0003-3889-2879 True
125
- GOC:mf Marc Feuermann 0000-0002-4187-2863 False
126
- GOC:mt Michael Tognolli 0000-0002-5278-3321 True
127
- GOC:mm2 Michele Magrane 0000-0003-3544-996X True
128
- GOC:pm2 Patrick Masson 0000-0001-7646-0052 False
129
- GOC:pdr Paula Duek Roggli 0000-0002-0819-0473 True
130
- GOC:pga Penelope Garmiri 0000-0002-2283-2575 False
131
- GOC:plm Philippe Le Mercier 0000-0001-8528-090X True
132
- GOC:reh Reija Hieta 0000-0001-5724-6253 True
133
- GOC:so Sandra Orchard 0000-0002-8878-3972 True
134
- GOC:sp Sylvain Poux 0000-0001-7299-6685 False
135
- GOC:ss Shyamala Sundaram 0000-0003-4209-460X True
136
- GOC:uh Ursula Hinz 0000-0002-2365-2234 True
137
- GOC:wmc Wei Mun Chan 0000-0002-9971-813X True
138
- GOC:nhn Nevila Hyka-Nouspikel 0000-0001-7855-209X True
139
- GOC:jc Jonas Cicenas 0000-0002-9365-1843 True
140
- GOC:gap Ghislaine Argoud-Puy 0000-0002-2979-8613 False
141
- GOC:ppm Pablo Porras Millan 0000-0002-8429-8793 True
142
- GOC:dsz Dora Szakonyi 0000-0002-9189-629X True
143
- GOC:dr Daniela Raciti 0000-0002-4945-5837 False
144
- GOC:kmv Kimberly Van Auken 0000-0002-1706-4196 False
145
- GOC:rk Ranjana Kishore 0000-0002-1478-7671 False
146
- GOC:dgh Doug Howe 0000-0001-5831-7439 False
147
- GOC:dsf David Fashena 0000-0001-9656-0683 False
148
- GOC:cvs Ceri Van Slyke 0000-0002-2244-7917 False
149
- GOC:sr Sridhar Ramachandran 0000-0002-2246-3722 False
150
- GOC:ymb Yvonne M Bradford 0000-0002-9900-7880 False
151
- GOC:sat Sabrina Toro 0000-0002-4142-7153 False
152
- GOC:ksf Ken Frazer 0000-0002-6889-0711 False
153
- GOC:lrz Leyla Ruzicka 0000-0002-1009-339X False
154
- GOC:ejs Erik Segerdell 0000-0002-9611-1279 True
155
- GOC:lb Lionel Breuza 0000-0002-8075-8625 False
156
- GOC:mh Melissa Haendel 0000-0001-9114-8737 False
157
- GOC:mag Marion Gremse 0000-0003-0350-6392 True
158
- GOC:hp Helen Parkinson 0000-0003-3035-4195 True
159
- GOC:sk Scott Kalberer 0000-0003-2101-2484 True
160
- GOC:md Mickael Desvaux 0000-0003-2986-6417 True
161
- GOC:expert_db Dominique Bergmann 0000-0003-0873-3543 True
162
- GOC:expert_ks Kevin Struhl 0000-0002-4181-7856 True
163
- GOC:expert_jwt Jeremy Thorner 0000-0002-2583-500X True
164
- GOC:expert_mm Michael Melkonian 0000-0002-5911-6548 True
165
- GOC:expert_pt Philippa Talmud 0000-0002-5560-1933 True
166
- GOC:expert_rsh R. Scott Hawley 0000-0002-6478-0494 True
167
- GOC:expert_tf Tim Formosa 0000-0002-8477-2483 True
168
- GOC:jal Jamie A. Lee 0000-0001-6182-2372 True
169
- GOC:mmt Monica Munoz-Torres 0000-0001-8430-6039 False
170
- GOC:at Anne Thessen 0000-0002-2908-3327 False
171
- GOC:ga Giulia Antonazzo 0000-0003-0086-5621 False
172
- GOC:ani Anne Niknejad 0000-0003-3308-6245 True
173
- GOC:pvn Pim van Nierop 0000-0003-0593-3443 False
174
- GOC:add Alexander Diehl 0000-0001-9990-8331 False
175
- GOC:pj Pankaj Jaiswal 0000-0002-1005-8383 False
176
- GOC:rz Rossana Zaru 0000-0002-3358-4423 False
177
- GOC:lr Leonore Reiser 0000-0003-0073-0858 False
178
- GOC:mcc2 Marcus Chibucos 0000-0001-9586-0780 False
179
- GOC:das Debby Siegele 0000-0001-8935-0696 False
180
- GOC:bc Barbara Kramarz 0000-0002-3898-1727 False
181
- GOC:lnp Livia Perfetto 0000-0003-4392-8725 False
182
- GOC:ach Achchuthan Shanmugasundram 0000-0003-2349-6929 False
183
- GOC:mch Marie-Claire Harrison 0000-0002-3013-9906 False
184
- GOC:mlg Michelle Gwinn Giglio 0000-0001-7628-5565 False
185
- GOC:jja Josh Jaffery 0000-0002-1965-6945 True
186
- GOC:tc Teresa Chu 0000-0003-4172-1966 True
187
- GOC:apd Allan P Davis 0000-0002-5741-7128 False
188
- GOC:sjm Steven Marygold 0000-0003-2759-266X False
File without changes