pyobo 0.12.0__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/api/properties.py +8 -12
- pyobo/api/xrefs.py +1 -2
- pyobo/cli/database.py +30 -2
- pyobo/cli/database_utils.py +5 -11
- pyobo/getters.py +18 -78
- pyobo/gilda_utils.py +3 -80
- pyobo/identifier_utils/__init__.py +2 -10
- pyobo/identifier_utils/api.py +21 -12
- pyobo/identifier_utils/preprocessing.json +74 -13
- pyobo/identifier_utils/preprocessing.py +5 -39
- pyobo/obographs.py +5 -1
- pyobo/reader.py +13 -17
- pyobo/sources/cgnc.py +9 -1
- pyobo/sources/flybase.py +5 -5
- pyobo/sources/omim_ps.py +4 -4
- pyobo/sources/pharmgkb/pharmgkb_gene.py +1 -1
- pyobo/struct/functional/ontology.py +3 -1
- pyobo/struct/reference.py +4 -4
- pyobo/struct/struct.py +112 -55
- pyobo/utils/cache.py +3 -4
- pyobo/utils/io.py +38 -14
- pyobo/utils/path.py +16 -19
- pyobo/version.py +1 -1
- {pyobo-0.12.0.dist-info → pyobo-0.12.1.dist-info}/METADATA +71 -110
- {pyobo-0.12.0.dist-info → pyobo-0.12.1.dist-info}/RECORD +29 -30
- {pyobo-0.12.0.dist-info → pyobo-0.12.1.dist-info}/WHEEL +1 -1
- pyobo/identifier_utils/model.py +0 -130
- {pyobo-0.12.0.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +0 -0
- {pyobo-0.12.0.dist-info → pyobo-0.12.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
{
|
|
2
|
-
"
|
|
2
|
+
"blocklists": {
|
|
3
3
|
"full": [
|
|
4
4
|
":",
|
|
5
5
|
"ADL:FTT",
|
|
6
|
+
"BGEE:curator",
|
|
7
|
+
"BSPO:PATO_mtg_2009",
|
|
6
8
|
"Bgee:curator",
|
|
7
9
|
"BioGRID:curators",
|
|
8
10
|
"CGP:curators",
|
|
@@ -16,8 +18,10 @@
|
|
|
16
18
|
"Europe:\\:260",
|
|
17
19
|
"FBC:Autogenerated",
|
|
18
20
|
"Follicular:fluid",
|
|
21
|
+
"GO:curator",
|
|
19
22
|
"GOC:TermGenie",
|
|
20
23
|
"GOC:go_curators",
|
|
24
|
+
"GOC:mtg_cell_cycle",
|
|
21
25
|
"GOC:mtg_kidney_jan10",
|
|
22
26
|
"GOC:pamgo_curators",
|
|
23
27
|
"GROUP:OBI",
|
|
@@ -30,13 +34,16 @@
|
|
|
30
34
|
"KEGG",
|
|
31
35
|
"Lamina:propria",
|
|
32
36
|
"Lymph:node",
|
|
37
|
+
"MIM:OMIM",
|
|
38
|
+
"MeSH:MeSH",
|
|
33
39
|
"NIFSTD:NeuroNames_abbrevSource",
|
|
34
40
|
"NIST",
|
|
35
41
|
"PLATY:A.H.L.Fischer",
|
|
36
42
|
"POC:curators",
|
|
37
43
|
"Pituitary:gland",
|
|
38
44
|
"PomBane:vw",
|
|
39
|
-
"
|
|
45
|
+
"RGD:www.rgd.mcw.edu",
|
|
46
|
+
"SGD:curators",
|
|
40
47
|
"STRUCTURE_Formula",
|
|
41
48
|
"South:Korea",
|
|
42
49
|
"TAIR:TED",
|
|
@@ -74,8 +81,10 @@
|
|
|
74
81
|
"STRUCTURE_Formula",
|
|
75
82
|
"SUBMITTER",
|
|
76
83
|
"TEMP:",
|
|
84
|
+
"http://wiki.geneontology.org/index.php",
|
|
77
85
|
"stedman",
|
|
78
|
-
"synonym:"
|
|
86
|
+
"synonym:",
|
|
87
|
+
"urn:lsid\\:globalnames.org\\:index\\:"
|
|
79
88
|
],
|
|
80
89
|
"resource_full": {
|
|
81
90
|
"ceph": [
|
|
@@ -303,9 +312,9 @@
|
|
|
303
312
|
},
|
|
304
313
|
"rewrites": {
|
|
305
314
|
"full": {
|
|
306
|
-
"OBO_REL:is_a": "rdfs:subClassOf",
|
|
307
|
-
":has_start_point": "has_start_point",
|
|
308
315
|
"ABBREVIATION": "omo:0003000",
|
|
316
|
+
"BSPO:cjm": "orcid:0000-0002-6601-2165",
|
|
317
|
+
"BSPO:mah": "orcid:0000-0003-4148-4606",
|
|
309
318
|
"CHEBI:133245 MetaCyc:quercetin 3-rhamnoside-7-rhamnoside": "CHEBI:133245",
|
|
310
319
|
"CHEBI:16531 MetaCyc:renillar luciferin": "CHEBI:16531",
|
|
311
320
|
"CHEBI:49553 PDBeChem:Copper(II) chloride": "CHEBI:49553",
|
|
@@ -319,23 +328,42 @@
|
|
|
319
328
|
"FOBI_050091": "FOBI:050091",
|
|
320
329
|
"GIOC:vw": "GOC:vw",
|
|
321
330
|
"GOC:MAH": "orcid:0000-0003-4148-4606",
|
|
331
|
+
"HAO:attached_to": "ro:0002371",
|
|
322
332
|
"HPO:SKOEHLER": "orcid:0000-0002-5316-1399",
|
|
323
333
|
"HPO:skoehler": "orcid:0000-0002-5316-1399",
|
|
324
334
|
"LIPIDMAPSLMFA01030141": "LIPIDMAPS:LMFA01030141",
|
|
325
335
|
"LIPIDMAPSLMFA01030152": "LIPIDMAPS:LMFA01030152",
|
|
326
336
|
"MimicDB:KatalinParej": "orcid:0000-0002-2567-6061",
|
|
327
337
|
"OBO_REL2:has_role": "ro:0000087",
|
|
338
|
+
"OBO_REL:adjacent_to": "ro:0002220",
|
|
339
|
+
"OBO_REL:has_participant": "ro:0000057",
|
|
340
|
+
"OBO_REL:has_quality": "bfo:0000086",
|
|
341
|
+
"OBO_REL:has_role": "ro:0000087",
|
|
342
|
+
"OBO_REL:has_vector": "ro:0002460",
|
|
343
|
+
"OBO_REL:inheres_in": "ro:0004096",
|
|
344
|
+
"OBO_REL:is_a": "rdfs:subClassOf",
|
|
345
|
+
"OBO_REL:located_in": "ro:0001025",
|
|
346
|
+
"OBO_REL:location_of": "ro:0001015",
|
|
347
|
+
"OBO_REL:part_of": "bfo:0000050",
|
|
348
|
+
"OBO_REL:participates_in": "ro:0000056",
|
|
349
|
+
"OBO_REL:preceded_by": "bfo:0000062",
|
|
350
|
+
"OBO_REL:precedes": "bfo:0000063",
|
|
351
|
+
"OBO_REL:proper_part_of": "bfo:0000050",
|
|
352
|
+
"OBO_REL:transformation_of": "ro:0002494",
|
|
328
353
|
"PATOC:CJM": "orcid:0000-0002-6601-2165",
|
|
329
354
|
"PATOC:MAH": "orcid:0000-0003-4148-4606",
|
|
330
355
|
"PHI-base:AlayneCuzick": "orcid:0000-0001-8941-3984",
|
|
331
356
|
"PMI:17498297": "PMID:17498297",
|
|
357
|
+
"POMBASE:mah": "orcid:0000-0003-4148-4606",
|
|
358
|
+
"PomBase\\:mah": "orcid:0000-0003-4148-4606",
|
|
332
359
|
"Property:P1659": "wikidata:P1659",
|
|
360
|
+
"RO:has_part": "bfo:0000051",
|
|
361
|
+
"RO:part_of": "BFO:0000050",
|
|
333
362
|
"SBN:9780070316607": "ISBN:9780070316607",
|
|
334
363
|
"SIB:PG": "orcid:0000-0003-1813-6857",
|
|
335
364
|
"SIB:PG xsd:string": "orcid:0000-0003-1813-6857",
|
|
336
365
|
"SNOMEDCT274897005": "SNOMEDCT:274897005",
|
|
337
366
|
"UBERON:cjm": "orcid:0000-0002-6601-2165",
|
|
338
|
-
"\\:has_start_point": "has_start_point",
|
|
339
367
|
"bearer:of": "RO:0000053",
|
|
340
368
|
"broadMatch": "skos:broadMatch",
|
|
341
369
|
"connected_to": "ro:0002170",
|
|
@@ -368,7 +396,6 @@
|
|
|
368
396
|
"hasNarrowSynonym": "oboinowl:hasNarrowSynonym",
|
|
369
397
|
"hasRelatedSynonym": "oboinowl:hasRelatedSynonym",
|
|
370
398
|
"has_part": "bfo:0000051",
|
|
371
|
-
"has_start_point:": "has_start_point",
|
|
372
399
|
"http://creativecommons.org/licenses/by/3.0/": "spdx:CC-BY-3.0",
|
|
373
400
|
"http://creativecommons.org/licenses/by/4.0/": "spdx:CC-BY-4.0",
|
|
374
401
|
"http://creativecommons.org/publicdomain/zero/1.0/": "spdx:CC0-1.0",
|
|
@@ -565,6 +592,10 @@
|
|
|
565
592
|
"is_a": "rdfs:subClassOf",
|
|
566
593
|
"isa": "rdfs:subClassOf",
|
|
567
594
|
"narrowMatch": "skos:narrowMatch",
|
|
595
|
+
"obo:aism#dcterms-license": "dcterms:license",
|
|
596
|
+
"obo:caro#dcterms-license": "dcterms:license",
|
|
597
|
+
"obo:sasap#closeMatch": "skos:closeMatch",
|
|
598
|
+
"obo:sasap#relatedMatch": "skos:relatedMatch",
|
|
568
599
|
"part:of": "BFO:0000050",
|
|
569
600
|
"part_of": "bfo:0000050",
|
|
570
601
|
"provenance_notes": "ubprop:0000004",
|
|
@@ -590,7 +621,7 @@
|
|
|
590
621
|
"CREDIT_00": "CREDIT:00",
|
|
591
622
|
"CVCL_": "cellosaurus:CVCL_",
|
|
592
623
|
"DC:0000": "diseaseclass:0000",
|
|
593
|
-
"DrugNames:DB": "drugbank:DB
|
|
624
|
+
"DrugNames:DB": "drugbank:DB",
|
|
594
625
|
"ECGOntology:<new dbxref> \"": "ECGOntology:",
|
|
595
626
|
"EGA:EGAD": "ega.study:EGAD",
|
|
596
627
|
"EGA:EGAS": "ega.study:EGAS",
|
|
@@ -604,12 +635,14 @@
|
|
|
604
635
|
"From_Merriam-Webster's_Online_Dictionary_at_www.Merriam-Webster.com:http\\://www.merriam-webster.com/dictionary/": "merriamwebster:",
|
|
605
636
|
"GEMET:http\\://www.eionet.europa.eu/gemet/concept/": "GEMET:",
|
|
606
637
|
"GNOme: \"GNO:": "glygen",
|
|
638
|
+
"GO:GO\\:": "go:",
|
|
607
639
|
"IEDB:RV": "orcid:0000-0001-8957-7612",
|
|
608
640
|
"IEDB:RandiVita": "orcid:0000-0001-8957-7612",
|
|
609
641
|
"IEDB:RandiVita xsd:string": "orcid:0000-0001-8957-7612",
|
|
610
642
|
"KCB:KCB ": "KCB:",
|
|
611
643
|
"KEGG COMPOUND": "KEGG.COMPOUND",
|
|
612
644
|
"KEGG DRUG": "KEGG.DRUG",
|
|
645
|
+
"KEGG.COMPOUND:c": "KEGG.COMPOUND:C",
|
|
613
646
|
"LIPID MAPS:": "LIPIDMAPS:",
|
|
614
647
|
"MONDOLEX:": "MONDO:",
|
|
615
648
|
"MedlinePlus: ": "MedlinePlus:",
|
|
@@ -635,9 +668,13 @@
|
|
|
635
668
|
"PANTHER:PTHR": "panther.family:PTHR",
|
|
636
669
|
"PDR:PMID:": "pubmed:",
|
|
637
670
|
"PMID: ": "PMID:",
|
|
671
|
+
"PMID:ID\\:": "pubmed:",
|
|
672
|
+
"PMID:_": "pubmed:",
|
|
673
|
+
"Pubmed:PMID\\:": "pubmed:",
|
|
638
674
|
"SNOMEDCT: ": "SNOMEDCT:",
|
|
639
675
|
"TKG:TKG ": "TKG:",
|
|
640
676
|
"TS-": "caloha:",
|
|
677
|
+
"UM-BBD_enzymeID:r": "umbbd.reaction:r",
|
|
641
678
|
"UMLS CUI:": "UMLS:",
|
|
642
679
|
"URL: ": "URL:",
|
|
643
680
|
"URL: http\\://": "http://",
|
|
@@ -652,8 +689,10 @@
|
|
|
652
689
|
"chembl target:CHEMBL": "chembl.target:CHEMBL",
|
|
653
690
|
"detail?JGLOBAL:ID=": "iobc",
|
|
654
691
|
"doi:/": "doi:",
|
|
692
|
+
"edamontology:topic_": "edam.topic:",
|
|
655
693
|
"has:": "has_",
|
|
656
694
|
"http://en.wikipedia.org/wiki/": "wikipedia.en:",
|
|
695
|
+
"http://https://orcid.org/": "orcid:",
|
|
657
696
|
"http://linkedlifedata.com/resource/umls/id/": "UMLS:",
|
|
658
697
|
"http://orcid.org/": "orcid:",
|
|
659
698
|
"http:en.wikipedia.org/wiki/": "wikipedia.en:",
|
|
@@ -671,6 +710,7 @@
|
|
|
671
710
|
"resource:SIO_": "SIO:",
|
|
672
711
|
"scipion:CRYOEM_": "cryoem:",
|
|
673
712
|
"snap#": "snap:",
|
|
713
|
+
"subject:SRAO_": "srao:",
|
|
674
714
|
"terms1": "dcterms",
|
|
675
715
|
"unirot:": "uniprot:",
|
|
676
716
|
"url: \"https://en.wikipedia.org/wiki/": "wikipedia.en:",
|
|
@@ -691,7 +731,16 @@
|
|
|
691
731
|
"vo/ontorat/PR:": "PR:",
|
|
692
732
|
"xsd\\:": "xsd:"
|
|
693
733
|
},
|
|
694
|
-
"resource_full": {
|
|
734
|
+
"resource_full": {
|
|
735
|
+
"bspo": {
|
|
736
|
+
":has_start_point": "BSPO:0020002",
|
|
737
|
+
"has_start_point": "BSPO:0020002"
|
|
738
|
+
},
|
|
739
|
+
"cteno": {
|
|
740
|
+
":has_start_point": "BSPO:0020002",
|
|
741
|
+
"has_start_point": "BSPO:0020002"
|
|
742
|
+
}
|
|
743
|
+
},
|
|
695
744
|
"resource_prefix": {
|
|
696
745
|
"biolink": {
|
|
697
746
|
"active:in": "biolink:active_in",
|
|
@@ -710,7 +759,8 @@
|
|
|
710
759
|
"coexists:with": "biolink:coexists_with",
|
|
711
760
|
"coexpressed:with": "biolink:coexpressed_with",
|
|
712
761
|
"colocalizes:with": "biolink:colocalizes_with",
|
|
713
|
-
"completed:by": "biolink:completed_by"
|
|
762
|
+
"completed:by": "biolink:completed_by",
|
|
763
|
+
"os:": "owlstar:"
|
|
714
764
|
},
|
|
715
765
|
"biomodels.teddy": {
|
|
716
766
|
"TR:": "biomodels.teddy:"
|
|
@@ -723,18 +773,21 @@
|
|
|
723
773
|
"pgx:CVCL_": "cellosaurus:"
|
|
724
774
|
},
|
|
725
775
|
"chebi": {
|
|
726
|
-
"DrugBank:DBSALT": "drugbank.salt:"
|
|
776
|
+
"DrugBank:DBSALT": "drugbank.salt:DBSALT"
|
|
727
777
|
},
|
|
728
778
|
"chmo": {
|
|
729
779
|
"Orange:": "orangebook:",
|
|
730
780
|
"Orange: ": "orangebook:"
|
|
731
781
|
},
|
|
732
|
-
"
|
|
733
|
-
"
|
|
782
|
+
"classyfire": {
|
|
783
|
+
"LIPIDMAPS:": "lipidmaps:LM"
|
|
734
784
|
},
|
|
735
785
|
"doid": {
|
|
736
786
|
"MIM:PS": "omim.ps:"
|
|
737
787
|
},
|
|
788
|
+
"ehdaa2": {
|
|
789
|
+
"CS": "carnegie.stage:"
|
|
790
|
+
},
|
|
738
791
|
"emapa": {
|
|
739
792
|
"TS:": "theiler:"
|
|
740
793
|
},
|
|
@@ -760,6 +813,10 @@
|
|
|
760
813
|
"mdm": {
|
|
761
814
|
"drugbank:DB:": "drugbank:DB"
|
|
762
815
|
},
|
|
816
|
+
"ncro": {
|
|
817
|
+
"miRBase:MIPF": "mirbase.family:MIPF",
|
|
818
|
+
"mirbase:MIMAT": "mirbase.mature:MIMAT"
|
|
819
|
+
},
|
|
763
820
|
"phipo": {
|
|
764
821
|
"created:by": "dcterms:creator",
|
|
765
822
|
"created:date": "dcterms:created",
|
|
@@ -769,6 +826,10 @@
|
|
|
769
826
|
"OMA:": "uniprot:",
|
|
770
827
|
"iPTMnet:": "uniprot:"
|
|
771
828
|
},
|
|
829
|
+
"pw": {
|
|
830
|
+
"KEGG:": "kegg.pathway:",
|
|
831
|
+
"PID:": "pid.pathway:"
|
|
832
|
+
},
|
|
772
833
|
"senso": {
|
|
773
834
|
"odo:SENSO_": "senso:"
|
|
774
835
|
},
|
|
@@ -1,18 +1,14 @@
|
|
|
1
1
|
"""Load the manually curated metaregistry."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
3
|
from functools import lru_cache
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
|
|
7
|
-
from
|
|
6
|
+
from curies.preprocessing import PreprocessingRules, _load_rules
|
|
8
7
|
|
|
9
|
-
from .model import Rules
|
|
10
8
|
from ..resources.goc import load_goc_map
|
|
11
9
|
|
|
12
10
|
__all__ = [
|
|
13
|
-
"
|
|
14
|
-
"remap_prefix",
|
|
15
|
-
"str_is_blacklisted",
|
|
11
|
+
"get_rules",
|
|
16
12
|
]
|
|
17
13
|
|
|
18
14
|
HERE = Path(__file__).parent.resolve()
|
|
@@ -20,42 +16,12 @@ RULES_PATH = HERE.joinpath("preprocessing.json")
|
|
|
20
16
|
|
|
21
17
|
|
|
22
18
|
@lru_cache(1)
|
|
23
|
-
def get_rules() ->
|
|
19
|
+
def get_rules() -> PreprocessingRules:
|
|
24
20
|
"""Get the CURIE/URI string preprocessing rules."""
|
|
25
|
-
rules =
|
|
21
|
+
rules = _load_rules(RULES_PATH)
|
|
26
22
|
rules.rewrites.full.update(load_goc_map())
|
|
27
23
|
return rules
|
|
28
24
|
|
|
29
25
|
|
|
30
|
-
def remap_full(
|
|
31
|
-
str_or_curie_or_uri: str, *, ontology_prefix: str | None = None
|
|
32
|
-
) -> NormalizedNamableReference | None:
|
|
33
|
-
"""Remap the string if possible otherwise return it."""
|
|
34
|
-
return get_rules().remap_full(
|
|
35
|
-
str_or_curie_or_uri, cls=NormalizedNamableReference, ontology_prefix=ontology_prefix
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def remap_prefix(str_or_curie_or_uri: str, ontology_prefix: str | None = None) -> str:
|
|
40
|
-
"""Remap a prefix."""
|
|
41
|
-
return get_rules().remap_prefix(str_or_curie_or_uri, ontology_prefix=ontology_prefix)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def _lint() -> None:
|
|
45
|
-
rules = Rules.model_validate_json(RULES_PATH.read_text())
|
|
46
|
-
rules.blacklists._sort()
|
|
47
|
-
RULES_PATH.write_text(json.dumps(rules.model_dump(), sort_keys=True, indent=2))
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def str_is_blacklisted(str_or_curie_or_uri: str, *, ontology_prefix: str | None = None) -> bool:
|
|
51
|
-
"""Check if the full CURIE string is blacklisted."""
|
|
52
|
-
rules = get_rules()
|
|
53
|
-
return (
|
|
54
|
-
rules.str_is_blacklisted_full(str_or_curie_or_uri, ontology_prefix=ontology_prefix)
|
|
55
|
-
or rules.str_has_blacklisted_prefix(str_or_curie_or_uri, ontology_prefix=ontology_prefix)
|
|
56
|
-
or rules.str_has_blacklisted_suffix(str_or_curie_or_uri)
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
|
|
60
26
|
if __name__ == "__main__":
|
|
61
|
-
|
|
27
|
+
PreprocessingRules.lint_file(RULES_PATH)
|
pyobo/obographs.py
CHANGED
|
@@ -41,7 +41,11 @@ def graph_from_obo(obo: Obo, use_tqdm: bool = True) -> Graph:
|
|
|
41
41
|
nodes: list[Node] = []
|
|
42
42
|
edges: list[Edge] = []
|
|
43
43
|
for term in tqdm(
|
|
44
|
-
obo,
|
|
44
|
+
obo,
|
|
45
|
+
disable=not use_tqdm,
|
|
46
|
+
unit="term",
|
|
47
|
+
unit_scale=True,
|
|
48
|
+
desc=f"[{obo._prefix_version}] to OBO Graph JSON",
|
|
45
49
|
):
|
|
46
50
|
nodes.append(_get_class_node(term))
|
|
47
51
|
edges.extend(_iter_edges(term))
|
pyobo/reader.py
CHANGED
|
@@ -15,20 +15,19 @@ from typing import Any
|
|
|
15
15
|
import bioregistry
|
|
16
16
|
import networkx as nx
|
|
17
17
|
from curies import ReferenceTuple
|
|
18
|
+
from curies.preprocessing import BlocklistError
|
|
18
19
|
from curies.vocabulary import SynonymScope
|
|
19
20
|
from more_itertools import pairwise
|
|
20
21
|
from tqdm.auto import tqdm
|
|
21
22
|
|
|
22
23
|
from .constants import DATE_FORMAT, PROVENANCE_PREFIXES
|
|
23
24
|
from .identifier_utils import (
|
|
24
|
-
BlacklistedError,
|
|
25
25
|
NotCURIEError,
|
|
26
26
|
ParseError,
|
|
27
27
|
UnparsableIRIError,
|
|
28
28
|
_is_valid_identifier,
|
|
29
29
|
_parse_str_or_curie_or_uri_helper,
|
|
30
|
-
|
|
31
|
-
str_is_blacklisted,
|
|
30
|
+
get_rules,
|
|
32
31
|
)
|
|
33
32
|
from .reader_utils import (
|
|
34
33
|
_chomp_axioms,
|
|
@@ -53,6 +52,7 @@ from .struct.struct_utils import Annotation, Stanza
|
|
|
53
52
|
from .struct.typedef import comment as has_comment
|
|
54
53
|
from .struct.typedef import default_typedefs, has_ontology_root_term
|
|
55
54
|
from .utils.cache import write_gzipped_graph
|
|
55
|
+
from .utils.io import safe_open
|
|
56
56
|
from .utils.misc import STATIC_VERSION_REWRITES, cleanup_version
|
|
57
57
|
|
|
58
58
|
__all__ = [
|
|
@@ -76,13 +76,7 @@ def from_obo_path(
|
|
|
76
76
|
) -> Obo:
|
|
77
77
|
"""Get the OBO graph from a path."""
|
|
78
78
|
path = Path(path).expanduser().resolve()
|
|
79
|
-
if path.suffix.endswith(".
|
|
80
|
-
import gzip
|
|
81
|
-
|
|
82
|
-
logger.info("[%s] parsing gzipped OBO with obonet from %s", prefix or "<unknown>", path)
|
|
83
|
-
with gzip.open(path, "rt") as file:
|
|
84
|
-
graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
|
|
85
|
-
elif path.suffix.endswith(".zip"):
|
|
79
|
+
if path.suffix.endswith(".zip"):
|
|
86
80
|
import io
|
|
87
81
|
import zipfile
|
|
88
82
|
|
|
@@ -95,7 +89,7 @@ def from_obo_path(
|
|
|
95
89
|
)
|
|
96
90
|
else:
|
|
97
91
|
logger.info("[%s] parsing OBO with obonet from %s", prefix or "<unknown>", path)
|
|
98
|
-
with
|
|
92
|
+
with safe_open(path, read=True) as file:
|
|
99
93
|
graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
|
|
100
94
|
|
|
101
95
|
if prefix:
|
|
@@ -1262,7 +1256,7 @@ def _handle_prop(
|
|
|
1262
1256
|
):
|
|
1263
1257
|
case Reference() as datatype_:
|
|
1264
1258
|
datatype = datatype_
|
|
1265
|
-
case
|
|
1259
|
+
case BlocklistError():
|
|
1266
1260
|
return None
|
|
1267
1261
|
case ParseError() as exc:
|
|
1268
1262
|
if strict:
|
|
@@ -1304,7 +1298,7 @@ def _handle_prop(
|
|
|
1304
1298
|
):
|
|
1305
1299
|
case Reference() as obj_reference:
|
|
1306
1300
|
return Annotation(prop_reference, obj_reference)
|
|
1307
|
-
case
|
|
1301
|
+
case BlocklistError():
|
|
1308
1302
|
return None
|
|
1309
1303
|
case UnparsableIRIError():
|
|
1310
1304
|
return Annotation(prop_reference, OBOLiteral.uri(value))
|
|
@@ -1330,7 +1324,7 @@ def _handle_prop(
|
|
|
1330
1324
|
):
|
|
1331
1325
|
case Reference() as obj_reference:
|
|
1332
1326
|
return Annotation(prop_reference, obj_reference)
|
|
1333
|
-
case
|
|
1327
|
+
case BlocklistError():
|
|
1334
1328
|
return None
|
|
1335
1329
|
case ParseError():
|
|
1336
1330
|
if datatype:
|
|
@@ -1535,10 +1529,12 @@ def _parse_xref_line(
|
|
|
1535
1529
|
) -> tuple[Reference, list[Reference | OBOLiteral]] | None:
|
|
1536
1530
|
xref, _, rest = line.partition(" [")
|
|
1537
1531
|
|
|
1538
|
-
|
|
1532
|
+
rules = get_rules()
|
|
1533
|
+
|
|
1534
|
+
if rules.str_is_blocked(xref, context=ontology_prefix) or ":" not in xref:
|
|
1539
1535
|
return None # sometimes xref to self... weird
|
|
1540
1536
|
|
|
1541
|
-
xref = remap_prefix(xref,
|
|
1537
|
+
xref = rules.remap_prefix(xref, context=ontology_prefix)
|
|
1542
1538
|
|
|
1543
1539
|
split_space = " " in xref
|
|
1544
1540
|
if split_space:
|
|
@@ -1552,7 +1548,7 @@ def _parse_xref_line(
|
|
|
1552
1548
|
xref, ontology_prefix=ontology_prefix, node=node, line=line, context="xref", upgrade=upgrade
|
|
1553
1549
|
)
|
|
1554
1550
|
match xref_ref:
|
|
1555
|
-
case
|
|
1551
|
+
case BlocklistError():
|
|
1556
1552
|
return None
|
|
1557
1553
|
case ParseError() as exc:
|
|
1558
1554
|
if strict:
|
pyobo/sources/cgnc.py
CHANGED
|
@@ -45,7 +45,15 @@ HEADER = [
|
|
|
45
45
|
|
|
46
46
|
def get_terms(force: bool = False) -> Iterable[Term]:
|
|
47
47
|
"""Get CGNC terms."""
|
|
48
|
-
df = ensure_df(
|
|
48
|
+
df = ensure_df(
|
|
49
|
+
PREFIX,
|
|
50
|
+
url=URL,
|
|
51
|
+
name=f"{PREFIX}.tsv",
|
|
52
|
+
force=force,
|
|
53
|
+
header=0,
|
|
54
|
+
names=HEADER,
|
|
55
|
+
on_bad_lines="skip",
|
|
56
|
+
)
|
|
49
57
|
for i, (cgnc_id, entrez_id, ensembl_id, name, synonym_1, synoynm_2, _, _) in enumerate(
|
|
50
58
|
df.values
|
|
51
59
|
):
|
pyobo/sources/flybase.py
CHANGED
|
@@ -18,7 +18,7 @@ __all__ = [
|
|
|
18
18
|
|
|
19
19
|
logger = logging.getLogger(__name__)
|
|
20
20
|
|
|
21
|
-
BASE_URL = "
|
|
21
|
+
BASE_URL = "https://s3ftp.flybase.org/releases"
|
|
22
22
|
PREFIX = "flybase"
|
|
23
23
|
NAME = "FlyBase"
|
|
24
24
|
|
|
@@ -51,7 +51,7 @@ def _get_names(version: str, force: bool = False) -> pd.DataFrame:
|
|
|
51
51
|
|
|
52
52
|
def _get_organisms(version: str, force: bool = False) -> Mapping[str, str]:
|
|
53
53
|
"""Get mapping from abbreviation column to NCBI taxonomy ID column."""
|
|
54
|
-
url = f"
|
|
54
|
+
url = f"{BASE_URL}/FB{version}/precomputed_files/species/organism_list_fb_{version}.tsv.gz"
|
|
55
55
|
df = ensure_df(
|
|
56
56
|
PREFIX, url=url, force=force, version=version, skiprows=4, header=None, usecols=[2, 4]
|
|
57
57
|
)
|
|
@@ -60,7 +60,7 @@ def _get_organisms(version: str, force: bool = False) -> Mapping[str, str]:
|
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
def _get_definitions(version: str, force: bool = False) -> Mapping[str, str]:
|
|
63
|
-
url = f"
|
|
63
|
+
url = f"{BASE_URL}/FB{version}/precomputed_files/genes/automated_gene_summaries.tsv.gz"
|
|
64
64
|
df = ensure_df(
|
|
65
65
|
PREFIX, url=url, force=force, version=version, skiprows=2, header=None, usecols=[0, 1]
|
|
66
66
|
)
|
|
@@ -69,7 +69,7 @@ def _get_definitions(version: str, force: bool = False) -> Mapping[str, str]:
|
|
|
69
69
|
|
|
70
70
|
def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, set[str]]:
|
|
71
71
|
url = (
|
|
72
|
-
f"
|
|
72
|
+
f"{BASE_URL}/FB{version}/precomputed_files/"
|
|
73
73
|
f"orthologs/dmel_human_orthologs_disease_fb_{version}.tsv.gz"
|
|
74
74
|
)
|
|
75
75
|
df = ensure_df(
|
|
@@ -86,7 +86,7 @@ def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, set[
|
|
|
86
86
|
|
|
87
87
|
|
|
88
88
|
def _get_synonyms(version, force):
|
|
89
|
-
url = f"
|
|
89
|
+
url = f"{BASE_URL}/FB{version}/precomputed_files/synonyms/fb_synonym_fb_{version}.tsv.gz"
|
|
90
90
|
df = ensure_df(PREFIX, url=url, force=force, version=version, skiprows=4, usecols=[0, 2])
|
|
91
91
|
return df # TODO use this
|
|
92
92
|
|
pyobo/sources/omim_ps.py
CHANGED
|
@@ -13,7 +13,7 @@ __all__ = [
|
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
15
15
|
PREFIX = "omim.ps"
|
|
16
|
-
URL = "https://omim.org/phenotypicSeriesTitles/
|
|
16
|
+
URL = "https://omim.org/phenotypicSeriesTitles/"
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class OMIMPSGetter(Obo):
|
|
@@ -26,13 +26,13 @@ class OMIMPSGetter(Obo):
|
|
|
26
26
|
soup = get_soup(URL, user_agent="Mozilla/5.0")
|
|
27
27
|
content = soup.find(id="mimContent")
|
|
28
28
|
if content is None:
|
|
29
|
-
raise ValueError
|
|
29
|
+
raise ValueError("omim.ps failed - scraper could not find id='mimContent' in HTML")
|
|
30
30
|
table = content.find("table") # type:ignore[attr-defined]
|
|
31
31
|
if table is None:
|
|
32
|
-
raise ValueError
|
|
32
|
+
raise ValueError("omim.ps failed - scraper could not find table in HTML")
|
|
33
33
|
tbody = table.find("tbody")
|
|
34
34
|
if tbody is None:
|
|
35
|
-
raise ValueError
|
|
35
|
+
raise ValueError("omim.ps failed - scraper could not find table body in HTML")
|
|
36
36
|
for row in tbody.find_all("tr"):
|
|
37
37
|
anchor = row.find("td").find("a")
|
|
38
38
|
name = anchor.text.strip()
|
|
@@ -16,6 +16,7 @@ from pyobo.struct.functional.utils import (
|
|
|
16
16
|
FunctionalOWLSerializable,
|
|
17
17
|
list_to_funowl,
|
|
18
18
|
)
|
|
19
|
+
from pyobo.utils.io import safe_open
|
|
19
20
|
|
|
20
21
|
__all__ = [
|
|
21
22
|
"Document",
|
|
@@ -109,7 +110,8 @@ class Document:
|
|
|
109
110
|
def write_funowl(self, path: str | Path) -> None:
|
|
110
111
|
"""Write functional OWL to a file.."""
|
|
111
112
|
path = Path(path).expanduser().resolve()
|
|
112
|
-
path
|
|
113
|
+
with safe_open(path, read=False) as file:
|
|
114
|
+
file.write(self.to_funowl())
|
|
113
115
|
|
|
114
116
|
def to_funowl(self) -> str:
|
|
115
117
|
"""Get the document as a functional OWL string."""
|
pyobo/struct/reference.py
CHANGED
|
@@ -14,9 +14,9 @@ import dateutil.parser
|
|
|
14
14
|
import pytz
|
|
15
15
|
from bioregistry import NormalizedNamableReference as Reference
|
|
16
16
|
from curies import ReferenceTuple
|
|
17
|
+
from curies.preprocessing import BlocklistError
|
|
17
18
|
|
|
18
19
|
from ..identifier_utils import (
|
|
19
|
-
BlacklistedError,
|
|
20
20
|
NotCURIEError,
|
|
21
21
|
ParseError,
|
|
22
22
|
UnparsableIRIError,
|
|
@@ -62,7 +62,7 @@ def _parse_str_or_curie_or_uri(
|
|
|
62
62
|
match reference:
|
|
63
63
|
case Reference():
|
|
64
64
|
return reference
|
|
65
|
-
case
|
|
65
|
+
case BlocklistError():
|
|
66
66
|
return None
|
|
67
67
|
case ParseError():
|
|
68
68
|
if strict:
|
|
@@ -224,7 +224,7 @@ def _obo_parse_identifier(
|
|
|
224
224
|
):
|
|
225
225
|
case Reference() as reference:
|
|
226
226
|
return reference
|
|
227
|
-
case
|
|
227
|
+
case BlocklistError():
|
|
228
228
|
return None
|
|
229
229
|
case NotCURIEError() as exc:
|
|
230
230
|
# this means there's no colon `:`
|
|
@@ -272,7 +272,7 @@ def _parse_reference_or_uri_literal(
|
|
|
272
272
|
):
|
|
273
273
|
case Reference() as reference:
|
|
274
274
|
return reference
|
|
275
|
-
case
|
|
275
|
+
case BlocklistError():
|
|
276
276
|
return None
|
|
277
277
|
case UnparsableIRIError():
|
|
278
278
|
# this means that it's defininitely a URI,
|