pyobo 0.12.0__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,10 @@
1
1
  {
2
- "blacklists": {
2
+ "blocklists": {
3
3
  "full": [
4
4
  ":",
5
5
  "ADL:FTT",
6
+ "BGEE:curator",
7
+ "BSPO:PATO_mtg_2009",
6
8
  "Bgee:curator",
7
9
  "BioGRID:curators",
8
10
  "CGP:curators",
@@ -16,8 +18,10 @@
16
18
  "Europe:\\:260",
17
19
  "FBC:Autogenerated",
18
20
  "Follicular:fluid",
21
+ "GO:curator",
19
22
  "GOC:TermGenie",
20
23
  "GOC:go_curators",
24
+ "GOC:mtg_cell_cycle",
21
25
  "GOC:mtg_kidney_jan10",
22
26
  "GOC:pamgo_curators",
23
27
  "GROUP:OBI",
@@ -30,13 +34,16 @@
30
34
  "KEGG",
31
35
  "Lamina:propria",
32
36
  "Lymph:node",
37
+ "MIM:OMIM",
38
+ "MeSH:MeSH",
33
39
  "NIFSTD:NeuroNames_abbrevSource",
34
40
  "NIST",
35
41
  "PLATY:A.H.L.Fischer",
36
42
  "POC:curators",
37
43
  "Pituitary:gland",
38
44
  "PomBane:vw",
39
- "PomBase\\:mah",
45
+ "RGD:www.rgd.mcw.edu",
46
+ "SGD:curators",
40
47
  "STRUCTURE_Formula",
41
48
  "South:Korea",
42
49
  "TAIR:TED",
@@ -74,8 +81,10 @@
74
81
  "STRUCTURE_Formula",
75
82
  "SUBMITTER",
76
83
  "TEMP:",
84
+ "http://wiki.geneontology.org/index.php",
77
85
  "stedman",
78
- "synonym:"
86
+ "synonym:",
87
+ "urn:lsid\\:globalnames.org\\:index\\:"
79
88
  ],
80
89
  "resource_full": {
81
90
  "ceph": [
@@ -303,9 +312,9 @@
303
312
  },
304
313
  "rewrites": {
305
314
  "full": {
306
- "OBO_REL:is_a": "rdfs:subClassOf",
307
- ":has_start_point": "has_start_point",
308
315
  "ABBREVIATION": "omo:0003000",
316
+ "BSPO:cjm": "orcid:0000-0002-6601-2165",
317
+ "BSPO:mah": "orcid:0000-0003-4148-4606",
309
318
  "CHEBI:133245 MetaCyc:quercetin 3-rhamnoside-7-rhamnoside": "CHEBI:133245",
310
319
  "CHEBI:16531 MetaCyc:renillar luciferin": "CHEBI:16531",
311
320
  "CHEBI:49553 PDBeChem:Copper(II) chloride": "CHEBI:49553",
@@ -319,23 +328,42 @@
319
328
  "FOBI_050091": "FOBI:050091",
320
329
  "GIOC:vw": "GOC:vw",
321
330
  "GOC:MAH": "orcid:0000-0003-4148-4606",
331
+ "HAO:attached_to": "ro:0002371",
322
332
  "HPO:SKOEHLER": "orcid:0000-0002-5316-1399",
323
333
  "HPO:skoehler": "orcid:0000-0002-5316-1399",
324
334
  "LIPIDMAPSLMFA01030141": "LIPIDMAPS:LMFA01030141",
325
335
  "LIPIDMAPSLMFA01030152": "LIPIDMAPS:LMFA01030152",
326
336
  "MimicDB:KatalinParej": "orcid:0000-0002-2567-6061",
327
337
  "OBO_REL2:has_role": "ro:0000087",
338
+ "OBO_REL:adjacent_to": "ro:0002220",
339
+ "OBO_REL:has_participant": "ro:0000057",
340
+ "OBO_REL:has_quality": "bfo:0000086",
341
+ "OBO_REL:has_role": "ro:0000087",
342
+ "OBO_REL:has_vector": "ro:0002460",
343
+ "OBO_REL:inheres_in": "ro:0004096",
344
+ "OBO_REL:is_a": "rdfs:subClassOf",
345
+ "OBO_REL:located_in": "ro:0001025",
346
+ "OBO_REL:location_of": "ro:0001015",
347
+ "OBO_REL:part_of": "bfo:0000050",
348
+ "OBO_REL:participates_in": "ro:0000056",
349
+ "OBO_REL:preceded_by": "bfo:0000062",
350
+ "OBO_REL:precedes": "bfo:0000063",
351
+ "OBO_REL:proper_part_of": "bfo:0000050",
352
+ "OBO_REL:transformation_of": "ro:0002494",
328
353
  "PATOC:CJM": "orcid:0000-0002-6601-2165",
329
354
  "PATOC:MAH": "orcid:0000-0003-4148-4606",
330
355
  "PHI-base:AlayneCuzick": "orcid:0000-0001-8941-3984",
331
356
  "PMI:17498297": "PMID:17498297",
357
+ "POMBASE:mah": "orcid:0000-0003-4148-4606",
358
+ "PomBase\\:mah": "orcid:0000-0003-4148-4606",
332
359
  "Property:P1659": "wikidata:P1659",
360
+ "RO:has_part": "bfo:0000051",
361
+ "RO:part_of": "BFO:0000050",
333
362
  "SBN:9780070316607": "ISBN:9780070316607",
334
363
  "SIB:PG": "orcid:0000-0003-1813-6857",
335
364
  "SIB:PG xsd:string": "orcid:0000-0003-1813-6857",
336
365
  "SNOMEDCT274897005": "SNOMEDCT:274897005",
337
366
  "UBERON:cjm": "orcid:0000-0002-6601-2165",
338
- "\\:has_start_point": "has_start_point",
339
367
  "bearer:of": "RO:0000053",
340
368
  "broadMatch": "skos:broadMatch",
341
369
  "connected_to": "ro:0002170",
@@ -368,7 +396,6 @@
368
396
  "hasNarrowSynonym": "oboinowl:hasNarrowSynonym",
369
397
  "hasRelatedSynonym": "oboinowl:hasRelatedSynonym",
370
398
  "has_part": "bfo:0000051",
371
- "has_start_point:": "has_start_point",
372
399
  "http://creativecommons.org/licenses/by/3.0/": "spdx:CC-BY-3.0",
373
400
  "http://creativecommons.org/licenses/by/4.0/": "spdx:CC-BY-4.0",
374
401
  "http://creativecommons.org/publicdomain/zero/1.0/": "spdx:CC0-1.0",
@@ -565,6 +592,10 @@
565
592
  "is_a": "rdfs:subClassOf",
566
593
  "isa": "rdfs:subClassOf",
567
594
  "narrowMatch": "skos:narrowMatch",
595
+ "obo:aism#dcterms-license": "dcterms:license",
596
+ "obo:caro#dcterms-license": "dcterms:license",
597
+ "obo:sasap#closeMatch": "skos:closeMatch",
598
+ "obo:sasap#relatedMatch": "skos:relatedMatch",
568
599
  "part:of": "BFO:0000050",
569
600
  "part_of": "bfo:0000050",
570
601
  "provenance_notes": "ubprop:0000004",
@@ -590,7 +621,7 @@
590
621
  "CREDIT_00": "CREDIT:00",
591
622
  "CVCL_": "cellosaurus:CVCL_",
592
623
  "DC:0000": "diseaseclass:0000",
593
- "DrugNames:DB": "drugbank:DB:",
624
+ "DrugNames:DB": "drugbank:DB",
594
625
  "ECGOntology:<new dbxref> \"": "ECGOntology:",
595
626
  "EGA:EGAD": "ega.study:EGAD",
596
627
  "EGA:EGAS": "ega.study:EGAS",
@@ -604,12 +635,14 @@
604
635
  "From_Merriam-Webster's_Online_Dictionary_at_www.Merriam-Webster.com:http\\://www.merriam-webster.com/dictionary/": "merriamwebster:",
605
636
  "GEMET:http\\://www.eionet.europa.eu/gemet/concept/": "GEMET:",
606
637
  "GNOme: \"GNO:": "glygen",
638
+ "GO:GO\\:": "go:",
607
639
  "IEDB:RV": "orcid:0000-0001-8957-7612",
608
640
  "IEDB:RandiVita": "orcid:0000-0001-8957-7612",
609
641
  "IEDB:RandiVita xsd:string": "orcid:0000-0001-8957-7612",
610
642
  "KCB:KCB ": "KCB:",
611
643
  "KEGG COMPOUND": "KEGG.COMPOUND",
612
644
  "KEGG DRUG": "KEGG.DRUG",
645
+ "KEGG.COMPOUND:c": "KEGG.COMPOUND:C",
613
646
  "LIPID MAPS:": "LIPIDMAPS:",
614
647
  "MONDOLEX:": "MONDO:",
615
648
  "MedlinePlus: ": "MedlinePlus:",
@@ -635,9 +668,13 @@
635
668
  "PANTHER:PTHR": "panther.family:PTHR",
636
669
  "PDR:PMID:": "pubmed:",
637
670
  "PMID: ": "PMID:",
671
+ "PMID:ID\\:": "pubmed:",
672
+ "PMID:_": "pubmed:",
673
+ "Pubmed:PMID\\:": "pubmed:",
638
674
  "SNOMEDCT: ": "SNOMEDCT:",
639
675
  "TKG:TKG ": "TKG:",
640
676
  "TS-": "caloha:",
677
+ "UM-BBD_enzymeID:r": "umbbd.reaction:r",
641
678
  "UMLS CUI:": "UMLS:",
642
679
  "URL: ": "URL:",
643
680
  "URL: http\\://": "http://",
@@ -652,8 +689,10 @@
652
689
  "chembl target:CHEMBL": "chembl.target:CHEMBL",
653
690
  "detail?JGLOBAL:ID=": "iobc",
654
691
  "doi:/": "doi:",
692
+ "edamontology:topic_": "edam.topic:",
655
693
  "has:": "has_",
656
694
  "http://en.wikipedia.org/wiki/": "wikipedia.en:",
695
+ "http://https://orcid.org/": "orcid:",
657
696
  "http://linkedlifedata.com/resource/umls/id/": "UMLS:",
658
697
  "http://orcid.org/": "orcid:",
659
698
  "http:en.wikipedia.org/wiki/": "wikipedia.en:",
@@ -671,6 +710,7 @@
671
710
  "resource:SIO_": "SIO:",
672
711
  "scipion:CRYOEM_": "cryoem:",
673
712
  "snap#": "snap:",
713
+ "subject:SRAO_": "srao:",
674
714
  "terms1": "dcterms",
675
715
  "unirot:": "uniprot:",
676
716
  "url: \"https://en.wikipedia.org/wiki/": "wikipedia.en:",
@@ -691,7 +731,16 @@
691
731
  "vo/ontorat/PR:": "PR:",
692
732
  "xsd\\:": "xsd:"
693
733
  },
694
- "resource_full": {},
734
+ "resource_full": {
735
+ "bspo": {
736
+ ":has_start_point": "BSPO:0020002",
737
+ "has_start_point": "BSPO:0020002"
738
+ },
739
+ "cteno": {
740
+ ":has_start_point": "BSPO:0020002",
741
+ "has_start_point": "BSPO:0020002"
742
+ }
743
+ },
695
744
  "resource_prefix": {
696
745
  "biolink": {
697
746
  "active:in": "biolink:active_in",
@@ -710,7 +759,8 @@
710
759
  "coexists:with": "biolink:coexists_with",
711
760
  "coexpressed:with": "biolink:coexpressed_with",
712
761
  "colocalizes:with": "biolink:colocalizes_with",
713
- "completed:by": "biolink:completed_by"
762
+ "completed:by": "biolink:completed_by",
763
+ "os:": "owlstar:"
714
764
  },
715
765
  "biomodels.teddy": {
716
766
  "TR:": "biomodels.teddy:"
@@ -723,18 +773,21 @@
723
773
  "pgx:CVCL_": "cellosaurus:"
724
774
  },
725
775
  "chebi": {
726
- "DrugBank:DBSALT": "drugbank.salt:"
776
+ "DrugBank:DBSALT": "drugbank.salt:DBSALT"
727
777
  },
728
778
  "chmo": {
729
779
  "Orange:": "orangebook:",
730
780
  "Orange: ": "orangebook:"
731
781
  },
732
- "ehdaa2": {
733
- "CS": "carnegie.stage:"
782
+ "classyfire": {
783
+ "LIPIDMAPS:": "lipidmaps:LM"
734
784
  },
735
785
  "doid": {
736
786
  "MIM:PS": "omim.ps:"
737
787
  },
788
+ "ehdaa2": {
789
+ "CS": "carnegie.stage:"
790
+ },
738
791
  "emapa": {
739
792
  "TS:": "theiler:"
740
793
  },
@@ -760,6 +813,10 @@
760
813
  "mdm": {
761
814
  "drugbank:DB:": "drugbank:DB"
762
815
  },
816
+ "ncro": {
817
+ "miRBase:MIPF": "mirbase.family:MIPF",
818
+ "mirbase:MIMAT": "mirbase.mature:MIMAT"
819
+ },
763
820
  "phipo": {
764
821
  "created:by": "dcterms:creator",
765
822
  "created:date": "dcterms:created",
@@ -769,6 +826,10 @@
769
826
  "OMA:": "uniprot:",
770
827
  "iPTMnet:": "uniprot:"
771
828
  },
829
+ "pw": {
830
+ "KEGG:": "kegg.pathway:",
831
+ "PID:": "pid.pathway:"
832
+ },
772
833
  "senso": {
773
834
  "odo:SENSO_": "senso:"
774
835
  },
@@ -1,18 +1,14 @@
1
1
  """Load the manually curated metaregistry."""
2
2
 
3
- import json
4
3
  from functools import lru_cache
5
4
  from pathlib import Path
6
5
 
7
- from bioregistry import NormalizedNamableReference
6
+ from curies.preprocessing import PreprocessingRules, _load_rules
8
7
 
9
- from .model import Rules
10
8
  from ..resources.goc import load_goc_map
11
9
 
12
10
  __all__ = [
13
- "remap_full",
14
- "remap_prefix",
15
- "str_is_blacklisted",
11
+ "get_rules",
16
12
  ]
17
13
 
18
14
  HERE = Path(__file__).parent.resolve()
@@ -20,42 +16,12 @@ RULES_PATH = HERE.joinpath("preprocessing.json")
20
16
 
21
17
 
22
18
  @lru_cache(1)
23
- def get_rules() -> Rules:
19
+ def get_rules() -> PreprocessingRules:
24
20
  """Get the CURIE/URI string preprocessing rules."""
25
- rules = Rules.model_validate_json(RULES_PATH.read_text())
21
+ rules = _load_rules(RULES_PATH)
26
22
  rules.rewrites.full.update(load_goc_map())
27
23
  return rules
28
24
 
29
25
 
30
- def remap_full(
31
- str_or_curie_or_uri: str, *, ontology_prefix: str | None = None
32
- ) -> NormalizedNamableReference | None:
33
- """Remap the string if possible otherwise return it."""
34
- return get_rules().remap_full(
35
- str_or_curie_or_uri, cls=NormalizedNamableReference, ontology_prefix=ontology_prefix
36
- )
37
-
38
-
39
- def remap_prefix(str_or_curie_or_uri: str, ontology_prefix: str | None = None) -> str:
40
- """Remap a prefix."""
41
- return get_rules().remap_prefix(str_or_curie_or_uri, ontology_prefix=ontology_prefix)
42
-
43
-
44
- def _lint() -> None:
45
- rules = Rules.model_validate_json(RULES_PATH.read_text())
46
- rules.blacklists._sort()
47
- RULES_PATH.write_text(json.dumps(rules.model_dump(), sort_keys=True, indent=2))
48
-
49
-
50
- def str_is_blacklisted(str_or_curie_or_uri: str, *, ontology_prefix: str | None = None) -> bool:
51
- """Check if the full CURIE string is blacklisted."""
52
- rules = get_rules()
53
- return (
54
- rules.str_is_blacklisted_full(str_or_curie_or_uri, ontology_prefix=ontology_prefix)
55
- or rules.str_has_blacklisted_prefix(str_or_curie_or_uri, ontology_prefix=ontology_prefix)
56
- or rules.str_has_blacklisted_suffix(str_or_curie_or_uri)
57
- )
58
-
59
-
60
26
  if __name__ == "__main__":
61
- _lint()
27
+ PreprocessingRules.lint_file(RULES_PATH)
pyobo/obographs.py CHANGED
@@ -41,7 +41,11 @@ def graph_from_obo(obo: Obo, use_tqdm: bool = True) -> Graph:
41
41
  nodes: list[Node] = []
42
42
  edges: list[Edge] = []
43
43
  for term in tqdm(
44
- obo, disable=not use_tqdm, unit="term", unit_scale=True, desc=f"[{obo.ontology}] to JSON"
44
+ obo,
45
+ disable=not use_tqdm,
46
+ unit="term",
47
+ unit_scale=True,
48
+ desc=f"[{obo._prefix_version}] to OBO Graph JSON",
45
49
  ):
46
50
  nodes.append(_get_class_node(term))
47
51
  edges.extend(_iter_edges(term))
pyobo/reader.py CHANGED
@@ -15,20 +15,19 @@ from typing import Any
15
15
  import bioregistry
16
16
  import networkx as nx
17
17
  from curies import ReferenceTuple
18
+ from curies.preprocessing import BlocklistError
18
19
  from curies.vocabulary import SynonymScope
19
20
  from more_itertools import pairwise
20
21
  from tqdm.auto import tqdm
21
22
 
22
23
  from .constants import DATE_FORMAT, PROVENANCE_PREFIXES
23
24
  from .identifier_utils import (
24
- BlacklistedError,
25
25
  NotCURIEError,
26
26
  ParseError,
27
27
  UnparsableIRIError,
28
28
  _is_valid_identifier,
29
29
  _parse_str_or_curie_or_uri_helper,
30
- remap_prefix,
31
- str_is_blacklisted,
30
+ get_rules,
32
31
  )
33
32
  from .reader_utils import (
34
33
  _chomp_axioms,
@@ -53,6 +52,7 @@ from .struct.struct_utils import Annotation, Stanza
53
52
  from .struct.typedef import comment as has_comment
54
53
  from .struct.typedef import default_typedefs, has_ontology_root_term
55
54
  from .utils.cache import write_gzipped_graph
55
+ from .utils.io import safe_open
56
56
  from .utils.misc import STATIC_VERSION_REWRITES, cleanup_version
57
57
 
58
58
  __all__ = [
@@ -76,13 +76,7 @@ def from_obo_path(
76
76
  ) -> Obo:
77
77
  """Get the OBO graph from a path."""
78
78
  path = Path(path).expanduser().resolve()
79
- if path.suffix.endswith(".gz"):
80
- import gzip
81
-
82
- logger.info("[%s] parsing gzipped OBO with obonet from %s", prefix or "<unknown>", path)
83
- with gzip.open(path, "rt") as file:
84
- graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
85
- elif path.suffix.endswith(".zip"):
79
+ if path.suffix.endswith(".zip"):
86
80
  import io
87
81
  import zipfile
88
82
 
@@ -95,7 +89,7 @@ def from_obo_path(
95
89
  )
96
90
  else:
97
91
  logger.info("[%s] parsing OBO with obonet from %s", prefix or "<unknown>", path)
98
- with open(path) as file:
92
+ with safe_open(path, read=True) as file:
99
93
  graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
100
94
 
101
95
  if prefix:
@@ -1262,7 +1256,7 @@ def _handle_prop(
1262
1256
  ):
1263
1257
  case Reference() as datatype_:
1264
1258
  datatype = datatype_
1265
- case BlacklistedError():
1259
+ case BlocklistError():
1266
1260
  return None
1267
1261
  case ParseError() as exc:
1268
1262
  if strict:
@@ -1304,7 +1298,7 @@ def _handle_prop(
1304
1298
  ):
1305
1299
  case Reference() as obj_reference:
1306
1300
  return Annotation(prop_reference, obj_reference)
1307
- case BlacklistedError():
1301
+ case BlocklistError():
1308
1302
  return None
1309
1303
  case UnparsableIRIError():
1310
1304
  return Annotation(prop_reference, OBOLiteral.uri(value))
@@ -1330,7 +1324,7 @@ def _handle_prop(
1330
1324
  ):
1331
1325
  case Reference() as obj_reference:
1332
1326
  return Annotation(prop_reference, obj_reference)
1333
- case BlacklistedError():
1327
+ case BlocklistError():
1334
1328
  return None
1335
1329
  case ParseError():
1336
1330
  if datatype:
@@ -1535,10 +1529,12 @@ def _parse_xref_line(
1535
1529
  ) -> tuple[Reference, list[Reference | OBOLiteral]] | None:
1536
1530
  xref, _, rest = line.partition(" [")
1537
1531
 
1538
- if str_is_blacklisted(xref, ontology_prefix=ontology_prefix) or ":" not in xref:
1532
+ rules = get_rules()
1533
+
1534
+ if rules.str_is_blocked(xref, context=ontology_prefix) or ":" not in xref:
1539
1535
  return None # sometimes xref to self... weird
1540
1536
 
1541
- xref = remap_prefix(xref, ontology_prefix=ontology_prefix)
1537
+ xref = rules.remap_prefix(xref, context=ontology_prefix)
1542
1538
 
1543
1539
  split_space = " " in xref
1544
1540
  if split_space:
@@ -1552,7 +1548,7 @@ def _parse_xref_line(
1552
1548
  xref, ontology_prefix=ontology_prefix, node=node, line=line, context="xref", upgrade=upgrade
1553
1549
  )
1554
1550
  match xref_ref:
1555
- case BlacklistedError():
1551
+ case BlocklistError():
1556
1552
  return None
1557
1553
  case ParseError() as exc:
1558
1554
  if strict:
pyobo/sources/cgnc.py CHANGED
@@ -45,7 +45,15 @@ HEADER = [
45
45
 
46
46
  def get_terms(force: bool = False) -> Iterable[Term]:
47
47
  """Get CGNC terms."""
48
- df = ensure_df(PREFIX, url=URL, name=f"{PREFIX}.tsv", force=force, header=0, names=HEADER)
48
+ df = ensure_df(
49
+ PREFIX,
50
+ url=URL,
51
+ name=f"{PREFIX}.tsv",
52
+ force=force,
53
+ header=0,
54
+ names=HEADER,
55
+ on_bad_lines="skip",
56
+ )
49
57
  for i, (cgnc_id, entrez_id, ensembl_id, name, synonym_1, synoynm_2, _, _) in enumerate(
50
58
  df.values
51
59
  ):
pyobo/sources/flybase.py CHANGED
@@ -18,7 +18,7 @@ __all__ = [
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
21
- BASE_URL = "http://ftp.flybase.net/releases"
21
+ BASE_URL = "https://s3ftp.flybase.org/releases"
22
22
  PREFIX = "flybase"
23
23
  NAME = "FlyBase"
24
24
 
@@ -51,7 +51,7 @@ def _get_names(version: str, force: bool = False) -> pd.DataFrame:
51
51
 
52
52
  def _get_organisms(version: str, force: bool = False) -> Mapping[str, str]:
53
53
  """Get mapping from abbreviation column to NCBI taxonomy ID column."""
54
- url = f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/species/organism_list_fb_{version}.tsv.gz"
54
+ url = f"{BASE_URL}/FB{version}/precomputed_files/species/organism_list_fb_{version}.tsv.gz"
55
55
  df = ensure_df(
56
56
  PREFIX, url=url, force=force, version=version, skiprows=4, header=None, usecols=[2, 4]
57
57
  )
@@ -60,7 +60,7 @@ def _get_organisms(version: str, force: bool = False) -> Mapping[str, str]:
60
60
 
61
61
 
62
62
  def _get_definitions(version: str, force: bool = False) -> Mapping[str, str]:
63
- url = f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/genes/automated_gene_summaries.tsv.gz"
63
+ url = f"{BASE_URL}/FB{version}/precomputed_files/genes/automated_gene_summaries.tsv.gz"
64
64
  df = ensure_df(
65
65
  PREFIX, url=url, force=force, version=version, skiprows=2, header=None, usecols=[0, 1]
66
66
  )
@@ -69,7 +69,7 @@ def _get_definitions(version: str, force: bool = False) -> Mapping[str, str]:
69
69
 
70
70
  def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, set[str]]:
71
71
  url = (
72
- f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/"
72
+ f"{BASE_URL}/FB{version}/precomputed_files/"
73
73
  f"orthologs/dmel_human_orthologs_disease_fb_{version}.tsv.gz"
74
74
  )
75
75
  df = ensure_df(
@@ -86,7 +86,7 @@ def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, set[
86
86
 
87
87
 
88
88
  def _get_synonyms(version, force):
89
- url = f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/synonyms/fb_synonym_fb_{version}.tsv.gz"
89
+ url = f"{BASE_URL}/FB{version}/precomputed_files/synonyms/fb_synonym_fb_{version}.tsv.gz"
90
90
  df = ensure_df(PREFIX, url=url, force=force, version=version, skiprows=4, usecols=[0, 2])
91
91
  return df # TODO use this
92
92
 
pyobo/sources/omim_ps.py CHANGED
@@ -13,7 +13,7 @@ __all__ = [
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
  PREFIX = "omim.ps"
16
- URL = "https://omim.org/phenotypicSeriesTitles/all"
16
+ URL = "https://omim.org/phenotypicSeriesTitles/"
17
17
 
18
18
 
19
19
  class OMIMPSGetter(Obo):
@@ -26,13 +26,13 @@ class OMIMPSGetter(Obo):
26
26
  soup = get_soup(URL, user_agent="Mozilla/5.0")
27
27
  content = soup.find(id="mimContent")
28
28
  if content is None:
29
- raise ValueError
29
+ raise ValueError("omim.ps failed - scraper could not find id='mimContent' in HTML")
30
30
  table = content.find("table") # type:ignore[attr-defined]
31
31
  if table is None:
32
- raise ValueError
32
+ raise ValueError("omim.ps failed - scraper could not find table in HTML")
33
33
  tbody = table.find("tbody")
34
34
  if tbody is None:
35
- raise ValueError
35
+ raise ValueError("omim.ps failed - scraper could not find table body in HTML")
36
36
  for row in tbody.find_all("tr"):
37
37
  anchor = row.find("td").find("a")
38
38
  name = anchor.text.strip()
@@ -105,4 +105,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
105
105
 
106
106
 
107
107
  if __name__ == "__main__":
108
- PharmGKBGeneGetter().write_default(force=True, write_obo=True, use_tqdm=True)
108
+ PharmGKBGeneGetter.cli()
@@ -16,6 +16,7 @@ from pyobo.struct.functional.utils import (
16
16
  FunctionalOWLSerializable,
17
17
  list_to_funowl,
18
18
  )
19
+ from pyobo.utils.io import safe_open
19
20
 
20
21
  __all__ = [
21
22
  "Document",
@@ -109,7 +110,8 @@ class Document:
109
110
  def write_funowl(self, path: str | Path) -> None:
110
111
  """Write functional OWL to a file.."""
111
112
  path = Path(path).expanduser().resolve()
112
- path.write_text(self.to_funowl())
113
+ with safe_open(path, read=False) as file:
114
+ file.write(self.to_funowl())
113
115
 
114
116
  def to_funowl(self) -> str:
115
117
  """Get the document as a functional OWL string."""
pyobo/struct/reference.py CHANGED
@@ -14,9 +14,9 @@ import dateutil.parser
14
14
  import pytz
15
15
  from bioregistry import NormalizedNamableReference as Reference
16
16
  from curies import ReferenceTuple
17
+ from curies.preprocessing import BlocklistError
17
18
 
18
19
  from ..identifier_utils import (
19
- BlacklistedError,
20
20
  NotCURIEError,
21
21
  ParseError,
22
22
  UnparsableIRIError,
@@ -62,7 +62,7 @@ def _parse_str_or_curie_or_uri(
62
62
  match reference:
63
63
  case Reference():
64
64
  return reference
65
- case BlacklistedError():
65
+ case BlocklistError():
66
66
  return None
67
67
  case ParseError():
68
68
  if strict:
@@ -224,7 +224,7 @@ def _obo_parse_identifier(
224
224
  ):
225
225
  case Reference() as reference:
226
226
  return reference
227
- case BlacklistedError():
227
+ case BlocklistError():
228
228
  return None
229
229
  case NotCURIEError() as exc:
230
230
  # this means there's no colon `:`
@@ -272,7 +272,7 @@ def _parse_reference_or_uri_literal(
272
272
  ):
273
273
  case Reference() as reference:
274
274
  return reference
275
- case BlacklistedError():
275
+ case BlocklistError():
276
276
  return None
277
277
  case UnparsableIRIError():
278
278
  # this means that it's defininitely a URI,