pyobo 0.12.7__tar.gz → 0.12.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyobo-0.12.7 → pyobo-0.12.9}/PKG-INFO +5 -1
- {pyobo-0.12.7 → pyobo-0.12.9}/pyproject.toml +11 -2
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/__init__.py +12 -1
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/__init__.py +2 -1
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/embedding.py +36 -6
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/hierarchy.py +6 -4
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/properties.py +12 -3
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/relations.py +9 -5
- pyobo-0.12.9/src/pyobo/ner/__init__.py +17 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/ner/normalizer.py +2 -2
- pyobo-0.12.9/src/pyobo/ner/scispacy_utils.py +241 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/__init__.py +2 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/chembl/chembl_target.py +1 -1
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/expasy.py +4 -1
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/go.py +6 -3
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/gtdb.py +1 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/iana_media_type.py +3 -1
- pyobo-0.12.9/src/pyobo/sources/iconclass.py +55 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/functional/macros.py +12 -12
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/reference.py +3 -1
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/struct.py +22 -14
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/version.py +1 -1
- pyobo-0.12.7/src/pyobo/ner/__init__.py +0 -9
- {pyobo-0.12.7 → pyobo-0.12.9}/LICENSE +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/README.md +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/.DS_Store +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/__main__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/alts.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/combine.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/edges.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/metadata.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/names.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/species.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/typedefs.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/api/xrefs.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/cli/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/cli/cli.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/cli/database.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/cli/database_utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/cli/lookup.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/cli/utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/constants.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/getters.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/gilda_utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/identifier_utils/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/identifier_utils/api.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/identifier_utils/relations/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/identifier_utils/relations/api.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/identifier_utils/relations/data.json +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/identifier_utils/relations/data_owl.json +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/identifier_utils/relations/data_rdf.json +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/identifier_utils/relations/data_rdfs.json +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/mocks.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/ner/api.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/plugins.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/py.typed +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/resource_utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/resources/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/resources/ncbitaxon.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/resources/ncbitaxon.tsv.gz +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/resources/ro.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/resources/ro.tsv +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/resources/so.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/resources/so.tsv +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/README.md +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/agrovoc.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/antibodyregistry.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/bigg/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/bigg/bigg_compartment.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/bigg/bigg_metabolite.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/bigg/bigg_model.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/bigg/bigg_reaction.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/biogrid.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/ccle.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/cgnc.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/chebi.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/chembl/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/chembl/chembl_cell.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/chembl/chembl_compound.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/chembl/chembl_mechanism.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/chembl/chembl_tissue.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/civic_gene.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/clinicaltrials.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/complexportal.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/conso.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/cpt.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/credit.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/cvx.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/depmap.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/dictybase_gene.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/drugbank/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/drugbank/drugbank.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/drugbank/drugbank_salt.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/drugcentral.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/famplex.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/flybase.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/gard.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/geonames/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/geonames/features.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/geonames/geonames.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/geonames/utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/gmt_utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/gwascentral/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/gwascentral/gwascentral_phenotype.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/gwascentral/gwascentral_study.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/hgnc/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/hgnc/hgnc.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/hgnc/hgncgenefamily.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/icd/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/icd/icd10.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/icd/icd11.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/icd/icd_utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/intact.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/interpro.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/itis.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/kegg/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/kegg/api.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/kegg/genes.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/kegg/genome.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/kegg/pathway.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/mesh.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/mgi.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/mirbase/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/mirbase/mirbase.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/mirbase/mirbase_constants.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/mirbase/mirbase_family.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/mirbase/mirbase_mature.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/msigdb.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/ncbi/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/ncbi/ncbi_gc.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/ncbi/ncbigene.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/nih_reporter.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/nlm/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/nlm/nlm_catalog.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/nlm/nlm_publisher.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/nlm/utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/npass.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/omim_ps.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pathbank.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pfam/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pfam/pfam.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pfam/pfam_clan.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pharmgkb/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pharmgkb/pharmgkb_chemical.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pharmgkb/pharmgkb_disease.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pharmgkb/pharmgkb_gene.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pharmgkb/pharmgkb_pathway.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pharmgkb/pharmgkb_variant.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pharmgkb/utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pid.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pombase.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/pubchem.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/reactome.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/rgd.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/rhea.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/ror.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/selventa/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/selventa/schem.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/selventa/scomp.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/selventa/sdis.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/selventa/sfam.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/sgd.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/signor/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/signor/download.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/signor/signor_complexes.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/slm.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/spdx.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/umls/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/umls/__main__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/umls/get_synonym_types.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/umls/sty.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/umls/synonym_types.tsv +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/umls/umls.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/unimod.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/uniprot/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/uniprot/uniprot.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/uniprot/uniprot_ptm.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/wikipathways.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/sources/zfin.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/ssg/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/ssg/base.html +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/ssg/index.html +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/ssg/term.html +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/ssg/typedef.html +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/functional/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/functional/dsl.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/functional/obo_to_functional.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/functional/ontology.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/functional/utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/obo/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/obo/reader.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/obo/reader_utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/obograph/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/obograph/export.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/obograph/reader.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/obograph/utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/struct_utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/typedef.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/struct/vocabulary.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/utils/__init__.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/utils/cache.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/utils/io.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/utils/iter.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/utils/misc.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/utils/ndex_utils.py +0 -0
- {pyobo-0.12.7 → pyobo-0.12.9}/src/pyobo/utils/path.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyobo
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.9
|
|
4
4
|
Summary: A python package for handling and generating OBO
|
|
5
5
|
Keywords: snekpack,cookiecutter,ontologies,biomedical ontologies,life sciences,natural sciences,bioinformatics,cheminformatics,Open Biomedical Ontologies,OBO
|
|
6
6
|
Author: Charles Tapley Hoyt
|
|
@@ -59,6 +59,9 @@ Requires-Dist: psycopg2-binary ; extra == 'drugcentral'
|
|
|
59
59
|
Requires-Dist: ssslm[gilda] ; extra == 'gilda'
|
|
60
60
|
Requires-Dist: ssslm[gilda-slim] ; extra == 'gilda-slim'
|
|
61
61
|
Requires-Dist: protmapper ; extra == 'pid'
|
|
62
|
+
Requires-Dist: scispacy ; python_full_version < '3.13' and extra == 'scispacy'
|
|
63
|
+
Requires-Dist: spacy ; python_full_version < '3.13' and extra == 'scispacy'
|
|
64
|
+
Requires-Dist: scipy<1.11 ; python_full_version < '3.13' and extra == 'scispacy'
|
|
62
65
|
Requires-Dist: pyobo[drugcentral,pid] ; extra == 'sources'
|
|
63
66
|
Maintainer: Charles Tapley Hoyt
|
|
64
67
|
Maintainer-email: Charles Tapley Hoyt <cthoyt@gmail.com>
|
|
@@ -72,6 +75,7 @@ Provides-Extra: drugcentral
|
|
|
72
75
|
Provides-Extra: gilda
|
|
73
76
|
Provides-Extra: gilda-slim
|
|
74
77
|
Provides-Extra: pid
|
|
78
|
+
Provides-Extra: scispacy
|
|
75
79
|
Provides-Extra: sources
|
|
76
80
|
Description-Content-Type: text/markdown
|
|
77
81
|
|
|
@@ -4,7 +4,7 @@ build-backend = "uv_build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "pyobo"
|
|
7
|
-
version = "0.12.
|
|
7
|
+
version = "0.12.9"
|
|
8
8
|
description = "A python package for handling and generating OBO"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [
|
|
@@ -165,6 +165,15 @@ drugcentral =[
|
|
|
165
165
|
pid = [
|
|
166
166
|
"protmapper",
|
|
167
167
|
]
|
|
168
|
+
# note that scispacy currently only works on <= 3.12
|
|
169
|
+
# because of its transitive dependencies. It also has
|
|
170
|
+
# a hard cutoff on scipy because it relied on (private/unstable?)
|
|
171
|
+
# functionality that no longer exists in newer versions
|
|
172
|
+
scispacy = [
|
|
173
|
+
"scispacy; python_version <= '3.12'",
|
|
174
|
+
"spacy; python_version <= '3.12'",
|
|
175
|
+
"scipy<1.11; python_version <= '3.12'",
|
|
176
|
+
]
|
|
168
177
|
|
|
169
178
|
# See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#urls
|
|
170
179
|
# and also https://packaging.python.org/en/latest/specifications/well-known-project-urls/
|
|
@@ -296,7 +305,7 @@ known-first-party = [
|
|
|
296
305
|
docstring-code-format = true
|
|
297
306
|
|
|
298
307
|
[tool.bumpversion]
|
|
299
|
-
current_version = "0.12.
|
|
308
|
+
current_version = "0.12.9"
|
|
300
309
|
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?(?:\\+(?P<build>[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?"
|
|
301
310
|
serialize = [
|
|
302
311
|
"{major}.{minor}.{patch}-{release}+{build}",
|
|
@@ -50,6 +50,7 @@ from .api import (
|
|
|
50
50
|
get_synonyms,
|
|
51
51
|
get_text_embedding,
|
|
52
52
|
get_text_embedding_similarity,
|
|
53
|
+
get_text_embeddings_df,
|
|
53
54
|
get_typedef_df,
|
|
54
55
|
get_xref,
|
|
55
56
|
get_xrefs,
|
|
@@ -58,7 +59,13 @@ from .api import (
|
|
|
58
59
|
is_descendent,
|
|
59
60
|
)
|
|
60
61
|
from .getters import get_ontology
|
|
61
|
-
from .ner import
|
|
62
|
+
from .ner import (
|
|
63
|
+
get_grounder,
|
|
64
|
+
get_scispacy_entities,
|
|
65
|
+
get_scispacy_entity_linker,
|
|
66
|
+
get_scispacy_knowledgebase,
|
|
67
|
+
ground,
|
|
68
|
+
)
|
|
62
69
|
from .plugins import (
|
|
63
70
|
has_nomenclature_plugin,
|
|
64
71
|
iter_nomenclature_plugins,
|
|
@@ -139,12 +146,16 @@ __all__ = [
|
|
|
139
146
|
"get_relation",
|
|
140
147
|
"get_relation_mapping",
|
|
141
148
|
"get_relations_df",
|
|
149
|
+
"get_scispacy_entities",
|
|
150
|
+
"get_scispacy_entity_linker",
|
|
151
|
+
"get_scispacy_knowledgebase",
|
|
142
152
|
"get_species",
|
|
143
153
|
"get_sssom_df",
|
|
144
154
|
"get_subhierarchy",
|
|
145
155
|
"get_synonyms",
|
|
146
156
|
"get_text_embedding",
|
|
147
157
|
"get_text_embedding_similarity",
|
|
158
|
+
"get_text_embeddings_df",
|
|
148
159
|
"get_typedef_df",
|
|
149
160
|
"get_version",
|
|
150
161
|
"get_xref",
|
|
@@ -8,7 +8,7 @@ from .alts import (
|
|
|
8
8
|
)
|
|
9
9
|
from .combine import get_literal_mappings_subset
|
|
10
10
|
from .edges import get_edges, get_edges_df, get_graph
|
|
11
|
-
from .embedding import get_text_embedding, get_text_embedding_similarity
|
|
11
|
+
from .embedding import get_text_embedding, get_text_embedding_similarity, get_text_embeddings_df
|
|
12
12
|
from .hierarchy import (
|
|
13
13
|
get_ancestors,
|
|
14
14
|
get_children,
|
|
@@ -119,6 +119,7 @@ __all__ = [
|
|
|
119
119
|
"get_synonyms",
|
|
120
120
|
"get_text_embedding",
|
|
121
121
|
"get_text_embedding_similarity",
|
|
122
|
+
"get_text_embeddings_df",
|
|
122
123
|
"get_typedef_df",
|
|
123
124
|
"get_version",
|
|
124
125
|
"get_xref",
|
|
@@ -6,8 +6,9 @@ from typing import TYPE_CHECKING
|
|
|
6
6
|
|
|
7
7
|
import curies
|
|
8
8
|
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
9
10
|
|
|
10
|
-
from pyobo.api.names import get_definition, get_name
|
|
11
|
+
from pyobo.api.names import get_definition, get_name, get_references
|
|
11
12
|
|
|
12
13
|
if TYPE_CHECKING:
|
|
13
14
|
import sentence_transformers
|
|
@@ -16,6 +17,7 @@ __all__ = [
|
|
|
16
17
|
"get_text_embedding",
|
|
17
18
|
"get_text_embedding_model",
|
|
18
19
|
"get_text_embedding_similarity",
|
|
20
|
+
"get_text_embeddings_df",
|
|
19
21
|
]
|
|
20
22
|
|
|
21
23
|
|
|
@@ -39,6 +41,30 @@ def _get_text(
|
|
|
39
41
|
return name
|
|
40
42
|
|
|
41
43
|
|
|
44
|
+
def get_text_embeddings_df(
|
|
45
|
+
prefix: str,
|
|
46
|
+
*,
|
|
47
|
+
model: sentence_transformers.SentenceTransformer | None = None,
|
|
48
|
+
) -> pd.DataFrame:
|
|
49
|
+
"""Get embeddings for all entities in the resource.
|
|
50
|
+
|
|
51
|
+
:param prefix: A reference, either as a string or Reference object
|
|
52
|
+
:param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
|
|
53
|
+
given.
|
|
54
|
+
"""
|
|
55
|
+
luids, texts = [], []
|
|
56
|
+
for reference in get_references(prefix):
|
|
57
|
+
text = _get_text(reference)
|
|
58
|
+
if text is None:
|
|
59
|
+
continue
|
|
60
|
+
luids.append(reference.identifier)
|
|
61
|
+
texts.append(text)
|
|
62
|
+
if model is None:
|
|
63
|
+
model = get_text_embedding_model()
|
|
64
|
+
res = model.encode(texts)
|
|
65
|
+
return pd.DataFrame(res, index=luids)
|
|
66
|
+
|
|
67
|
+
|
|
42
68
|
def get_text_embedding(
|
|
43
69
|
reference: str | curies.Reference | curies.ReferenceTuple,
|
|
44
70
|
*,
|
|
@@ -47,8 +73,10 @@ def get_text_embedding(
|
|
|
47
73
|
"""Get a text embedding for an entity, or return none if no text is available.
|
|
48
74
|
|
|
49
75
|
:param reference: A reference, either as a string or Reference object
|
|
50
|
-
:param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
|
|
51
|
-
|
|
76
|
+
:param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
|
|
77
|
+
given.
|
|
78
|
+
|
|
79
|
+
:returns: A 1D numpy float array of embeddings from :class:`sentence_transformers`
|
|
52
80
|
|
|
53
81
|
.. code-block:: python
|
|
54
82
|
|
|
@@ -87,9 +115,11 @@ def get_text_embedding_similarity(
|
|
|
87
115
|
|
|
88
116
|
:param reference_1: A reference, given as a string or Reference object
|
|
89
117
|
:param reference_2: A second reference
|
|
90
|
-
:param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
|
|
91
|
-
|
|
92
|
-
|
|
118
|
+
:param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not
|
|
119
|
+
given.
|
|
120
|
+
|
|
121
|
+
:returns: A floating point similarity, if text is available for both references,
|
|
122
|
+
otherwise none
|
|
93
123
|
|
|
94
124
|
.. code-block:: python
|
|
95
125
|
|
|
@@ -163,7 +163,8 @@ def is_descendent(
|
|
|
163
163
|
:param ancestor_prefix: The prefix for the ancestor
|
|
164
164
|
:param ancestor_identifier: The local unique identifier for the ancestor
|
|
165
165
|
:param kwargs: Keyword arguments for :func:`get_hierarchy`
|
|
166
|
-
|
|
166
|
+
|
|
167
|
+
:returns: If the decendant has the given ancestor
|
|
167
168
|
|
|
168
169
|
Check that ``GO:0070246`` (natural killer cell apoptotic process) is a descendant of
|
|
169
170
|
``GO:0006915`` (apoptotic process)
|
|
@@ -254,10 +255,11 @@ def has_ancestor(
|
|
|
254
255
|
:param ancestor_prefix: The prefix for the ancestor
|
|
255
256
|
:param ancestor_identifier: The local unique identifier for the ancestor
|
|
256
257
|
:param kwargs: Keyword arguments for :func:`get_hierarchy`
|
|
257
|
-
:return: If the decendant has the given ancestor
|
|
258
258
|
|
|
259
|
-
|
|
260
|
-
|
|
259
|
+
:returns: If the decendant has the given ancestor
|
|
260
|
+
|
|
261
|
+
Check that ``GO:0008219`` (cell death) is an ancestor of ``GO:0006915`` (apoptotic
|
|
262
|
+
process):
|
|
261
263
|
|
|
262
264
|
>>> apoptosis = Reference.from_curie("GO:0006915", name="apoptotic process")
|
|
263
265
|
>>> cell_death = Reference.from_curie("GO:0008219", name="cell death")
|
|
@@ -111,6 +111,7 @@ def get_properties_df(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> pd.Da
|
|
|
111
111
|
"""Extract properties.
|
|
112
112
|
|
|
113
113
|
:param prefix: the resource to load
|
|
114
|
+
|
|
114
115
|
:returns: A dataframe with the properties
|
|
115
116
|
"""
|
|
116
117
|
df1 = get_literal_properties_df(prefix, **kwargs)
|
|
@@ -131,6 +132,7 @@ def get_filtered_properties_mapping(
|
|
|
131
132
|
|
|
132
133
|
:param prefix: the resource to load
|
|
133
134
|
:param prop: the property to extract
|
|
135
|
+
|
|
134
136
|
:returns: A mapping from identifier to property value
|
|
135
137
|
"""
|
|
136
138
|
df = get_filtered_properties_df(prefix, prop, **kwargs)
|
|
@@ -145,6 +147,7 @@ def get_filtered_properties_multimapping(
|
|
|
145
147
|
|
|
146
148
|
:param prefix: the resource to load
|
|
147
149
|
:param prop: the property to extract
|
|
150
|
+
|
|
148
151
|
:returns: A mapping from identifier to property values
|
|
149
152
|
"""
|
|
150
153
|
df = get_filtered_properties_df(prefix, prop, **kwargs)
|
|
@@ -159,7 +162,9 @@ def get_property(
|
|
|
159
162
|
:param prefix: the resource to load
|
|
160
163
|
:param identifier: the identifier withing the resource
|
|
161
164
|
:param prop: the property to extract
|
|
162
|
-
|
|
165
|
+
|
|
166
|
+
:returns: The single value for the property. If multiple are expected, use
|
|
167
|
+
:func:`get_properties`
|
|
163
168
|
|
|
164
169
|
>>> import pyobo
|
|
165
170
|
>>> pyobo.get_property("chebi", "132964", "http://purl.obolibrary.org/obo/chebi/smiles")
|
|
@@ -182,7 +187,9 @@ def get_properties(
|
|
|
182
187
|
:param prefix: the resource to load
|
|
183
188
|
:param identifier: the identifier withing the resource
|
|
184
189
|
:param prop: the property to extract
|
|
185
|
-
|
|
190
|
+
|
|
191
|
+
:returns: Multiple values for the property. If only one is expected, use
|
|
192
|
+
:func:`get_property`
|
|
186
193
|
"""
|
|
187
194
|
filtered_properties_multimapping = get_filtered_properties_multimapping(
|
|
188
195
|
prefix=prefix, prop=prop, **kwargs
|
|
@@ -198,7 +205,9 @@ def get_filtered_properties_df(
|
|
|
198
205
|
|
|
199
206
|
:param prefix: the resource to load
|
|
200
207
|
:param prop: the property to extract
|
|
201
|
-
|
|
208
|
+
|
|
209
|
+
:returns: A dataframe from identifier to property value. Columns are [<prefix>_id,
|
|
210
|
+
value].
|
|
202
211
|
"""
|
|
203
212
|
prop = _ensure_ref(prop, ontology_prefix=prefix)
|
|
204
213
|
df = get_properties_df(prefix, **kwargs)
|
|
@@ -135,9 +135,11 @@ def get_relation_mapping(
|
|
|
135
135
|
) -> Mapping[str, str]:
|
|
136
136
|
"""Get relations from identifiers in the source prefix to target prefix with the given relation.
|
|
137
137
|
|
|
138
|
-
.. warning::
|
|
138
|
+
.. warning::
|
|
139
139
|
|
|
140
|
-
|
|
140
|
+
Assumes there's only one version of the property for each term.
|
|
141
|
+
|
|
142
|
+
Example usage: get homology between HGNC and MGI:
|
|
141
143
|
|
|
142
144
|
>>> import pyobo
|
|
143
145
|
>>> human_mapt_hgnc_id = "6893"
|
|
@@ -161,16 +163,18 @@ def get_relation(
|
|
|
161
163
|
) -> str | None:
|
|
162
164
|
"""Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
|
|
163
165
|
|
|
164
|
-
.. warning::
|
|
166
|
+
.. warning::
|
|
167
|
+
|
|
168
|
+
Assumes there's only one version of the property for each term.
|
|
165
169
|
|
|
166
|
-
|
|
170
|
+
Example usage: get homology between MAPT in HGNC and MGI:
|
|
167
171
|
|
|
168
172
|
>>> import pyobo
|
|
169
173
|
>>> human_mapt_hgnc_id = "6893"
|
|
170
174
|
>>> mouse_mapt_mgi_id = "97180"
|
|
171
175
|
>>> assert mouse_mapt_mgi_id == pyobo.get_relation(
|
|
172
176
|
... "hgnc", human_mapt_hgnc_id, "ro:HOM0000017", "mgi"
|
|
173
|
-
|
|
177
|
+
>>> )
|
|
174
178
|
"""
|
|
175
179
|
relation_mapping = get_relation_mapping(
|
|
176
180
|
prefix=prefix,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Wrapper around NER functionalities."""
|
|
2
|
+
|
|
3
|
+
from .api import get_grounder
|
|
4
|
+
from .normalizer import ground
|
|
5
|
+
from .scispacy_utils import (
|
|
6
|
+
get_scispacy_entities,
|
|
7
|
+
get_scispacy_entity_linker,
|
|
8
|
+
get_scispacy_knowledgebase,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"get_grounder",
|
|
13
|
+
"get_scispacy_entities",
|
|
14
|
+
"get_scispacy_entity_linker",
|
|
15
|
+
"get_scispacy_knowledgebase",
|
|
16
|
+
"ground",
|
|
17
|
+
]
|
|
@@ -20,8 +20,8 @@ def ground(
|
|
|
20
20
|
) -> Reference | None:
|
|
21
21
|
"""Normalize a string given the prefix's labels and synonyms.
|
|
22
22
|
|
|
23
|
-
:param prefix: If a string, only grounds against that namespace. If a list, will try
|
|
24
|
-
against all in that order
|
|
23
|
+
:param prefix: If a string, only grounds against that namespace. If a list, will try
|
|
24
|
+
grounding against all in that order
|
|
25
25
|
:param query: The string to try grounding
|
|
26
26
|
"""
|
|
27
27
|
grounder = get_grounder(prefix, **kwargs)
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""A bridge between PyOBO and :mod:`scispacy`.
|
|
2
|
+
|
|
3
|
+
:mod:`scispacy` implements a lexical index in
|
|
4
|
+
:class:`scispacy.linking_utils.KnowledgeBase` which keeps track of labels, synonyms, and
|
|
5
|
+
definitions for entities. These are used to construct a TF-IDF index and implement
|
|
6
|
+
entity linking (also called named entity normalization (NEN) or grounding) in
|
|
7
|
+
:class:`scispacy.linking.EntityLinker`.
|
|
8
|
+
|
|
9
|
+
Constructing a Lexical Index
|
|
10
|
+
============================
|
|
11
|
+
|
|
12
|
+
An *ad hoc* SciSpacy lexical index can be constructed on-the-fly by passing a
|
|
13
|
+
Bioregistry prefix to :func:`pyobo.get_scispacy_knowledgebase`. In the following
|
|
14
|
+
example, the prefix ``to`` is used to construct a lexical index for the `Plant Trait
|
|
15
|
+
Ontology <https://bioregistry.io/to>`_.
|
|
16
|
+
|
|
17
|
+
.. code-block:: python
|
|
18
|
+
|
|
19
|
+
import pyobo
|
|
20
|
+
from scispacy.linking_utils import KnowledgeBase
|
|
21
|
+
|
|
22
|
+
kb: KnowledgeBase = pyobo.get_scispacy_knowledgebase("to")
|
|
23
|
+
|
|
24
|
+
The high-level PyOBO interface abstracts the differences between external ontologies
|
|
25
|
+
like the Plant Trait Ontology and databases that are converted to ontologies in
|
|
26
|
+
:mod:`pyobo.sources` like the `HUGO Gene Nomenclature Committee
|
|
27
|
+
<https://bioregistry.io/hgnc>`_. Therefore, you can also do
|
|
28
|
+
|
|
29
|
+
.. code-block:: python
|
|
30
|
+
|
|
31
|
+
import pyobo
|
|
32
|
+
from scispacy.linking_utils import KnowledgeBase
|
|
33
|
+
|
|
34
|
+
kb: KnowledgeBase = pyobo.get_scispacy_knowledgebase("hgnc")
|
|
35
|
+
|
|
36
|
+
Alternatively, a reusable class can be defined like in the following:
|
|
37
|
+
|
|
38
|
+
.. code-block:: python
|
|
39
|
+
|
|
40
|
+
import pyobo
|
|
41
|
+
from scispacy.linking_utils import KnowledgeBase
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class HGNCKnowledgeBase(KnowledgeBase):
|
|
45
|
+
def __init__(self) -> None:
|
|
46
|
+
super().__init__(pyobo.get_scispacy_entities("hgnc"))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
kb = HGNCKnowledgeBase()
|
|
50
|
+
|
|
51
|
+
Constructing an Entity Linker
|
|
52
|
+
=============================
|
|
53
|
+
|
|
54
|
+
An entity linker can be constructed from a :class:`scispacy.linking_utils.KnowledgeBase`
|
|
55
|
+
like in:
|
|
56
|
+
|
|
57
|
+
.. code-block:: python
|
|
58
|
+
|
|
59
|
+
import pyobo
|
|
60
|
+
from scispacy.linking import EntityLinker
|
|
61
|
+
|
|
62
|
+
kb = pyobo.get_scispacy_knowledgebase("hgnc")
|
|
63
|
+
linker = EntityLinker.from_kb(kb, filter_for_definitions=False)
|
|
64
|
+
|
|
65
|
+
Where ``filter_for_definitions`` is set to ``False`` to retain entities that don't have
|
|
66
|
+
a definition.
|
|
67
|
+
|
|
68
|
+
PyOBO provides a convenience function :func:`pyobo.get_scispacy_entity_linker` that
|
|
69
|
+
wraps this workflow and also automatically caches the TF-IDF index constructed in the
|
|
70
|
+
process in the correctly versioned folder in the PyOBO cache.
|
|
71
|
+
|
|
72
|
+
.. code-block:: python
|
|
73
|
+
|
|
74
|
+
import pyobo
|
|
75
|
+
from scispacy.linking import EntityLinker
|
|
76
|
+
|
|
77
|
+
linker = pyobo.get_scispacy_entity_linker("hgnc", filter_for_definitions=False)
|
|
78
|
+
|
|
79
|
+
Full Workflow
|
|
80
|
+
=============
|
|
81
|
+
|
|
82
|
+
Once an entity linker has been constructed, it can b used in series with a
|
|
83
|
+
:mod:`spacy.Language` object instantiated with :func:`spacy.load` to ground named
|
|
84
|
+
entities that were recognized by a model like ``en_core_web_sm``
|
|
85
|
+
|
|
86
|
+
.. code-block:: python
|
|
87
|
+
|
|
88
|
+
import pyobo
|
|
89
|
+
import spacy
|
|
90
|
+
from scispacy.linking import EntityLinker
|
|
91
|
+
from tabulate import tabulate
|
|
92
|
+
|
|
93
|
+
linker: EntityLinker = pyobo.get_scispacy_entity_linker("hgnc", filter_for_definitions=False)
|
|
94
|
+
|
|
95
|
+
# now, put it all together with a NER model
|
|
96
|
+
nlp = spacy.load("en_core_web_sm")
|
|
97
|
+
|
|
98
|
+
text = (
|
|
99
|
+
"RAC(Rho family)-alpha serine/threonine-protein kinase "
|
|
100
|
+
"is an enzyme that in humans is encoded by the AKT1 gene."
|
|
101
|
+
)
|
|
102
|
+
doc = linker(nlp(text))
|
|
103
|
+
|
|
104
|
+
rows = [
|
|
105
|
+
(
|
|
106
|
+
span,
|
|
107
|
+
span.start_char,
|
|
108
|
+
span.end_char,
|
|
109
|
+
f"`{curie} <https://bioregistry.io/{curie}>`_",
|
|
110
|
+
score,
|
|
111
|
+
)
|
|
112
|
+
for span in doc.ents
|
|
113
|
+
for curie, score in span._.kb_ents
|
|
114
|
+
]
|
|
115
|
+
print(tabulate(rows, headers=["text", "start", "end", "prefix", "identifier"], tablefmt="rst"))
|
|
116
|
+
|
|
117
|
+
==== ===== === ============================================= ========
|
|
118
|
+
text start end curie score
|
|
119
|
+
==== ===== === ============================================= ========
|
|
120
|
+
AKT1 100 104 `hgnc:391 <https://bioregistry.io/hgnc:391>`_ 1
|
|
121
|
+
AKT1 100 104 `hgnc:392 <https://bioregistry.io/hgnc:392>`_ 0.776504
|
|
122
|
+
AKT1 100 104 `hgnc:393 <https://bioregistry.io/hgnc:393>`_ 0.764049
|
|
123
|
+
==== ===== === ============================================= ========
|
|
124
|
+
|
|
125
|
+
This example recognizes the AKT serine/threonine kinase 1 (AKT1) gene and provides three
|
|
126
|
+
highly scored groundings, the best of which, `hgnc:391
|
|
127
|
+
<https://bioregistry.io/hgnc:391>`_, is correct.
|
|
128
|
+
|
|
129
|
+
.. note::
|
|
130
|
+
|
|
131
|
+
The groundings and scores are stored by SciSpacy in the hidden attribute
|
|
132
|
+
``span._.kb_ents``.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
from __future__ import annotations
|
|
136
|
+
|
|
137
|
+
from collections.abc import Iterable
|
|
138
|
+
from typing import TYPE_CHECKING, Any
|
|
139
|
+
|
|
140
|
+
from typing_extensions import Unpack
|
|
141
|
+
|
|
142
|
+
from ..api.utils import get_version_from_kwargs
|
|
143
|
+
from ..constants import GetOntologyKwargs
|
|
144
|
+
from ..getters import get_ontology
|
|
145
|
+
from ..utils.path import prefix_directory_join
|
|
146
|
+
|
|
147
|
+
if TYPE_CHECKING:
|
|
148
|
+
from scispacy.linking import EntityLinker
|
|
149
|
+
from scispacy.linking_utils import Entity, KnowledgeBase
|
|
150
|
+
|
|
151
|
+
__all__ = [
|
|
152
|
+
"get_scispacy_entities",
|
|
153
|
+
"get_scispacy_entity_linker",
|
|
154
|
+
"get_scispacy_knowledgebase",
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def get_scispacy_entity_linker(
|
|
159
|
+
prefix: str,
|
|
160
|
+
*,
|
|
161
|
+
ontology_kwargs: GetOntologyKwargs | None = None,
|
|
162
|
+
candidate_generator_kwargs: dict[str, Any] | None = None,
|
|
163
|
+
**entity_linker_kwargs: Any,
|
|
164
|
+
) -> EntityLinker:
|
|
165
|
+
"""Get a knowledgebase object for usage with :mod:`scispacy`.
|
|
166
|
+
|
|
167
|
+
:param prefix :
|
|
168
|
+
The ontology's prefix, such as ``go` for Gene Ontology, ``doid`` for the Disease
|
|
169
|
+
Ontology, or more.
|
|
170
|
+
|
|
171
|
+
:param ontology_kwargs: keyword arguments to pass to :func:`pyobo.get_ontology`,
|
|
172
|
+
such as ``version``.
|
|
173
|
+
:param candidate_generator_kwargs: keyword arguments to pass to
|
|
174
|
+
:class:`scispacy.candidate_generation.CandidateGenerator`, such as ``ef_search``
|
|
175
|
+
:param entity_linker_kwargs: keyword arguments to pass to
|
|
176
|
+
:class:`scispacy.linking.EntityLinker`, such as ``ef_search``
|
|
177
|
+
|
|
178
|
+
:returns: An object that can be applied in a :mod:`spacy` natural language
|
|
179
|
+
processing workflow, namely to apply grounding/named entity normalization to
|
|
180
|
+
recognized named entities.
|
|
181
|
+
"""
|
|
182
|
+
from scispacy.linking import EntityLinker
|
|
183
|
+
|
|
184
|
+
if ontology_kwargs is None:
|
|
185
|
+
ontology_kwargs = {}
|
|
186
|
+
|
|
187
|
+
version = get_version_from_kwargs(prefix, ontology_kwargs)
|
|
188
|
+
scispacy_cache_directory = prefix_directory_join(prefix, "scispacy", version=version)
|
|
189
|
+
|
|
190
|
+
# TODO see if we can skip loading the KB
|
|
191
|
+
kb = get_scispacy_knowledgebase(prefix, **ontology_kwargs)
|
|
192
|
+
linker = EntityLinker.from_kb(
|
|
193
|
+
kb,
|
|
194
|
+
ann_index_out_dir=scispacy_cache_directory.as_posix(),
|
|
195
|
+
candidate_generator_kwargs=candidate_generator_kwargs,
|
|
196
|
+
**(entity_linker_kwargs or {}),
|
|
197
|
+
)
|
|
198
|
+
return linker
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def get_scispacy_knowledgebase(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> KnowledgeBase:
|
|
202
|
+
"""Get a knowledgebase object for usage with :mod:`scispacy`.
|
|
203
|
+
|
|
204
|
+
:param prefix :
|
|
205
|
+
The ontology's prefix, such as ``go` for Gene Ontology, ``doid`` for the Disease
|
|
206
|
+
Ontology, or more.
|
|
207
|
+
|
|
208
|
+
:param kwargs :
|
|
209
|
+
keyword arguments to pass to :func:`pyobo.get_ontology`, such as ``version``.
|
|
210
|
+
|
|
211
|
+
:returns: An object that represents a lexical index over name, synonym, and
|
|
212
|
+
definition strings from the ontology.
|
|
213
|
+
"""
|
|
214
|
+
from scispacy.linking_utils import KnowledgeBase
|
|
215
|
+
|
|
216
|
+
return KnowledgeBase(get_scispacy_entities(prefix, **kwargs))
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def get_scispacy_entities(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> Iterable[Entity]:
|
|
220
|
+
"""Iterate over entities in a given ontology via :mod:`pyobo`.
|
|
221
|
+
|
|
222
|
+
:param prefix :
|
|
223
|
+
The ontology's prefix, such as ``go` for Gene Ontology, ``doid`` for the Disease
|
|
224
|
+
Ontology, or more.
|
|
225
|
+
|
|
226
|
+
:param kwargs :
|
|
227
|
+
keyword arguments to pass to :func:`pyobo.get_ontology`, such as ``version``.
|
|
228
|
+
|
|
229
|
+
:yields: Entity objects for all terms in the ontology
|
|
230
|
+
"""
|
|
231
|
+
from scispacy.linking_utils import Entity
|
|
232
|
+
|
|
233
|
+
# TODO reuse labels, synonyms, and definitions cache
|
|
234
|
+
ontology = get_ontology(prefix, **kwargs)
|
|
235
|
+
for term in ontology:
|
|
236
|
+
yield Entity(
|
|
237
|
+
concept_id=term.curie,
|
|
238
|
+
canonical_name=term.name,
|
|
239
|
+
aliases=[s.name for s in term.synonyms],
|
|
240
|
+
definition=term.definition,
|
|
241
|
+
)
|
|
@@ -34,6 +34,7 @@ from .gwascentral import GWASCentralPhenotypeGetter, GWASCentralStudyGetter
|
|
|
34
34
|
from .hgnc import HGNCGetter, HGNCGroupGetter
|
|
35
35
|
from .iana_media_type import IANAGetter
|
|
36
36
|
from .icd import ICD10Getter, ICD11Getter
|
|
37
|
+
from .iconclass import IconclassGetter
|
|
37
38
|
from .intact import IntactGetter
|
|
38
39
|
from .interpro import InterProGetter
|
|
39
40
|
from .itis import ITISGetter
|
|
@@ -115,6 +116,7 @@ __all__ = [
|
|
|
115
116
|
"ICD10Getter",
|
|
116
117
|
"ICD11Getter",
|
|
117
118
|
"ITISGetter",
|
|
119
|
+
"IconclassGetter",
|
|
118
120
|
"IntactGetter",
|
|
119
121
|
"InterProGetter",
|
|
120
122
|
"KEGGGeneGetter",
|
|
@@ -138,7 +138,7 @@ def iter_terms(version: str) -> Iterable[Term]:
|
|
|
138
138
|
def get_chembl_protein_equivalences(version: str | None = None) -> dict[str, list[str]]:
|
|
139
139
|
"""Get ChEMBL protein equivalences."""
|
|
140
140
|
if version is None:
|
|
141
|
-
version = chembl_downloader.latest()
|
|
141
|
+
version = chembl_downloader.latest(full=False)
|
|
142
142
|
url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}/chembl_uniprot_mapping.txt"
|
|
143
143
|
df = ensure_df(
|
|
144
144
|
PREFIX,
|
|
@@ -160,7 +160,8 @@ def normalize_expasy_id(expasy_id: str) -> str:
|
|
|
160
160
|
"""Return a standardized ExPASy identifier string.
|
|
161
161
|
|
|
162
162
|
:param expasy_id: A possibly non-normalized ExPASy identifier
|
|
163
|
-
|
|
163
|
+
|
|
164
|
+
:returns: A normalized string.
|
|
164
165
|
"""
|
|
165
166
|
return expasy_id.replace(" ", "")
|
|
166
167
|
|
|
@@ -211,6 +212,7 @@ def get_database(lines: Iterable[str]) -> Mapping[str, dict[str, Any]]:
|
|
|
211
212
|
"""Parse the ExPASy database file and returns a list of enzyme entry dictionaries.
|
|
212
213
|
|
|
213
214
|
:param lines: An iterator over the ExPASy database file or file-like
|
|
215
|
+
|
|
214
216
|
:returns: A mapping from EC code to data
|
|
215
217
|
"""
|
|
216
218
|
rv = {}
|
|
@@ -285,6 +287,7 @@ def _parse_transfer(value: str) -> list[str]:
|
|
|
285
287
|
"""Parse transferred entry string.
|
|
286
288
|
|
|
287
289
|
:param value: A string for a transferred entry
|
|
290
|
+
|
|
288
291
|
:returns: A list of EC codes that it got transferred to
|
|
289
292
|
|
|
290
293
|
>>> _parse_transfer("Transferred entry: 1.1.1.198, 1.1.1.227 and 1.1.1.228.")
|
|
@@ -13,7 +13,8 @@ def is_biological_process(identifier: str) -> bool:
|
|
|
13
13
|
"""Return if the given GO identifier is a biological process.
|
|
14
14
|
|
|
15
15
|
:param identifier: A local unique identifier from GO
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
:returns: If the identifier is a biological process
|
|
17
18
|
|
|
18
19
|
>>> is_biological_process("0006915")
|
|
19
20
|
True
|
|
@@ -27,7 +28,8 @@ def is_molecular_function(identifier: str) -> bool:
|
|
|
27
28
|
"""Return if the given GO identifier is a molecular function.
|
|
28
29
|
|
|
29
30
|
:param identifier: A local unique identifier from GO
|
|
30
|
-
|
|
31
|
+
|
|
32
|
+
:returns: If the identifier is a molecular function
|
|
31
33
|
|
|
32
34
|
>>> is_molecular_function("0006915")
|
|
33
35
|
False
|
|
@@ -39,7 +41,8 @@ def is_cellular_component(identifier: str) -> bool:
|
|
|
39
41
|
"""Return if the given GO identifier is a cellular component.
|
|
40
42
|
|
|
41
43
|
:param identifier: A local unique identifier from GO
|
|
42
|
-
|
|
44
|
+
|
|
45
|
+
:returns: If the identifier is a cellular component
|
|
43
46
|
|
|
44
47
|
>>> is_cellular_component("0006915")
|
|
45
48
|
False
|