pyobo 0.12.6__py3-none-any.whl → 0.12.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +4 -0
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +2 -1
- pyobo/api/alts.py +0 -0
- pyobo/api/combine.py +0 -0
- pyobo/api/edges.py +0 -0
- pyobo/api/embedding.py +26 -1
- pyobo/api/hierarchy.py +0 -0
- pyobo/api/metadata.py +0 -0
- pyobo/api/names.py +0 -0
- pyobo/api/properties.py +0 -0
- pyobo/api/relations.py +0 -0
- pyobo/api/species.py +0 -0
- pyobo/api/typedefs.py +0 -0
- pyobo/api/utils.py +0 -0
- pyobo/api/xrefs.py +0 -0
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +0 -0
- pyobo/cli/database.py +0 -0
- pyobo/cli/database_utils.py +0 -0
- pyobo/cli/lookup.py +0 -0
- pyobo/cli/utils.py +0 -0
- pyobo/constants.py +0 -0
- pyobo/getters.py +0 -0
- pyobo/gilda_utils.py +0 -0
- pyobo/identifier_utils/__init__.py +0 -0
- pyobo/identifier_utils/api.py +0 -0
- pyobo/identifier_utils/relations/__init__.py +0 -0
- pyobo/identifier_utils/relations/api.py +0 -0
- pyobo/identifier_utils/relations/data.json +0 -0
- pyobo/identifier_utils/relations/data_owl.json +0 -0
- pyobo/identifier_utils/relations/data_rdf.json +0 -0
- pyobo/identifier_utils/relations/data_rdfs.json +0 -0
- pyobo/mocks.py +0 -0
- pyobo/ner/__init__.py +0 -0
- pyobo/ner/api.py +0 -0
- pyobo/ner/normalizer.py +0 -0
- pyobo/plugins.py +0 -0
- pyobo/py.typed +0 -0
- pyobo/resource_utils.py +0 -0
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/ncbitaxon.py +0 -0
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +0 -0
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +0 -0
- pyobo/sources/__init__.py +2 -0
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +0 -0
- pyobo/sources/bigg/__init__.py +0 -0
- pyobo/sources/bigg/bigg_compartment.py +0 -0
- pyobo/sources/bigg/bigg_metabolite.py +0 -0
- pyobo/sources/bigg/bigg_model.py +0 -0
- pyobo/sources/bigg/bigg_reaction.py +0 -0
- pyobo/sources/biogrid.py +0 -0
- pyobo/sources/ccle.py +0 -0
- pyobo/sources/cgnc.py +0 -0
- pyobo/sources/chebi.py +0 -0
- pyobo/sources/chembl/__init__.py +0 -0
- pyobo/sources/chembl/chembl_cell.py +0 -0
- pyobo/sources/chembl/chembl_compound.py +0 -0
- pyobo/sources/chembl/chembl_mechanism.py +0 -0
- pyobo/sources/chembl/chembl_target.py +1 -1
- pyobo/sources/chembl/chembl_tissue.py +0 -0
- pyobo/sources/civic_gene.py +0 -0
- pyobo/sources/clinicaltrials.py +0 -0
- pyobo/sources/complexportal.py +0 -0
- pyobo/sources/conso.py +0 -0
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +0 -0
- pyobo/sources/cvx.py +0 -0
- pyobo/sources/depmap.py +0 -0
- pyobo/sources/dictybase_gene.py +0 -0
- pyobo/sources/drugbank/__init__.py +0 -0
- pyobo/sources/drugbank/drugbank.py +0 -0
- pyobo/sources/drugbank/drugbank_salt.py +0 -0
- pyobo/sources/drugcentral.py +0 -0
- pyobo/sources/expasy.py +0 -0
- pyobo/sources/famplex.py +0 -0
- pyobo/sources/flybase.py +0 -0
- pyobo/sources/gard.py +0 -0
- pyobo/sources/geonames/__init__.py +0 -0
- pyobo/sources/geonames/features.py +0 -0
- pyobo/sources/geonames/geonames.py +0 -0
- pyobo/sources/geonames/utils.py +0 -0
- pyobo/sources/gmt_utils.py +0 -0
- pyobo/sources/go.py +0 -0
- pyobo/sources/gtdb.py +0 -0
- pyobo/sources/gwascentral/__init__.py +0 -0
- pyobo/sources/gwascentral/gwascentral_phenotype.py +0 -0
- pyobo/sources/gwascentral/gwascentral_study.py +0 -0
- pyobo/sources/hgnc/__init__.py +0 -0
- pyobo/sources/hgnc/hgnc.py +0 -0
- pyobo/sources/hgnc/hgncgenefamily.py +0 -0
- pyobo/sources/iana_media_type.py +65 -9
- pyobo/sources/icd/__init__.py +0 -0
- pyobo/sources/icd/icd10.py +0 -0
- pyobo/sources/icd/icd11.py +0 -0
- pyobo/sources/icd/icd_utils.py +0 -0
- pyobo/sources/iconclass.py +55 -0
- pyobo/sources/intact.py +0 -0
- pyobo/sources/interpro.py +0 -0
- pyobo/sources/itis.py +0 -0
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +0 -0
- pyobo/sources/kegg/genes.py +0 -0
- pyobo/sources/kegg/genome.py +0 -0
- pyobo/sources/kegg/pathway.py +0 -0
- pyobo/sources/mesh.py +0 -0
- pyobo/sources/mgi.py +0 -0
- pyobo/sources/mirbase/__init__.py +0 -0
- pyobo/sources/mirbase/mirbase.py +0 -0
- pyobo/sources/mirbase/mirbase_constants.py +0 -0
- pyobo/sources/mirbase/mirbase_family.py +0 -0
- pyobo/sources/mirbase/mirbase_mature.py +0 -0
- pyobo/sources/msigdb.py +0 -0
- pyobo/sources/ncbi/__init__.py +0 -0
- pyobo/sources/ncbi/ncbi_gc.py +0 -0
- pyobo/sources/ncbi/ncbigene.py +0 -0
- pyobo/sources/nih_reporter.py +0 -0
- pyobo/sources/nlm/__init__.py +0 -0
- pyobo/sources/nlm/nlm_catalog.py +0 -0
- pyobo/sources/nlm/nlm_publisher.py +0 -0
- pyobo/sources/nlm/utils.py +0 -0
- pyobo/sources/npass.py +0 -0
- pyobo/sources/omim_ps.py +0 -0
- pyobo/sources/pathbank.py +0 -0
- pyobo/sources/pfam/__init__.py +0 -0
- pyobo/sources/pfam/pfam.py +0 -0
- pyobo/sources/pfam/pfam_clan.py +0 -0
- pyobo/sources/pharmgkb/__init__.py +0 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +0 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +0 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +0 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +0 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +0 -0
- pyobo/sources/pharmgkb/utils.py +0 -0
- pyobo/sources/pid.py +0 -0
- pyobo/sources/pombase.py +0 -0
- pyobo/sources/pubchem.py +0 -0
- pyobo/sources/reactome.py +0 -0
- pyobo/sources/rgd.py +0 -0
- pyobo/sources/rhea.py +0 -0
- pyobo/sources/ror.py +0 -0
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +0 -0
- pyobo/sources/selventa/scomp.py +0 -0
- pyobo/sources/selventa/sdis.py +0 -0
- pyobo/sources/selventa/sfam.py +0 -0
- pyobo/sources/sgd.py +0 -0
- pyobo/sources/signor/__init__.py +0 -0
- pyobo/sources/signor/download.py +0 -0
- pyobo/sources/signor/signor_complexes.py +0 -0
- pyobo/sources/slm.py +0 -0
- pyobo/sources/spdx.py +0 -0
- pyobo/sources/umls/__init__.py +0 -0
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +0 -0
- pyobo/sources/umls/sty.py +0 -0
- pyobo/sources/umls/synonym_types.tsv +0 -0
- pyobo/sources/umls/umls.py +0 -0
- pyobo/sources/unimod.py +0 -0
- pyobo/sources/uniprot/__init__.py +0 -0
- pyobo/sources/uniprot/uniprot.py +0 -0
- pyobo/sources/uniprot/uniprot_ptm.py +0 -0
- pyobo/sources/utils.py +0 -0
- pyobo/sources/wikipathways.py +0 -0
- pyobo/sources/zfin.py +0 -0
- pyobo/ssg/__init__.py +0 -0
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +0 -0
- pyobo/ssg/term.html +0 -0
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +0 -0
- pyobo/struct/functional/__init__.py +0 -0
- pyobo/struct/functional/dsl.py +0 -0
- pyobo/struct/functional/macros.py +0 -0
- pyobo/struct/functional/obo_to_functional.py +9 -3
- pyobo/struct/functional/ontology.py +0 -0
- pyobo/struct/functional/utils.py +0 -0
- pyobo/struct/obo/__init__.py +0 -0
- pyobo/struct/obo/reader.py +0 -0
- pyobo/struct/obo/reader_utils.py +0 -0
- pyobo/struct/obograph/__init__.py +0 -0
- pyobo/struct/obograph/export.py +8 -2
- pyobo/struct/obograph/reader.py +0 -0
- pyobo/struct/obograph/utils.py +0 -0
- pyobo/struct/reference.py +0 -0
- pyobo/struct/struct.py +12 -0
- pyobo/struct/struct_utils.py +17 -3
- pyobo/struct/typedef.py +1 -0
- pyobo/struct/utils.py +0 -0
- pyobo/struct/vocabulary.py +0 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +0 -0
- pyobo/utils/io.py +0 -0
- pyobo/utils/iter.py +0 -0
- pyobo/utils/misc.py +0 -0
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +0 -0
- pyobo/version.py +1 -1
- {pyobo-0.12.6.dist-info → pyobo-0.12.8.dist-info}/METADATA +1 -1
- pyobo-0.12.8.dist-info/RECORD +209 -0
- {pyobo-0.12.6.dist-info → pyobo-0.12.8.dist-info}/WHEEL +1 -1
- {pyobo-0.12.6.dist-info → pyobo-0.12.8.dist-info}/licenses/LICENSE +0 -0
- pyobo-0.12.6.dist-info/RECORD +0 -208
- {pyobo-0.12.6.dist-info → pyobo-0.12.8.dist-info}/entry_points.txt +0 -0
pyobo/.DS_Store
CHANGED
|
File without changes
|
pyobo/__init__.py
CHANGED
|
@@ -50,6 +50,7 @@ from .api import (
|
|
|
50
50
|
get_synonyms,
|
|
51
51
|
get_text_embedding,
|
|
52
52
|
get_text_embedding_similarity,
|
|
53
|
+
get_text_embeddings_df,
|
|
53
54
|
get_typedef_df,
|
|
54
55
|
get_xref,
|
|
55
56
|
get_xrefs,
|
|
@@ -65,6 +66,7 @@ from .plugins import (
|
|
|
65
66
|
run_nomenclature_plugin,
|
|
66
67
|
)
|
|
67
68
|
from .struct import (
|
|
69
|
+
Annotation,
|
|
68
70
|
Obo,
|
|
69
71
|
Reference,
|
|
70
72
|
StanzaType,
|
|
@@ -80,6 +82,7 @@ from .utils.path import ensure_path
|
|
|
80
82
|
from .version import get_version
|
|
81
83
|
|
|
82
84
|
__all__ = [
|
|
85
|
+
"Annotation",
|
|
83
86
|
"Obo",
|
|
84
87
|
"Reference",
|
|
85
88
|
"StanzaType",
|
|
@@ -143,6 +146,7 @@ __all__ = [
|
|
|
143
146
|
"get_synonyms",
|
|
144
147
|
"get_text_embedding",
|
|
145
148
|
"get_text_embedding_similarity",
|
|
149
|
+
"get_text_embeddings_df",
|
|
146
150
|
"get_typedef_df",
|
|
147
151
|
"get_version",
|
|
148
152
|
"get_xref",
|
pyobo/__main__.py
CHANGED
|
File without changes
|
pyobo/api/__init__.py
CHANGED
|
@@ -8,7 +8,7 @@ from .alts import (
|
|
|
8
8
|
)
|
|
9
9
|
from .combine import get_literal_mappings_subset
|
|
10
10
|
from .edges import get_edges, get_edges_df, get_graph
|
|
11
|
-
from .embedding import get_text_embedding, get_text_embedding_similarity
|
|
11
|
+
from .embedding import get_text_embedding, get_text_embedding_similarity, get_text_embeddings_df
|
|
12
12
|
from .hierarchy import (
|
|
13
13
|
get_ancestors,
|
|
14
14
|
get_children,
|
|
@@ -119,6 +119,7 @@ __all__ = [
|
|
|
119
119
|
"get_synonyms",
|
|
120
120
|
"get_text_embedding",
|
|
121
121
|
"get_text_embedding_similarity",
|
|
122
|
+
"get_text_embeddings_df",
|
|
122
123
|
"get_typedef_df",
|
|
123
124
|
"get_version",
|
|
124
125
|
"get_xref",
|
pyobo/api/alts.py
CHANGED
|
File without changes
|
pyobo/api/combine.py
CHANGED
|
File without changes
|
pyobo/api/edges.py
CHANGED
|
File without changes
|
pyobo/api/embedding.py
CHANGED
|
@@ -6,8 +6,9 @@ from typing import TYPE_CHECKING
|
|
|
6
6
|
|
|
7
7
|
import curies
|
|
8
8
|
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
9
10
|
|
|
10
|
-
from pyobo.api.names import get_definition, get_name
|
|
11
|
+
from pyobo.api.names import get_definition, get_name, get_references
|
|
11
12
|
|
|
12
13
|
if TYPE_CHECKING:
|
|
13
14
|
import sentence_transformers
|
|
@@ -16,6 +17,7 @@ __all__ = [
|
|
|
16
17
|
"get_text_embedding",
|
|
17
18
|
"get_text_embedding_model",
|
|
18
19
|
"get_text_embedding_similarity",
|
|
20
|
+
"get_text_embeddings_df",
|
|
19
21
|
]
|
|
20
22
|
|
|
21
23
|
|
|
@@ -39,6 +41,29 @@ def _get_text(
|
|
|
39
41
|
return name
|
|
40
42
|
|
|
41
43
|
|
|
44
|
+
def get_text_embeddings_df(
|
|
45
|
+
prefix: str,
|
|
46
|
+
*,
|
|
47
|
+
model: sentence_transformers.SentenceTransformer | None = None,
|
|
48
|
+
) -> pd.DataFrame:
|
|
49
|
+
"""Get embeddings for all entities in the resource.
|
|
50
|
+
|
|
51
|
+
:param prefix: A reference, either as a string or Reference object
|
|
52
|
+
:param model: A sentence transformer model. Defaults to ``all-MiniLM-L6-v2`` if not given.
|
|
53
|
+
"""
|
|
54
|
+
luids, texts = [], []
|
|
55
|
+
for reference in get_references(prefix):
|
|
56
|
+
text = _get_text(reference)
|
|
57
|
+
if text is None:
|
|
58
|
+
continue
|
|
59
|
+
luids.append(reference.identifier)
|
|
60
|
+
texts.append(text)
|
|
61
|
+
if model is None:
|
|
62
|
+
model = get_text_embedding_model()
|
|
63
|
+
res = model.encode(texts)
|
|
64
|
+
return pd.DataFrame(res, index=luids)
|
|
65
|
+
|
|
66
|
+
|
|
42
67
|
def get_text_embedding(
|
|
43
68
|
reference: str | curies.Reference | curies.ReferenceTuple,
|
|
44
69
|
*,
|
pyobo/api/hierarchy.py
CHANGED
|
File without changes
|
pyobo/api/metadata.py
CHANGED
|
File without changes
|
pyobo/api/names.py
CHANGED
|
File without changes
|
pyobo/api/properties.py
CHANGED
|
File without changes
|
pyobo/api/relations.py
CHANGED
|
File without changes
|
pyobo/api/species.py
CHANGED
|
File without changes
|
pyobo/api/typedefs.py
CHANGED
|
File without changes
|
pyobo/api/utils.py
CHANGED
|
File without changes
|
pyobo/api/xrefs.py
CHANGED
|
File without changes
|
pyobo/cli/__init__.py
CHANGED
|
File without changes
|
pyobo/cli/cli.py
CHANGED
|
File without changes
|
pyobo/cli/database.py
CHANGED
|
File without changes
|
pyobo/cli/database_utils.py
CHANGED
|
File without changes
|
pyobo/cli/lookup.py
CHANGED
|
File without changes
|
pyobo/cli/utils.py
CHANGED
|
File without changes
|
pyobo/constants.py
CHANGED
|
File without changes
|
pyobo/getters.py
CHANGED
|
File without changes
|
pyobo/gilda_utils.py
CHANGED
|
File without changes
|
|
File without changes
|
pyobo/identifier_utils/api.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
pyobo/mocks.py
CHANGED
|
File without changes
|
pyobo/ner/__init__.py
CHANGED
|
File without changes
|
pyobo/ner/api.py
CHANGED
|
File without changes
|
pyobo/ner/normalizer.py
CHANGED
|
File without changes
|
pyobo/plugins.py
CHANGED
|
File without changes
|
pyobo/py.typed
CHANGED
|
File without changes
|
pyobo/resource_utils.py
CHANGED
|
File without changes
|
pyobo/resources/__init__.py
CHANGED
|
File without changes
|
pyobo/resources/ncbitaxon.py
CHANGED
|
File without changes
|
pyobo/resources/ncbitaxon.tsv.gz
CHANGED
|
File without changes
|
pyobo/resources/ro.py
CHANGED
|
File without changes
|
pyobo/resources/ro.tsv
CHANGED
|
File without changes
|
pyobo/resources/so.py
CHANGED
|
File without changes
|
pyobo/resources/so.tsv
CHANGED
|
File without changes
|
pyobo/sources/README.md
CHANGED
|
File without changes
|
pyobo/sources/__init__.py
CHANGED
|
@@ -34,6 +34,7 @@ from .gwascentral import GWASCentralPhenotypeGetter, GWASCentralStudyGetter
|
|
|
34
34
|
from .hgnc import HGNCGetter, HGNCGroupGetter
|
|
35
35
|
from .iana_media_type import IANAGetter
|
|
36
36
|
from .icd import ICD10Getter, ICD11Getter
|
|
37
|
+
from .iconclass import IconclassGetter
|
|
37
38
|
from .intact import IntactGetter
|
|
38
39
|
from .interpro import InterProGetter
|
|
39
40
|
from .itis import ITISGetter
|
|
@@ -115,6 +116,7 @@ __all__ = [
|
|
|
115
116
|
"ICD10Getter",
|
|
116
117
|
"ICD11Getter",
|
|
117
118
|
"ITISGetter",
|
|
119
|
+
"IconclassGetter",
|
|
118
120
|
"IntactGetter",
|
|
119
121
|
"InterProGetter",
|
|
120
122
|
"KEGGGeneGetter",
|
pyobo/sources/agrovoc.py
CHANGED
|
File without changes
|
|
File without changes
|
pyobo/sources/bigg/__init__.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
pyobo/sources/bigg/bigg_model.py
CHANGED
|
File without changes
|
|
File without changes
|
pyobo/sources/biogrid.py
CHANGED
|
File without changes
|
pyobo/sources/ccle.py
CHANGED
|
File without changes
|
pyobo/sources/cgnc.py
CHANGED
|
File without changes
|
pyobo/sources/chebi.py
CHANGED
|
File without changes
|
pyobo/sources/chembl/__init__.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -138,7 +138,7 @@ def iter_terms(version: str) -> Iterable[Term]:
|
|
|
138
138
|
def get_chembl_protein_equivalences(version: str | None = None) -> dict[str, list[str]]:
|
|
139
139
|
"""Get ChEMBL protein equivalences."""
|
|
140
140
|
if version is None:
|
|
141
|
-
version = chembl_downloader.latest()
|
|
141
|
+
version = chembl_downloader.latest(full=False)
|
|
142
142
|
url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}/chembl_uniprot_mapping.txt"
|
|
143
143
|
df = ensure_df(
|
|
144
144
|
PREFIX,
|
|
File without changes
|
pyobo/sources/civic_gene.py
CHANGED
|
File without changes
|
pyobo/sources/clinicaltrials.py
CHANGED
|
File without changes
|
pyobo/sources/complexportal.py
CHANGED
|
File without changes
|
pyobo/sources/conso.py
CHANGED
|
File without changes
|
pyobo/sources/cpt.py
CHANGED
|
File without changes
|
pyobo/sources/credit.py
CHANGED
|
File without changes
|
pyobo/sources/cvx.py
CHANGED
|
File without changes
|
pyobo/sources/depmap.py
CHANGED
|
File without changes
|
pyobo/sources/dictybase_gene.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
pyobo/sources/drugcentral.py
CHANGED
|
File without changes
|
pyobo/sources/expasy.py
CHANGED
|
File without changes
|
pyobo/sources/famplex.py
CHANGED
|
File without changes
|
pyobo/sources/flybase.py
CHANGED
|
File without changes
|
pyobo/sources/gard.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
pyobo/sources/geonames/utils.py
CHANGED
|
File without changes
|
pyobo/sources/gmt_utils.py
CHANGED
|
File without changes
|
pyobo/sources/go.py
CHANGED
|
File without changes
|
pyobo/sources/gtdb.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
pyobo/sources/hgnc/__init__.py
CHANGED
|
File without changes
|
pyobo/sources/hgnc/hgnc.py
CHANGED
|
File without changes
|
|
File without changes
|
pyobo/sources/iana_media_type.py
CHANGED
|
@@ -5,14 +5,18 @@
|
|
|
5
5
|
|
|
6
6
|
from collections.abc import Iterable
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from pyobo import Obo, Reference, Term, TypeDef, default_reference
|
|
11
|
+
from pyobo.struct import Annotation
|
|
12
|
+
from pyobo.struct.typedef import has_source, term_replaced_by
|
|
10
13
|
from pyobo.utils.path import ensure_df
|
|
11
14
|
|
|
12
15
|
__all__ = ["IANAGetter"]
|
|
13
16
|
|
|
14
17
|
PREFIX = "iana.mediatype"
|
|
15
|
-
|
|
18
|
+
ROOT_MEDIA_TYPE = Term.from_triple(prefix="dcterms", identifier="MediaType", name="media type")
|
|
19
|
+
ROOT_FILE_FORMAT = Term.from_triple(prefix="dcterms", identifier="FileFormat", name="file format")
|
|
16
20
|
|
|
17
21
|
#: The top-level types listed on https://www.iana.org/assignments/media-types/media-types.xhtml
|
|
18
22
|
MEDIA_TYPE_GROUPS = [
|
|
@@ -31,13 +35,37 @@ MEDIA_TYPE_GROUPS = [
|
|
|
31
35
|
GROUP_TO_CSV = {
|
|
32
36
|
media_type_group: (
|
|
33
37
|
f"https://www.iana.org/assignments/media-types/{media_type_group}.csv",
|
|
34
|
-
Term(
|
|
35
|
-
|
|
36
|
-
),
|
|
38
|
+
Term(
|
|
39
|
+
reference=Reference(prefix=PREFIX, identifier=media_type_group, name=media_type_group)
|
|
40
|
+
).append_parent(ROOT_MEDIA_TYPE),
|
|
37
41
|
)
|
|
38
42
|
for media_type_group in MEDIA_TYPE_GROUPS
|
|
39
43
|
}
|
|
40
44
|
|
|
45
|
+
MIMETYPE_IO_URL = (
|
|
46
|
+
"https://github.com/patrickmccallum/mimetype-io/raw/refs/heads/master/src/mimeData.json"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _get_mimetypes():
|
|
51
|
+
records = requests.get(MIMETYPE_IO_URL, timeout=5).json()
|
|
52
|
+
rv = {}
|
|
53
|
+
for record in records:
|
|
54
|
+
name = record.pop("name")
|
|
55
|
+
rv[name] = record
|
|
56
|
+
return rv
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
PREDICATE = TypeDef(
|
|
60
|
+
reference=default_reference(
|
|
61
|
+
prefix=PREFIX, identifier="extension", name="appears with file extension"
|
|
62
|
+
),
|
|
63
|
+
domain=ROOT_MEDIA_TYPE.reference,
|
|
64
|
+
range=ROOT_FILE_FORMAT.reference,
|
|
65
|
+
definition="Connects a media type with a file format that has been observed to encode it",
|
|
66
|
+
is_metadata_tag=True,
|
|
67
|
+
)
|
|
68
|
+
|
|
41
69
|
|
|
42
70
|
class IANAGetter(Obo):
|
|
43
71
|
"""An ontology representation of IANA media types (i.e. MIME types)."""
|
|
@@ -45,9 +73,10 @@ class IANAGetter(Obo):
|
|
|
45
73
|
ontology = bioregistry_key = PREFIX
|
|
46
74
|
name = "IANA Media Types"
|
|
47
75
|
dynamic_version = True
|
|
48
|
-
root_terms = [
|
|
76
|
+
root_terms = [ROOT_MEDIA_TYPE.reference, ROOT_FILE_FORMAT.reference]
|
|
49
77
|
typedefs = [
|
|
50
78
|
term_replaced_by,
|
|
79
|
+
PREDICATE,
|
|
51
80
|
]
|
|
52
81
|
|
|
53
82
|
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
@@ -55,26 +84,53 @@ class IANAGetter(Obo):
|
|
|
55
84
|
return get_terms()
|
|
56
85
|
|
|
57
86
|
|
|
58
|
-
def get_terms() ->
|
|
87
|
+
def get_terms() -> Iterable[Term]:
|
|
59
88
|
"""Get IANA Media Type terms."""
|
|
89
|
+
mimetypes_data = _get_mimetypes()
|
|
90
|
+
|
|
91
|
+
filetype_to_term = {}
|
|
92
|
+
for record in mimetypes_data.values():
|
|
93
|
+
for filetype_str in record.get("fileTypes", []):
|
|
94
|
+
filetype_id = filetype_str.removeprefix(".")
|
|
95
|
+
filetype_term = Term(
|
|
96
|
+
reference=default_reference(PREFIX, identifier=filetype_id, name=filetype_str),
|
|
97
|
+
type="Instance",
|
|
98
|
+
)
|
|
99
|
+
filetype_term.append_parent(ROOT_FILE_FORMAT)
|
|
100
|
+
filetype_term.annotate_string(has_source, MIMETYPE_IO_URL)
|
|
101
|
+
filetype_to_term[filetype_str] = filetype_term
|
|
102
|
+
yield filetype_term
|
|
103
|
+
|
|
60
104
|
terms: dict[str, Term] = {}
|
|
61
105
|
forwards: dict[Term, str] = {}
|
|
62
106
|
for key, (url, parent) in GROUP_TO_CSV.items():
|
|
63
107
|
df = ensure_df(PREFIX, url=url, sep=",")
|
|
64
108
|
terms[key] = parent
|
|
65
109
|
for name, identifier, references in df.values:
|
|
110
|
+
mimetypes_record = mimetypes_data.get(identifier, {})
|
|
111
|
+
|
|
66
112
|
if "OBSOLE" in name or "DEPRECATED" in name:
|
|
67
113
|
is_obsolete = True
|
|
68
114
|
else:
|
|
69
115
|
is_obsolete = None
|
|
116
|
+
|
|
70
117
|
term = Term(
|
|
71
118
|
reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
|
|
72
119
|
is_obsolete=is_obsolete,
|
|
120
|
+
# TODO how to add definition source?
|
|
121
|
+
definition=mimetypes_record.get("description"),
|
|
73
122
|
).append_parent(parent)
|
|
74
123
|
for reference in _process_references(references):
|
|
75
124
|
term.append_see_also_uri(reference)
|
|
76
125
|
terms[identifier.casefold()] = term
|
|
77
126
|
|
|
127
|
+
for filetype_str in mimetypes_record.get("fileTypes", []):
|
|
128
|
+
term.annotate_object(
|
|
129
|
+
PREDICATE,
|
|
130
|
+
filetype_to_term[filetype_str],
|
|
131
|
+
annotations=[Annotation.uri(has_source, MIMETYPE_IO_URL)],
|
|
132
|
+
)
|
|
133
|
+
|
|
78
134
|
if "in favor of" in name:
|
|
79
135
|
_, _, new = name.partition("in favor of ")
|
|
80
136
|
forwards[term] = new.casefold().strip().rstrip(")").strip()
|
|
@@ -84,7 +140,7 @@ def get_terms() -> list[Term]:
|
|
|
84
140
|
new = "application/vnd.afpc.afplinedata"
|
|
85
141
|
old.append_replaced_by(terms[new].reference)
|
|
86
142
|
|
|
87
|
-
|
|
143
|
+
yield from terms.values()
|
|
88
144
|
|
|
89
145
|
|
|
90
146
|
def _process_references(cell: str) -> list[str]:
|
pyobo/sources/icd/__init__.py
CHANGED
|
File without changes
|
pyobo/sources/icd/icd10.py
CHANGED
|
File without changes
|
pyobo/sources/icd/icd11.py
CHANGED
|
File without changes
|
pyobo/sources/icd/icd_utils.py
CHANGED
|
File without changes
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Get ICONCLASS as OBO."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from urllib.parse import quote
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from pyobo.struct import Obo, Term
|
|
9
|
+
from pyobo.utils.path import ensure_df
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"IconclassGetter",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
PREFIX = "iconclass"
|
|
16
|
+
BASE_URL = "https://github.com/iconclass/data/raw/refs/heads/main/txt/en/txt_en_{}.txt"
|
|
17
|
+
URLS = [
|
|
18
|
+
BASE_URL.format("0_1"),
|
|
19
|
+
BASE_URL.format("2_3"),
|
|
20
|
+
BASE_URL.format("4"),
|
|
21
|
+
BASE_URL.format("5_6_7_8"),
|
|
22
|
+
BASE_URL.format("9"),
|
|
23
|
+
BASE_URL.format("keys"),
|
|
24
|
+
# shakespeare
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_df() -> pd.DataFrame:
|
|
29
|
+
"""Get an ICONCLASS terms dataframe."""
|
|
30
|
+
df = pd.concat(
|
|
31
|
+
ensure_df(prefix=PREFIX, url=url, sep="|", names=["luid", "name"]) for url in URLS
|
|
32
|
+
)
|
|
33
|
+
return df
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def iter_terms() -> Iterable[Term]:
|
|
37
|
+
"""Iterate over terms in ICONCLASS."""
|
|
38
|
+
for luid, name in get_df().values:
|
|
39
|
+
yv = Term.from_triple(prefix=PREFIX, identifier=quote(luid), name=name)
|
|
40
|
+
yield yv
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class IconclassGetter(Obo):
|
|
44
|
+
"""An ontology representation of ICONCLASS."""
|
|
45
|
+
|
|
46
|
+
ontology = PREFIX
|
|
47
|
+
dynamic_version = True
|
|
48
|
+
|
|
49
|
+
def iter_terms(self, force: bool = False) -> Iterable[Term]:
|
|
50
|
+
"""Iterate over terms in the ontology."""
|
|
51
|
+
return iter_terms()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
if __name__ == "__main__":
|
|
55
|
+
IconclassGetter.cli()
|
pyobo/sources/intact.py
CHANGED
|
File without changes
|
pyobo/sources/interpro.py
CHANGED
|
File without changes
|
pyobo/sources/itis.py
CHANGED
|
File without changes
|
pyobo/sources/kegg/__init__.py
CHANGED
|
File without changes
|
pyobo/sources/kegg/api.py
CHANGED
|
File without changes
|
pyobo/sources/kegg/genes.py
CHANGED
|
File without changes
|
pyobo/sources/kegg/genome.py
CHANGED
|
File without changes
|
pyobo/sources/kegg/pathway.py
CHANGED
|
File without changes
|
pyobo/sources/mesh.py
CHANGED
|
File without changes
|
pyobo/sources/mgi.py
CHANGED
|
File without changes
|
|
File without changes
|
pyobo/sources/mirbase/mirbase.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
pyobo/sources/msigdb.py
CHANGED
|
File without changes
|
pyobo/sources/ncbi/__init__.py
CHANGED
|
File without changes
|
pyobo/sources/ncbi/ncbi_gc.py
CHANGED
|
File without changes
|
pyobo/sources/ncbi/ncbigene.py
CHANGED
|
File without changes
|
pyobo/sources/nih_reporter.py
CHANGED
|
File without changes
|
pyobo/sources/nlm/__init__.py
CHANGED
|
File without changes
|
pyobo/sources/nlm/nlm_catalog.py
CHANGED
|
File without changes
|
|
File without changes
|
pyobo/sources/nlm/utils.py
CHANGED
|
File without changes
|
pyobo/sources/npass.py
CHANGED
|
File without changes
|
pyobo/sources/omim_ps.py
CHANGED
|
File without changes
|
pyobo/sources/pathbank.py
CHANGED
|
File without changes
|
pyobo/sources/pfam/__init__.py
CHANGED
|
File without changes
|
pyobo/sources/pfam/pfam.py
CHANGED
|
File without changes
|