PyPI - bioregistry - Versions diffs - 0.13.8__py3-none-any.whl → 0.13.10__py3-none-any.whl - Mend

bioregistry 0.13.8py3-none-any.whl → 0.13.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

bioregistry/.DS_Store +0 -0
bioregistry/__init__.py +1 -1
bioregistry/analysis/paper_ranking.py +6 -2
bioregistry/app/.DS_Store +0 -0
bioregistry/app/api.py +1 -2
bioregistry/app/templates/.DS_Store +0 -0
bioregistry/app/ui.py +3 -3
bioregistry/bibliometrics.py +1 -1
bioregistry/cli.py +1 -1
bioregistry/curation/.DS_Store +0 -0
bioregistry/curation/add_cessda.py +1 -1
bioregistry/curation/add_provider_status_curations.py +1 -1
bioregistry/curation/add_sweet.py +146 -0
bioregistry/curation/clean_publications.py +1 -1
bioregistry/curation/enrich_publications.py +1 -1
bioregistry/data/bioregistry.json +6849 -619
bioregistry/data/collections.json +24 -1
bioregistry/data/curated_papers.tsv +9 -0
bioregistry/data/metaregistry.json +1 -1
bioregistry/export/rdf_export.py +1 -1
bioregistry/export/tables_export.py +1 -1
bioregistry/external/.DS_Store +0 -0
bioregistry/external/aberowl/processed.json +23 -19
bioregistry/external/bartoc/processed.json +5 -5
bioregistry/external/biolink/processed.json +3 -0
bioregistry/external/bioportal/agroportal.json +3 -3
bioregistry/external/bioportal/bioportal.json +45 -17
bioregistry/external/cellosaurus/processed.json +3 -3
bioregistry/external/fairsharing/processed.json +6 -5
bioregistry/external/integbio/processed.json +56 -55
bioregistry/external/lov/processed.json +59 -0
bioregistry/external/miriam/.DS_Store +0 -0
bioregistry/external/obofoundry/processed.json +4 -4
bioregistry/external/ols/__init__.py +13 -5
bioregistry/external/ols/processed.json +6 -6
bioregistry/external/ols/tib-processed.json +0 -1
bioregistry/external/ols/tib.py +1 -0
bioregistry/external/re3data/processed.json +24 -2
bioregistry/record_accumulator.py +1 -1
bioregistry/resolve.py +1 -2
bioregistry/resource_manager.py +1 -1
bioregistry/schema/.DS_Store +0 -0
bioregistry/schema/__init__.py +24 -0
bioregistry/schema/struct.py +10 -3
bioregistry/version.py +1 -1
{bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/METADATA +1 -1
{bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/RECORD +49 -41
{bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/WHEEL +1 -1
{bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/entry_points.txt +0 -0

bioregistry/.DS_Store ADDED Viewed

Binary file

bioregistry/__init__.py CHANGED Viewed

@@ -127,7 +127,7 @@ from .resolve_identifier import (
     standardize_identifier,
 )
 from .resource_manager import Manager, manager
-from .schema.struct import (
+from .schema import (
     Author,
     Collection,
     Context,

bioregistry/analysis/paper_ranking.py CHANGED Viewed

@@ -400,7 +400,7 @@ def predict_and_save(
     :param path: Path to save the predictions.
     """
     x_meta = pd.DataFrame()
-    x_transformed = vectorizer.transform(df["title"] + " " + df["abstract"])
+    x_transformed = vectorizer.transform(_concat(df))
     for name, clf in classifiers:
         x_meta[name] = _predict(clf, x_transformed)
@@ -524,7 +524,7 @@ def train(
     df = pd.concat(curated_dfs)[["pubmed", "title", "abstract", "relevant"]]
     df["abstract"] = df["abstract"].fillna("")
-    df["title_abstract"] = df["title"] + " " + df["abstract"]
+    df["title_abstract"] = _concat(df)
     df = df[df.title_abstract.notna()]
     df = df.drop_duplicates()
     _echo_stats(df, "combine curated publications")
@@ -582,5 +582,9 @@ def train(
     return TrainingResult(curated_pubmed_ids, vectorizer, classifiers, meta_clf)
+def _concat(df: pd.DataFrame) -> pd.Series[str]:
+    return cast("pd.Series[str]", df["title"]) + " " + cast("pd.Series[str]", df["abstract"])
 if __name__ == "__main__":
     main()

bioregistry/app/.DS_Store ADDED Viewed

Binary file

bioregistry/app/api.py CHANGED Viewed

@@ -19,8 +19,7 @@ from ..export.rdf_export import (
     resource_to_rdf_str,
 )
 from ..resource_manager import Manager
-from ..schema import Attributable, sanitize_mapping
-from ..schema.struct import Collection, Context, Registry, Resource
+from ..schema import Attributable, Collection, Context, Registry, Resource, sanitize_mapping
 from ..schema_utils import (
     read_collections_contributions,
     read_prefix_contacts,

bioregistry/app/templates/.DS_Store ADDED Viewed

Binary file

bioregistry/app/ui.py CHANGED Viewed

@@ -38,9 +38,8 @@ from ..export.rdf_export import (
     metaresource_to_rdf_str,
     resource_to_rdf_str,
 )
-from ..schema import Context
-from ..schema.constants import SCHEMA_TERMS
-from ..schema.struct import (
+from ..schema import (
+    Context,
     Registry,
     RegistryGovernance,
     RegistryQualities,
@@ -49,6 +48,7 @@ from ..schema.struct import (
     get_json_schema,
     schema_status_map,
 )
+from ..schema.constants import SCHEMA_TERMS
 from ..schema_utils import (
     read_collections_contributions,
     read_context_contributions,

bioregistry/bibliometrics.py CHANGED Viewed

@@ -8,7 +8,7 @@ from collections.abc import Iterable
 from typing import TYPE_CHECKING
 from .resource_manager import manager
-from .schema.struct import Publication, deduplicate_publications
+from .schema import Publication, deduplicate_publications
 if TYPE_CHECKING:
     import pandas

bioregistry/cli.py CHANGED Viewed

@@ -8,7 +8,7 @@ from .app.cli import web
 from .compare import compare
 from .export.cli import export
 from .lint import lint
-from .schema.struct import generate_schema
+from .schema import generate_schema
 from .utils import get_hexdigests, secho
 from .validate.cli import validate
 from .version import VERSION

bioregistry/curation/.DS_Store ADDED Viewed

Binary file

bioregistry/curation/add_cessda.py CHANGED Viewed

@@ -13,7 +13,7 @@ from tabulate import tabulate
 from tqdm import tqdm
 import bioregistry
-from bioregistry.schema.struct import Author, Organization
+from bioregistry.schema import Author, Organization
 BASE = "https://vocabularies.cessda.eu"
 MODULE = pystow.module("cessda")

bioregistry/curation/add_provider_status_curations.py CHANGED Viewed

@@ -8,7 +8,7 @@ import pandas as pd
 from tqdm import tqdm
 from bioregistry import manager
-from bioregistry.schema.struct import StatusCheck
+from bioregistry.schema import StatusCheck
 URL = "https://docs.google.com/spreadsheets/d/e/2PACX-1vSC8RAMlNGauLHJb1RGwFuvC2LBJBjeeICRtq596npE6G4ZjZwX8W_Fz031hAfqsbu6f9Ruxl2PTsFx/pub?gid=1207894592&single=true&output=tsv"

bioregistry/curation/add_sweet.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""Add SWEET ontologies."""
+from typing import cast
+import click
+import pystow
+import bioregistry
+MODULE = pystow.module("bioregistry", "sweet")
+ALL_PREFIXES_URL = "https://github.com/ESIPFed/sweet/raw/refs/heads/master/sweetPrefixes.ttl"
+MANUAL = {
+    "sosto": "Acute",
+    "sostri": "Catastrophic",
+    "sostsp": "Big",
+    "sorel": "hasPhenomena",
+    "sorelch": "atomicMass",
+    "sorelh": "hasAttribute",
+    "soreaer": "AbyssopelagicZone",
+    "sorelcl": "hasAverageAnnualPrecipitation",
+    "sorelm": "averageOver",
+    "sorelph": "colderThan",
+    "sorelsc": "causedBy",
+    "sorelt": "dayOfYear",
+    "sorelsp": "adjacentTo",
+    "sorepsd": "Counterclockwise",
+    "sorelpr": "fillValue",
+    "sostss": "Continental",
+    "sostrt": "Accurate",
+    "sostsl": "CaK",
+    "sosttf": "Annual",
+    "sosttg": "0MYA",
+    "sostv": "Clear",
+}
+@click.command()
+def main() -> None:
+    """Add SWEET ontologies."""
+    graph = MODULE.ensure_rdf(url=ALL_PREFIXES_URL)
+    sparql = """
+        SELECT ?prefix ?namespace
+        WHERE {
+            ?x sh:prefix ?prefix;
+               sh:namespace ?namespace .
+        }
+    """
+    for sweet_internal_prefix, uri_prefix in graph.query(sparql):  # type:ignore
+        sweet_internal_prefix = str(sweet_internal_prefix)
+        uri_prefix = str(uri_prefix)
+        if sweet_internal_prefix in {"soall", "sweet"}:
+            continue  # this is the combine one, not its own prefix
+        sweet_internal_key = uri_prefix.removeprefix("http://sweetontology.net/").rstrip("/")
+        if not sweet_internal_key:
+            raise ValueError(f"no internal key found for {sweet_internal_prefix}")
+        download_rdf = (
+            f"https://github.com/ESIPFed/sweet/raw/refs/heads/master/src/{sweet_internal_key}.ttl"
+        )
+        inner_graph = MODULE.ensure_rdf(url=download_rdf)
+        ontology_name_query = """
+            SELECT ?name
+            WHERE { owl:Ontology ^rdf:type/rdfs:label ?name }
+            LIMIT 1
+        """
+        name = str(next(iter(inner_graph.query(ontology_name_query)))[0])  # type:ignore
+        name_short = name.removeprefix("SWEET Ontology ")
+        example_query = f"""
+            SELECT ?term
+            WHERE {{
+                ?term rdf:type owl:Class;
+                      rdfs:label ?name ;
+                FILTER STRSTARTS(str(?term), "{uri_prefix}")
+            }}
+            LIMIT 1
+        """
+        example_records = list(inner_graph.query(example_query))
+        if example_records:
+            example_uri = cast(str, example_records[0][0])  # type:ignore[index]
+            example = example_uri.removeprefix(uri_prefix)
+        elif sweet_internal_prefix in MANUAL:
+            example = MANUAL[sweet_internal_prefix]
+        else:
+            raise ValueError(
+                f"[{sweet_internal_prefix}] missing example in {name_short} ({uri_prefix})"
+            )
+        if not sweet_internal_prefix.startswith("so"):
+            raise ValueError
+        nsl = name_short.lower()
+        if nsl.startswith("human "):
+            keywords = [nsl.removeprefix("human ")]
+        elif nsl.startswith("material "):
+            keywords = ["materials", nsl.removeprefix("material ")]
+        elif nsl.startswith("phenomena "):
+            keywords = ["phenomena", nsl.removeprefix("phenomena ")]
+        elif nsl.startswith("property relationships "):
+            keywords = [nsl.removeprefix("property relationships ")]
+        elif nsl.startswith("property "):
+            keywords = [nsl.removeprefix("property ")]
+        elif nsl.startswith("process "):
+            keywords = [nsl.removeprefix("process ")]
+        elif nsl.startswith("realm land "):
+            keywords = [nsl.removeprefix("realm land") + "land"]
+        elif nsl.startswith("realm "):
+            keywords = ["realm", nsl.removeprefix("realm ")]
+        elif nsl.startswith("representation "):
+            keywords = [nsl.removeprefix("realm ")]
+        elif nsl.startswith("state "):
+            keywords = [nsl.removeprefix("realm ")]
+        elif nsl.startswith("relationships "):
+            keywords = [nsl.removeprefix("relationships ")]
+        else:
+            keywords = [nsl.lower()]
+        prefix = f"sweet.{sweet_internal_prefix.removeprefix('so')}"
+        resource = bioregistry.Resource(
+            prefix=prefix,
+            synonyms=[sweet_internal_prefix],
+            name=name,
+            keywords=sorted(keywords),
+            homepage=str(uri_prefix),
+            uri_format=f"{uri_prefix}$1",
+            description=f"The Semantic Web for Earth and Environmental Terminology (SWEET) ontology for {name_short}",
+            example=example,
+            download_rdf=download_rdf,
+            part_of="sweet",
+            license="CC0-1.0",
+            repository="https://github.com/ESIPFed/sweet",
+            contributor=bioregistry.Author.get_charlie(),
+            github_request_issue=1772,
+        )
+        bioregistry.add_resource(resource)
+    bioregistry.manager.write_registry()
+if __name__ == "__main__":
+    main()

bioregistry/curation/clean_publications.py CHANGED Viewed

@@ -7,7 +7,7 @@
 import click
 import bioregistry
-from bioregistry.schema.struct import deduplicate_publications
+from bioregistry.schema import deduplicate_publications
 @click.command()

bioregistry/curation/enrich_publications.py CHANGED Viewed

@@ -13,7 +13,7 @@ from manubot.cite.pubmed import get_pmid_for_doi, get_pubmed_csl_item
 from tqdm import tqdm
 from bioregistry import manager
-from bioregistry.schema.struct import Publication, deduplicate_publications
+from bioregistry.schema import Publication, deduplicate_publications
 from bioregistry.utils import removeprefix

bioregistry 0.13.8__py3-none-any.whl → 0.13.10__py3-none-any.whl

bioregistry 0.13.8py3-none-any.whl → 0.13.10py3-none-any.whl