bioregistry 0.13.8__py3-none-any.whl → 0.13.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bioregistry/.DS_Store +0 -0
- bioregistry/__init__.py +1 -1
- bioregistry/analysis/paper_ranking.py +6 -2
- bioregistry/app/.DS_Store +0 -0
- bioregistry/app/api.py +1 -2
- bioregistry/app/templates/.DS_Store +0 -0
- bioregistry/app/ui.py +3 -3
- bioregistry/bibliometrics.py +1 -1
- bioregistry/cli.py +1 -1
- bioregistry/curation/.DS_Store +0 -0
- bioregistry/curation/add_cessda.py +1 -1
- bioregistry/curation/add_provider_status_curations.py +1 -1
- bioregistry/curation/add_sweet.py +146 -0
- bioregistry/curation/clean_publications.py +1 -1
- bioregistry/curation/enrich_publications.py +1 -1
- bioregistry/data/bioregistry.json +6849 -619
- bioregistry/data/collections.json +24 -1
- bioregistry/data/curated_papers.tsv +9 -0
- bioregistry/data/metaregistry.json +1 -1
- bioregistry/export/rdf_export.py +1 -1
- bioregistry/export/tables_export.py +1 -1
- bioregistry/external/.DS_Store +0 -0
- bioregistry/external/aberowl/processed.json +23 -19
- bioregistry/external/bartoc/processed.json +5 -5
- bioregistry/external/biolink/processed.json +3 -0
- bioregistry/external/bioportal/agroportal.json +3 -3
- bioregistry/external/bioportal/bioportal.json +45 -17
- bioregistry/external/cellosaurus/processed.json +3 -3
- bioregistry/external/fairsharing/processed.json +6 -5
- bioregistry/external/integbio/processed.json +56 -55
- bioregistry/external/lov/processed.json +59 -0
- bioregistry/external/miriam/.DS_Store +0 -0
- bioregistry/external/obofoundry/processed.json +4 -4
- bioregistry/external/ols/__init__.py +13 -5
- bioregistry/external/ols/processed.json +6 -6
- bioregistry/external/ols/tib-processed.json +0 -1
- bioregistry/external/ols/tib.py +1 -0
- bioregistry/external/re3data/processed.json +24 -2
- bioregistry/record_accumulator.py +1 -1
- bioregistry/resolve.py +1 -2
- bioregistry/resource_manager.py +1 -1
- bioregistry/schema/.DS_Store +0 -0
- bioregistry/schema/__init__.py +24 -0
- bioregistry/schema/struct.py +10 -3
- bioregistry/version.py +1 -1
- {bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/METADATA +1 -1
- {bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/RECORD +49 -41
- {bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/WHEEL +1 -1
- {bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/entry_points.txt +0 -0
bioregistry/.DS_Store
ADDED
|
Binary file
|
bioregistry/__init__.py
CHANGED
|
@@ -400,7 +400,7 @@ def predict_and_save(
|
|
|
400
400
|
:param path: Path to save the predictions.
|
|
401
401
|
"""
|
|
402
402
|
x_meta = pd.DataFrame()
|
|
403
|
-
x_transformed = vectorizer.transform(df
|
|
403
|
+
x_transformed = vectorizer.transform(_concat(df))
|
|
404
404
|
for name, clf in classifiers:
|
|
405
405
|
x_meta[name] = _predict(clf, x_transformed)
|
|
406
406
|
|
|
@@ -524,7 +524,7 @@ def train(
|
|
|
524
524
|
df = pd.concat(curated_dfs)[["pubmed", "title", "abstract", "relevant"]]
|
|
525
525
|
|
|
526
526
|
df["abstract"] = df["abstract"].fillna("")
|
|
527
|
-
df["title_abstract"] = df
|
|
527
|
+
df["title_abstract"] = _concat(df)
|
|
528
528
|
df = df[df.title_abstract.notna()]
|
|
529
529
|
df = df.drop_duplicates()
|
|
530
530
|
_echo_stats(df, "combine curated publications")
|
|
@@ -582,5 +582,9 @@ def train(
|
|
|
582
582
|
return TrainingResult(curated_pubmed_ids, vectorizer, classifiers, meta_clf)
|
|
583
583
|
|
|
584
584
|
|
|
585
|
+
def _concat(df: pd.DataFrame) -> pd.Series[str]:
|
|
586
|
+
return cast("pd.Series[str]", df["title"]) + " " + cast("pd.Series[str]", df["abstract"])
|
|
587
|
+
|
|
588
|
+
|
|
585
589
|
if __name__ == "__main__":
|
|
586
590
|
main()
|
|
Binary file
|
bioregistry/app/api.py
CHANGED
|
@@ -19,8 +19,7 @@ from ..export.rdf_export import (
|
|
|
19
19
|
resource_to_rdf_str,
|
|
20
20
|
)
|
|
21
21
|
from ..resource_manager import Manager
|
|
22
|
-
from ..schema import Attributable, sanitize_mapping
|
|
23
|
-
from ..schema.struct import Collection, Context, Registry, Resource
|
|
22
|
+
from ..schema import Attributable, Collection, Context, Registry, Resource, sanitize_mapping
|
|
24
23
|
from ..schema_utils import (
|
|
25
24
|
read_collections_contributions,
|
|
26
25
|
read_prefix_contacts,
|
|
Binary file
|
bioregistry/app/ui.py
CHANGED
|
@@ -38,9 +38,8 @@ from ..export.rdf_export import (
|
|
|
38
38
|
metaresource_to_rdf_str,
|
|
39
39
|
resource_to_rdf_str,
|
|
40
40
|
)
|
|
41
|
-
from ..schema import
|
|
42
|
-
|
|
43
|
-
from ..schema.struct import (
|
|
41
|
+
from ..schema import (
|
|
42
|
+
Context,
|
|
44
43
|
Registry,
|
|
45
44
|
RegistryGovernance,
|
|
46
45
|
RegistryQualities,
|
|
@@ -49,6 +48,7 @@ from ..schema.struct import (
|
|
|
49
48
|
get_json_schema,
|
|
50
49
|
schema_status_map,
|
|
51
50
|
)
|
|
51
|
+
from ..schema.constants import SCHEMA_TERMS
|
|
52
52
|
from ..schema_utils import (
|
|
53
53
|
read_collections_contributions,
|
|
54
54
|
read_context_contributions,
|
bioregistry/bibliometrics.py
CHANGED
|
@@ -8,7 +8,7 @@ from collections.abc import Iterable
|
|
|
8
8
|
from typing import TYPE_CHECKING
|
|
9
9
|
|
|
10
10
|
from .resource_manager import manager
|
|
11
|
-
from .schema
|
|
11
|
+
from .schema import Publication, deduplicate_publications
|
|
12
12
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
14
|
import pandas
|
bioregistry/cli.py
CHANGED
|
@@ -8,7 +8,7 @@ from .app.cli import web
|
|
|
8
8
|
from .compare import compare
|
|
9
9
|
from .export.cli import export
|
|
10
10
|
from .lint import lint
|
|
11
|
-
from .schema
|
|
11
|
+
from .schema import generate_schema
|
|
12
12
|
from .utils import get_hexdigests, secho
|
|
13
13
|
from .validate.cli import validate
|
|
14
14
|
from .version import VERSION
|
|
Binary file
|
|
@@ -13,7 +13,7 @@ from tabulate import tabulate
|
|
|
13
13
|
from tqdm import tqdm
|
|
14
14
|
|
|
15
15
|
import bioregistry
|
|
16
|
-
from bioregistry.schema
|
|
16
|
+
from bioregistry.schema import Author, Organization
|
|
17
17
|
|
|
18
18
|
BASE = "https://vocabularies.cessda.eu"
|
|
19
19
|
MODULE = pystow.module("cessda")
|
|
@@ -8,7 +8,7 @@ import pandas as pd
|
|
|
8
8
|
from tqdm import tqdm
|
|
9
9
|
|
|
10
10
|
from bioregistry import manager
|
|
11
|
-
from bioregistry.schema
|
|
11
|
+
from bioregistry.schema import StatusCheck
|
|
12
12
|
|
|
13
13
|
URL = "https://docs.google.com/spreadsheets/d/e/2PACX-1vSC8RAMlNGauLHJb1RGwFuvC2LBJBjeeICRtq596npE6G4ZjZwX8W_Fz031hAfqsbu6f9Ruxl2PTsFx/pub?gid=1207894592&single=true&output=tsv"
|
|
14
14
|
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Add SWEET ontologies."""
|
|
2
|
+
|
|
3
|
+
from typing import cast
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
import pystow
|
|
7
|
+
|
|
8
|
+
import bioregistry
|
|
9
|
+
|
|
10
|
+
MODULE = pystow.module("bioregistry", "sweet")
|
|
11
|
+
|
|
12
|
+
ALL_PREFIXES_URL = "https://github.com/ESIPFed/sweet/raw/refs/heads/master/sweetPrefixes.ttl"
|
|
13
|
+
|
|
14
|
+
MANUAL = {
|
|
15
|
+
"sosto": "Acute",
|
|
16
|
+
"sostri": "Catastrophic",
|
|
17
|
+
"sostsp": "Big",
|
|
18
|
+
"sorel": "hasPhenomena",
|
|
19
|
+
"sorelch": "atomicMass",
|
|
20
|
+
"sorelh": "hasAttribute",
|
|
21
|
+
"soreaer": "AbyssopelagicZone",
|
|
22
|
+
"sorelcl": "hasAverageAnnualPrecipitation",
|
|
23
|
+
"sorelm": "averageOver",
|
|
24
|
+
"sorelph": "colderThan",
|
|
25
|
+
"sorelsc": "causedBy",
|
|
26
|
+
"sorelt": "dayOfYear",
|
|
27
|
+
"sorelsp": "adjacentTo",
|
|
28
|
+
"sorepsd": "Counterclockwise",
|
|
29
|
+
"sorelpr": "fillValue",
|
|
30
|
+
"sostss": "Continental",
|
|
31
|
+
"sostrt": "Accurate",
|
|
32
|
+
"sostsl": "CaK",
|
|
33
|
+
"sosttf": "Annual",
|
|
34
|
+
"sosttg": "0MYA",
|
|
35
|
+
"sostv": "Clear",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@click.command()
|
|
40
|
+
def main() -> None:
|
|
41
|
+
"""Add SWEET ontologies."""
|
|
42
|
+
graph = MODULE.ensure_rdf(url=ALL_PREFIXES_URL)
|
|
43
|
+
sparql = """
|
|
44
|
+
SELECT ?prefix ?namespace
|
|
45
|
+
WHERE {
|
|
46
|
+
?x sh:prefix ?prefix;
|
|
47
|
+
sh:namespace ?namespace .
|
|
48
|
+
}
|
|
49
|
+
"""
|
|
50
|
+
for sweet_internal_prefix, uri_prefix in graph.query(sparql): # type:ignore
|
|
51
|
+
sweet_internal_prefix = str(sweet_internal_prefix)
|
|
52
|
+
uri_prefix = str(uri_prefix)
|
|
53
|
+
|
|
54
|
+
if sweet_internal_prefix in {"soall", "sweet"}:
|
|
55
|
+
continue # this is the combine one, not its own prefix
|
|
56
|
+
|
|
57
|
+
sweet_internal_key = uri_prefix.removeprefix("http://sweetontology.net/").rstrip("/")
|
|
58
|
+
if not sweet_internal_key:
|
|
59
|
+
raise ValueError(f"no internal key found for {sweet_internal_prefix}")
|
|
60
|
+
|
|
61
|
+
download_rdf = (
|
|
62
|
+
f"https://github.com/ESIPFed/sweet/raw/refs/heads/master/src/{sweet_internal_key}.ttl"
|
|
63
|
+
)
|
|
64
|
+
inner_graph = MODULE.ensure_rdf(url=download_rdf)
|
|
65
|
+
|
|
66
|
+
ontology_name_query = """
|
|
67
|
+
SELECT ?name
|
|
68
|
+
WHERE { owl:Ontology ^rdf:type/rdfs:label ?name }
|
|
69
|
+
LIMIT 1
|
|
70
|
+
"""
|
|
71
|
+
name = str(next(iter(inner_graph.query(ontology_name_query)))[0]) # type:ignore
|
|
72
|
+
name_short = name.removeprefix("SWEET Ontology ")
|
|
73
|
+
|
|
74
|
+
example_query = f"""
|
|
75
|
+
SELECT ?term
|
|
76
|
+
WHERE {{
|
|
77
|
+
?term rdf:type owl:Class;
|
|
78
|
+
rdfs:label ?name ;
|
|
79
|
+
FILTER STRSTARTS(str(?term), "{uri_prefix}")
|
|
80
|
+
}}
|
|
81
|
+
LIMIT 1
|
|
82
|
+
"""
|
|
83
|
+
example_records = list(inner_graph.query(example_query))
|
|
84
|
+
if example_records:
|
|
85
|
+
example_uri = cast(str, example_records[0][0]) # type:ignore[index]
|
|
86
|
+
example = example_uri.removeprefix(uri_prefix)
|
|
87
|
+
elif sweet_internal_prefix in MANUAL:
|
|
88
|
+
example = MANUAL[sweet_internal_prefix]
|
|
89
|
+
else:
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"[{sweet_internal_prefix}] missing example in {name_short} ({uri_prefix})"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if not sweet_internal_prefix.startswith("so"):
|
|
95
|
+
raise ValueError
|
|
96
|
+
|
|
97
|
+
nsl = name_short.lower()
|
|
98
|
+
if nsl.startswith("human "):
|
|
99
|
+
keywords = [nsl.removeprefix("human ")]
|
|
100
|
+
elif nsl.startswith("material "):
|
|
101
|
+
keywords = ["materials", nsl.removeprefix("material ")]
|
|
102
|
+
elif nsl.startswith("phenomena "):
|
|
103
|
+
keywords = ["phenomena", nsl.removeprefix("phenomena ")]
|
|
104
|
+
elif nsl.startswith("property relationships "):
|
|
105
|
+
keywords = [nsl.removeprefix("property relationships ")]
|
|
106
|
+
elif nsl.startswith("property "):
|
|
107
|
+
keywords = [nsl.removeprefix("property ")]
|
|
108
|
+
elif nsl.startswith("process "):
|
|
109
|
+
keywords = [nsl.removeprefix("process ")]
|
|
110
|
+
elif nsl.startswith("realm land "):
|
|
111
|
+
keywords = [nsl.removeprefix("realm land") + "land"]
|
|
112
|
+
elif nsl.startswith("realm "):
|
|
113
|
+
keywords = ["realm", nsl.removeprefix("realm ")]
|
|
114
|
+
elif nsl.startswith("representation "):
|
|
115
|
+
keywords = [nsl.removeprefix("realm ")]
|
|
116
|
+
elif nsl.startswith("state "):
|
|
117
|
+
keywords = [nsl.removeprefix("realm ")]
|
|
118
|
+
elif nsl.startswith("relationships "):
|
|
119
|
+
keywords = [nsl.removeprefix("relationships ")]
|
|
120
|
+
else:
|
|
121
|
+
keywords = [nsl.lower()]
|
|
122
|
+
|
|
123
|
+
prefix = f"sweet.{sweet_internal_prefix.removeprefix('so')}"
|
|
124
|
+
resource = bioregistry.Resource(
|
|
125
|
+
prefix=prefix,
|
|
126
|
+
synonyms=[sweet_internal_prefix],
|
|
127
|
+
name=name,
|
|
128
|
+
keywords=sorted(keywords),
|
|
129
|
+
homepage=str(uri_prefix),
|
|
130
|
+
uri_format=f"{uri_prefix}$1",
|
|
131
|
+
description=f"The Semantic Web for Earth and Environmental Terminology (SWEET) ontology for {name_short}",
|
|
132
|
+
example=example,
|
|
133
|
+
download_rdf=download_rdf,
|
|
134
|
+
part_of="sweet",
|
|
135
|
+
license="CC0-1.0",
|
|
136
|
+
repository="https://github.com/ESIPFed/sweet",
|
|
137
|
+
contributor=bioregistry.Author.get_charlie(),
|
|
138
|
+
github_request_issue=1772,
|
|
139
|
+
)
|
|
140
|
+
bioregistry.add_resource(resource)
|
|
141
|
+
|
|
142
|
+
bioregistry.manager.write_registry()
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
if __name__ == "__main__":
|
|
146
|
+
main()
|
|
@@ -13,7 +13,7 @@ from manubot.cite.pubmed import get_pmid_for_doi, get_pubmed_csl_item
|
|
|
13
13
|
from tqdm import tqdm
|
|
14
14
|
|
|
15
15
|
from bioregistry import manager
|
|
16
|
-
from bioregistry.schema
|
|
16
|
+
from bioregistry.schema import Publication, deduplicate_publications
|
|
17
17
|
from bioregistry.utils import removeprefix
|
|
18
18
|
|
|
19
19
|
|