bioregistry 0.13.9__py3-none-any.whl → 0.13.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bioregistry/__init__.py +1 -1
- bioregistry/analysis/paper_ranking.py +6 -2
- bioregistry/app/api.py +1 -2
- bioregistry/app/ui.py +3 -3
- bioregistry/bibliometrics.py +1 -1
- bioregistry/cli.py +1 -1
- bioregistry/curation/add_cessda.py +1 -1
- bioregistry/curation/add_provider_status_curations.py +1 -1
- bioregistry/curation/clean_publications.py +1 -1
- bioregistry/curation/enrich_publications.py +1 -1
- bioregistry/data/bioregistry.json +312 -39
- bioregistry/data/collections.json +26 -2
- bioregistry/export/rdf_export.py +1 -1
- bioregistry/export/tables_export.py +1 -1
- bioregistry/external/aberowl/processed.json +49 -20
- bioregistry/external/bartoc/processed.json +133 -11
- bioregistry/external/bioportal/agroportal.json +11 -1
- bioregistry/external/fairsharing/processed.json +96 -4
- bioregistry/external/integbio/processed.json +6 -4
- bioregistry/external/lov/processed.json +10 -0
- bioregistry/external/obofoundry/processed.json +4 -0
- bioregistry/external/ols/processed.json +39 -27
- bioregistry/external/re3data/processed.json +22 -8
- bioregistry/record_accumulator.py +1 -1
- bioregistry/resolve.py +1 -2
- bioregistry/resource_manager.py +1 -1
- bioregistry/schema/__init__.py +24 -0
- bioregistry/schema/struct.py +3 -0
- bioregistry/version.py +1 -1
- {bioregistry-0.13.9.dist-info → bioregistry-0.13.11.dist-info}/METADATA +1 -1
- {bioregistry-0.13.9.dist-info → bioregistry-0.13.11.dist-info}/RECORD +33 -33
- {bioregistry-0.13.9.dist-info → bioregistry-0.13.11.dist-info}/WHEEL +1 -1
- {bioregistry-0.13.9.dist-info → bioregistry-0.13.11.dist-info}/entry_points.txt +0 -0
bioregistry/__init__.py
CHANGED
|
@@ -400,7 +400,7 @@ def predict_and_save(
|
|
|
400
400
|
:param path: Path to save the predictions.
|
|
401
401
|
"""
|
|
402
402
|
x_meta = pd.DataFrame()
|
|
403
|
-
x_transformed = vectorizer.transform(df
|
|
403
|
+
x_transformed = vectorizer.transform(_concat(df))
|
|
404
404
|
for name, clf in classifiers:
|
|
405
405
|
x_meta[name] = _predict(clf, x_transformed)
|
|
406
406
|
|
|
@@ -524,7 +524,7 @@ def train(
|
|
|
524
524
|
df = pd.concat(curated_dfs)[["pubmed", "title", "abstract", "relevant"]]
|
|
525
525
|
|
|
526
526
|
df["abstract"] = df["abstract"].fillna("")
|
|
527
|
-
df["title_abstract"] = df
|
|
527
|
+
df["title_abstract"] = _concat(df)
|
|
528
528
|
df = df[df.title_abstract.notna()]
|
|
529
529
|
df = df.drop_duplicates()
|
|
530
530
|
_echo_stats(df, "combine curated publications")
|
|
@@ -582,5 +582,9 @@ def train(
|
|
|
582
582
|
return TrainingResult(curated_pubmed_ids, vectorizer, classifiers, meta_clf)
|
|
583
583
|
|
|
584
584
|
|
|
585
|
+
def _concat(df: pd.DataFrame) -> pd.Series[str]:
|
|
586
|
+
return cast("pd.Series[str]", df["title"]) + " " + cast("pd.Series[str]", df["abstract"])
|
|
587
|
+
|
|
588
|
+
|
|
585
589
|
if __name__ == "__main__":
|
|
586
590
|
main()
|
bioregistry/app/api.py
CHANGED
|
@@ -19,8 +19,7 @@ from ..export.rdf_export import (
|
|
|
19
19
|
resource_to_rdf_str,
|
|
20
20
|
)
|
|
21
21
|
from ..resource_manager import Manager
|
|
22
|
-
from ..schema import Attributable, sanitize_mapping
|
|
23
|
-
from ..schema.struct import Collection, Context, Registry, Resource
|
|
22
|
+
from ..schema import Attributable, Collection, Context, Registry, Resource, sanitize_mapping
|
|
24
23
|
from ..schema_utils import (
|
|
25
24
|
read_collections_contributions,
|
|
26
25
|
read_prefix_contacts,
|
bioregistry/app/ui.py
CHANGED
|
@@ -38,9 +38,8 @@ from ..export.rdf_export import (
|
|
|
38
38
|
metaresource_to_rdf_str,
|
|
39
39
|
resource_to_rdf_str,
|
|
40
40
|
)
|
|
41
|
-
from ..schema import
|
|
42
|
-
|
|
43
|
-
from ..schema.struct import (
|
|
41
|
+
from ..schema import (
|
|
42
|
+
Context,
|
|
44
43
|
Registry,
|
|
45
44
|
RegistryGovernance,
|
|
46
45
|
RegistryQualities,
|
|
@@ -49,6 +48,7 @@ from ..schema.struct import (
|
|
|
49
48
|
get_json_schema,
|
|
50
49
|
schema_status_map,
|
|
51
50
|
)
|
|
51
|
+
from ..schema.constants import SCHEMA_TERMS
|
|
52
52
|
from ..schema_utils import (
|
|
53
53
|
read_collections_contributions,
|
|
54
54
|
read_context_contributions,
|
bioregistry/bibliometrics.py
CHANGED
|
@@ -8,7 +8,7 @@ from collections.abc import Iterable
|
|
|
8
8
|
from typing import TYPE_CHECKING
|
|
9
9
|
|
|
10
10
|
from .resource_manager import manager
|
|
11
|
-
from .schema
|
|
11
|
+
from .schema import Publication, deduplicate_publications
|
|
12
12
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
14
|
import pandas
|
bioregistry/cli.py
CHANGED
|
@@ -8,7 +8,7 @@ from .app.cli import web
|
|
|
8
8
|
from .compare import compare
|
|
9
9
|
from .export.cli import export
|
|
10
10
|
from .lint import lint
|
|
11
|
-
from .schema
|
|
11
|
+
from .schema import generate_schema
|
|
12
12
|
from .utils import get_hexdigests, secho
|
|
13
13
|
from .validate.cli import validate
|
|
14
14
|
from .version import VERSION
|
|
@@ -13,7 +13,7 @@ from tabulate import tabulate
|
|
|
13
13
|
from tqdm import tqdm
|
|
14
14
|
|
|
15
15
|
import bioregistry
|
|
16
|
-
from bioregistry.schema
|
|
16
|
+
from bioregistry.schema import Author, Organization
|
|
17
17
|
|
|
18
18
|
BASE = "https://vocabularies.cessda.eu"
|
|
19
19
|
MODULE = pystow.module("cessda")
|
|
@@ -8,7 +8,7 @@ import pandas as pd
|
|
|
8
8
|
from tqdm import tqdm
|
|
9
9
|
|
|
10
10
|
from bioregistry import manager
|
|
11
|
-
from bioregistry.schema
|
|
11
|
+
from bioregistry.schema import StatusCheck
|
|
12
12
|
|
|
13
13
|
URL = "https://docs.google.com/spreadsheets/d/e/2PACX-1vSC8RAMlNGauLHJb1RGwFuvC2LBJBjeeICRtq596npE6G4ZjZwX8W_Fz031hAfqsbu6f9Ruxl2PTsFx/pub?gid=1207894592&single=true&output=tsv"
|
|
14
14
|
|
|
@@ -13,7 +13,7 @@ from manubot.cite.pubmed import get_pmid_for_doi, get_pubmed_csl_item
|
|
|
13
13
|
from tqdm import tqdm
|
|
14
14
|
|
|
15
15
|
from bioregistry import manager
|
|
16
|
-
from bioregistry.schema
|
|
16
|
+
from bioregistry.schema import Publication, deduplicate_publications
|
|
17
17
|
from bioregistry.utils import removeprefix
|
|
18
18
|
|
|
19
19
|
|