bioregistry 0.13.9__py3-none-any.whl → 0.13.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. bioregistry/__init__.py +1 -1
  2. bioregistry/analysis/paper_ranking.py +6 -2
  3. bioregistry/app/api.py +1 -2
  4. bioregistry/app/ui.py +3 -3
  5. bioregistry/bibliometrics.py +1 -1
  6. bioregistry/cli.py +1 -1
  7. bioregistry/curation/add_cessda.py +1 -1
  8. bioregistry/curation/add_provider_status_curations.py +1 -1
  9. bioregistry/curation/clean_publications.py +1 -1
  10. bioregistry/curation/enrich_publications.py +1 -1
  11. bioregistry/data/bioregistry.json +312 -39
  12. bioregistry/data/collections.json +26 -2
  13. bioregistry/export/rdf_export.py +1 -1
  14. bioregistry/export/tables_export.py +1 -1
  15. bioregistry/external/aberowl/processed.json +49 -20
  16. bioregistry/external/bartoc/processed.json +133 -11
  17. bioregistry/external/bioportal/agroportal.json +11 -1
  18. bioregistry/external/fairsharing/processed.json +96 -4
  19. bioregistry/external/integbio/processed.json +6 -4
  20. bioregistry/external/lov/processed.json +10 -0
  21. bioregistry/external/obofoundry/processed.json +4 -0
  22. bioregistry/external/ols/processed.json +39 -27
  23. bioregistry/external/re3data/processed.json +22 -8
  24. bioregistry/record_accumulator.py +1 -1
  25. bioregistry/resolve.py +1 -2
  26. bioregistry/resource_manager.py +1 -1
  27. bioregistry/schema/__init__.py +24 -0
  28. bioregistry/schema/struct.py +3 -0
  29. bioregistry/version.py +1 -1
  30. {bioregistry-0.13.9.dist-info → bioregistry-0.13.11.dist-info}/METADATA +1 -1
  31. {bioregistry-0.13.9.dist-info → bioregistry-0.13.11.dist-info}/RECORD +33 -33
  32. {bioregistry-0.13.9.dist-info → bioregistry-0.13.11.dist-info}/WHEEL +1 -1
  33. {bioregistry-0.13.9.dist-info → bioregistry-0.13.11.dist-info}/entry_points.txt +0 -0
bioregistry/__init__.py CHANGED
@@ -127,7 +127,7 @@ from .resolve_identifier import (
127
127
  standardize_identifier,
128
128
  )
129
129
  from .resource_manager import Manager, manager
130
- from .schema.struct import (
130
+ from .schema import (
131
131
  Author,
132
132
  Collection,
133
133
  Context,
@@ -400,7 +400,7 @@ def predict_and_save(
400
400
  :param path: Path to save the predictions.
401
401
  """
402
402
  x_meta = pd.DataFrame()
403
- x_transformed = vectorizer.transform(df["title"] + " " + df["abstract"])
403
+ x_transformed = vectorizer.transform(_concat(df))
404
404
  for name, clf in classifiers:
405
405
  x_meta[name] = _predict(clf, x_transformed)
406
406
 
@@ -524,7 +524,7 @@ def train(
524
524
  df = pd.concat(curated_dfs)[["pubmed", "title", "abstract", "relevant"]]
525
525
 
526
526
  df["abstract"] = df["abstract"].fillna("")
527
- df["title_abstract"] = df["title"] + " " + df["abstract"]
527
+ df["title_abstract"] = _concat(df)
528
528
  df = df[df.title_abstract.notna()]
529
529
  df = df.drop_duplicates()
530
530
  _echo_stats(df, "combine curated publications")
@@ -582,5 +582,9 @@ def train(
582
582
  return TrainingResult(curated_pubmed_ids, vectorizer, classifiers, meta_clf)
583
583
 
584
584
 
585
+ def _concat(df: pd.DataFrame) -> pd.Series[str]:
586
+ return cast("pd.Series[str]", df["title"]) + " " + cast("pd.Series[str]", df["abstract"])
587
+
588
+
585
589
  if __name__ == "__main__":
586
590
  main()
bioregistry/app/api.py CHANGED
@@ -19,8 +19,7 @@ from ..export.rdf_export import (
19
19
  resource_to_rdf_str,
20
20
  )
21
21
  from ..resource_manager import Manager
22
- from ..schema import Attributable, sanitize_mapping
23
- from ..schema.struct import Collection, Context, Registry, Resource
22
+ from ..schema import Attributable, Collection, Context, Registry, Resource, sanitize_mapping
24
23
  from ..schema_utils import (
25
24
  read_collections_contributions,
26
25
  read_prefix_contacts,
bioregistry/app/ui.py CHANGED
@@ -38,9 +38,8 @@ from ..export.rdf_export import (
38
38
  metaresource_to_rdf_str,
39
39
  resource_to_rdf_str,
40
40
  )
41
- from ..schema import Context
42
- from ..schema.constants import SCHEMA_TERMS
43
- from ..schema.struct import (
41
+ from ..schema import (
42
+ Context,
44
43
  Registry,
45
44
  RegistryGovernance,
46
45
  RegistryQualities,
@@ -49,6 +48,7 @@ from ..schema.struct import (
49
48
  get_json_schema,
50
49
  schema_status_map,
51
50
  )
51
+ from ..schema.constants import SCHEMA_TERMS
52
52
  from ..schema_utils import (
53
53
  read_collections_contributions,
54
54
  read_context_contributions,
@@ -8,7 +8,7 @@ from collections.abc import Iterable
8
8
  from typing import TYPE_CHECKING
9
9
 
10
10
  from .resource_manager import manager
11
- from .schema.struct import Publication, deduplicate_publications
11
+ from .schema import Publication, deduplicate_publications
12
12
 
13
13
  if TYPE_CHECKING:
14
14
  import pandas
bioregistry/cli.py CHANGED
@@ -8,7 +8,7 @@ from .app.cli import web
8
8
  from .compare import compare
9
9
  from .export.cli import export
10
10
  from .lint import lint
11
- from .schema.struct import generate_schema
11
+ from .schema import generate_schema
12
12
  from .utils import get_hexdigests, secho
13
13
  from .validate.cli import validate
14
14
  from .version import VERSION
@@ -13,7 +13,7 @@ from tabulate import tabulate
13
13
  from tqdm import tqdm
14
14
 
15
15
  import bioregistry
16
- from bioregistry.schema.struct import Author, Organization
16
+ from bioregistry.schema import Author, Organization
17
17
 
18
18
  BASE = "https://vocabularies.cessda.eu"
19
19
  MODULE = pystow.module("cessda")
@@ -8,7 +8,7 @@ import pandas as pd
8
8
  from tqdm import tqdm
9
9
 
10
10
  from bioregistry import manager
11
- from bioregistry.schema.struct import StatusCheck
11
+ from bioregistry.schema import StatusCheck
12
12
 
13
13
  URL = "https://docs.google.com/spreadsheets/d/e/2PACX-1vSC8RAMlNGauLHJb1RGwFuvC2LBJBjeeICRtq596npE6G4ZjZwX8W_Fz031hAfqsbu6f9Ruxl2PTsFx/pub?gid=1207894592&single=true&output=tsv"
14
14
 
@@ -7,7 +7,7 @@
7
7
  import click
8
8
 
9
9
  import bioregistry
10
- from bioregistry.schema.struct import deduplicate_publications
10
+ from bioregistry.schema import deduplicate_publications
11
11
 
12
12
 
13
13
  @click.command()
@@ -13,7 +13,7 @@ from manubot.cite.pubmed import get_pmid_for_doi, get_pubmed_csl_item
13
13
  from tqdm import tqdm
14
14
 
15
15
  from bioregistry import manager
16
- from bioregistry.schema.struct import Publication, deduplicate_publications
16
+ from bioregistry.schema import Publication, deduplicate_publications
17
17
  from bioregistry.utils import removeprefix
18
18
 
19
19