bioregistry 0.13.8__py3-none-any.whl → 0.13.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. bioregistry/.DS_Store +0 -0
  2. bioregistry/__init__.py +1 -1
  3. bioregistry/analysis/paper_ranking.py +6 -2
  4. bioregistry/app/.DS_Store +0 -0
  5. bioregistry/app/api.py +1 -2
  6. bioregistry/app/templates/.DS_Store +0 -0
  7. bioregistry/app/ui.py +3 -3
  8. bioregistry/bibliometrics.py +1 -1
  9. bioregistry/cli.py +1 -1
  10. bioregistry/curation/.DS_Store +0 -0
  11. bioregistry/curation/add_cessda.py +1 -1
  12. bioregistry/curation/add_provider_status_curations.py +1 -1
  13. bioregistry/curation/add_sweet.py +146 -0
  14. bioregistry/curation/clean_publications.py +1 -1
  15. bioregistry/curation/enrich_publications.py +1 -1
  16. bioregistry/data/bioregistry.json +6849 -619
  17. bioregistry/data/collections.json +24 -1
  18. bioregistry/data/curated_papers.tsv +9 -0
  19. bioregistry/data/metaregistry.json +1 -1
  20. bioregistry/export/rdf_export.py +1 -1
  21. bioregistry/export/tables_export.py +1 -1
  22. bioregistry/external/.DS_Store +0 -0
  23. bioregistry/external/aberowl/processed.json +23 -19
  24. bioregistry/external/bartoc/processed.json +5 -5
  25. bioregistry/external/biolink/processed.json +3 -0
  26. bioregistry/external/bioportal/agroportal.json +3 -3
  27. bioregistry/external/bioportal/bioportal.json +45 -17
  28. bioregistry/external/cellosaurus/processed.json +3 -3
  29. bioregistry/external/fairsharing/processed.json +6 -5
  30. bioregistry/external/integbio/processed.json +56 -55
  31. bioregistry/external/lov/processed.json +59 -0
  32. bioregistry/external/miriam/.DS_Store +0 -0
  33. bioregistry/external/obofoundry/processed.json +4 -4
  34. bioregistry/external/ols/__init__.py +13 -5
  35. bioregistry/external/ols/processed.json +6 -6
  36. bioregistry/external/ols/tib-processed.json +0 -1
  37. bioregistry/external/ols/tib.py +1 -0
  38. bioregistry/external/re3data/processed.json +24 -2
  39. bioregistry/record_accumulator.py +1 -1
  40. bioregistry/resolve.py +1 -2
  41. bioregistry/resource_manager.py +1 -1
  42. bioregistry/schema/.DS_Store +0 -0
  43. bioregistry/schema/__init__.py +24 -0
  44. bioregistry/schema/struct.py +10 -3
  45. bioregistry/version.py +1 -1
  46. {bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/METADATA +1 -1
  47. {bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/RECORD +49 -41
  48. {bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/WHEEL +1 -1
  49. {bioregistry-0.13.8.dist-info → bioregistry-0.13.10.dist-info}/entry_points.txt +0 -0
bioregistry/.DS_Store ADDED
Binary file
bioregistry/__init__.py CHANGED
@@ -127,7 +127,7 @@ from .resolve_identifier import (
127
127
  standardize_identifier,
128
128
  )
129
129
  from .resource_manager import Manager, manager
130
- from .schema.struct import (
130
+ from .schema import (
131
131
  Author,
132
132
  Collection,
133
133
  Context,
@@ -400,7 +400,7 @@ def predict_and_save(
400
400
  :param path: Path to save the predictions.
401
401
  """
402
402
  x_meta = pd.DataFrame()
403
- x_transformed = vectorizer.transform(df["title"] + " " + df["abstract"])
403
+ x_transformed = vectorizer.transform(_concat(df))
404
404
  for name, clf in classifiers:
405
405
  x_meta[name] = _predict(clf, x_transformed)
406
406
 
@@ -524,7 +524,7 @@ def train(
524
524
  df = pd.concat(curated_dfs)[["pubmed", "title", "abstract", "relevant"]]
525
525
 
526
526
  df["abstract"] = df["abstract"].fillna("")
527
- df["title_abstract"] = df["title"] + " " + df["abstract"]
527
+ df["title_abstract"] = _concat(df)
528
528
  df = df[df.title_abstract.notna()]
529
529
  df = df.drop_duplicates()
530
530
  _echo_stats(df, "combine curated publications")
@@ -582,5 +582,9 @@ def train(
582
582
  return TrainingResult(curated_pubmed_ids, vectorizer, classifiers, meta_clf)
583
583
 
584
584
 
585
+ def _concat(df: pd.DataFrame) -> pd.Series[str]:
586
+ return cast("pd.Series[str]", df["title"]) + " " + cast("pd.Series[str]", df["abstract"])
587
+
588
+
585
589
  if __name__ == "__main__":
586
590
  main()
Binary file
bioregistry/app/api.py CHANGED
@@ -19,8 +19,7 @@ from ..export.rdf_export import (
19
19
  resource_to_rdf_str,
20
20
  )
21
21
  from ..resource_manager import Manager
22
- from ..schema import Attributable, sanitize_mapping
23
- from ..schema.struct import Collection, Context, Registry, Resource
22
+ from ..schema import Attributable, Collection, Context, Registry, Resource, sanitize_mapping
24
23
  from ..schema_utils import (
25
24
  read_collections_contributions,
26
25
  read_prefix_contacts,
Binary file
bioregistry/app/ui.py CHANGED
@@ -38,9 +38,8 @@ from ..export.rdf_export import (
38
38
  metaresource_to_rdf_str,
39
39
  resource_to_rdf_str,
40
40
  )
41
- from ..schema import Context
42
- from ..schema.constants import SCHEMA_TERMS
43
- from ..schema.struct import (
41
+ from ..schema import (
42
+ Context,
44
43
  Registry,
45
44
  RegistryGovernance,
46
45
  RegistryQualities,
@@ -49,6 +48,7 @@ from ..schema.struct import (
49
48
  get_json_schema,
50
49
  schema_status_map,
51
50
  )
51
+ from ..schema.constants import SCHEMA_TERMS
52
52
  from ..schema_utils import (
53
53
  read_collections_contributions,
54
54
  read_context_contributions,
@@ -8,7 +8,7 @@ from collections.abc import Iterable
8
8
  from typing import TYPE_CHECKING
9
9
 
10
10
  from .resource_manager import manager
11
- from .schema.struct import Publication, deduplicate_publications
11
+ from .schema import Publication, deduplicate_publications
12
12
 
13
13
  if TYPE_CHECKING:
14
14
  import pandas
bioregistry/cli.py CHANGED
@@ -8,7 +8,7 @@ from .app.cli import web
8
8
  from .compare import compare
9
9
  from .export.cli import export
10
10
  from .lint import lint
11
- from .schema.struct import generate_schema
11
+ from .schema import generate_schema
12
12
  from .utils import get_hexdigests, secho
13
13
  from .validate.cli import validate
14
14
  from .version import VERSION
Binary file
@@ -13,7 +13,7 @@ from tabulate import tabulate
13
13
  from tqdm import tqdm
14
14
 
15
15
  import bioregistry
16
- from bioregistry.schema.struct import Author, Organization
16
+ from bioregistry.schema import Author, Organization
17
17
 
18
18
  BASE = "https://vocabularies.cessda.eu"
19
19
  MODULE = pystow.module("cessda")
@@ -8,7 +8,7 @@ import pandas as pd
8
8
  from tqdm import tqdm
9
9
 
10
10
  from bioregistry import manager
11
- from bioregistry.schema.struct import StatusCheck
11
+ from bioregistry.schema import StatusCheck
12
12
 
13
13
  URL = "https://docs.google.com/spreadsheets/d/e/2PACX-1vSC8RAMlNGauLHJb1RGwFuvC2LBJBjeeICRtq596npE6G4ZjZwX8W_Fz031hAfqsbu6f9Ruxl2PTsFx/pub?gid=1207894592&single=true&output=tsv"
14
14
 
@@ -0,0 +1,146 @@
1
+ """Add SWEET ontologies."""
2
+
3
+ from typing import cast
4
+
5
+ import click
6
+ import pystow
7
+
8
+ import bioregistry
9
+
10
+ MODULE = pystow.module("bioregistry", "sweet")
11
+
12
+ ALL_PREFIXES_URL = "https://github.com/ESIPFed/sweet/raw/refs/heads/master/sweetPrefixes.ttl"
13
+
14
+ MANUAL = {
15
+ "sosto": "Acute",
16
+ "sostri": "Catastrophic",
17
+ "sostsp": "Big",
18
+ "sorel": "hasPhenomena",
19
+ "sorelch": "atomicMass",
20
+ "sorelh": "hasAttribute",
21
+ "soreaer": "AbyssopelagicZone",
22
+ "sorelcl": "hasAverageAnnualPrecipitation",
23
+ "sorelm": "averageOver",
24
+ "sorelph": "colderThan",
25
+ "sorelsc": "causedBy",
26
+ "sorelt": "dayOfYear",
27
+ "sorelsp": "adjacentTo",
28
+ "sorepsd": "Counterclockwise",
29
+ "sorelpr": "fillValue",
30
+ "sostss": "Continental",
31
+ "sostrt": "Accurate",
32
+ "sostsl": "CaK",
33
+ "sosttf": "Annual",
34
+ "sosttg": "0MYA",
35
+ "sostv": "Clear",
36
+ }
37
+
38
+
39
+ @click.command()
40
+ def main() -> None:
41
+ """Add SWEET ontologies."""
42
+ graph = MODULE.ensure_rdf(url=ALL_PREFIXES_URL)
43
+ sparql = """
44
+ SELECT ?prefix ?namespace
45
+ WHERE {
46
+ ?x sh:prefix ?prefix;
47
+ sh:namespace ?namespace .
48
+ }
49
+ """
50
+ for sweet_internal_prefix, uri_prefix in graph.query(sparql): # type:ignore
51
+ sweet_internal_prefix = str(sweet_internal_prefix)
52
+ uri_prefix = str(uri_prefix)
53
+
54
+ if sweet_internal_prefix in {"soall", "sweet"}:
55
+ continue # this is the combine one, not its own prefix
56
+
57
+ sweet_internal_key = uri_prefix.removeprefix("http://sweetontology.net/").rstrip("/")
58
+ if not sweet_internal_key:
59
+ raise ValueError(f"no internal key found for {sweet_internal_prefix}")
60
+
61
+ download_rdf = (
62
+ f"https://github.com/ESIPFed/sweet/raw/refs/heads/master/src/{sweet_internal_key}.ttl"
63
+ )
64
+ inner_graph = MODULE.ensure_rdf(url=download_rdf)
65
+
66
+ ontology_name_query = """
67
+ SELECT ?name
68
+ WHERE { owl:Ontology ^rdf:type/rdfs:label ?name }
69
+ LIMIT 1
70
+ """
71
+ name = str(next(iter(inner_graph.query(ontology_name_query)))[0]) # type:ignore
72
+ name_short = name.removeprefix("SWEET Ontology ")
73
+
74
+ example_query = f"""
75
+ SELECT ?term
76
+ WHERE {{
77
+ ?term rdf:type owl:Class;
78
+ rdfs:label ?name ;
79
+ FILTER STRSTARTS(str(?term), "{uri_prefix}")
80
+ }}
81
+ LIMIT 1
82
+ """
83
+ example_records = list(inner_graph.query(example_query))
84
+ if example_records:
85
+ example_uri = cast(str, example_records[0][0]) # type:ignore[index]
86
+ example = example_uri.removeprefix(uri_prefix)
87
+ elif sweet_internal_prefix in MANUAL:
88
+ example = MANUAL[sweet_internal_prefix]
89
+ else:
90
+ raise ValueError(
91
+ f"[{sweet_internal_prefix}] missing example in {name_short} ({uri_prefix})"
92
+ )
93
+
94
+ if not sweet_internal_prefix.startswith("so"):
95
+ raise ValueError
96
+
97
+ nsl = name_short.lower()
98
+ if nsl.startswith("human "):
99
+ keywords = [nsl.removeprefix("human ")]
100
+ elif nsl.startswith("material "):
101
+ keywords = ["materials", nsl.removeprefix("material ")]
102
+ elif nsl.startswith("phenomena "):
103
+ keywords = ["phenomena", nsl.removeprefix("phenomena ")]
104
+ elif nsl.startswith("property relationships "):
105
+ keywords = [nsl.removeprefix("property relationships ")]
106
+ elif nsl.startswith("property "):
107
+ keywords = [nsl.removeprefix("property ")]
108
+ elif nsl.startswith("process "):
109
+ keywords = [nsl.removeprefix("process ")]
110
+ elif nsl.startswith("realm land "):
111
+ keywords = [nsl.removeprefix("realm land") + "land"]
112
+ elif nsl.startswith("realm "):
113
+ keywords = ["realm", nsl.removeprefix("realm ")]
114
+ elif nsl.startswith("representation "):
115
+ keywords = [nsl.removeprefix("realm ")]
116
+ elif nsl.startswith("state "):
117
+ keywords = [nsl.removeprefix("realm ")]
118
+ elif nsl.startswith("relationships "):
119
+ keywords = [nsl.removeprefix("relationships ")]
120
+ else:
121
+ keywords = [nsl.lower()]
122
+
123
+ prefix = f"sweet.{sweet_internal_prefix.removeprefix('so')}"
124
+ resource = bioregistry.Resource(
125
+ prefix=prefix,
126
+ synonyms=[sweet_internal_prefix],
127
+ name=name,
128
+ keywords=sorted(keywords),
129
+ homepage=str(uri_prefix),
130
+ uri_format=f"{uri_prefix}$1",
131
+ description=f"The Semantic Web for Earth and Environmental Terminology (SWEET) ontology for {name_short}",
132
+ example=example,
133
+ download_rdf=download_rdf,
134
+ part_of="sweet",
135
+ license="CC0-1.0",
136
+ repository="https://github.com/ESIPFed/sweet",
137
+ contributor=bioregistry.Author.get_charlie(),
138
+ github_request_issue=1772,
139
+ )
140
+ bioregistry.add_resource(resource)
141
+
142
+ bioregistry.manager.write_registry()
143
+
144
+
145
+ if __name__ == "__main__":
146
+ main()
@@ -7,7 +7,7 @@
7
7
  import click
8
8
 
9
9
  import bioregistry
10
- from bioregistry.schema.struct import deduplicate_publications
10
+ from bioregistry.schema import deduplicate_publications
11
11
 
12
12
 
13
13
  @click.command()
@@ -13,7 +13,7 @@ from manubot.cite.pubmed import get_pmid_for_doi, get_pubmed_csl_item
13
13
  from tqdm import tqdm
14
14
 
15
15
  from bioregistry import manager
16
- from bioregistry.schema.struct import Publication, deduplicate_publications
16
+ from bioregistry.schema import Publication, deduplicate_publications
17
17
  from bioregistry.utils import removeprefix
18
18
 
19
19