PyPI - commonmeta-py - Versions diffs - 0.104__py3-none-any.whl → 0.106__py3-none-any.whl - Mend

commonmeta-py 0.104py3-none-any.whl → 0.106py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

commonmeta/__init__.py +1 -1
commonmeta/api_utils.py +7 -6
commonmeta/author_utils.py +10 -10
commonmeta/base_utils.py +3 -2
commonmeta/cli.py +3 -0
commonmeta/date_utils.py +3 -2
commonmeta/doi_utils.py +4 -14
commonmeta/metadata.py +5 -1
commonmeta/readers/cff_reader.py +9 -8
commonmeta/readers/codemeta_reader.py +14 -13
commonmeta/readers/crossref_reader.py +6 -5
commonmeta/readers/crossref_xml_reader.py +20 -19
commonmeta/readers/datacite_reader.py +21 -19
commonmeta/readers/datacite_xml_reader.py +7 -6
commonmeta/readers/inveniordm_reader.py +15 -16
commonmeta/readers/json_feed_reader.py +25 -22
commonmeta/readers/openalex_reader.py +31 -19
commonmeta/readers/ris_reader.py +4 -4
commonmeta/readers/schema_org_reader.py +31 -29
commonmeta/schema_utils.py +1 -0
commonmeta/translators.py +2 -1
commonmeta/utils.py +81 -7
{commonmeta_py-0.104.dist-info → commonmeta_py-0.106.dist-info}/METADATA +16 -15
{commonmeta_py-0.104.dist-info → commonmeta_py-0.106.dist-info}/RECORD +27 -29
commonmeta/resources/ietf-bcp-47.json +0 -3025
commonmeta/resources/iso-8601.json +0 -3182
{commonmeta_py-0.104.dist-info → commonmeta_py-0.106.dist-info}/WHEEL +0 -0
{commonmeta_py-0.104.dist-info → commonmeta_py-0.106.dist-info}/entry_points.txt +0 -0
{commonmeta_py-0.104.dist-info → commonmeta_py-0.106.dist-info}/licenses/LICENSE +0 -0

commonmeta/__init__.py CHANGED Viewed

@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
 """
 __title__ = "commonmeta-py"
-__version__ = "0.104"
+__version__ = "0.106"
 __author__ = "Martin Fenner"
 __license__ = "MIT"

commonmeta/api_utils.py CHANGED Viewed

@@ -1,12 +1,13 @@
 """API Utils module for commonmeta-py"""
-from typing import Optional
 from datetime import datetime as date
-import httpx
-from furl import furl
+from typing import Optional
 import jwt
+import requests
+from furl import furl
-from .doi_utils import validate_doi, doi_as_url
+from .doi_utils import doi_as_url, validate_doi
 from .readers.json_feed_reader import get_json_feed_item_uuid
@@ -52,7 +53,7 @@ def update_ghost_post_via_api(
     f = furl(url)
     slug = f.path.segments[-1]
     ghost_url = f"{api_url}/ghost/api/admin/posts/slug/{slug}/"
-    response = httpx.get(ghost_url, headers=headers, timeout=10)
+    response = requests.get(ghost_url, headers=headers, timeout=10)
     if response.status_code != 200:
         return {"error": "Error fetching post"}
     ghost_post = response.json().get("posts")[0]
@@ -67,7 +68,7 @@ def update_ghost_post_via_api(
     ghost_url = f"{api_url}/ghost/api/admin/posts/{guid}/"
     json = {"posts": [{"canonical_url": doi, "updated_at": updated_at}]}
-    response = httpx.put(
+    response = requests.put(
         ghost_url,
         headers=headers,
         json=json,

commonmeta/author_utils.py CHANGED Viewed

@@ -2,23 +2,23 @@
 import re
 from typing import List
+from furl import furl
 from nameparser import HumanName
 from pydash import py_
-from furl import furl
+from .base_utils import compact, parse_attributes, presence, wrap
+from .constants import (
+    COMMONMETA_CONTRIBUTOR_ROLES,
+)
 from .utils import (
-    normalize_orcid,
+    format_name_identifier,
     normalize_id,
-    normalize_ror,
     normalize_isni,
-    format_name_identifier,
-    validate_ror,
+    normalize_orcid,
+    normalize_ror,
     validate_orcid,
-)
-from .base_utils import parse_attributes, wrap, presence, compact
-from .constants import (
-    COMMONMETA_CONTRIBUTOR_ROLES,
+    validate_ror,
 )

commonmeta/base_utils.py CHANGED Viewed

@@ -1,11 +1,12 @@
 """Base utilities for commonmeta-py"""
 import html
-from os import path
 import re
-import xmltodict
+from os import path
 from typing import Optional, Union
 import nh3
+import xmltodict
 def wrap(item) -> list:

commonmeta/cli.py CHANGED Viewed

@@ -12,6 +12,7 @@ from commonmeta.readers.datacite_reader import get_random_datacite_id
 from commonmeta.readers.json_feed_reader import (
     get_json_feed_item_uuid,
 )
+from commonmeta.readers.openalex_reader import get_random_openalex_id
 @click.group()
@@ -131,6 +132,8 @@ def sample(provider, prefix, type, number, to, style, locale, show_errors):
         )
     elif provider == "datacite":
         string = json.dumps({"items": get_random_datacite_id(number)})
+    elif provider == "openalex":
+        string = json.dumps({"items": get_random_openalex_id(number)})
     else:
         output = "Provider not supported. Use 'crossref' or 'datacite' instead."
         click.echo(output)

commonmeta/date_utils.py CHANGED Viewed

@@ -3,10 +3,11 @@
 import datetime
 from datetime import datetime as dt
 from typing import Optional, Union
 import dateparser
-from edtf import parse_edtf, DateAndTime, Date
-from edtf.parser.edtf_exceptions import EDTFParseException
 import pydash as py_
+from edtf import Date, DateAndTime, parse_edtf
+from edtf.parser.edtf_exceptions import EDTFParseException
 from .base_utils import compact

commonmeta/doi_utils.py CHANGED Viewed

@@ -4,7 +4,7 @@ import re
 from typing import Optional
 import base32_lib as base32
-import httpx
+import requests
 from furl import furl
 from .base_utils import compact
@@ -94,7 +94,7 @@ def short_doi_as_doi(doi: Optional[str]) -> Optional[str]:
     doi_url = doi_as_url(doi)
     if doi_url is None:
         return None
-    response = httpx.head(doi_url, timeout=10)
+    response = requests.head(doi_url, timeout=10)
     if response.status_code != 301:
         return doi_url
     return response.headers.get("Location")
@@ -137,7 +137,7 @@ def get_doi_ra(doi) -> Optional[str]:
     prefix = validate_prefix(doi)
     if prefix is None:
         return None
-    response = httpx.get("https://doi.org/ra/" + prefix, timeout=10)
+    response = requests.get("https://doi.org/ra/" + prefix, timeout=10)
     if response.status_code != 200:
         return None
     return response.json()[0].get("RA", None)
@@ -170,7 +170,7 @@ def decode_doi(doi: str, checksum: bool = True) -> int:
 def get_crossref_member(member_id) -> Optional[dict]:
     """Return the Crossref member for a given member_id"""
-    response = httpx.get("https://api.crossref.org/members/" + member_id, timeout=10)
+    response = requests.get("https://api.crossref.org/members/" + member_id, timeout=10)
     if response.status_code != 200:
         return None
     data = response.json().get("message", None)
@@ -302,16 +302,6 @@ def datacite_api_sample_url(number: int = 1, **kwargs) -> str:
     return f"https://api.datacite.org/dois?random=true&page[size]={number}"
-def openalex_api_url(doi: str, **kwargs) -> str:
-    """Return the OpenAlex API URL for a given DOI"""
-    return f"https://api.openalex.org/works/{doi}"
-def openalex_api_sample_url(number: int = 1, **kwargs) -> str:
-    """Return the OpenAlex API URL for a sample of dois"""
-    return f"https://api.openalex.org/works?sample={number}"
 def is_rogue_scholar_doi(doi: str) -> bool:
     """Return True if DOI is from Rogue Scholar"""
     prefix = validate_prefix(doi)

commonmeta/metadata.py CHANGED Viewed

@@ -199,6 +199,7 @@ class Metadata:
                 "codemeta",
                 "kbase",
                 "inveniordm",
+                "openalex",
             ]:
                 return json.loads(string)
             else:
@@ -237,7 +238,7 @@ class Metadata:
         elif via == "kbase":
             return dict(read_kbase(data))
         elif via == "openalex":
-            return read_openalex(data)
+            return dict(read_openalex(data))
         elif via == "ris":
             return dict(read_ris(data["data"] if isinstance(data, dict) else data))
         else:
@@ -397,6 +398,7 @@ class MetadataList:
             "crossref",
             "datacite",
             "schema_org",
+            "openalex",
             "csl",
             "json_feed_item",
         ]:
@@ -425,6 +427,8 @@ class MetadataList:
             raise ValueError("Schema.org not supported for metadata lists")
         elif to == "datacite":
             raise ValueError("Datacite not supported for metadata lists")
+        elif to == "openalex":
+            raise ValueError("OpenAlex not supported for metadata lists")
         elif to == "crossref_xml":
             return write_crossref_xml_list(self)
         else:

commonmeta/readers/cff_reader.py CHANGED Viewed

@@ -2,26 +2,27 @@
 from typing import Optional
 from urllib.parse import urlparse
-import httpx
+import requests
 import yaml
+from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
+from ..constants import Commonmeta
+from ..date_utils import get_iso8601_date
 from ..utils import (
-    normalize_id,
-    name_to_fos,
     dict_to_spdx,
-    normalize_orcid,
     github_as_cff_url,
     github_as_repo_url,
+    name_to_fos,
+    normalize_id,
+    normalize_orcid,
 )
-from ..base_utils import compact, wrap, presence, sanitize, parse_attributes
-from ..date_utils import get_iso8601_date
-from ..constants import Commonmeta
 def get_cff(pid: str, **kwargs) -> dict:
     """get_cff"""
     url = github_as_cff_url(pid)
-    response = httpx.get(url, timeout=10, **kwargs)
+    response = requests.get(url, timeout=10, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     text = response.text

commonmeta/readers/codemeta_reader.py CHANGED Viewed

@@ -1,30 +1,31 @@
 """codemeta reader for commonmeta-py"""
-from typing import Optional
 from collections import defaultdict
-import httpx
+from typing import Optional
+import requests
+from ..author_utils import get_authors
+from ..base_utils import compact, presence, sanitize, wrap
+from ..constants import (
+    SO_TO_CM_TRANSLATIONS,
+    Commonmeta,
+)
 from ..utils import (
-    normalize_id,
-    from_schema_org_creators,
-    name_to_fos,
     dict_to_spdx,
+    doi_from_url,
+    from_schema_org_creators,
     github_as_codemeta_url,
     github_as_repo_url,
-    doi_from_url,
-)
-from ..base_utils import wrap, presence, compact, sanitize
-from ..author_utils import get_authors
-from ..constants import (
-    Commonmeta,
-    SO_TO_CM_TRANSLATIONS,
+    name_to_fos,
+    normalize_id,
 )
 def get_codemeta(pid: str, **kwargs) -> dict:
     """get_codemeta"""
     url = str(github_as_codemeta_url(pid))
-    response = httpx.get(url, timeout=10, **kwargs)
+    response = requests.get(url, timeout=10, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     data = response.json()

commonmeta/readers/crossref_reader.py CHANGED Viewed

@@ -2,8 +2,9 @@
 from typing import Optional
-import httpx
+import requests
 from pydash import py_
+from requests.exceptions import ConnectionError, ReadTimeout
 from ..author_utils import get_authors
 from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
@@ -34,7 +35,7 @@ from ..utils import (
 def get_crossref_list(query: dict, **kwargs) -> list[dict]:
     """get_crossref list from Crossref API."""
     url = crossref_api_query_url(query, **kwargs)
-    response = httpx.get(url, timeout=30, **kwargs)
+    response = requests.get(url, timeout=30, **kwargs)
     if response.status_code != 200:
         return []
     return response.json().get("message", {}).get("items", [])
@@ -46,7 +47,7 @@ def get_crossref(pid: str, **kwargs) -> dict:
     if doi is None:
         return {"state": "not_found"}
     url = crossref_api_url(doi)
-    response = httpx.get(url, timeout=10, **kwargs)
+    response = requests.get(url, timeout=10, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     return response.json().get("message", {}) | {"via": "crossref"}
@@ -402,11 +403,11 @@ def get_random_crossref_id(number: int = 1, **kwargs) -> list:
     number = 20 if number > 20 else number
     url = crossref_api_sample_url(number, **kwargs)
     try:
-        response = httpx.get(url, timeout=10)
+        response = requests.get(url, timeout=10)
         if response.status_code != 200:
             return []
         items = py_.get(response.json(), "message.items")
         return [i.get("DOI") for i in items]
-    except (httpx.ReadTimeout, httpx.ConnectError):
+    except (ReadTimeout, ConnectionError):
         return []

commonmeta/readers/crossref_xml_reader.py CHANGED Viewed

@@ -1,34 +1,35 @@
 """crossref_xml reader for commonmeta-py"""
-from typing import Optional
 from collections import defaultdict
-import httpx
+from typing import Optional
+import requests
 from pydash import py_
-from ..utils import (
-    doi_from_url,
-    dict_to_spdx,
-    from_crossref_xml,
-    normalize_cc_url,
-    normalize_issn,
-    normalize_url,
-)
+from ..author_utils import get_authors
 from ..base_utils import (
     compact,
-    wrap,
-    presence,
-    sanitize,
     parse_attributes,
     parse_xml,
+    presence,
+    sanitize,
+    wrap,
 )
-from ..author_utils import get_authors
-from ..date_utils import get_date_from_crossref_parts, get_iso8601_date
-from ..doi_utils import get_doi_ra, crossref_xml_api_url, normalize_doi
 from ..constants import (
-    Commonmeta,
+    CR_TO_CM_CONTAINER_TRANSLATIONS,
     CR_TO_CM_TRANSLATIONS,
     CROSSREF_CONTAINER_TYPES,
-    CR_TO_CM_CONTAINER_TRANSLATIONS,
+    Commonmeta,
+)
+from ..date_utils import get_date_from_crossref_parts, get_iso8601_date
+from ..doi_utils import crossref_xml_api_url, get_doi_ra, normalize_doi
+from ..utils import (
+    dict_to_spdx,
+    doi_from_url,
+    from_crossref_xml,
+    normalize_cc_url,
+    normalize_issn,
+    normalize_url,
 )
@@ -38,7 +39,7 @@ def get_crossref_xml(pid: str, **kwargs) -> dict:
     if doi is None:
         return {"state": "not_found"}
     url = crossref_xml_api_url(doi)
-    response = httpx.get(
+    response = requests.get(
         url, headers={"Accept": "text/xml;charset=utf-8"}, timeout=10, **kwargs
     )
     if response.status_code != 200:

commonmeta/readers/datacite_reader.py CHANGED Viewed

@@ -2,29 +2,31 @@
 from collections import defaultdict
 from typing import Optional
-import httpx
+import requests
 from pydash import py_
+from requests.exceptions import ReadTimeout
-from ..utils import (
-    normalize_url,
-    normalize_doi,
-    normalize_cc_url,
-    dict_to_spdx,
-    format_name_identifier,
-)
-from ..base_utils import compact, wrap, presence
 from ..author_utils import get_authors
+from ..base_utils import compact, presence, wrap
+from ..constants import (
+    DC_TO_CM_CONTAINER_TRANSLATIONS,
+    DC_TO_CM_TRANSLATIONS,
+    Commonmeta,
+)
 from ..date_utils import normalize_date_dict
 from ..doi_utils import (
+    datacite_api_sample_url,
+    datacite_api_url,
     doi_as_url,
     doi_from_url,
-    datacite_api_url,
-    datacite_api_sample_url,
 )
-from ..constants import (
-    DC_TO_CM_TRANSLATIONS,
-    DC_TO_CM_CONTAINER_TRANSLATIONS,
-    Commonmeta,
+from ..utils import (
+    dict_to_spdx,
+    format_name_identifier,
+    normalize_cc_url,
+    normalize_doi,
+    normalize_url,
 )
@@ -35,11 +37,11 @@ def get_datacite(pid: str, **kwargs) -> dict:
         return {"state": "not_found"}
     url = datacite_api_url(doi)
     try:
-        response = httpx.get(url, timeout=10, **kwargs)
+        response = requests.get(url, timeout=10, **kwargs)
         if response.status_code != 200:
             return {"state": "not_found"}
         return py_.get(response.json(), "data.attributes", {}) | {"via": "datacite"}
-    except httpx.ReadTimeout:
+    except ReadTimeout:
         return {"state": "timeout"}
@@ -380,11 +382,11 @@ def get_random_datacite_id(number: int = 1) -> list:
     number = 20 if number > 20 else number
     url = datacite_api_sample_url(number)
     try:
-        response = httpx.get(url, timeout=60)
+        response = requests.get(url, timeout=60)
         if response.status_code != 200:
             return []
         items = py_.get(response.json(), "data")
         return [i.get("id") for i in items]
-    except httpx.ReadTimeout:
+    except ReadTimeout:
         return []

commonmeta/readers/datacite_xml_reader.py CHANGED Viewed

@@ -1,15 +1,16 @@
 """datacite_xml reader for Commonmeta"""
 from collections import defaultdict
-import httpx
+import requests
 from pydash import py_
-from ..base_utils import compact, wrap, presence, sanitize, parse_attributes
 from ..author_utils import get_authors
-from ..date_utils import strip_milliseconds, normalize_date_dict
-from ..doi_utils import doi_from_url, doi_as_url, datacite_api_url, normalize_doi
-from ..utils import normalize_url, normalize_cc_url, dict_to_spdx
+from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
 from ..constants import DC_TO_CM_TRANSLATIONS, Commonmeta
+from ..date_utils import normalize_date_dict, strip_milliseconds
+from ..doi_utils import datacite_api_url, doi_as_url, doi_from_url, normalize_doi
+from ..utils import dict_to_spdx, normalize_cc_url, normalize_url
 def get_datacite_xml(pid: str, **kwargs) -> dict:
@@ -18,7 +19,7 @@ def get_datacite_xml(pid: str, **kwargs) -> dict:
     if doi is None:
         return {"state": "not_found"}
     url = datacite_api_url(doi)
-    response = httpx.get(url, timeout=10, **kwargs)
+    response = requests.get(url, timeout=10, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     return py_.get(response.json(), "data.attributes", {}) | {"via": "datacite_xml"}

commonmeta/readers/inveniordm_reader.py CHANGED Viewed

@@ -1,27 +1,27 @@
 """InvenioRDM reader for Commonmeta"""
-import httpx
-from pydash import py_
+import requests
 from furl import furl
+from pydash import py_
+from ..author_utils import get_authors
+from ..base_utils import compact, presence, sanitize, wrap
+from ..constants import (
+    COMMONMETA_RELATION_TYPES,
+    INVENIORDM_TO_CM_TRANSLATIONS,
+    Commonmeta,
+)
+from ..date_utils import strip_milliseconds
+from ..doi_utils import doi_as_url, doi_from_url
 from ..utils import (
-    normalize_url,
-    normalize_doi,
     dict_to_spdx,
-    name_to_fos,
     from_inveniordm,
     get_language,
+    name_to_fos,
+    normalize_doi,
+    normalize_url,
     validate_ror,
 )
-from ..base_utils import compact, wrap, presence, sanitize
-from ..author_utils import get_authors
-from ..date_utils import strip_milliseconds
-from ..doi_utils import doi_as_url, doi_from_url
-from ..constants import (
-    INVENIORDM_TO_CM_TRANSLATIONS,
-    COMMONMETA_RELATION_TYPES,
-    Commonmeta,
-)
 def get_inveniordm(pid: str, **kwargs) -> dict:
@@ -29,7 +29,7 @@ def get_inveniordm(pid: str, **kwargs) -> dict:
     if pid is None:
         return {"state": "not_found"}
     url = normalize_url(pid)
-    response = httpx.get(url, timeout=10, follow_redirects=True, **kwargs)
+    response = requests.get(url, timeout=10, allow_redirects=True, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     return response.json()
@@ -63,7 +63,6 @@ def read_inveniordm(data: dict, **kwargs) -> Commonmeta:
     title = py_.get(meta, "metadata.title")
     titles = [{"title": sanitize(title)}] if title else None
-    additional_titles = py_.get(meta, "metadata.additional_titles")
     # if additional_titles:
     #     titles += [{"title": sanitize("bla")} for i in wrap(additional_titles)]

commonmeta/readers/json_feed_reader.py CHANGED Viewed

@@ -1,33 +1,34 @@
 """JSON Feed reader for commonmeta-py"""
 from typing import Optional
-import httpx
-from pydash import py_
+import requests
 from furl import furl
+from pydash import py_
+from ..author_utils import get_authors
+from ..base_utils import parse_attributes, presence, sanitize
+from ..constants import Commonmeta
+from ..date_utils import get_date_from_unix_timestamp
+from ..doi_utils import (
+    doi_from_url,
+    encode_doi,
+    is_rogue_scholar_doi,
+    normalize_doi,
+    validate_doi,
+    validate_prefix,
+)
 from ..utils import (
     compact,
-    normalize_url,
-    from_json_feed,
-    wrap,
     dict_to_spdx,
+    from_json_feed,
+    issn_as_url,
     name_to_fos,
+    normalize_url,
     validate_ror,
     validate_url,
-    issn_as_url,
-)
-from ..author_utils import get_authors
-from ..base_utils import presence, sanitize, parse_attributes
-from ..date_utils import get_date_from_unix_timestamp
-from ..doi_utils import (
-    normalize_doi,
-    validate_prefix,
-    validate_doi,
-    doi_from_url,
-    is_rogue_scholar_doi,
-    encode_doi,
+    wrap,
 )
-from ..constants import Commonmeta
 def get_json_feed_item(pid: str, **kwargs) -> dict:
@@ -35,7 +36,7 @@ def get_json_feed_item(pid: str, **kwargs) -> dict:
     if pid is None:
         return {"state": "not_found"}
     url = normalize_url(pid)
-    response = httpx.get(url, timeout=10, follow_redirects=True, **kwargs)
+    response = requests.get(url, timeout=10, allow_redirects=True, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     return response.json() | {"via": "json_feed_item"}
@@ -255,7 +256,9 @@ def get_funding_references(meta: Optional[dict]) -> Optional[list]:
         elif len(urls) == 2 and validate_ror(urls[0]):
             f = furl(urls[0])
             _id = f.path.segments[-1]
-            response = httpx.get(f"https://api.ror.org/organizations/{_id}", timeout=10)
+            response = requests.get(
+                f"https://api.ror.org/organizations/{_id}", timeout=10
+            )
             ror = response.json()
             funder_name = ror.get("name", None)
             funder_identifier = urls[0]
@@ -398,7 +401,7 @@ def get_json_feed_item_uuid(id: str):
     if id is None:
         return None
     url = f"https://api.rogue-scholar.org/posts/{id}"
-    response = httpx.get(url, timeout=10)
+    response = requests.get(url, timeout=10)
     if response.status_code != 200:
         return response.json()
     post = response.json()
@@ -426,7 +429,7 @@ def get_json_feed_blog_slug(id: str):
     if id is None:
         return None
     url = f"https://api.rogue-scholar.org/posts/{id}"
-    response = httpx.get(url, timeout=10)
+    response = requests.get(url, timeout=10)
     if response.status_code != 200:
         return response.json()
     post = response.json()

commonmeta-py 0.104__py3-none-any.whl → 0.106__py3-none-any.whl

commonmeta-py 0.104py3-none-any.whl → 0.106py3-none-any.whl