PyPI - commonmeta-py - Versions diffs - 0.105__py3-none-any.whl → 0.107__py3-none-any.whl - Mend

commonmeta-py 0.105py3-none-any.whl → 0.107py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

commonmeta/__init__.py +13 -1
commonmeta/api_utils.py +7 -6
commonmeta/author_utils.py +10 -10
commonmeta/base_utils.py +3 -2
commonmeta/crossref_utils.py +80 -1
commonmeta/date_utils.py +3 -2
commonmeta/doi_utils.py +4 -4
commonmeta/readers/cff_reader.py +9 -8
commonmeta/readers/codemeta_reader.py +14 -13
commonmeta/readers/crossref_reader.py +6 -5
commonmeta/readers/crossref_xml_reader.py +20 -19
commonmeta/readers/datacite_reader.py +6 -5
commonmeta/readers/datacite_xml_reader.py +7 -6
commonmeta/readers/inveniordm_reader.py +15 -16
commonmeta/readers/json_feed_reader.py +25 -22
commonmeta/readers/openalex_reader.py +10 -9
commonmeta/readers/schema_org_reader.py +31 -29
commonmeta/schema_utils.py +1 -0
commonmeta/translators.py +2 -1
commonmeta/utils.py +70 -11
commonmeta/writers/citation_writer.py +10 -5
commonmeta/writers/commonmeta_writer.py +1 -0
commonmeta/writers/crossref_xml_writer.py +1 -0
commonmeta/writers/csl_writer.py +5 -4
commonmeta/writers/datacite_writer.py +5 -4
commonmeta/writers/inveniordm_writer.py +9 -8
{commonmeta_py-0.105.dist-info → commonmeta_py-0.107.dist-info}/METADATA +17 -15
{commonmeta_py-0.105.dist-info → commonmeta_py-0.107.dist-info}/RECORD +31 -33
commonmeta/resources/ietf-bcp-47.json +0 -3025
commonmeta/resources/iso-8601.json +0 -3182
{commonmeta_py-0.105.dist-info → commonmeta_py-0.107.dist-info}/WHEEL +0 -0
{commonmeta_py-0.105.dist-info → commonmeta_py-0.107.dist-info}/entry_points.txt +0 -0
{commonmeta_py-0.105.dist-info → commonmeta_py-0.107.dist-info}/licenses/LICENSE +0 -0

commonmeta/__init__.py CHANGED Viewed

@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
 """
 __title__ = "commonmeta-py"
-__version__ = "0.105"
+__version__ = "0.107"
 __author__ = "Martin Fenner"
 __license__ = "MIT"
@@ -31,6 +31,18 @@ from .base_utils import (
     unwrap,
     wrap,
 )
+from .crossref_utils import (
+    CrossrefBadRequestError,
+    CrossrefError,
+    CrossrefForbiddenError,
+    CrossrefNoContentError,
+    CrossrefNotFoundError,
+    CrossrefRequestError,
+    CrossrefServerError,
+    CrossrefUnauthorizedError,
+    HttpError,
+    generate_crossref_xml,
+)
 from .date_utils import (
     get_date_from_crossref_parts,
     get_date_from_date_parts,

commonmeta/api_utils.py CHANGED Viewed

@@ -1,12 +1,13 @@
 """API Utils module for commonmeta-py"""
-from typing import Optional
 from datetime import datetime as date
-import httpx
-from furl import furl
+from typing import Optional
 import jwt
+import requests
+from furl import furl
-from .doi_utils import validate_doi, doi_as_url
+from .doi_utils import doi_as_url, validate_doi
 from .readers.json_feed_reader import get_json_feed_item_uuid
@@ -52,7 +53,7 @@ def update_ghost_post_via_api(
     f = furl(url)
     slug = f.path.segments[-1]
     ghost_url = f"{api_url}/ghost/api/admin/posts/slug/{slug}/"
-    response = httpx.get(ghost_url, headers=headers, timeout=10)
+    response = requests.get(ghost_url, headers=headers, timeout=10)
     if response.status_code != 200:
         return {"error": "Error fetching post"}
     ghost_post = response.json().get("posts")[0]
@@ -67,7 +68,7 @@ def update_ghost_post_via_api(
     ghost_url = f"{api_url}/ghost/api/admin/posts/{guid}/"
     json = {"posts": [{"canonical_url": doi, "updated_at": updated_at}]}
-    response = httpx.put(
+    response = requests.put(
         ghost_url,
         headers=headers,
         json=json,

commonmeta/author_utils.py CHANGED Viewed

@@ -2,23 +2,23 @@
 import re
 from typing import List
+from furl import furl
 from nameparser import HumanName
 from pydash import py_
-from furl import furl
+from .base_utils import compact, parse_attributes, presence, wrap
+from .constants import (
+    COMMONMETA_CONTRIBUTOR_ROLES,
+)
 from .utils import (
-    normalize_orcid,
+    format_name_identifier,
     normalize_id,
-    normalize_ror,
     normalize_isni,
-    format_name_identifier,
-    validate_ror,
+    normalize_orcid,
+    normalize_ror,
     validate_orcid,
-)
-from .base_utils import parse_attributes, wrap, presence, compact
-from .constants import (
-    COMMONMETA_CONTRIBUTOR_ROLES,
+    validate_ror,
 )

commonmeta/base_utils.py CHANGED Viewed

@@ -1,11 +1,12 @@
 """Base utilities for commonmeta-py"""
 import html
-from os import path
 import re
-import xmltodict
+from os import path
 from typing import Optional, Union
 import nh3
+import xmltodict
 def wrap(item) -> list:

commonmeta/crossref_utils.py CHANGED Viewed

@@ -231,7 +231,6 @@ def insert_citation_list(metadata, xml):
     citation_list = etree.SubElement(xml, "citation_list")
     for i, ref in enumerate(metadata.references):
-        print(i)
         if ref.get("id", None) is None:
             continue
         citation = etree.SubElement(
@@ -581,3 +580,83 @@ def generate_crossref_xml_list(metalist) -> Optional[str]:
         doctype='<?xml version="1.0" encoding="UTF-8"?>',
         pretty_print=True,
     )
+"""Errors for the Crossref XML API.
+Error responses will be converted into an exception from this module.
+"""
+class HttpError(Exception):
+    """Exception raised when a connection problem happens."""
+class CrossrefError(Exception):
+    """Exception raised when the server returns a known HTTP error code.
+    Known HTTP error codes include:
+    * 204 No Content
+    * 400 Bad Request
+    * 401 Unauthorized
+    * 403 Forbidden
+    * 404 Not Found
+    * 410 Gone (deleted)
+    """
+    @staticmethod
+    def factory(err_code, *args):
+        """Create exceptions through a Factory based on the HTTP error code."""
+        if err_code == 204:
+            return CrossrefNoContentError(*args)
+        elif err_code == 400:
+            return CrossrefBadRequestError(*args)
+        elif err_code == 401:
+            return CrossrefUnauthorizedError(*args)
+        elif err_code == 403:
+            return CrossrefForbiddenError(*args)
+        elif err_code == 404:
+            return CrossrefNotFoundError(*args)
+        else:
+            return CrossrefServerError(*args)
+class CrossrefServerError(CrossrefError):
+    """An internal server error happened on the Crossref end. Try later.
+    Base class for all 5XX-related HTTP error codes.
+    """
+class CrossrefRequestError(CrossrefError):
+    """A Crossref request error. You made an invalid request.
+    Base class for all 4XX-related HTTP error codes as well as 204.
+    """
+class CrossrefNoContentError(CrossrefRequestError):
+    """DOI is known to Crossref, but not resolvable.
+    This might be due to handle's latency.
+    """
+class CrossrefBadRequestError(CrossrefRequestError):
+    """Bad request error.
+    Bad requests can include e.g. invalid XML, wrong domain, wrong prefix.
+    """
+class CrossrefUnauthorizedError(CrossrefRequestError):
+    """Bad username or password."""
+class CrossrefForbiddenError(CrossrefRequestError):
+    """Login problem, record belongs to another party or quota exceeded."""
+class CrossrefNotFoundError(CrossrefRequestError):
+    """DOI does not exist in the database."""

commonmeta/date_utils.py CHANGED Viewed

@@ -3,10 +3,11 @@
 import datetime
 from datetime import datetime as dt
 from typing import Optional, Union
 import dateparser
-from edtf import parse_edtf, DateAndTime, Date
-from edtf.parser.edtf_exceptions import EDTFParseException
 import pydash as py_
+from edtf import Date, DateAndTime, parse_edtf
+from edtf.parser.edtf_exceptions import EDTFParseException
 from .base_utils import compact

commonmeta/doi_utils.py CHANGED Viewed

@@ -4,7 +4,7 @@ import re
 from typing import Optional
 import base32_lib as base32
-import httpx
+import requests
 from furl import furl
 from .base_utils import compact
@@ -94,7 +94,7 @@ def short_doi_as_doi(doi: Optional[str]) -> Optional[str]:
     doi_url = doi_as_url(doi)
     if doi_url is None:
         return None
-    response = httpx.head(doi_url, timeout=10)
+    response = requests.head(doi_url, timeout=10)
     if response.status_code != 301:
         return doi_url
     return response.headers.get("Location")
@@ -137,7 +137,7 @@ def get_doi_ra(doi) -> Optional[str]:
     prefix = validate_prefix(doi)
     if prefix is None:
         return None
-    response = httpx.get("https://doi.org/ra/" + prefix, timeout=10)
+    response = requests.get("https://doi.org/ra/" + prefix, timeout=10)
     if response.status_code != 200:
         return None
     return response.json()[0].get("RA", None)
@@ -170,7 +170,7 @@ def decode_doi(doi: str, checksum: bool = True) -> int:
 def get_crossref_member(member_id) -> Optional[dict]:
     """Return the Crossref member for a given member_id"""
-    response = httpx.get("https://api.crossref.org/members/" + member_id, timeout=10)
+    response = requests.get("https://api.crossref.org/members/" + member_id, timeout=10)
     if response.status_code != 200:
         return None
     data = response.json().get("message", None)

commonmeta/readers/cff_reader.py CHANGED Viewed

@@ -2,26 +2,27 @@
 from typing import Optional
 from urllib.parse import urlparse
-import httpx
+import requests
 import yaml
+from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
+from ..constants import Commonmeta
+from ..date_utils import get_iso8601_date
 from ..utils import (
-    normalize_id,
-    name_to_fos,
     dict_to_spdx,
-    normalize_orcid,
     github_as_cff_url,
     github_as_repo_url,
+    name_to_fos,
+    normalize_id,
+    normalize_orcid,
 )
-from ..base_utils import compact, wrap, presence, sanitize, parse_attributes
-from ..date_utils import get_iso8601_date
-from ..constants import Commonmeta
 def get_cff(pid: str, **kwargs) -> dict:
     """get_cff"""
     url = github_as_cff_url(pid)
-    response = httpx.get(url, timeout=10, **kwargs)
+    response = requests.get(url, timeout=10, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     text = response.text

commonmeta/readers/codemeta_reader.py CHANGED Viewed

@@ -1,30 +1,31 @@
 """codemeta reader for commonmeta-py"""
-from typing import Optional
 from collections import defaultdict
-import httpx
+from typing import Optional
+import requests
+from ..author_utils import get_authors
+from ..base_utils import compact, presence, sanitize, wrap
+from ..constants import (
+    SO_TO_CM_TRANSLATIONS,
+    Commonmeta,
+)
 from ..utils import (
-    normalize_id,
-    from_schema_org_creators,
-    name_to_fos,
     dict_to_spdx,
+    doi_from_url,
+    from_schema_org_creators,
     github_as_codemeta_url,
     github_as_repo_url,
-    doi_from_url,
-)
-from ..base_utils import wrap, presence, compact, sanitize
-from ..author_utils import get_authors
-from ..constants import (
-    Commonmeta,
-    SO_TO_CM_TRANSLATIONS,
+    name_to_fos,
+    normalize_id,
 )
 def get_codemeta(pid: str, **kwargs) -> dict:
     """get_codemeta"""
     url = str(github_as_codemeta_url(pid))
-    response = httpx.get(url, timeout=10, **kwargs)
+    response = requests.get(url, timeout=10, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     data = response.json()

commonmeta/readers/crossref_reader.py CHANGED Viewed

@@ -2,8 +2,9 @@
 from typing import Optional
-import httpx
+import requests
 from pydash import py_
+from requests.exceptions import ConnectionError, ReadTimeout
 from ..author_utils import get_authors
 from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
@@ -34,7 +35,7 @@ from ..utils import (
 def get_crossref_list(query: dict, **kwargs) -> list[dict]:
     """get_crossref list from Crossref API."""
     url = crossref_api_query_url(query, **kwargs)
-    response = httpx.get(url, timeout=30, **kwargs)
+    response = requests.get(url, timeout=30, **kwargs)
     if response.status_code != 200:
         return []
     return response.json().get("message", {}).get("items", [])
@@ -46,7 +47,7 @@ def get_crossref(pid: str, **kwargs) -> dict:
     if doi is None:
         return {"state": "not_found"}
     url = crossref_api_url(doi)
-    response = httpx.get(url, timeout=10, **kwargs)
+    response = requests.get(url, timeout=10, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     return response.json().get("message", {}) | {"via": "crossref"}
@@ -402,11 +403,11 @@ def get_random_crossref_id(number: int = 1, **kwargs) -> list:
     number = 20 if number > 20 else number
     url = crossref_api_sample_url(number, **kwargs)
     try:
-        response = httpx.get(url, timeout=10)
+        response = requests.get(url, timeout=10)
         if response.status_code != 200:
             return []
         items = py_.get(response.json(), "message.items")
         return [i.get("DOI") for i in items]
-    except (httpx.ReadTimeout, httpx.ConnectError):
+    except (ReadTimeout, ConnectionError):
         return []

commonmeta/readers/crossref_xml_reader.py CHANGED Viewed

@@ -1,34 +1,35 @@
 """crossref_xml reader for commonmeta-py"""
-from typing import Optional
 from collections import defaultdict
-import httpx
+from typing import Optional
+import requests
 from pydash import py_
-from ..utils import (
-    doi_from_url,
-    dict_to_spdx,
-    from_crossref_xml,
-    normalize_cc_url,
-    normalize_issn,
-    normalize_url,
-)
+from ..author_utils import get_authors
 from ..base_utils import (
     compact,
-    wrap,
-    presence,
-    sanitize,
     parse_attributes,
     parse_xml,
+    presence,
+    sanitize,
+    wrap,
 )
-from ..author_utils import get_authors
-from ..date_utils import get_date_from_crossref_parts, get_iso8601_date
-from ..doi_utils import get_doi_ra, crossref_xml_api_url, normalize_doi
 from ..constants import (
-    Commonmeta,
+    CR_TO_CM_CONTAINER_TRANSLATIONS,
     CR_TO_CM_TRANSLATIONS,
     CROSSREF_CONTAINER_TYPES,
-    CR_TO_CM_CONTAINER_TRANSLATIONS,
+    Commonmeta,
+)
+from ..date_utils import get_date_from_crossref_parts, get_iso8601_date
+from ..doi_utils import crossref_xml_api_url, get_doi_ra, normalize_doi
+from ..utils import (
+    dict_to_spdx,
+    doi_from_url,
+    from_crossref_xml,
+    normalize_cc_url,
+    normalize_issn,
+    normalize_url,
 )
@@ -38,7 +39,7 @@ def get_crossref_xml(pid: str, **kwargs) -> dict:
     if doi is None:
         return {"state": "not_found"}
     url = crossref_xml_api_url(doi)
-    response = httpx.get(
+    response = requests.get(
         url, headers={"Accept": "text/xml;charset=utf-8"}, timeout=10, **kwargs
     )
     if response.status_code != 200:

commonmeta/readers/datacite_reader.py CHANGED Viewed

@@ -3,8 +3,9 @@
 from collections import defaultdict
 from typing import Optional
-import httpx
+import requests
 from pydash import py_
+from requests.exceptions import ReadTimeout
 from ..author_utils import get_authors
 from ..base_utils import compact, presence, wrap
@@ -36,11 +37,11 @@ def get_datacite(pid: str, **kwargs) -> dict:
         return {"state": "not_found"}
     url = datacite_api_url(doi)
     try:
-        response = httpx.get(url, timeout=10, **kwargs)
+        response = requests.get(url, timeout=10, **kwargs)
         if response.status_code != 200:
             return {"state": "not_found"}
         return py_.get(response.json(), "data.attributes", {}) | {"via": "datacite"}
-    except httpx.ReadTimeout:
+    except ReadTimeout:
         return {"state": "timeout"}
@@ -381,11 +382,11 @@ def get_random_datacite_id(number: int = 1) -> list:
     number = 20 if number > 20 else number
     url = datacite_api_sample_url(number)
     try:
-        response = httpx.get(url, timeout=60)
+        response = requests.get(url, timeout=60)
         if response.status_code != 200:
             return []
         items = py_.get(response.json(), "data")
         return [i.get("id") for i in items]
-    except httpx.ReadTimeout:
+    except ReadTimeout:
         return []

commonmeta/readers/datacite_xml_reader.py CHANGED Viewed

@@ -1,15 +1,16 @@
 """datacite_xml reader for Commonmeta"""
 from collections import defaultdict
-import httpx
+import requests
 from pydash import py_
-from ..base_utils import compact, wrap, presence, sanitize, parse_attributes
 from ..author_utils import get_authors
-from ..date_utils import strip_milliseconds, normalize_date_dict
-from ..doi_utils import doi_from_url, doi_as_url, datacite_api_url, normalize_doi
-from ..utils import normalize_url, normalize_cc_url, dict_to_spdx
+from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
 from ..constants import DC_TO_CM_TRANSLATIONS, Commonmeta
+from ..date_utils import normalize_date_dict, strip_milliseconds
+from ..doi_utils import datacite_api_url, doi_as_url, doi_from_url, normalize_doi
+from ..utils import dict_to_spdx, normalize_cc_url, normalize_url
 def get_datacite_xml(pid: str, **kwargs) -> dict:
@@ -18,7 +19,7 @@ def get_datacite_xml(pid: str, **kwargs) -> dict:
     if doi is None:
         return {"state": "not_found"}
     url = datacite_api_url(doi)
-    response = httpx.get(url, timeout=10, **kwargs)
+    response = requests.get(url, timeout=10, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     return py_.get(response.json(), "data.attributes", {}) | {"via": "datacite_xml"}

commonmeta/readers/inveniordm_reader.py CHANGED Viewed

@@ -1,27 +1,27 @@
 """InvenioRDM reader for Commonmeta"""
-import httpx
-from pydash import py_
+import requests
 from furl import furl
+from pydash import py_
+from ..author_utils import get_authors
+from ..base_utils import compact, presence, sanitize, wrap
+from ..constants import (
+    COMMONMETA_RELATION_TYPES,
+    INVENIORDM_TO_CM_TRANSLATIONS,
+    Commonmeta,
+)
+from ..date_utils import strip_milliseconds
+from ..doi_utils import doi_as_url, doi_from_url
 from ..utils import (
-    normalize_url,
-    normalize_doi,
     dict_to_spdx,
-    name_to_fos,
     from_inveniordm,
     get_language,
+    name_to_fos,
+    normalize_doi,
+    normalize_url,
     validate_ror,
 )
-from ..base_utils import compact, wrap, presence, sanitize
-from ..author_utils import get_authors
-from ..date_utils import strip_milliseconds
-from ..doi_utils import doi_as_url, doi_from_url
-from ..constants import (
-    INVENIORDM_TO_CM_TRANSLATIONS,
-    COMMONMETA_RELATION_TYPES,
-    Commonmeta,
-)
 def get_inveniordm(pid: str, **kwargs) -> dict:
@@ -29,7 +29,7 @@ def get_inveniordm(pid: str, **kwargs) -> dict:
     if pid is None:
         return {"state": "not_found"}
     url = normalize_url(pid)
-    response = httpx.get(url, timeout=10, follow_redirects=True, **kwargs)
+    response = requests.get(url, timeout=10, allow_redirects=True, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     return response.json()
@@ -63,7 +63,6 @@ def read_inveniordm(data: dict, **kwargs) -> Commonmeta:
     title = py_.get(meta, "metadata.title")
     titles = [{"title": sanitize(title)}] if title else None
-    additional_titles = py_.get(meta, "metadata.additional_titles")
     # if additional_titles:
     #     titles += [{"title": sanitize("bla")} for i in wrap(additional_titles)]

commonmeta/readers/json_feed_reader.py CHANGED Viewed

@@ -1,33 +1,34 @@
 """JSON Feed reader for commonmeta-py"""
 from typing import Optional
-import httpx
-from pydash import py_
+import requests
 from furl import furl
+from pydash import py_
+from ..author_utils import get_authors
+from ..base_utils import parse_attributes, presence, sanitize
+from ..constants import Commonmeta
+from ..date_utils import get_date_from_unix_timestamp
+from ..doi_utils import (
+    doi_from_url,
+    encode_doi,
+    is_rogue_scholar_doi,
+    normalize_doi,
+    validate_doi,
+    validate_prefix,
+)
 from ..utils import (
     compact,
-    normalize_url,
-    from_json_feed,
-    wrap,
     dict_to_spdx,
+    from_json_feed,
+    issn_as_url,
     name_to_fos,
+    normalize_url,
     validate_ror,
     validate_url,
-    issn_as_url,
-)
-from ..author_utils import get_authors
-from ..base_utils import presence, sanitize, parse_attributes
-from ..date_utils import get_date_from_unix_timestamp
-from ..doi_utils import (
-    normalize_doi,
-    validate_prefix,
-    validate_doi,
-    doi_from_url,
-    is_rogue_scholar_doi,
-    encode_doi,
+    wrap,
 )
-from ..constants import Commonmeta
 def get_json_feed_item(pid: str, **kwargs) -> dict:
@@ -35,7 +36,7 @@ def get_json_feed_item(pid: str, **kwargs) -> dict:
     if pid is None:
         return {"state": "not_found"}
     url = normalize_url(pid)
-    response = httpx.get(url, timeout=10, follow_redirects=True, **kwargs)
+    response = requests.get(url, timeout=10, allow_redirects=True, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
     return response.json() | {"via": "json_feed_item"}
@@ -255,7 +256,9 @@ def get_funding_references(meta: Optional[dict]) -> Optional[list]:
         elif len(urls) == 2 and validate_ror(urls[0]):
             f = furl(urls[0])
             _id = f.path.segments[-1]
-            response = httpx.get(f"https://api.ror.org/organizations/{_id}", timeout=10)
+            response = requests.get(
+                f"https://api.ror.org/organizations/{_id}", timeout=10
+            )
             ror = response.json()
             funder_name = ror.get("name", None)
             funder_identifier = urls[0]
@@ -398,7 +401,7 @@ def get_json_feed_item_uuid(id: str):
     if id is None:
         return None
     url = f"https://api.rogue-scholar.org/posts/{id}"
-    response = httpx.get(url, timeout=10)
+    response = requests.get(url, timeout=10)
     if response.status_code != 200:
         return response.json()
     post = response.json()
@@ -426,7 +429,7 @@ def get_json_feed_blog_slug(id: str):
     if id is None:
         return None
     url = f"https://api.rogue-scholar.org/posts/{id}"
-    response = httpx.get(url, timeout=10)
+    response = requests.get(url, timeout=10)
     if response.status_code != 200:
         return response.json()
     post = response.json()

commonmeta-py 0.105__py3-none-any.whl → 0.107__py3-none-any.whl

commonmeta-py 0.105py3-none-any.whl → 0.107py3-none-any.whl