PyPI - commonmeta-py - Versions diffs - 0.23__py3-none-any.whl → 0.24__py3-none-any.whl - Mend

commonmeta-py 0.23py3-none-any.whl → 0.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

commonmeta/__init__.py +96 -0
commonmeta/api_utils.py +77 -0
commonmeta/author_utils.py +260 -0
commonmeta/base_utils.py +121 -0
commonmeta/cli.py +200 -0
commonmeta/constants.py +587 -0
commonmeta/crossref_utils.py +575 -0
commonmeta/date_utils.py +193 -0
commonmeta/doi_utils.py +273 -0
commonmeta/metadata.py +320 -0
commonmeta/readers/__init__.py +1 -0
commonmeta/readers/cff_reader.py +199 -0
commonmeta/readers/codemeta_reader.py +112 -0
commonmeta/readers/commonmeta_reader.py +13 -0
commonmeta/readers/crossref_reader.py +409 -0
commonmeta/readers/crossref_xml_reader.py +505 -0
commonmeta/readers/csl_reader.py +98 -0
commonmeta/readers/datacite_reader.py +390 -0
commonmeta/readers/datacite_xml_reader.py +359 -0
commonmeta/readers/inveniordm_reader.py +218 -0
commonmeta/readers/json_feed_reader.py +420 -0
commonmeta/readers/kbase_reader.py +205 -0
commonmeta/readers/ris_reader.py +103 -0
commonmeta/readers/schema_org_reader.py +506 -0
commonmeta/resources/cff_v1.2.0.json +1827 -0
commonmeta/resources/commonmeta_v0.12.json +601 -0
commonmeta/resources/commonmeta_v0.13.json +559 -0
commonmeta/resources/commonmeta_v0.14.json +573 -0
commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
commonmeta/resources/crossref/fundref.xsd +49 -0
commonmeta/resources/crossref/module-ali.xsd +39 -0
commonmeta/resources/crossref/relations.xsd +444 -0
commonmeta/resources/crossref-v0.2.json +60 -0
commonmeta/resources/csl-data.json +538 -0
commonmeta/resources/datacite-v4.5.json +829 -0
commonmeta/resources/datacite-v4.5pr.json +608 -0
commonmeta/resources/ietf-bcp-47.json +3025 -0
commonmeta/resources/iso-8601.json +3182 -0
commonmeta/resources/spdx/licenses.json +4851 -0
commonmeta/resources/spdx-schema..json +903 -0
commonmeta/resources/styles/apa.csl +1697 -0
commonmeta/resources/styles/chicago-author-date.csl +684 -0
commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
commonmeta/resources/styles/ieee.csl +468 -0
commonmeta/resources/styles/modern-language-association.csl +341 -0
commonmeta/resources/styles/vancouver.csl +376 -0
commonmeta/schema_utils.py +27 -0
commonmeta/translators.py +47 -0
commonmeta/utils.py +1108 -0
commonmeta/writers/__init__.py +1 -0
commonmeta/writers/bibtex_writer.py +149 -0
commonmeta/writers/citation_writer.py +70 -0
commonmeta/writers/commonmeta_writer.py +68 -0
commonmeta/writers/crossref_xml_writer.py +17 -0
commonmeta/writers/csl_writer.py +79 -0
commonmeta/writers/datacite_writer.py +193 -0
commonmeta/writers/inveniordm_writer.py +94 -0
commonmeta/writers/ris_writer.py +58 -0
commonmeta/writers/schema_org_writer.py +146 -0
{commonmeta_py-0.23.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
commonmeta_py-0.24.dist-info/RECORD +75 -0
{commonmeta_py-0.23.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
commonmeta_py-0.23.dist-info/RECORD +0 -5
/commonmeta_py/__init__.py → /commonmeta/readers/bibtex_reader.py +0 -0
{commonmeta_py-0.23.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0

commonmeta/date_utils.py ADDED Viewed

@@ -0,0 +1,193 @@
+"""Date utils for commonmeta-py"""
+import datetime
+from datetime import datetime as dt
+from typing import Optional, Union
+import dateparser
+import pydash as py_
+from .base_utils import compact
+MONTH_NAMES = {
+    "01": "jan",
+    "02": "feb",
+    "03": "mar",
+    "04": "apr",
+    "05": "may",
+    "06": "jun",
+    "07": "jul",
+    "08": "aug",
+    "09": "sep",
+    "10": "oct",
+    "11": "nov",
+    "12": "dec",
+}
+MONTH_SHORT_NAMES = [
+    "jan",
+    "feb",
+    "mar",
+    "apr",
+    "may",
+    "jun",
+    "jul",
+    "aug",
+    "sep",
+    "oct",
+    "nov",
+    "dec",
+]
+ISO8601_DATE_FORMAT = "%Y-%m-%d"
+def get_iso8601_date(date: Union[datetime.datetime, datetime.date, str, int]) -> str:
+    """Get ISO 8601 date without time"""
+    if date is None:
+        return ""
+    if isinstance(date, (datetime.datetime, datetime.date)):
+        return date.strftime(ISO8601_DATE_FORMAT)
+    if isinstance(date, str):
+        length = len(date)
+        if length == 7:
+            return dateparser.parse(date).strftime("%Y-%m")
+        if length == 4:
+            return dateparser.parse(date).strftime("%Y")
+        else:
+            return dateparser.parse(date).strftime(ISO8601_DATE_FORMAT)
+    if isinstance(date, int):
+        return datetime.datetime.fromtimestamp(date).strftime(ISO8601_DATE_FORMAT)
+    return ""
+def get_date_parts(iso8601_time: Optional[str]) -> dict:
+    """Get date parts"""
+    if iso8601_time is None:
+        return {"date-parts": [[]]}
+    # add 0s to the end of the date if it is incomplete
+    if len(iso8601_time) < 10:
+        iso8601_time = iso8601_time.ljust(10, "0")
+    year = int(iso8601_time[0:4])
+    month = int(iso8601_time[5:7])
+    day = int(iso8601_time[8:10])
+    date_parts = py_.reject([year, month, day], lambda x: x == 0)
+    return {"date-parts": [date_parts]}
+def get_date_from_unix_timestamp(timestamp: Optional[int]) -> Optional[str]:
+    """Get date from unix timestamp"""
+    if timestamp is None:
+        return None
+    return datetime.datetime.fromtimestamp(timestamp).replace(microsecond=0).isoformat()
+def get_date_from_date_parts(date_as_parts: Optional[dict]) -> Optional[str]:
+    """Get date from date parts"""
+    if date_as_parts is None:
+        return None
+    date_parts = date_as_parts.get("date-parts", [])
+    if len(date_parts) == 0:
+        return None
+    date_parts = date_parts[0]
+    if date_parts[0] is None:
+        return None
+    year = date_parts[0] if len(date_parts) > 0 else 0
+    month = date_parts[1] if len(date_parts) > 1 else 0
+    day = date_parts[2] if len(date_parts) > 2 else 0
+    return get_date_from_parts(year, month, day)
+def get_date_from_crossref_parts(date_parts: dict):
+    """Get date from Crossref XML date parts"""
+    if isinstance(date_parts, list):
+        date_parts = date_parts[0]
+    year = date_parts.get("year", None)
+    if year is None:
+        return None
+    month = date_parts.get("month", 0)
+    day = date_parts.get("day", 0)
+    return get_date_from_parts(year, month, day)
+def get_date_from_parts(year=0, month=0, day=0) -> Optional[str]:
+    """Get date from parts"""
+    arr = [str(year).rjust(4, "0"), str(month).rjust(2, "0"), str(day).rjust(2, "0")]
+    arr = [e for i, e in enumerate(arr) if (e not in ["00", "0000"])]
+    return None if len(arr) == 0 else "-".join(arr)
+def get_month_from_date(
+    date: Optional[Union[str, int, datetime.datetime, datetime.date]],
+) -> Optional[str]:
+    """Get month from date"""
+    if date is None:
+        return None
+    # if date type is not recognized
+    if not isinstance(date, (str, int, datetime.datetime, datetime.date)):
+        return None
+    if isinstance(date, str):
+        date = dateparser.parse(date).strftime(ISO8601_DATE_FORMAT)
+    if isinstance(date, int):
+        date = datetime.datetime.fromtimestamp(date).strftime(ISO8601_DATE_FORMAT)
+    if isinstance(date, (datetime.datetime, datetime.date)):
+        date = date.strftime(ISO8601_DATE_FORMAT)
+    date = date.split("-")
+    return MONTH_NAMES.get(date[1], None) if len(date) > 1 else None
+def strip_milliseconds(iso8601_time: Optional[str]) -> Optional[str]:
+    """strip milliseconds if there is a time, as it interferes with edtc parsing"""
+    if iso8601_time is None or len(iso8601_time) == 0:
+        return None
+    if "T00:00:00" in iso8601_time:
+        return iso8601_time.split("T")[0]
+    if "." in iso8601_time:
+        return iso8601_time.split(".")[0] + "Z"
+    if "+00:00" in iso8601_time:
+        return iso8601_time.split("+")[0] + "Z"
+    return iso8601_time
+def get_datetime_from_time(time: str) -> Optional[str]:
+    """iso8601 datetime without hyphens and colons, used by Crossref"""
+    try:
+        return dt.strptime(time, "%Y%m%d%H%M%S").strftime("%Y-%m-%dT%H:%M:%SZ")
+    except ValueError:
+        return None
+def get_datetime_from_pdf_time(time: str) -> Optional[str]:
+    """iso8601 datetime in slightly different format, used in PDF metadata"""
+    try:
+        time = str(time).replace("D:", "").replace("'", "")
+        return dt.strptime(time, "%Y%m%d%H%M%S%z").strftime("%Y-%m-%dT%H:%M:%SZ")
+    except ValueError as e:
+        print(e)
+        return None
+def normalize_date_dict(data: dict) -> dict:
+    """Normalize date dict
+    Supported date types in commonmeta:
+    - created
+    - submitted
+    - accepted
+    - published
+    - available
+    - updated
+    - withdrawn
+    """
+    return compact(
+        {
+            "created": data.get("Created", None),
+            "submitted": data.get("Submitted", None),
+            "accepted": data.get("Accepted", None),
+            "published": data.get("Issued", None),
+            "available": data.get("Available", None),
+            "updated": data.get("Updated", None),
+            "withdrawn": data.get("Withdrawn", None),
+        }
+    )

commonmeta/doi_utils.py ADDED Viewed

@@ -0,0 +1,273 @@
+"""Doi utils for commonmeta-py"""
+import re
+from typing import Optional
+import httpx
+from furl import furl
+from .base_utils import compact
+def validate_doi(doi: Optional[str]) -> Optional[str]:
+    """Validate a DOI"""
+    if doi is None:
+        return None
+    match = re.search(
+        r"\A(?:(http|https):/(/)?(dx\.)?(doi\.org|handle\.stage\.datacite\.org|handle\.test\.datacite\.org)/)?(doi:)?(10\.\d{4,5}/.+)\Z",  # noqa: E501
+        doi,
+    )
+    if match is None:
+        return None
+    return match.group(6)
+def validate_prefix(doi: Optional[str]) -> Optional[str]:
+    """Validate a DOI prefix for a given DOI"""
+    if doi is None:
+        return None
+    match = re.search(
+        r"\A(?:(http|https):/(/)?(dx\.)?(doi\.org|handle\.stage\.datacite\.org|handle\.test\.datacite\.org)/)?(doi:)?(10\.\d{4,5}).*\Z",  # noqa: E501
+        doi,
+    )
+    if match is None:
+        return None
+    return match.group(6)
+def validate_suffix(doi: Optional[str]) -> Optional[str]:
+    """Validate a DOI suffix for a given DOI"""
+    if doi is None:
+        return None
+    match = re.search(
+        r"\A(?:(http|https):/(/)?(dx\.)?(doi\.org|handle\.stage\.datacite\.org|handle\.test\.datacite\.org)/)?(doi:)?(10\.\d{4,5})/(.+)\Z",  # noqa: E501
+        doi,
+    )
+    if match is None:
+        return None
+    return match.group(7)
+def doi_from_url(url: Optional[str]) -> Optional[str]:
+    """Return a DOI from a URL"""
+    if url is None:
+        return None
+    f = furl(url)
+    # check for allowed scheme if string is a URL
+    if f.host is not None and f.scheme not in ["http", "https", "ftp"]:
+        return None
+    # url is for a short DOI
+    if f.host == "doi.org" and not f.path.segments[0].startswith("10."):
+        return short_doi_as_doi(url)
+    # special rules for specific hosts
+    if f.host == "onlinelibrary.wiley.com":
+        if f.path.segments[-1] in ["epdf"]:
+            f.path.segments.pop()
+    elif f.host == "www.plosone.org":
+        if (
+            f.path.segments[-1] in ["fetchobject.action"]
+            and f.args.get("uri", None) is not None
+        ):
+            f.path = f.args.get("uri")
+    path = str(f.path)
+    match = re.search(
+        r"(10\.\d{4,5}/.+)\Z",
+        path,
+    )
+    if match is None:
+        return None
+    return match.group(0).lower()
+def short_doi_as_doi(doi: Optional[str]) -> Optional[str]:
+    """Resolve a short DOI"""
+    if doi is None:
+        return None
+    response = httpx.head(doi_as_url(doi), timeout=10)
+    if response.status_code != 301:
+        return doi_as_url(doi)
+    return response.headers.get("Location")
+def doi_as_url(doi: Optional[str]) -> Optional[str]:
+    """Return a DOI as a URL"""
+    if doi is None:
+        return None
+    if furl(doi).host == "doi.org":
+        return doi.lower()
+    return "https://doi.org/" + doi.lower()
+def normalize_doi(doi: Optional[str], **kwargs) -> Optional[str]:
+    """Normalize a DOI"""
+    doi_str = validate_doi(doi)
+    if not doi_str:
+        return None
+    return doi_resolver(doi, **kwargs) + doi_str.lower()
+def doi_resolver(doi, **kwargs):
+    """Return a DOI resolver for a given DOI"""
+    if doi is None:
+        return None
+    match = re.match(
+        r"\A(http|https):/(/)?handle\.stage\.datacite\.org", doi, re.IGNORECASE
+    )
+    if match is not None or kwargs.get("sandbox", False):
+        return "https://handle.stage.datacite.org/"
+    return "https://doi.org/"
+def get_doi_ra(doi) -> Optional[str]:
+    """Return the DOI registration agency for a given DOI"""
+    prefix = validate_prefix(doi)
+    if prefix is None:
+        return None
+    response = httpx.get("https://doi.org/ra/" + prefix, timeout=10)
+    if response.status_code != 200:
+        return None
+    return response.json()[0].get("RA", None)
+def get_crossref_member(member_id) -> Optional[dict]:
+    """Return the Crossref member for a given member_id"""
+    response = httpx.get("https://api.crossref.org/members/" + member_id, timeout=10)
+    if response.status_code != 200:
+        return None
+    data = response.json().get("message", None)
+    name = data.get("primary-name", None)
+    return {"id": "https://api.crossref.org/members/" + member_id, "name": name}
+def crossref_api_url(doi: str) -> str:
+    """Return the Crossref API URL for a given DOI"""
+    return "https://api.crossref.org/works/" + doi
+def crossref_xml_api_url(doi: str) -> str:
+    """Return the Crossref XML API URL for a given DOI"""
+    return f"https://api.crossref.org/works/{doi}/transform/application/vnd.crossref.unixsd+xml"
+def crossref_api_query_url(query: dict) -> str:
+    """Return the Crossref API query URL"""
+    url = "https://api.crossref.org/works"
+    f = furl(url)
+    rows = min(int(query.get("rows", 20)), 1000)
+    queries = []
+    filters = []
+    if query.get("query", None) is not None:
+        queries += [query.get("query")]
+    for key, value in query.items():
+        if key in [
+            "query.bibliographic",
+            "query.author",
+            "query.title",
+            "query.container-title",
+        ]:
+            queries += [f"{key}:{value}"]
+        _query = ",".join(queries) if len(queries) > 0 else None
+    for key, value in query.items():
+        if key in [
+            "prefix",
+            "member",
+            "type",
+            "has-full-text",
+            "has-references",
+            "has-orcid",
+            "has-funder",
+            "has-license",
+        ]:
+            filters += [f"{key}:{value}"]
+        _filter = ",".join(filters) if len(filters) > 0 else None
+    f.args = compact({"rows": rows, "query": _query, "filter": _filter})
+    return f.url
+def crossref_api_sample_url(number: int = 1, **kwargs) -> str:
+    """Return the Crossref API URL for a sample of works"""
+    types = [
+        "book-section",
+        "monograph",
+        "report-component",
+        "report",
+        "peer-review",
+        "book-track",
+        "journal-article",
+        "book-part",
+        "other",
+        "book",
+        "journal-volume",
+        "book-set",
+        "reference-entry",
+        "proceedings-article",
+        "journal",
+        "component",
+        "book-chapter",
+        "proceedings-series",
+        "report-series",
+        "proceedings",
+        "database",
+        "standard",
+        "reference-book",
+        "posted-content",
+        "journal-issue",
+        "dissertation",
+        "grant",
+        "dataset",
+        "book-series",
+        "edited-book",
+        "journal-section",
+        "monograph-series",
+        "journal-meta",
+        "book-series-meta",
+        "component-list",
+        "journal-issue-meta",
+        "journal-meta",
+        "book-part-meta",
+        "book-meta",
+        "proceedings-meta",
+        "book-series-meta",
+        "book-set",
+    ]
+    url = f"https://api.crossref.org/works?sample={number}"
+    if kwargs.get("prefix", None) and validate_prefix(kwargs.get("prefix")):
+        url += f"&filter=prefix:{kwargs.get('prefix')}"
+    if kwargs.get("_type", None) and kwargs.get("_type") in types:
+        url += f"&filter=type:{kwargs.get('_type')}"
+    return url
+def datacite_api_url(doi: str, **kwargs) -> str:
+    """Return the DataCite API URL for a given DOI"""
+    match = re.match(
+        r"\A(http|https):/(/)?handle\.stage\.datacite\.org", doi, re.IGNORECASE
+    )
+    if match is not None or kwargs.get("sandbox", False):
+        return f"https://api.stage.datacite.org/dois/{doi_from_url(doi)}?include=media,client"
+    return f"https://api.datacite.org/dois/{doi_from_url(doi)}?include=media,client"
+def datacite_api_sample_url(number: int = 1, **kwargs) -> str:
+    """Return the DataCite API URL for a sample of dois"""
+    if kwargs.get("sandbox", False):
+        return f"https://api.stage.datacite.org/dois?random=true&page[size]={number}"
+    return f"https://api.datacite.org/dois?random=true&page[size]={number}"
+def is_rogue_scholar_doi(doi: str) -> bool:
+    """Return True if DOI is from Rogue Scholar"""
+    prefix = validate_prefix(doi)
+    return prefix in [
+        "10.34732",
+        "10.53731",
+        "10.54900",
+        "10.57689",
+        "10.59348",
+        "10.59349",
+        "10.59350",
+    ]

commonmeta-py 0.23__py3-none-any.whl → 0.24__py3-none-any.whl

commonmeta-py 0.23py3-none-any.whl → 0.24py3-none-any.whl