PyPI - commonmeta-py - Versions diffs - 0.107__py3-none-any.whl → 0.108__py3-none-any.whl - Mend

commonmeta-py 0.107py3-none-any.whl → 0.108py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

commonmeta/__init__.py +12 -15
commonmeta/api_utils.py +3 -2
commonmeta/base_utils.py +186 -3
commonmeta/cli.py +114 -34
commonmeta/constants.py +20 -0
commonmeta/file_utils.py +112 -0
commonmeta/metadata.py +102 -42
commonmeta/readers/codemeta_reader.py +1 -1
commonmeta/readers/crossref_reader.py +23 -10
commonmeta/readers/crossref_xml_reader.py +1 -1
commonmeta/readers/datacite_reader.py +6 -4
commonmeta/readers/{json_feed_reader.py → jsonfeed_reader.py} +12 -12
commonmeta/resources/crossref/common5.4.0.xsd +1264 -0
commonmeta/resources/crossref/{crossref5.3.1.xsd → crossref5.4.0.xsd} +286 -88
commonmeta/resources/crossref/doi_resources5.4.0.xsd +117 -0
commonmeta/resources/crossref/fundingdata5.4.0.xsd +59 -0
commonmeta/resources/crossref/fundref.xsd +29 -19
commonmeta/resources/crossref/languages5.4.0.xsd +8119 -0
commonmeta/resources/crossref/mediatypes5.4.0.xsd +2207 -0
commonmeta/resources/crossref/module-ali.xsd +14 -6
commonmeta/resources/crossref/standard-modules/mathml3/mathml3-common.xsd +101 -0
commonmeta/resources/crossref/standard-modules/mathml3/mathml3-content.xsd +683 -0
commonmeta/resources/crossref/standard-modules/mathml3/mathml3-presentation.xsd +2092 -0
commonmeta/resources/crossref/standard-modules/mathml3/mathml3-strict-content.xsd +186 -0
commonmeta/resources/crossref/standard-modules/mathml3/mathml3.xsd +9 -0
commonmeta/resources/crossref/standard-modules/mathml3/module-ali.xsd +47 -0
commonmeta/resources/crossref/standard-modules/module-ali.xsd +47 -0
commonmeta/resources/crossref/standard-modules/xlink.xsd +100 -0
commonmeta/resources/crossref/standard-modules/xml.xsd +287 -0
commonmeta/resources/crossref/xml.xsd +287 -0
commonmeta/schema_utils.py +25 -0
commonmeta/utils.py +25 -9
commonmeta/writers/bibtex_writer.py +5 -5
commonmeta/writers/commonmeta_writer.py +4 -17
commonmeta/writers/crossref_xml_writer.py +1031 -4
commonmeta/writers/csl_writer.py +1 -2
commonmeta/writers/datacite_writer.py +8 -4
commonmeta/writers/inveniordm_writer.py +277 -2
commonmeta/writers/ris_writer.py +3 -3
commonmeta/writers/schema_org_writer.py +10 -5
{commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/METADATA +4 -2
{commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/RECORD +45 -31
commonmeta/crossref_utils.py +0 -662
commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
{commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/WHEEL +0 -0
{commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/entry_points.txt +0 -0
{commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/licenses/LICENSE +0 -0

commonmeta/writers/crossref_xml_writer.py CHANGED Viewed

@@ -1,19 +1,1046 @@
 """Crossref XML writer for commonmeta-py"""
+import io
+from datetime import datetime
+from time import time
 from typing import Optional
+import orjson as json
+import requests
+from dateutil.parser import parse as date_parse
+from furl import furl
+from marshmallow import Schema, fields
+from pydash import py_
+from requests_toolbelt.multipart.encoder import MultipartEncoder
+from ..base_utils import compact, parse_xml, unparse_xml, unparse_xml_list, wrap
 from ..constants import Commonmeta
-from ..crossref_utils import generate_crossref_xml, generate_crossref_xml_list
+from ..doi_utils import doi_from_url, validate_doi
+from ..utils import validate_url
+POSTED_CONTENT_TYPES = [
+    "preprint",
+    "working_paper",
+    "letter",
+    "dissertation",
+    "report",
+    "review",
+    "other",
+]
+MARSHMALLOW_MAP = {
+    "abstracts": "jats:abstract",
+    "license": "ai:program",
+    "funding_references": "fr:program",
+    "relations": "rel:program",
+    "references": "citation_list",
+}
+class CrossrefXMLSchema(Schema):
+    """Crossref XML schema"""
+    # root element
+    book = fields.Dict()
+    conference = fields.Dict()
+    database = fields.Dict()
+    dissertation = fields.Dict()
+    journal = fields.Dict()
+    peer_review = fields.Dict()
+    report_paper = fields.Dict()
+    pending_publication = fields.Dict()
+    posted_content = fields.Dict()
+    sa_component = fields.Dict()
+    standard = fields.Dict()
+    # elements
+    group_title = fields.String()
+    book_metadata = fields.Dict()
+    database_metadata = fields.Dict()
+    event_metadata = fields.Dict()
+    proceedings_metadata = fields.Dict()
+    journal_metadata = fields.Dict()
+    journal_issue = fields.Dict()
+    journal_article = fields.Dict()
+    component = fields.Dict()
+    titles = fields.Dict()
+    contributors = fields.Dict()
+    abstracts = fields.List(fields.Dict(), data_key="jats:abstract")
+    publication_date = fields.Dict()
+    posted_date = fields.Dict()
+    review_date = fields.Dict()
+    approval_date = fields.Dict()
+    publisher_item = fields.Dict()
+    institution = fields.Dict()
+    item_number = fields.Dict()
+    institution = fields.Dict()
+    isbn = fields.String()
+    issn = fields.String()
+    publisher = fields.Dict()
+    description = fields.Dict()
+    funding_references = fields.Dict(data_key="fr:program")
+    license = fields.Dict(data_key="ai:program")
+    relations = fields.Dict(data_key="rel:program")
+    archive_locations = fields.List(fields.Dict())
+    doi_data = fields.Dict(data_key="doi_data")
+    references = fields.Dict(data_key="citation_list")
+def convert_crossref_xml(metadata: Commonmeta) -> Optional[dict]:
+    """Convert Crossref XML"""
+    # return None if type is not supported by Crossref
+    if metadata.type not in [
+        "Article",
+        "BlogPost",
+        "Book",
+        "BookChapter",
+        "Component",
+        "Dataset",
+        "Dissertation",
+        "JournalArticle",
+        "PeerReview",
+        "ProceedingsArticle",
+        "Report",
+        "Standard",
+    ]:
+        return None
+    # return None if doi or url are not present
+    if doi_from_url(metadata.id) is None or metadata.url is None:
+        return None
+    titles = get_titles(metadata)
+    contributors = get_contributors(metadata)
+    abstracts = get_abstracts(metadata)
+    relations = get_relations(metadata)
+    doi_data = get_doi_data(metadata)
+    references = get_references(metadata)
+    funding_references = get_funding_references(metadata)
+    license = get_license(metadata)
+    kwargs = {}
+    if metadata.type == "Article":
+        if metadata.additional_type in POSTED_CONTENT_TYPES:
+            kwargs["type"] = metadata.additional_type
+        else:
+            kwargs["type"] = "other"
+        kwargs["language"] = metadata.language
+        data = compact(
+            {
+                "posted_content": get_attributes(metadata, **kwargs),
+                "group_title": get_group_title(metadata),
+                "contributors": contributors,
+                "titles": titles,
+                "posted_date": get_publication_date(metadata),
+                "institution": get_institution(metadata),
+                "item_number": get_item_number(metadata),
+                "abstracts": abstracts,
+                "funding_references": funding_references,
+                "license": license,
+                "relations": relations,
+                "doi_data": doi_data,
+                "references": references,
+            }
+        )
+    elif metadata.type == "BlogPost":
+        kwargs["type"] = "other"
+        kwargs["language"] = metadata.language
+        data = compact(
+            {
+                "posted_content": get_attributes(metadata, **kwargs),
+                "group_title": get_group_title(metadata),
+                "contributors": contributors,
+                "titles": titles,
+                "posted_date": get_publication_date(metadata),
+                "institution": get_institution(metadata),
+                "item_number": get_item_number(metadata),
+                "abstracts": abstracts,
+                "funding_references": funding_references,
+                "license": license,
+                "relations": relations,
+                "doi_data": doi_data,
+                "references": references,
+            }
+        )
+    elif metadata.type == "Book":
+        kwargs["book_type"] = "monograph"
+        data = compact(
+            {
+                "book": get_attributes(metadata, **kwargs),
+                "book_metadata": get_book_metadata(metadata),
+                "contributors": contributors,
+                "titles": titles,
+                "abstracts": abstracts,
+                "publication_date": get_publication_date(metadata, media_type="online"),
+                "isbn": get_isbn(metadata),
+                "publisher": get_publisher(metadata),
+                "publisher_item": None,
+                "funding_references": funding_references,
+                "license": license,
+                "relations": relations,
+                "archive_locations": get_archive_locations(metadata),
+                "doi_data": doi_data,
+                "references": references,
+            }
+        )
+    elif metadata.type == "BookChapter":
+        kwargs["book_type"] = "monograph"
+        data = compact(
+            {
+                "book": get_attributes(metadata, **kwargs),
+                "book_metadata": get_book_metadata(metadata),
+                "contributors": contributors,
+                "titles": titles,
+                "publication_date": get_publication_date(metadata, media_type="online"),
+                "isbn": get_isbn(metadata),
+                "publisher": get_publisher(metadata),
+                "abstracts": abstracts,
+                "funding_references": funding_references,
+                "license": license,
+                "relations": relations,
+                "archive_locations": get_archive_locations(metadata),
+                "doi_data": doi_data,
+                "references": references,
+            }
+        )
+    elif metadata.type == "Component":
+        data = compact(
+            {
+                "sa_component": get_attributes(metadata),
+                "component": {"@reg-agency": "CrossRef"},
+                "description": None,
+                "doi_data": doi_data,
+            }
+        )
+    elif metadata.type == "Dataset":
+        publisher = py_.get(metadata, "publisher.name")
+        if publisher is not None:
+            publisher_item = {
+                "title": publisher,
+            }
+        data = compact(
+            {
+                "database": {},
+                "database_metadata": get_database_metadata(metadata),
+                "publisher_item": publisher_item if publisher else None,
+                "institution": get_institution(metadata),
+                "component": {"@parent_relation": "isPartOf"},
+                "titles": titles,
+                "contributors": contributors,
+                "publication_date": get_publication_date(metadata, media_type="online"),
+                "doi_data": doi_data,
+            }
+        )
+    elif metadata.type == "Dissertation":
+        data = compact(
+            {
+                "dissertation": get_attributes(metadata, **kwargs),
+                "contributors": contributors,
+                "titles": titles,
+                "approval_date": get_publication_date(metadata),
+                "institution": get_institution(metadata),
+                "degree": None,
+                "isbn": get_isbn(metadata),
+                "publisher_item": None,
+                "funding_references": funding_references,
+                "license": license,
+                "relations": relations,
+                "doi_data": doi_data,
+            }
+        )
+    elif metadata.type == "JournalArticle":
+        publisher_item = None
+        kwargs["language"] = metadata.language
+        data = compact(
+            {
+                "journal": {},
+                "journal_metadata": get_journal_metadata(metadata),
+                "journal_issue": get_journal_issue(metadata),
+                "journal_article": get_attributes(metadata, **kwargs),
+                "titles": titles,
+                "contributors": contributors,
+                "abstracts": abstracts,
+                "publication_date": get_publication_date(metadata, media_type="online"),
+                "publisher_item": publisher_item,
+                "funding_references": funding_references,
+                "license": license,
+                "crossmark": None,
+                "relations": relations,
+                "archive_locations": get_archive_locations(metadata),
+                "doi_data": doi_data,
+                "references": references,
+            }
+        )
+    elif metadata.type == "PeerReview":
+        kwargs["type"] = "author-comment"
+        kwargs["stage"] = "pre-publication"
+        data = compact(
+            {
+                "peer_review": get_attributes(metadata, **kwargs),
+                "contributors": contributors,
+                "titles": titles,
+                "review_date": get_publication_date(metadata),
+                "license": license,
+                "relations": relations,
+                "doi_data": doi_data,
+            }
+        )
+    elif metadata.type == "ProceedingsArticle":
+        publisher_item = None
+        data = compact(
+            {
+                "conference": get_attributes(metadata, **kwargs),
+                "event_metadata": get_event_metadata(metadata),
+                "proceedings_metadata": get_proceedings_metadata(metadata),
+                "proceedings_title": py_.get(metadata, "container.title"),
+                "publisher": get_publisher(metadata),
+                "conference_paper": get_attributes(metadata, **kwargs),
+                "contributors": contributors,
+                "titles": titles,
+                "publication_date": get_publication_date(metadata),
+                "abstracts": abstracts,
+                "publisher_item": publisher_item,
+                "funding_references": funding_references,
+                "license": license,
+                "crossmark": None,
+                "relations": relations,
+                "archive_locations": get_archive_locations(metadata),
+                "doi_data": doi_data,
+                "references": references,
+            }
+        )
+    elif metadata.type == "Standard":
+        publisher_item = None
+        data = compact(
+            {
+                "standard": get_attributes(metadata, **kwargs),
+                "journal_metadata": get_journal_metadata(metadata),
+                "journal_issue": get_journal_issue(metadata),
+                "titles": titles,
+                "contributors": contributors,
+                "publication_date": get_publication_date(metadata),
+                "publisher_item": publisher_item,
+                "funding_references": funding_references,
+                "license": license,
+                "crossmark": None,
+                "relations": relations,
+                "archive_locations": get_archive_locations(metadata),
+                "doi_data": doi_data,
+                "references": references,
+                "component_list": None,
+            }
+        )
+    else:
+        data = None
+    return data
 def write_crossref_xml(metadata: Commonmeta) -> Optional[str]:
     """Write Crossref XML"""
-    return generate_crossref_xml(metadata)
+    data = convert_crossref_xml(metadata)
+    if data is None:
+        return None
+    schema = CrossrefXMLSchema()
+    crossref_xml = schema.dump(data)
+    # Ensure the order of fields in the XML matches the expected order
+    field_order = [MARSHMALLOW_MAP.get(k, k) for k in list(data.keys())]
+    crossref_xml = {k: crossref_xml[k] for k in field_order if k in crossref_xml}
+    # Convert to XML
+    return unparse_xml(crossref_xml, dialect="crossref")
 def write_crossref_xml_list(metalist):
     """Write crossref_xml list"""
-    if metalist is None:
+    if metalist is None or not metalist.is_valid:
+        return None
+    schema = CrossrefXMLSchema()
+    crossref_xml_list = []
+    for item in metalist.items:
+        data = convert_crossref_xml(item)
+        crossref_xml = schema.dump(data)
+        # Ensure the order of fields in the XML matches the expected order
+        field_order = [MARSHMALLOW_MAP.get(k, k) for k in list(data.keys())]
+        crossref_xml = {k: crossref_xml[k] for k in field_order if k in crossref_xml}
+        crossref_xml_list.append(crossref_xml)
+    head = {
+        "depositor": metalist.depositor,
+        "email": metalist.email,
+        "registrant": metalist.registrant,
+    }
+    return unparse_xml_list(crossref_xml_list, dialect="crossref", head=head)
+def push_crossref_xml_list(metalist, login_id: str, login_passwd: str) -> bytes:
+    """Push crossref_xml list to Crossref API, returns the API response."""
+    input = write_crossref_xml_list(metalist)
+    # Convert string to bytes if necessary
+    if isinstance(input, str):
+        input = input.encode("utf-8")
+    # The filename displayed in the Crossref admin interface, using the current UNIX timestamp
+    filename = f"{int(time())}"
+    # Create multipart form data
+    multipart_data = MultipartEncoder(
+        fields={
+            "fname": (filename, io.BytesIO(input), "application/xml"),
+            "operation": "doMDUpload",
+            "login_id": login_id,
+            "login_passwd": login_passwd,
+        }
+    )
+    # Set up the request
+    post_url = "https://doi.crossref.org/servlet/deposit"
+    headers = {"Content-Type": multipart_data.content_type}
+    try:
+        # Send the request
+        resp = requests.post(post_url, data=multipart_data, headers=headers, timeout=10)
+        resp.raise_for_status()
+        # Parse the response
+        response = parse_xml(resp.content)
+        status = py_.get(response, "html.body.h2")
+        if status == "SUCCESS":
+            items = []
+            for item in metalist.items:
+                items.append(
+                    {
+                        "doi": item.id,
+                        "updated": datetime.now().isoformat("T", "seconds"),
+                        "status": "submitted",
+                    }
+                )
+            # orjson has different options
+            return json.dumps(items, option=json.OPT_INDENT_2)
+        # if there is an error
+        message = py_.get(response, "html.body.p")
+        raise CrossrefError(f"Error uploading batch: {message}")
+    except requests.exceptions.RequestException as e:
+        raise CrossrefError(f"Error uploading batch: {str(e)}") from e
+def get_attributes(obj, **kwargs) -> dict:
+    """Get root attributes"""
+    return compact(
+        {
+            "@type": kwargs.get("type", None),
+            "@book_type": kwargs.get("book_type", None),
+            "@language": kwargs.get("language", None),
+            "@stage": kwargs.get("stage", None),
+            "@reg-agency": kwargs.get("reg-agency", None),
+        }
+    )
+def get_journal_metadata(obj) -> Optional[dict]:
+    """get journal metadata"""
+    issn = (
+        py_.get(obj, "container.identifier")
+        if py_.get(obj, "container.identifierType") == "ISSN"
+        else None
+    )
+    return compact(
+        {
+            "@language": py_.get(obj, "language"),
+            "full_title": py_.get(obj, "container.title"),
+            "issn": issn,
+        }
+    )
+def get_book_metadata(obj) -> Optional[dict]:
+    return compact(
+        {
+            "@language": py_.get(obj, "language"),
+        }
+    )
+def get_database_metadata(obj) -> Optional[dict]:
+    return compact(
+        {
+            "@language": py_.get(obj, "language"),
+        }
+    )
+def get_event_metadata(obj) -> Optional[dict]:
+    """get event metadata"""
+    if py_.get(obj, "container.title") is None:
+        return None
+    return compact(
+        {
+            "conference_name": py_.get(obj, "container.title"),
+            "conference_location": py_.get(obj, "container.location"),
+            "conference_date": None,
+        }
+    )
+def get_proceedings_metadata(obj) -> Optional[dict]:
+    """get proceedings metadata"""
+    if py_.get(obj, "container.title") is None:
+        return None
+    return compact(
+        {
+            "@language": py_.get(obj, "language"),
+            "proceedings_title": py_.get(obj, "container.title"),
+        }
+    )
+def get_journal_issue(obj) -> Optional[dict]:
+    """get journal issue"""
+    volume = py_.get(obj, "container.volume")
+    if volume is not None:
+        volume = {"volume": volume}
+    return compact(
+        {
+            "publication_date": get_publication_date(obj),
+            "journal_volume": volume,
+            "issue": py_.get(obj, "container.issue"),
+        }
+    )
+def get_institution(obj) -> Optional[dict]:
+    """get institution"""
+    if py_.get(obj, "container.title") is None:
+        return None
+    return compact(
+        {
+            "institution_name": py_.get(obj, "container.title"),
+            "institution_id": {
+                "#text": py_.get(obj, "container.identifier"),
+                "@type": "ror",
+            }
+            if py_.get(obj, "container.identifierTyoe") == "ROR"
+            else None,
+        }
+    )
+def get_titles(obj) -> Optional[dict]:
+    """get titles"""
+    title = {}
+    for t in wrap(py_.get(obj, "titles", [])):
+        if isinstance(t, str):
+            title["title"] = t
+        elif isinstance(t, dict) and t.get("titleType", None) == "Subtitle":
+            title["subtitle"] = t.get("title", None)
+        elif isinstance(title, dict):
+            title["title"] = t.get("title", None)
+    return title
+def get_contributors(obj) -> Optional[dict]:
+    """get contributors"""
+    def map_affiliations(affiliations):
+        """map affiliations"""
+        if affiliations is None:
+            return None
+        return [
+            compact(
+                {
+                    "institution": compact(
+                        {
+                            "institution_name": affiliation.get("name", None),
+                            "institution_id": {
+                                "#text": affiliation.get("id"),
+                                "@type": "ror",
+                            }
+                            if affiliation.get("id", None) is not None
+                            else None,
+                        }
+                    ),
+                }
+            )
+            for affiliation in affiliations
+        ]
+    if py_.get(obj, "contributors") is None or len(py_.get(obj, "contributors")) == 0:
+        return None
+    con = [
+        c
+        for c in py_.get(obj, "contributors")
+        if c.get("contributorRoles", None) == ["Author"]
+        or c.get("contributorRoles", None) == ["Editor"]
+    ]
+    person_names = []
+    organizations = []
+    anonymous_contributors = []
+    for num, contributor in enumerate(con):
+        contributor_role = (
+            "author" if "Author" in contributor.get("contributorRoles") else None
+        )
+        if contributor_role is None:
+            contributor_role = (
+                "editor" if "Editor" in contributor.get("contributorRoles") else None
+            )
+        sequence = "first" if num == 0 else "additional"
+        if (
+            contributor.get("type", None) == "Organization"
+            and contributor.get("name", None) is not None
+        ):
+            organizations.append(
+                {
+                    "@contributor_role": contributor_role,
+                    "@sequence": sequence,
+                    "#text": contributor.get("name"),
+                }
+            )
+        elif (
+            contributor.get("givenName", None) is not None
+            or contributor.get("familyName", None) is not None
+        ):
+            person_names.append(
+                compact(
+                    {
+                        "@contributor_role": contributor_role,
+                        "@sequence": sequence,
+                        "given_name": contributor.get("givenName", None),
+                        "surname": contributor.get("familyName", None),
+                        "affiliations": map_affiliations(
+                            contributor.get("affiliations", None)
+                        ),
+                        "ORCID": contributor.get("id", None),
+                    }
+                )
+            )
+        else:
+            anonymous_contributors.append(
+                compact(
+                    {
+                        "@contributor_role": contributor_role,
+                        "@sequence": sequence,
+                        "affiliations": map_affiliations(
+                            contributor.get("affiliations", None)
+                        ),
+                    }
+                )
+            )
+    result = {}
+    if person_names:
+        result["person_name"] = person_names
+    if organizations:
+        result["organization"] = organizations
+    if anonymous_contributors:
+        result["anonymous"] = anonymous_contributors
+    return result if result else None
+def get_publisher(obj) -> Optional[dict]:
+    """get publisher"""
+    if py_.get(obj, "publisher.name") is None:
+        return None
+    return {
+        "publisher_name": py_.get(obj, "publisher.name"),
+    }
+def get_abstracts(obj) -> Optional[list]:
+    """get abstracts"""
+    if py_.get(obj, "descriptions") is None:
+        return None
+    abstracts = []
+    for d in wrap(py_.get(obj, "descriptions", [])):
+        if d.get("type", None) == "Abstract":
+            abstracts.append(
+                {
+                    "@xmlns:jats": "http://www.ncbi.nlm.nih.gov/JATS1",
+                    "jats:p": d.get("description", None),
+                }
+            )
+        elif d.get("type", None) == "Other":
+            abstracts.append(
+                {
+                    "@xmlns:jats": "http://www.ncbi.nlm.nih.gov/JATS1",
+                    "jats:p": d.get("description", None),
+                }
+            )
+    return abstracts
+def get_group_title(obj) -> Optional[str]:
+    """Get group title from metadata"""
+    if py_.get(obj, "subjects") is None or len(py_.get(obj, "subjects")) == 0:
         return None
+    group_title = py_.get(obj, "subjects[0].subject")
+    # strip optional FOS (Field of Science) prefix
+    if group_title.startswith("FOS: "):
+        group_title = group_title[5:]
+    return group_title
+def get_item_number(obj) -> Optional[dict]:
+    """Insert item number"""
+    if py_.get(obj, "identifiers") is None:
+        return None
+    for identifier in py_.get(obj, "identifiers"):
+        if identifier.get("identifierType", None) == "UUID":
+            # strip hyphen from UUIDs, as item_number can only be 32 characters long (UUIDv4 is 36 characters long)
+            return {
+                "@item_number_type": identifier.get("identifierType", "").lower(),
+                "#text": identifier.get("identifier", None).replace("-", ""),
+            }
+def get_publication_date(obj, media_type: str = None) -> Optional[str]:
+    """get publication date"""
+    pub_date = date_parse(py_.get(obj, "date.published"))
+    if pub_date is None:
+        return None
+    return compact(
+        {
+            "@media_type": media_type,
+            "month": f"{pub_date.month:d}",
+            "day": f"{pub_date.day:d}",
+            "year": str(pub_date.year),
+        }
+    )
+def get_archive_locations(obj) -> Optional[list]:
+    """get archive locations"""
+    if (
+        py_.get(obj, "archive_locations") is None
+        or len(py_.get(obj, "archive_locations")) == 0
+    ):
+        return None
+    return [
+        compact(
+            {
+                "archive": {"@name": location},
+            }
+        )
+        for location in py_.get(obj, "archive_locations")
+    ]
+def get_references(obj) -> Optional[dict]:
+    """get references"""
+    if py_.get(obj, "references") is None or len(py_.get(obj, "references")) == 0:
+        return None
+    citations = []
+    for i, ref in enumerate(py_.get(obj, "references")):
+        reference = compact(
+            {
+                "@key": ref.get("key", f"ref{i + 1}"),
+                "doi": doi_from_url(ref.get("id", None)),
+                "journal_title": ref.get("journal_title", None),
+                "author": ref.get("author", None),
+                "volume": ref.get("volume", None),
+                "first_page": ref.get("first_page", None),
+                "cYear": ref.get("publicationYear", None),
+                "article_title": ref.get("title", None),
+                "unstructured_citation": ref.get("unstructured", None),
+            }
+        )
+        citations.append(reference)
+    return {"citation": citations}
+def get_license(obj) -> Optional[dict]:
+    """get license"""
+    rights_uri = py_.get(obj, "license.url")
+    if rights_uri is None:
+        return None
+    return {
+        "@xmlns:ai": "http://www.crossref.org/AccessIndicators.xsd",
+        "@name": "AccessIndicators",
+        "ai:license_ref": [
+            {
+                "@applies_to": "vor",
+                "#text": rights_uri,
+            },
+            {
+                "@applies_to": "tdm",
+                "#text": rights_uri,
+            },
+        ],
+    }
+def get_funding_references(obj) -> Optional[dict]:
+    """Get funding references"""
+    if (
+        py_.get(obj, "funding_references") is None
+        or len(py_.get(obj, "funding_references")) == 0
+    ):
+        return None
+    funding_references = []
+    for funding_reference in wrap(py_.get(obj, "funding_references")):
+        funder_identifier = funding_reference.get("funderIdentifier", None)
+        funder_identifier_type = funding_reference.get("funderIdentifierType", None)
+        if funder_identifier is not None and funder_identifier_type == "ROR":
+            assertion = {
+                "@name": "ror",
+                "#text": funder_identifier,
+            }
+            funding_references.append(assertion)
+        elif funding_reference.get("funderName", None) is not None:
+            assertion = {
+                "@name": "funder_name",
+                "#text": funding_reference.get("funderName"),
+            }
+            funding_references.append(assertion)
+        if funding_reference.get("awardNumber", None) is not None:
+            assertion = {
+                "@name": "award_number",
+                "#text": funding_reference.get("awardNumber"),
+            }
+            funding_references.append(assertion)
+    return {
+        "@xmlns:fr": "http://www.crossref.org/fundref.xsd",
+        "@name": "fundref",
+        "fr:assertion": funding_references,
+    }
+def get_relations(obj) -> list:
+    """get relations"""
+    if py_.get(obj, "relations") is None or len(py_.get(obj, "relations")) == 0:
+        return None
+    def format_relation(relation):
+        """format relation"""
+        if relation.get("type", None) in [
+            "IsPartOf",
+            "HasPart",
+            "IsReviewOf",
+            "HasReview",
+            "IsRelatedMaterial",
+            "HasRelatedMaterial",
+        ]:
+            group = "rel:inter_work_relation"
+        elif relation.get("type", None) in [
+            "IsIdenticalTo",
+            "IsPreprintOf",
+            "HasPreprint",
+            "IsTranslationOf",
+            "HasTranslation",
+            "IsVersionOf",
+            "HasVersion",
+        ]:
+            group = "rel:intra_work_relation"
+        else:
+            return None
+        f = furl(relation.get("id", None))
+        if validate_doi(relation.get("id", None)):
+            identifier_type = "doi"
+            _id = doi_from_url(relation.get("id", None))
+        elif f.host == "portal.issn.org" and obj.type in [
+            "Article",
+            "BlogPost",
+        ]:
+            identifier_type = "issn"
+            _id = f.path.segments[-1] if f.path.segments else None
+        elif validate_url(relation.get("id", None)) == "URL":
+            identifier_type = "uri"
+            _id = relation.get("id", None)
+        else:
+            identifier_type = "other"
+            _id = relation.get("id", None)
+        return {
+            group: compact(
+                {
+                    "@relationship-type": py_.lower_first(relation.get("type"))
+                    if relation.get("type", None) is not None
+                    else None,
+                    "@identifier-type": identifier_type,
+                    "#text": _id,
+                },
+            )
+        }
+    return {
+        "@xmlns:rel": "http://www.crossref.org/relations.xsd",
+        "@name": "relations",
+        "rel:related_item": [
+            format_relation(i)
+            for i in py_.get(obj, "relations")
+            if format_relation(i) is not None
+        ],
+    }
+def get_subjects(obj) -> Optional[list]:
+    """Get crossref subjects"""
+    if py_.get(obj, "subjects") is None:
+        return None
+    subjects = []
+    for subject in py_.get(obj, "subjects"):
+        if isinstance(subject, dict):
+            subjects.append(subject.get("subject", None))
+        else:
+            subjects.append(subject)
+    return subjects
+def get_doi_data(obj) -> Optional[dict]:
+    """get doi data"""
+    if doi_from_url(py_.get(obj, "id")) is None or py_.get(obj, "url") is None:
+        return None
+    items = [
+        {
+            "resource": {
+                "@mime_type": "text/html",
+                "#text": py_.get(obj, "url"),
+            }
+        }
+    ]
+    for file in wrap(py_.get(obj, "files")):
+        if file.get("mimeType", None) is not None and file.get("url", None) is not None:
+            items.append(
+                {
+                    "resource": {
+                        "@mime_type": file.get("mimeType"),
+                        "#text": file.get("url"),
+                    }
+                }
+            )
+    return compact(
+        {
+            "doi": doi_from_url(py_.get(obj, "id")),
+            "resource": py_.get(obj, "url"),
+            "collection": {
+                "@property": "text-mining",
+                "item": items,
+            },
+        }
+    )
+def get_isbn(obj):
+    """get isbn"""
+    if py_.get(obj, "container.identifierType") != "ISBN":
+        return None
+    return py_.get(obj, "container.identifier")
+def get_issn(obj):
+    """get issn"""
+    if py_.get(obj, "container.identifierType") != "ISSN":
+        return None
+    return py_.get(obj, "container.identifier")
+"""Errors for the Crossref XML API.
+Error responses will be converted into an exception from this module.
+"""
+class HttpError(Exception):
+    """Exception raised when a connection problem happens."""
+class CrossrefError(Exception):
+    """Exception raised when the server returns a known HTTP error code.
+    Known HTTP error codes include:
+    * 204 No Content
+    * 400 Bad Request
+    * 401 Unauthorized
+    * 403 Forbidden
+    * 404 Not Found
+    * 410 Gone (deleted)
+    """
+    @staticmethod
+    def factory(err_code, *args):
+        """Create exceptions through a Factory based on the HTTP error code."""
+        if err_code == 204:
+            return CrossrefNoContentError(*args)
+        elif err_code == 400:
+            return CrossrefBadRequestError(*args)
+        elif err_code == 401:
+            return CrossrefUnauthorizedError(*args)
+        elif err_code == 403:
+            return CrossrefForbiddenError(*args)
+        elif err_code == 404:
+            return CrossrefNotFoundError(*args)
+        else:
+            return CrossrefServerError(*args)
+class CrossrefServerError(CrossrefError):
+    """An internal server error happened on the Crossref end. Try later.
+    Base class for all 5XX-related HTTP error codes.
+    """
+class CrossrefRequestError(CrossrefError):
+    """A Crossref request error. You made an invalid request.
+    Base class for all 4XX-related HTTP error codes as well as 204.
+    """
+class CrossrefNoContentError(CrossrefRequestError):
+    """DOI is known to Crossref, but not resolvable.
+    This might be due to handle's latency.
+    """
+class CrossrefBadRequestError(CrossrefRequestError):
+    """Bad request error.
+    Bad requests can include e.g. invalid XML, wrong domain, wrong prefix.
+    """
+class CrossrefUnauthorizedError(CrossrefRequestError):
+    """Bad username or password."""
+class CrossrefForbiddenError(CrossrefRequestError):
+    """Login problem, record belongs to another party or quota exceeded."""
-    return generate_crossref_xml_list(metalist)
+class CrossrefNotFoundError(CrossrefRequestError):
+    """DOI does not exist in the database."""

commonmeta-py 0.107__py3-none-any.whl → 0.108__py3-none-any.whl

commonmeta-py 0.107py3-none-any.whl → 0.108py3-none-any.whl