PyPI - commonmeta-py - Versions diffs - 0.107__py3-none-any.whl → 0.108__py3-none-any.whl - Mend

commonmeta-py 0.107py3-none-any.whl → 0.108py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

commonmeta/__init__.py +12 -15
commonmeta/api_utils.py +3 -2
commonmeta/base_utils.py +186 -3
commonmeta/cli.py +114 -34
commonmeta/constants.py +20 -0
commonmeta/file_utils.py +112 -0
commonmeta/metadata.py +102 -42
commonmeta/readers/codemeta_reader.py +1 -1
commonmeta/readers/crossref_reader.py +23 -10
commonmeta/readers/crossref_xml_reader.py +1 -1
commonmeta/readers/datacite_reader.py +6 -4
commonmeta/readers/{json_feed_reader.py → jsonfeed_reader.py} +12 -12
commonmeta/resources/crossref/common5.4.0.xsd +1264 -0
commonmeta/resources/crossref/{crossref5.3.1.xsd → crossref5.4.0.xsd} +286 -88
commonmeta/resources/crossref/doi_resources5.4.0.xsd +117 -0
commonmeta/resources/crossref/fundingdata5.4.0.xsd +59 -0
commonmeta/resources/crossref/fundref.xsd +29 -19
commonmeta/resources/crossref/languages5.4.0.xsd +8119 -0
commonmeta/resources/crossref/mediatypes5.4.0.xsd +2207 -0
commonmeta/resources/crossref/module-ali.xsd +14 -6
commonmeta/resources/crossref/standard-modules/mathml3/mathml3-common.xsd +101 -0
commonmeta/resources/crossref/standard-modules/mathml3/mathml3-content.xsd +683 -0
commonmeta/resources/crossref/standard-modules/mathml3/mathml3-presentation.xsd +2092 -0
commonmeta/resources/crossref/standard-modules/mathml3/mathml3-strict-content.xsd +186 -0
commonmeta/resources/crossref/standard-modules/mathml3/mathml3.xsd +9 -0
commonmeta/resources/crossref/standard-modules/mathml3/module-ali.xsd +47 -0
commonmeta/resources/crossref/standard-modules/module-ali.xsd +47 -0
commonmeta/resources/crossref/standard-modules/xlink.xsd +100 -0
commonmeta/resources/crossref/standard-modules/xml.xsd +287 -0
commonmeta/resources/crossref/xml.xsd +287 -0
commonmeta/schema_utils.py +25 -0
commonmeta/utils.py +25 -9
commonmeta/writers/bibtex_writer.py +5 -5
commonmeta/writers/commonmeta_writer.py +4 -17
commonmeta/writers/crossref_xml_writer.py +1031 -4
commonmeta/writers/csl_writer.py +1 -2
commonmeta/writers/datacite_writer.py +8 -4
commonmeta/writers/inveniordm_writer.py +277 -2
commonmeta/writers/ris_writer.py +3 -3
commonmeta/writers/schema_org_writer.py +10 -5
{commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/METADATA +4 -2
{commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/RECORD +45 -31
commonmeta/crossref_utils.py +0 -662
commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
{commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/WHEEL +0 -0
{commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/entry_points.txt +0 -0
{commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/licenses/LICENSE +0 -0

commonmeta/metadata.py CHANGED Viewed

@@ -8,8 +8,7 @@ import yaml
 from pydash import py_
 from .base_utils import parse_xml, wrap
-from .constants import CM_TO_CR_TRANSLATIONS
-from .doi_utils import doi_from_url
+from .file_utils import write_output
 from .readers.cff_reader import get_cff, read_cff
 from .readers.codemeta_reader import (
     get_codemeta,
@@ -34,7 +33,7 @@ from .readers.inveniordm_reader import (
     get_inveniordm,
     read_inveniordm,
 )
-from .readers.json_feed_reader import get_json_feed_item, read_json_feed_item
+from .readers.jsonfeed_reader import get_jsonfeed, read_jsonfeed
 from .readers.kbase_reader import read_kbase
 from .readers.openalex_reader import (
     get_openalex,
@@ -45,17 +44,25 @@ from .readers.schema_org_reader import (
     get_schema_org,
     read_schema_org,
 )
-from .schema_utils import json_schema_errors
+from .schema_utils import json_schema_errors, xml_schema_errors
 from .utils import find_from_format, normalize_id
 from .writers.bibtex_writer import write_bibtex, write_bibtex_list
 from .writers.citation_writer import write_citation, write_citation_list
 from .writers.commonmeta_writer import write_commonmeta, write_commonmeta_list
-from .writers.crossref_xml_writer import write_crossref_xml, write_crossref_xml_list
+from .writers.crossref_xml_writer import (
+    push_crossref_xml_list,
+    write_crossref_xml,
+    write_crossref_xml_list,
+)
 from .writers.csl_writer import write_csl, write_csl_list
-from .writers.datacite_writer import write_datacite
-from .writers.inveniordm_writer import write_inveniordm
+from .writers.datacite_writer import write_datacite, write_datacite_list
+from .writers.inveniordm_writer import (
+    push_inveniordm_list,
+    write_inveniordm,
+    write_inveniordm_list,
+)
 from .writers.ris_writer import write_ris, write_ris_list
-from .writers.schema_org_writer import write_schema_org
+from .writers.schema_org_writer import write_schema_org, write_schema_org_list
 # pylint: disable=R0902
@@ -154,8 +161,8 @@ class Metadata:
             return get_codemeta(pid)
         elif via == "cff":
             return get_cff(pid)
-        elif via == "json_feed_item":
-            return get_json_feed_item(pid)
+        elif via == "jsonfeed":
+            return get_jsonfeed(pid)
         elif via == "inveniordm":
             return get_inveniordm(pid)
         elif via == "openalex":
@@ -195,7 +202,7 @@ class Metadata:
                 "datacite",
                 "schema_org",
                 "csl",
-                "json_feed_item",
+                "jsonfeed",
                 "codemeta",
                 "kbase",
                 "inveniordm",
@@ -231,8 +238,8 @@ class Metadata:
             return dict(read_codemeta(data))
         elif via == "cff":
             return dict(read_cff(data))
-        elif via == "json_feed_item":
-            return dict(read_json_feed_item(data, **kwargs))
+        elif via == "jsonfeed":
+            return dict(read_jsonfeed(data, **kwargs))
         elif via == "inveniordm":
             return dict(read_inveniordm(data))
         elif via == "kbase":
@@ -270,13 +277,13 @@ class Metadata:
     def _write_json_format(self, to: str) -> str:
         """Handle JSON-based output formats."""
         if to == "commonmeta":
-            result = write_commonmeta(self)
+            result = json.dumps(write_commonmeta(self))
         elif to == "datacite":
-            result = write_datacite(self)
+            result = json.dumps(write_datacite(self))
         elif to == "inveniordm":
-            result = write_inveniordm(self)
+            result = json.dumps(write_inveniordm(self))
         elif to == "schema_org":
-            result = write_schema_org(self)
+            result = json.dumps(write_schema_org(self))
         else:
             return "{}"
@@ -340,16 +347,19 @@ class Metadata:
     def _write_crossref_xml(self, **kwargs) -> str:
         """Write in Crossref XML format with error checking."""
-        doi = doi_from_url(self.id)
-        _type = CM_TO_CR_TRANSLATIONS.get(str(self.type or ""), None)
-        url = self.url
-        instance = {"doi": doi, "type": _type, "url": url}
+        # doi = doi_from_url(self.id)
+        # _type = CM_TO_CR_TRANSLATIONS.get(str(self.type or ""), None)
+        # url = self.url
+        # instance = {"doi": doi, "type": _type, "url": url}
         self.depositor = kwargs.get("depositor", None)
         self.email = kwargs.get("email", None)
         self.registrant = kwargs.get("registrant", None)
-        self.write_errors = json_schema_errors(instance, schema="crossref")
-        result = write_crossref_xml(self)
-        return result if result is not None else ""
+        output = write_crossref_xml(self)
+        self.write_errors = xml_schema_errors(output, schema="crossref_xml")
+        if self.write_errors is not None:
+            self.is_valid = False
+            return ""
+        return output if output is not None else ""
 class MetadataList:
@@ -378,6 +388,12 @@ class MetadataList:
         self.depositor = kwargs.get("depositor", None)
         self.email = kwargs.get("email", None)
         self.registrant = kwargs.get("registrant", None)
+        self.login_id = kwargs.get("login_id", None)
+        self.login_passwd = kwargs.get("login_passwd", None)
+        # options needed for InvenioRDM registration
+        self.host = kwargs.get("host", None)
+        self.token = kwargs.get("token", None)
         self.items = self.read_metadata_list(wrap(meta.get("items", None)), **kwargs)
         self.errors = [i.errors for i in self.items if i.errors is not None]
@@ -387,8 +403,7 @@ class MetadataList:
         self.is_valid = all([i.is_valid for i in self.items])
         # other options
-        self.jsonlines = kwargs.get("jsonlines", False)
-        self.filename = kwargs.get("filename", None)
+        self.file = kwargs.get("file", None)
     def get_metadata_list(self, string) -> list:
         if string is None or not isinstance(string, (str, bytes)):
@@ -396,11 +411,12 @@ class MetadataList:
         if self.via in [
             "commonmeta",
             "crossref",
+            "csl",
             "datacite",
-            "schema_org",
+            "inveniordm",
+            "jsonfeed",
             "openalex",
-            "csl",
-            "json_feed_item",
+            "schema_org",
         ]:
             return json.loads(string)
         else:
@@ -413,23 +429,67 @@ class MetadataList:
     def write(self, to: str = "commonmeta", **kwargs) -> str:
         """convert metadata list into different formats"""
-        if to == "commonmeta":
-            return write_commonmeta_list(self)
-        elif to == "bibtex":
-            return write_bibtex_list(self)
-        elif to == "csl":
-            return write_csl_list(self)
+        if to == "bibtex":
+            output = write_bibtex_list(self)
+            if self.file:
+                return write_output(self.file, output, [".bib"])
+            else:
+                return output
         elif to == "citation":
             return write_citation_list(self, **kwargs)
+        elif to == "commonmeta":
+            output = json.dumps(write_commonmeta_list(self))
+            if self.file:
+                return write_output(self.file, output, [".json", ".jsonl"])
+            else:
+                return output
+        elif to == "crossref_xml":
+            output = write_crossref_xml_list(self)
+            if self.file:
+                return write_output(self.file, output, [".xml"])
+            else:
+                return output
+        elif to == "csl":
+            output = json.dumps(write_csl_list(self))
+            if self.file:
+                return write_output(self.file, output, [".json"])
+            else:
+                return output
+        elif to == "datacite":
+            output = json.dumps(write_datacite_list(self))
+            if self.file:
+                return write_output(self.file, output, [".json"])
+            else:
+                return output
+        elif to == "inveniordm":
+            output = json.dumps(write_inveniordm_list(self))
+            if self.file:
+                return write_output(self.file, output, [".json"])
+            else:
+                return output
         elif to == "ris":
             return write_ris_list(self)
         elif to == "schema_org":
-            raise ValueError("Schema.org not supported for metadata lists")
+            output = json.dumps(write_schema_org_list(self))
+            if self.file:
+                return write_output(self.file, output, [".json"])
+            else:
+                return output
+        else:
+            raise ValueError("No valid output format found")
+    def push(self, to: str = "commonmeta", **kwargs) -> str:
+        """push metadata list to external APIs"""
+        if to == "crossref_xml":
+            response = push_crossref_xml_list(
+                self, login_id=self.login_id, login_passwd=self.login_passwd
+            )
+            return response
         elif to == "datacite":
-            raise ValueError("Datacite not supported for metadata lists")
-        elif to == "openalex":
-            raise ValueError("OpenAlex not supported for metadata lists")
-        elif to == "crossref_xml":
-            return write_crossref_xml_list(self)
+            raise ValueError("Datacite not yet supported for metadata lists")
+        elif to == "inveniordm":
+            response = push_inveniordm_list(self, host=self.host, token=self.token)
+            return response
         else:
-            raise ValueError("No output format found")
+            raise ValueError("No valid output format found")

commonmeta/readers/codemeta_reader.py CHANGED Viewed

@@ -75,7 +75,7 @@ def read_codemeta(data: Optional[dict], **kwargs) -> Commonmeta:
         descriptions = [
             {
                 "description": sanitize(str(meta.get("description"))),
-                "descriptionType": "Abstract",
+                "type": "Abstract",
             }
         ]
     else:

commonmeta/readers/crossref_reader.py CHANGED Viewed

@@ -1,13 +1,14 @@
 """crossref reader for commonmeta-py"""
 from typing import Optional
+from xml.parsers.expat import ExpatError
 import requests
 from pydash import py_
 from requests.exceptions import ConnectionError, ReadTimeout
 from ..author_utils import get_authors
-from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
+from ..base_utils import compact, parse_attributes, parse_xml, presence, sanitize, wrap
 from ..constants import (
     CR_TO_CM_CONTAINER_TRANSLATIONS,
     CR_TO_CM_TRANSLATIONS,
@@ -66,6 +67,7 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
     doi = meta.get("DOI", None)
     _id = doi_as_url(doi)
     _type = CR_TO_CM_TRANSLATIONS.get(meta.get("type", None)) or "Other"
+    additional_type = meta.get("subtype", None)
     archive_locations = wrap(meta.get("archive", None))
@@ -120,13 +122,7 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
         relations = py_.uniq(relations)
     references = py_.uniq([get_reference(i) for i in wrap(meta.get("reference", None))])
     funding_references = from_crossref_funding(wrap(meta.get("funder", None)))
-    description = meta.get("abstract", None)
-    if description is not None:
-        descriptions = [{"description": sanitize(description), "type": "Abstract"}]
-    else:
-        descriptions = None
+    descriptions = get_abstract(meta)
     subjects = py_.uniq(
         [
             {"subject": i}
@@ -146,7 +142,7 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
         "id": _id,
         "type": _type,
         # recommended and optional properties
-        "additionalType": None,
+        "additionalType": additional_type,
         "archiveLocations": presence(archive_locations),
         "container": presence(container),
         "contributors": presence(contributors),
@@ -201,6 +197,23 @@ def get_titles(meta):
     )
+def get_abstract(meta: dict) -> Optional[str]:
+    """Get abstract from Crossref metadata."""
+    abstract = meta.get("abstract", None)
+    if abstract is None:
+        return None
+    try:
+        # Parse the abstract XML if it is JATS formatted
+        description_dct = parse_xml(abstract, xml_attribs=True)
+        description = py_.get(description_dct, "jats:p")
+        if description is None:
+            description = abstract
+        return [{"description": sanitize(description), "type": "Abstract"}]
+    except (TypeError, ExpatError):
+        return [{"description": sanitize(abstract), "type": "Abstract"}]
 def get_reference(reference: Optional[dict]) -> Optional[dict]:
     """Get reference from Crossref reference"""
     if reference is None or not isinstance(reference, dict):
@@ -341,7 +354,7 @@ def get_container(meta: dict, issn: str) -> dict:
     )
     isbn = isbn["value"] if isbn else None
     container_title = parse_attributes(meta.get("container-title", None), first=True)
-    if not container_title and container_type in ["Periodical"]:
+    if not container_title:
         container_title = py_.get(meta, "institution.0.name")
     volume = meta.get("volume", None)
     issue = py_.get(meta, "journal-issue.issue")

commonmeta/readers/crossref_xml_reader.py CHANGED Viewed

@@ -332,7 +332,7 @@ def crossref_description(bibmeta):
         )
         return compact(
             {
-                "descriptionType": description_type,
+                "type": description_type,
                 "description": sanitize(
                     parse_attributes(element, content="p", first=True)
                 ),

commonmeta/readers/datacite_reader.py CHANGED Viewed

@@ -267,13 +267,15 @@ def get_descriptions(descriptions: list) -> list:
     def map_description(description):
         """map_description"""
+        type = description.get("descriptionType", None)
+        if type is None:
+            type = "Abstract"
+        elif type not in ["Abstract", "Methods", "TechnicalInfo", "Other"]:
+            type = "Other"
         return compact(
             {
                 "description": description.get("description", None),
-                "type": description.get("descriptionType")
-                if description.get("descriptionType", None)
-                in ["Abstract", "Methods", "TechnicalInfo", "Other"]
-                else "Other",
+                "type": type,
                 "language": description.get("lang", None),
             }
         )

commonmeta/readers/{json_feed_reader.py → jsonfeed_reader.py} RENAMED Viewed

@@ -21,7 +21,7 @@ from ..doi_utils import (
 from ..utils import (
     compact,
     dict_to_spdx,
-    from_json_feed,
+    from_jsonfeed,
     issn_as_url,
     name_to_fos,
     normalize_url,
@@ -31,19 +31,19 @@ from ..utils import (
 )
-def get_json_feed_item(pid: str, **kwargs) -> dict:
-    """get_json_feed_item"""
+def get_jsonfeed(pid: str, **kwargs) -> dict:
+    """get_jsonfeed"""
     if pid is None:
         return {"state": "not_found"}
     url = normalize_url(pid)
     response = requests.get(url, timeout=10, allow_redirects=True, **kwargs)
     if response.status_code != 200:
         return {"state": "not_found"}
-    return response.json() | {"via": "json_feed_item"}
+    return response.json() | {"via": "jsonfeed"}
-def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
-    """read_json_feed_item"""
+def read_jsonfeed(data: Optional[dict], **kwargs) -> Commonmeta:
+    """read_jsonfeed"""
     if data is None:
         return {"state": "not_found"}
     meta = data
@@ -64,7 +64,7 @@ def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
         _id = encode_doi(prefix)
     if meta.get("authors", None):
-        contributors = get_authors(from_json_feed(wrap(meta.get("authors"))))
+        contributors = get_authors(from_jsonfeed(wrap(meta.get("authors"))))
     else:
         contributors = None
@@ -176,7 +176,7 @@ def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
 def get_references(references: list) -> list:
-    """get json feed references."""
+    """get jsonfeed references."""
     def get_reference(reference: dict) -> Optional[dict]:
         if reference is None or not isinstance(reference, dict):
@@ -396,8 +396,8 @@ def get_files(pid: str) -> Optional[list]:
     ]
-def get_json_feed_item_uuid(id: str):
-    """get JSON Feed item by uuid"""
+def get_jsonfeed_uuid(id: str):
+    """get jsonfeed by uuid"""
     if id is None:
         return None
     url = f"https://api.rogue-scholar.org/posts/{id}"
@@ -424,8 +424,8 @@ def get_json_feed_item_uuid(id: str):
     )
-def get_json_feed_blog_slug(id: str):
-    """get JSON Feed item by id and return blog slug"""
+def get_jsonfeed_blog_slug(id: str):
+    """get jsonfeed by id and return blog slug"""
     if id is None:
         return None
     url = f"https://api.rogue-scholar.org/posts/{id}"

commonmeta-py 0.107__py3-none-any.whl → 0.108__py3-none-any.whl

commonmeta-py 0.107py3-none-any.whl → 0.108py3-none-any.whl