commonmeta-py 0.107__tar.gz → 0.108__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {commonmeta_py-0.107 → commonmeta_py-0.108}/PKG-INFO +4 -2
- {commonmeta_py-0.107 → commonmeta_py-0.108}/README.md +1 -1
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/__init__.py +12 -15
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/api_utils.py +3 -2
- commonmeta_py-0.108/commonmeta/base_utils.py +307 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/cli.py +114 -34
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/constants.py +20 -0
- commonmeta_py-0.108/commonmeta/file_utils.py +112 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/metadata.py +102 -42
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/codemeta_reader.py +1 -1
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/crossref_reader.py +23 -10
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/crossref_xml_reader.py +1 -1
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/datacite_reader.py +6 -4
- commonmeta_py-0.107/commonmeta/readers/json_feed_reader.py → commonmeta_py-0.108/commonmeta/readers/jsonfeed_reader.py +12 -12
- commonmeta_py-0.108/commonmeta/resources/crossref/common5.4.0.xsd +1264 -0
- commonmeta_py-0.107/commonmeta/resources/crossref/crossref5.3.1.xsd → commonmeta_py-0.108/commonmeta/resources/crossref/crossref5.4.0.xsd +286 -88
- commonmeta_py-0.108/commonmeta/resources/crossref/doi_resources5.4.0.xsd +117 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/fundingdata5.4.0.xsd +59 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/fundref.xsd +59 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/languages5.4.0.xsd +8119 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/mediatypes5.4.0.xsd +2207 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/module-ali.xsd +14 -6
- commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/mathml3-common.xsd +101 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/mathml3-content.xsd +683 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/mathml3-presentation.xsd +2092 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/mathml3-strict-content.xsd +186 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/mathml3.xsd +9 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/module-ali.xsd +47 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/module-ali.xsd +47 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/xlink.xsd +100 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/xml.xsd +287 -0
- commonmeta_py-0.108/commonmeta/resources/crossref/xml.xsd +287 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/schema_utils.py +25 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/utils.py +25 -9
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/bibtex_writer.py +5 -5
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/commonmeta_writer.py +4 -17
- commonmeta_py-0.108/commonmeta/writers/crossref_xml_writer.py +1046 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/csl_writer.py +1 -2
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/datacite_writer.py +8 -4
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/inveniordm_writer.py +277 -2
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/ris_writer.py +3 -3
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/schema_org_writer.py +10 -5
- {commonmeta_py-0.107 → commonmeta_py-0.108}/pyproject.toml +3 -1
- commonmeta_py-0.107/commonmeta/base_utils.py +0 -124
- commonmeta_py-0.107/commonmeta/crossref_utils.py +0 -662
- commonmeta_py-0.107/commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
- commonmeta_py-0.107/commonmeta/resources/crossref/fundref.xsd +0 -49
- commonmeta_py-0.107/commonmeta/writers/crossref_xml_writer.py +0 -19
- {commonmeta_py-0.107 → commonmeta_py-0.108}/.gitignore +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/LICENSE +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/author_utils.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/date_utils.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/doi_utils.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/__init__.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/bibtex_reader.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/cff_reader.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/commonmeta_reader.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/csl_reader.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/datacite_xml_reader.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/inveniordm_reader.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/kbase_reader.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/openalex_reader.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/ris_reader.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/schema_org_reader.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/cff_v1.2.0.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/commonmeta_v0.12.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/commonmeta_v0.13.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/commonmeta_v0.14.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/commonmeta_v0.15.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/commonmeta_v0.16.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/AccessIndicators.xsd +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1.xsd +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/clinicaltrials.xsd +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/crossref_query_output3.0.xsd +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/relations.xsd +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref-v0.2.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/csl-data.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/datacite-v4.5.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/datacite-v4.5pr.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/spdx/licenses.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/spdx-schema.json +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/apa.csl +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/chicago-author-date.csl +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/harvard-cite-them-right.csl +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/ieee.csl +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/modern-language-association.csl +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/vancouver.csl +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/translators.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/__init__.py +0 -0
- {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/citation_writer.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: commonmeta-py
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.108
|
4
4
|
Summary: Library for conversions to/from the Commonmeta scholarly metadata format
|
5
5
|
Project-URL: Homepage, https://python.commonmeta.org
|
6
6
|
Project-URL: Repository, https://github.com/front-matter/commonmeta-py
|
@@ -33,6 +33,7 @@ Requires-Dist: pydash<9,>=6
|
|
33
33
|
Requires-Dist: pyjwt<3,>=2.8.0
|
34
34
|
Requires-Dist: python-dateutil<3,>=2.8.2
|
35
35
|
Requires-Dist: pyyaml>=5.4
|
36
|
+
Requires-Dist: requests-toolbelt>=1.0.0
|
36
37
|
Requires-Dist: requests>=2.31.0
|
37
38
|
Requires-Dist: requests>=2.32.3
|
38
39
|
Requires-Dist: simplejson~=3.18
|
@@ -40,6 +41,7 @@ Requires-Dist: types-beautifulsoup4<5,>=4.11
|
|
40
41
|
Requires-Dist: types-dateparser~=1.1
|
41
42
|
Requires-Dist: types-pyyaml>=5.4
|
42
43
|
Requires-Dist: types-xmltodict<0.20,>=0.13
|
44
|
+
Requires-Dist: xmlschema>=4.0.1
|
43
45
|
Requires-Dist: xmltodict<0.20,>=0.12
|
44
46
|
Description-Content-Type: text/markdown
|
45
47
|
|
@@ -85,7 +87,7 @@ Commometa-py reads and/or writes these metadata formats:
|
|
85
87
|
| [BibTex](http://en.wikipedia.org/wiki/BibTeX) | bibtex | application/x-bibtex | later | yes |
|
86
88
|
| [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | yes | yes |
|
87
89
|
| [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | yes | yes |
|
88
|
-
| [JSON Feed](https://www.jsonfeed.org/) |
|
90
|
+
| [JSON Feed](https://www.jsonfeed.org/) | jsonfeed | application/feed+json | yes | later |
|
89
91
|
| [OpenAlex](https://www.openalex.org/) | openalex | | yes | no |
|
90
92
|
|
91
93
|
_commonmeta_: the Commonmeta format is the native format for the library and used internally.
|
@@ -40,7 +40,7 @@ Commometa-py reads and/or writes these metadata formats:
|
|
40
40
|
| [BibTex](http://en.wikipedia.org/wiki/BibTeX) | bibtex | application/x-bibtex | later | yes |
|
41
41
|
| [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | yes | yes |
|
42
42
|
| [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | yes | yes |
|
43
|
-
| [JSON Feed](https://www.jsonfeed.org/) |
|
43
|
+
| [JSON Feed](https://www.jsonfeed.org/) | jsonfeed | application/feed+json | yes | later |
|
44
44
|
| [OpenAlex](https://www.openalex.org/) | openalex | | yes | no |
|
45
45
|
|
46
46
|
_commonmeta_: the Commonmeta format is the native format for the library and used internally.
|
@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
|
|
10
10
|
"""
|
11
11
|
|
12
12
|
__title__ = "commonmeta-py"
|
13
|
-
__version__ = "0.
|
13
|
+
__version__ = "0.108"
|
14
14
|
__author__ = "Martin Fenner"
|
15
15
|
__license__ = "MIT"
|
16
16
|
|
@@ -31,18 +31,6 @@ from .base_utils import (
|
|
31
31
|
unwrap,
|
32
32
|
wrap,
|
33
33
|
)
|
34
|
-
from .crossref_utils import (
|
35
|
-
CrossrefBadRequestError,
|
36
|
-
CrossrefError,
|
37
|
-
CrossrefForbiddenError,
|
38
|
-
CrossrefNoContentError,
|
39
|
-
CrossrefNotFoundError,
|
40
|
-
CrossrefRequestError,
|
41
|
-
CrossrefServerError,
|
42
|
-
CrossrefUnauthorizedError,
|
43
|
-
HttpError,
|
44
|
-
generate_crossref_xml,
|
45
|
-
)
|
46
34
|
from .date_utils import (
|
47
35
|
get_date_from_crossref_parts,
|
48
36
|
get_date_from_date_parts,
|
@@ -66,6 +54,14 @@ from .doi_utils import (
|
|
66
54
|
validate_doi,
|
67
55
|
validate_prefix,
|
68
56
|
)
|
57
|
+
from .file_utils import (
|
58
|
+
download_file,
|
59
|
+
read_file,
|
60
|
+
read_gz_file,
|
61
|
+
read_zip_file,
|
62
|
+
uncompress_content,
|
63
|
+
unzip_content,
|
64
|
+
)
|
69
65
|
from .metadata import Metadata, MetadataList
|
70
66
|
from .readers import (
|
71
67
|
cff_reader,
|
@@ -75,7 +71,7 @@ from .readers import (
|
|
75
71
|
datacite_reader,
|
76
72
|
datacite_xml_reader,
|
77
73
|
inveniordm_reader,
|
78
|
-
|
74
|
+
jsonfeed_reader,
|
79
75
|
kbase_reader,
|
80
76
|
openalex_reader,
|
81
77
|
ris_reader,
|
@@ -87,7 +83,7 @@ from .utils import (
|
|
87
83
|
extract_url,
|
88
84
|
extract_urls,
|
89
85
|
from_csl,
|
90
|
-
|
86
|
+
from_jsonfeed,
|
91
87
|
from_schema_org,
|
92
88
|
get_language,
|
93
89
|
issn_as_url,
|
@@ -109,6 +105,7 @@ from .writers import (
|
|
109
105
|
bibtex_writer,
|
110
106
|
citation_writer,
|
111
107
|
commonmeta_writer,
|
108
|
+
crossref_xml_writer,
|
112
109
|
csl_writer,
|
113
110
|
datacite_writer,
|
114
111
|
ris_writer,
|
@@ -7,8 +7,9 @@ import jwt
|
|
7
7
|
import requests
|
8
8
|
from furl import furl
|
9
9
|
|
10
|
+
from commonmeta.readers.jsonfeed_reader import get_jsonfeed_uuid
|
11
|
+
|
10
12
|
from .doi_utils import doi_as_url, validate_doi
|
11
|
-
from .readers.json_feed_reader import get_json_feed_item_uuid
|
12
13
|
|
13
14
|
|
14
15
|
def generate_ghost_token(key: str) -> str:
|
@@ -34,7 +35,7 @@ def update_ghost_post_via_api(
|
|
34
35
|
"""Update Ghost post via API"""
|
35
36
|
# get post doi and url from Rogue Scholar API
|
36
37
|
# post url is needed to find post via Ghost API
|
37
|
-
post =
|
38
|
+
post = get_jsonfeed_uuid(_id)
|
38
39
|
if post.get("error", None):
|
39
40
|
return post
|
40
41
|
doi = validate_doi(post.get("doi", None))
|
@@ -0,0 +1,307 @@
|
|
1
|
+
"""Base utilities for commonmeta-py"""
|
2
|
+
|
3
|
+
import html
|
4
|
+
import re
|
5
|
+
import uuid
|
6
|
+
from datetime import datetime
|
7
|
+
from os import path
|
8
|
+
from typing import Optional, Union
|
9
|
+
|
10
|
+
import nh3
|
11
|
+
import pydash as py_
|
12
|
+
import xmltodict
|
13
|
+
|
14
|
+
|
15
|
+
def wrap(item) -> list:
|
16
|
+
"""Turn None, dict, or list into list"""
|
17
|
+
if item is None:
|
18
|
+
return []
|
19
|
+
if isinstance(item, list):
|
20
|
+
return item
|
21
|
+
return [item]
|
22
|
+
|
23
|
+
|
24
|
+
def unwrap(lst: list) -> Optional[Union[dict, list]]:
|
25
|
+
"""Turn list into dict or None, depending on list size"""
|
26
|
+
if len(lst) == 0:
|
27
|
+
return None
|
28
|
+
if len(lst) == 1:
|
29
|
+
return lst[0]
|
30
|
+
return lst
|
31
|
+
|
32
|
+
|
33
|
+
def presence(
|
34
|
+
item: Optional[Union[dict, list, str]],
|
35
|
+
) -> Optional[Union[dict, list, str]]:
|
36
|
+
"""Turn empty list, dict or str into None"""
|
37
|
+
return None if item is None or len(item) == 0 or item == [{}] else item
|
38
|
+
|
39
|
+
|
40
|
+
def compact(dict_or_list: Union[dict, list]) -> Optional[Union[dict, list]]:
|
41
|
+
"""Remove None from dict or list"""
|
42
|
+
if isinstance(dict_or_list, dict):
|
43
|
+
return {k: v for k, v in dict_or_list.items() if v is not None}
|
44
|
+
if isinstance(dict_or_list, list):
|
45
|
+
lst = [compact(i) for i in dict_or_list]
|
46
|
+
return lst if len(lst) > 0 else None
|
47
|
+
|
48
|
+
return None
|
49
|
+
|
50
|
+
|
51
|
+
def parse_attributes(
|
52
|
+
element: Union[str, dict, list], **kwargs
|
53
|
+
) -> Optional[Union[str, list]]:
|
54
|
+
"""extract attributes from a string, dict or list"""
|
55
|
+
|
56
|
+
def parse_item(item):
|
57
|
+
if isinstance(item, dict):
|
58
|
+
return item.get(html.unescape(content), None)
|
59
|
+
return html.unescape(item)
|
60
|
+
|
61
|
+
content = kwargs.get("content", "#text")
|
62
|
+
if isinstance(element, str) and kwargs.get("content", None) is None:
|
63
|
+
return html.unescape(element)
|
64
|
+
if isinstance(element, dict):
|
65
|
+
return element.get(html.unescape(content), None)
|
66
|
+
if isinstance(element, list):
|
67
|
+
arr = [parse_item(i) for i in element if i]
|
68
|
+
arr = arr[0] if len(arr) > 0 and kwargs.get("first") else unwrap(arr)
|
69
|
+
return arr
|
70
|
+
|
71
|
+
|
72
|
+
def parse_xml(string: Optional[str], **kwargs) -> Optional[Union[dict, list]]:
|
73
|
+
"""Parse XML into dict using xmltodict. Set default options, and options for Crossref XML"""
|
74
|
+
if string is None or string == "{}":
|
75
|
+
return None
|
76
|
+
if path.exists(string):
|
77
|
+
with open(string, encoding="utf-8") as file:
|
78
|
+
string = file.read()
|
79
|
+
|
80
|
+
if kwargs.get("dialect", None) == "crossref":
|
81
|
+
# remove namespaces from xml
|
82
|
+
namespaces = {
|
83
|
+
"http://www.crossref.org/schema/5.4.0": None,
|
84
|
+
"http://www.crossref.org/qrschema/3.0": None,
|
85
|
+
"http://www.crossref.org/xschema/1.0": None,
|
86
|
+
"http://www.crossref.org/xschema/1.1": None,
|
87
|
+
"http://www.crossref.org/AccessIndicators.xsd": None,
|
88
|
+
"http://www.crossref.org/relations.xsd": None,
|
89
|
+
"http://www.crossref.org/fundref.xsd": None,
|
90
|
+
"http://www.ncbi.nlm.nih.gov/JATS1": None,
|
91
|
+
}
|
92
|
+
|
93
|
+
kwargs["process_namespaces"] = True
|
94
|
+
kwargs["namespaces"] = namespaces
|
95
|
+
kwargs["force_list"] = {
|
96
|
+
"person_name",
|
97
|
+
"organization",
|
98
|
+
"titles",
|
99
|
+
"abstract",
|
100
|
+
"item",
|
101
|
+
"citation",
|
102
|
+
"program",
|
103
|
+
"related_item",
|
104
|
+
}
|
105
|
+
|
106
|
+
kwargs["attr_prefix"] = ""
|
107
|
+
kwargs["dict_constructor"] = dict
|
108
|
+
kwargs.pop("dialect", None)
|
109
|
+
return xmltodict.parse(string, **kwargs)
|
110
|
+
|
111
|
+
|
112
|
+
def unparse_xml(input: Optional[dict], **kwargs) -> str:
|
113
|
+
"""Unparse (dump) dict into XML using xmltodict. Set default options, and options for Crossref XML"""
|
114
|
+
if input is None:
|
115
|
+
return None
|
116
|
+
if kwargs.get("dialect", None) == "crossref":
|
117
|
+
# Add additional logic for crossref dialect
|
118
|
+
# add body and root element as wrapping elements
|
119
|
+
type = next(iter(input))
|
120
|
+
attributes = input.get(type)
|
121
|
+
input.pop(type)
|
122
|
+
|
123
|
+
if type == "book":
|
124
|
+
book_metadata = py_.get(input, "book_metadata") or {}
|
125
|
+
input.pop("book_metadata")
|
126
|
+
book_metadata = {**book_metadata, **input}
|
127
|
+
input = {"book": {**attributes, "book_metadata": book_metadata}}
|
128
|
+
elif type == "database":
|
129
|
+
database_metadata = py_.get(input, "database_metadata") or {}
|
130
|
+
input.pop("database_metadata")
|
131
|
+
val = input.pop("publisher_item")
|
132
|
+
institution = input.pop("institution", None)
|
133
|
+
database_metadata = {**{"titles": val}, **database_metadata}
|
134
|
+
database_metadata["institution"] = institution or {}
|
135
|
+
component = input.pop("component", None)
|
136
|
+
input = {
|
137
|
+
"database": {
|
138
|
+
**attributes,
|
139
|
+
"database_metadata": database_metadata,
|
140
|
+
"component_list": {"component": component | input},
|
141
|
+
}
|
142
|
+
}
|
143
|
+
elif type == "journal":
|
144
|
+
journal_metadata = py_.get(input, "journal_metadata") or {}
|
145
|
+
journal_issue = py_.get(input, "journal_issue") or {}
|
146
|
+
journal_article = py_.get(input, "journal_article") or {}
|
147
|
+
input.pop("journal_metadata")
|
148
|
+
input.pop("journal_issue")
|
149
|
+
input.pop("journal_article")
|
150
|
+
input = {
|
151
|
+
"journal": {
|
152
|
+
"journal_metadata": journal_metadata,
|
153
|
+
"journal_issue": journal_issue,
|
154
|
+
"journal_article": journal_article | input,
|
155
|
+
}
|
156
|
+
}
|
157
|
+
elif type == "proceedings_article":
|
158
|
+
proceedings_metadata = py_.get(input, "proceedings_metadata") or {}
|
159
|
+
input.pop("proceedings_metadata")
|
160
|
+
input = {
|
161
|
+
"proceedings": {
|
162
|
+
**attributes,
|
163
|
+
"proceedings_metadata": proceedings_metadata,
|
164
|
+
"conference_paper": input,
|
165
|
+
}
|
166
|
+
}
|
167
|
+
elif type == "sa_component":
|
168
|
+
component = py_.get(input, "component") or {}
|
169
|
+
input.pop("component")
|
170
|
+
input = {
|
171
|
+
"sa_component": {
|
172
|
+
**attributes,
|
173
|
+
"component_list": {"component": component | input},
|
174
|
+
}
|
175
|
+
}
|
176
|
+
else:
|
177
|
+
input = {type: attributes | input}
|
178
|
+
|
179
|
+
doi_batch = {
|
180
|
+
"@xmlns": "http://www.crossref.org/schema/5.4.0",
|
181
|
+
"@version": "5.4.0",
|
182
|
+
"head": get_crossref_xml_head(input),
|
183
|
+
"body": input,
|
184
|
+
}
|
185
|
+
input = {"doi_batch": doi_batch}
|
186
|
+
kwargs["pretty"] = True
|
187
|
+
kwargs["indent"] = " "
|
188
|
+
kwargs.pop("dialect", None)
|
189
|
+
return xmltodict.unparse(input, **kwargs)
|
190
|
+
|
191
|
+
|
192
|
+
def unparse_xml_list(input: Optional[list], **kwargs) -> str:
|
193
|
+
"""Unparse (dump) list into XML using xmltodict. Set default options, and options for Crossref XML"""
|
194
|
+
if input is None:
|
195
|
+
return None
|
196
|
+
if kwargs.get("dialect", None) == "crossref":
|
197
|
+
# Add additional logic for crossref dialect
|
198
|
+
# add body and root element as wrapping elements
|
199
|
+
|
200
|
+
# Group items by type with minimal grouping
|
201
|
+
items_by_type = {}
|
202
|
+
|
203
|
+
for item in wrap(input):
|
204
|
+
type = next(iter(item))
|
205
|
+
attributes = item.get(type)
|
206
|
+
item.pop(type)
|
207
|
+
|
208
|
+
# handle nested book_metadata and journal structure as in unparse_xml
|
209
|
+
if type == "book":
|
210
|
+
book_metadata = py_.get(item, "book_metadata") or {}
|
211
|
+
item.pop("book_metadata")
|
212
|
+
book_metadata = {**book_metadata, **item}
|
213
|
+
item = {"book": {**attributes, "book_metadata": book_metadata}}
|
214
|
+
elif type == "database":
|
215
|
+
database_metadata = py_.get(item, "database_metadata") or {}
|
216
|
+
item.pop("database_metadata")
|
217
|
+
database_metadata = {**database_metadata, **item}
|
218
|
+
item = {
|
219
|
+
"database": {**attributes, "database_metadata": database_metadata}
|
220
|
+
}
|
221
|
+
elif type == "journal":
|
222
|
+
journal_metadata = py_.get(item, "journal_metadata") or {}
|
223
|
+
journal_issue = py_.get(item, "journal_issue") or {}
|
224
|
+
journal_article = py_.get(item, "journal_article") or {}
|
225
|
+
item.pop("journal_metadata")
|
226
|
+
item.pop("journal_issue")
|
227
|
+
item.pop("journal_article")
|
228
|
+
item = {
|
229
|
+
"journal": {
|
230
|
+
"journal_metadata": journal_metadata,
|
231
|
+
"journal_issue": journal_issue,
|
232
|
+
"journal_article": journal_article | item,
|
233
|
+
}
|
234
|
+
}
|
235
|
+
elif type == "sa_component":
|
236
|
+
component = py_.get(input, "component") or {}
|
237
|
+
item.pop("component")
|
238
|
+
item = {
|
239
|
+
"sa_component": {
|
240
|
+
**attributes,
|
241
|
+
"component_list": {"component": component | item},
|
242
|
+
}
|
243
|
+
}
|
244
|
+
else:
|
245
|
+
item = {type: attributes | item}
|
246
|
+
|
247
|
+
# Add item to appropriate type bucket
|
248
|
+
if type not in items_by_type:
|
249
|
+
items_by_type[type] = []
|
250
|
+
items_by_type[type].append(item[type])
|
251
|
+
|
252
|
+
# Create the final structure with body containing all grouped items
|
253
|
+
body_content = {}
|
254
|
+
for type_key, items in items_by_type.items():
|
255
|
+
if len(items) == 1:
|
256
|
+
body_content[type_key] = items[0] # Use single item without array
|
257
|
+
else:
|
258
|
+
body_content[type_key] = items # Use array when multiple items
|
259
|
+
head = kwargs["head"] or {}
|
260
|
+
doi_batch = {
|
261
|
+
"@xmlns": "http://www.crossref.org/schema/5.4.0",
|
262
|
+
"@xmlns:ai": "http://www.crossref.org/AccessIndicators.xsd",
|
263
|
+
"@xmlns:rel": "http://www.crossref.org/relations.xsd",
|
264
|
+
"@xmlns:fr": "http://www.crossref.org/fundref.xsd",
|
265
|
+
"@version": "5.4.0",
|
266
|
+
"head": get_crossref_xml_head(head),
|
267
|
+
"body": body_content,
|
268
|
+
}
|
269
|
+
output = {"doi_batch": doi_batch}
|
270
|
+
|
271
|
+
kwargs["pretty"] = True
|
272
|
+
kwargs["indent"] = " "
|
273
|
+
kwargs.pop("dialect", None)
|
274
|
+
kwargs.pop("head", None)
|
275
|
+
return xmltodict.unparse(output, **kwargs)
|
276
|
+
|
277
|
+
|
278
|
+
def sanitize(text: str, **kwargs) -> str:
|
279
|
+
"""Sanitize text"""
|
280
|
+
# default whitelisted HTML tags
|
281
|
+
tags = kwargs.get("tags", None) or {
|
282
|
+
"b",
|
283
|
+
"br",
|
284
|
+
"code",
|
285
|
+
"em",
|
286
|
+
"i",
|
287
|
+
"sub",
|
288
|
+
"sup",
|
289
|
+
"strong",
|
290
|
+
}
|
291
|
+
attributes = kwargs.get("attributes", None)
|
292
|
+
string = nh3.clean(text, tags=tags, attributes=attributes, link_rel=None)
|
293
|
+
# remove excessive internal whitespace
|
294
|
+
return " ".join(re.split(r"\s+", string, flags=re.UNICODE))
|
295
|
+
|
296
|
+
|
297
|
+
def get_crossref_xml_head(metadata: dict) -> dict:
|
298
|
+
"""Get head element for Crossref XML"""
|
299
|
+
return {
|
300
|
+
"doi_batch_id": str(uuid.uuid4()),
|
301
|
+
"timestamp": datetime.now().strftime("%Y%m%d%H%M%S"),
|
302
|
+
"depositor": {
|
303
|
+
"depositor_name": metadata.get("depositor", None) or "test",
|
304
|
+
"email_address": metadata.get("email", None) or "info@example.org",
|
305
|
+
},
|
306
|
+
"registrant": metadata.get("registrant", None) or "test",
|
307
|
+
}
|
@@ -2,16 +2,12 @@ import time
|
|
2
2
|
|
3
3
|
import click
|
4
4
|
import orjson as json
|
5
|
-
import pydash as py_
|
6
5
|
|
7
6
|
from commonmeta import Metadata, MetadataList # __version__
|
8
7
|
from commonmeta.api_utils import update_ghost_post_via_api
|
9
8
|
from commonmeta.doi_utils import decode_doi, encode_doi, validate_prefix
|
10
9
|
from commonmeta.readers.crossref_reader import get_random_crossref_id
|
11
10
|
from commonmeta.readers.datacite_reader import get_random_datacite_id
|
12
|
-
from commonmeta.readers.json_feed_reader import (
|
13
|
-
get_json_feed_item_uuid,
|
14
|
-
)
|
15
11
|
from commonmeta.readers.openalex_reader import get_random_openalex_id
|
16
12
|
|
17
13
|
|
@@ -46,6 +42,49 @@ def convert(
|
|
46
42
|
email,
|
47
43
|
registrant,
|
48
44
|
show_errors,
|
45
|
+
):
|
46
|
+
metadata = Metadata(input, via=via, doi=doi, prefix=prefix)
|
47
|
+
if show_errors and not metadata.is_valid:
|
48
|
+
raise click.ClickException(str(metadata.errors))
|
49
|
+
|
50
|
+
click.echo(
|
51
|
+
metadata.write(
|
52
|
+
to=to,
|
53
|
+
style=style,
|
54
|
+
locale=locale,
|
55
|
+
depositor=depositor,
|
56
|
+
email=email,
|
57
|
+
registrant=registrant,
|
58
|
+
)
|
59
|
+
)
|
60
|
+
if show_errors and metadata.write_errors:
|
61
|
+
raise click.ClickException(str(metadata.write_errors))
|
62
|
+
|
63
|
+
|
64
|
+
@cli.command()
|
65
|
+
@click.argument("input", type=str, required=True)
|
66
|
+
@click.option("--via", "-f", type=str, default=None)
|
67
|
+
@click.option("--to", "-t", type=str, default="commonmeta")
|
68
|
+
@click.option("--style", "-s", type=str, default="apa")
|
69
|
+
@click.option("--locale", "-l", type=str, default="en-US")
|
70
|
+
@click.option("--doi", type=str)
|
71
|
+
@click.option("--prefix", type=str)
|
72
|
+
@click.option("--depositor", type=str)
|
73
|
+
@click.option("--email", type=str)
|
74
|
+
@click.option("--registrant", type=str)
|
75
|
+
@click.option("--show-errors/--no-errors", type=bool, show_default=True, default=False)
|
76
|
+
def put(
|
77
|
+
input,
|
78
|
+
via,
|
79
|
+
to,
|
80
|
+
style,
|
81
|
+
locale,
|
82
|
+
doi,
|
83
|
+
prefix,
|
84
|
+
depositor,
|
85
|
+
email,
|
86
|
+
registrant,
|
87
|
+
show_errors,
|
49
88
|
):
|
50
89
|
metadata = Metadata(input, via=via, doi=doi, prefix=prefix)
|
51
90
|
if show_errors and not metadata.is_valid:
|
@@ -75,8 +114,7 @@ def convert(
|
|
75
114
|
@click.option("--depositor", type=str)
|
76
115
|
@click.option("--email", type=str)
|
77
116
|
@click.option("--registrant", type=str)
|
78
|
-
@click.option("--
|
79
|
-
@click.option("--jsonlines/--no-jsonlines", type=bool, show_default=True, default=False)
|
117
|
+
@click.option("--file", type=str)
|
80
118
|
@click.option("--show-errors/--no-errors", type=bool, show_default=True, default=False)
|
81
119
|
@click.option("--show-timer/--no-timer", type=bool, show_default=True, default=False)
|
82
120
|
def list(
|
@@ -89,8 +127,69 @@ def list(
|
|
89
127
|
depositor,
|
90
128
|
email,
|
91
129
|
registrant,
|
92
|
-
|
93
|
-
|
130
|
+
file,
|
131
|
+
show_errors,
|
132
|
+
show_timer,
|
133
|
+
):
|
134
|
+
start = time.time()
|
135
|
+
metadata_list = MetadataList(
|
136
|
+
string,
|
137
|
+
via=via,
|
138
|
+
file=file,
|
139
|
+
depositor=depositor,
|
140
|
+
email=email,
|
141
|
+
registrant=registrant,
|
142
|
+
prefix=prefix,
|
143
|
+
)
|
144
|
+
end = time.time()
|
145
|
+
runtime = end - start
|
146
|
+
if show_errors and not metadata_list.is_valid:
|
147
|
+
raise click.ClickException(str(metadata_list.errors))
|
148
|
+
if file:
|
149
|
+
metadata_list.write(to=to, style=style, locale=locale)
|
150
|
+
else:
|
151
|
+
click.echo(metadata_list.write(to=to, style=style, locale=locale))
|
152
|
+
|
153
|
+
if show_errors and len(metadata_list.write_errors) > 0:
|
154
|
+
raise click.ClickException(str(metadata_list.write_errors))
|
155
|
+
if show_timer:
|
156
|
+
click.echo(f"Runtime: {runtime:.2f} seconds")
|
157
|
+
|
158
|
+
|
159
|
+
@cli.command()
|
160
|
+
@click.argument("string", type=str, required=True)
|
161
|
+
@click.option("--via", "-f", type=str)
|
162
|
+
@click.option("--to", "-t", type=str, default="commonmeta")
|
163
|
+
@click.option("--style", "-s", type=str, default="apa")
|
164
|
+
@click.option("--locale", "-l", type=str, default="en-US")
|
165
|
+
@click.option("--prefix", type=str)
|
166
|
+
@click.option("--depositor", type=str)
|
167
|
+
@click.option("--email", type=str)
|
168
|
+
@click.option("--registrant", type=str)
|
169
|
+
@click.option("--login_id", type=str)
|
170
|
+
@click.option("--login_passwd", type=str)
|
171
|
+
@click.option("--host", type=str)
|
172
|
+
@click.option("--token", type=str)
|
173
|
+
@click.option("--legacy-key", type=str)
|
174
|
+
@click.option("--file", type=str)
|
175
|
+
@click.option("--show-errors/--no-errors", type=bool, show_default=True, default=False)
|
176
|
+
@click.option("--show-timer/--no-timer", type=bool, show_default=True, default=False)
|
177
|
+
def push(
|
178
|
+
string,
|
179
|
+
via,
|
180
|
+
to,
|
181
|
+
style,
|
182
|
+
locale,
|
183
|
+
prefix,
|
184
|
+
depositor,
|
185
|
+
email,
|
186
|
+
registrant,
|
187
|
+
login_id,
|
188
|
+
login_passwd,
|
189
|
+
host,
|
190
|
+
token,
|
191
|
+
legacy_key,
|
192
|
+
file,
|
94
193
|
show_errors,
|
95
194
|
show_timer,
|
96
195
|
):
|
@@ -98,18 +197,22 @@ def list(
|
|
98
197
|
metadata_list = MetadataList(
|
99
198
|
string,
|
100
199
|
via=via,
|
200
|
+
file=file,
|
101
201
|
depositor=depositor,
|
102
202
|
email=email,
|
103
203
|
registrant=registrant,
|
204
|
+
login_id=login_id,
|
205
|
+
login_passwd=login_passwd,
|
206
|
+
host=host,
|
207
|
+
token=token,
|
104
208
|
prefix=prefix,
|
105
|
-
filename=filename,
|
106
|
-
jsonlines=jsonlines,
|
107
209
|
)
|
108
210
|
end = time.time()
|
109
211
|
runtime = end - start
|
110
212
|
if show_errors and not metadata_list.is_valid:
|
111
213
|
raise click.ClickException(str(metadata_list.errors))
|
112
|
-
|
214
|
+
|
215
|
+
click.echo(metadata_list.push(to=to, style=style, locale=locale))
|
113
216
|
if show_errors and len(metadata_list.write_errors) > 0:
|
114
217
|
raise click.ClickException(str(metadata_list.write_errors))
|
115
218
|
if show_timer:
|
@@ -167,29 +270,6 @@ def decode(doi):
|
|
167
270
|
click.echo(output)
|
168
271
|
|
169
272
|
|
170
|
-
@cli.command()
|
171
|
-
@click.argument("id", type=str, required=True)
|
172
|
-
def encode_by_id(id):
|
173
|
-
post = get_json_feed_item_uuid(id)
|
174
|
-
prefix = py_.get(post, "blog.prefix")
|
175
|
-
if validate_prefix(prefix) is None:
|
176
|
-
return None
|
177
|
-
output = encode_doi(prefix)
|
178
|
-
click.echo(output)
|
179
|
-
|
180
|
-
|
181
|
-
@cli.command()
|
182
|
-
@click.argument("filter", type=str, required=True, default="unregistered")
|
183
|
-
@click.option("--id", type=str)
|
184
|
-
def json_feed(filter, id=None):
|
185
|
-
if filter == "blog_slug" and id is not None:
|
186
|
-
post = get_json_feed_item_uuid(id)
|
187
|
-
output = py_.get(post, "blog.slug", "no slug found")
|
188
|
-
else:
|
189
|
-
output = "no filter specified"
|
190
|
-
click.echo(output)
|
191
|
-
|
192
|
-
|
193
273
|
@cli.command()
|
194
274
|
@click.argument("id", type=str, required=True)
|
195
275
|
@click.option("--api-key", "-k", type=str, required=True)
|
@@ -190,6 +190,7 @@ CM_TO_CR_TRANSLATIONS = {
|
|
190
190
|
"JournalIssue": "JournalIssue",
|
191
191
|
"JournalVolume": "JournalVolume",
|
192
192
|
"Journal": "Journal",
|
193
|
+
"PeerReview": "PeerReview",
|
193
194
|
"ProceedingsArticle": "ProceedingsArticle",
|
194
195
|
"ProceedingsSeries": "ProceedingsSeries",
|
195
196
|
"Proceedings": "Proceedings",
|
@@ -698,3 +699,22 @@ ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS = {
|
|
698
699
|
"https://ror.org/00yjd3n13": "https://doi.org/10.13039/501100001711",
|
699
700
|
"https://ror.org/04wfr2810": "https://doi.org/10.13039/501100003043",
|
700
701
|
}
|
702
|
+
|
703
|
+
COMMUNITY_TRANSLATIONS = {
|
704
|
+
"ai": "artificialintelligence",
|
705
|
+
"llms": "artificialintelligence",
|
706
|
+
"book%20review": "bookreview",
|
707
|
+
"bjps%20review%20of%20books": "bookreview",
|
708
|
+
"books": "bookreview",
|
709
|
+
"nachrichten": "news",
|
710
|
+
"opencitations": "researchassessment",
|
711
|
+
"papers": "researchblogging",
|
712
|
+
"urheberrecht": "copyright",
|
713
|
+
"workshop": "events",
|
714
|
+
"veranstaltungen": "events",
|
715
|
+
"veranstaltungshinweise": "events",
|
716
|
+
"asapbio": "preprints",
|
717
|
+
"biorxiv": "preprints",
|
718
|
+
"runiverse": "r",
|
719
|
+
"bericht": "report",
|
720
|
+
}
|