commonmeta-py 0.22__py3-none-any.whl → 0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +96 -0
- commonmeta/api_utils.py +77 -0
- commonmeta/author_utils.py +260 -0
- commonmeta/base_utils.py +121 -0
- commonmeta/cli.py +200 -0
- commonmeta/constants.py +587 -0
- commonmeta/crossref_utils.py +575 -0
- commonmeta/date_utils.py +193 -0
- commonmeta/doi_utils.py +273 -0
- commonmeta/metadata.py +320 -0
- commonmeta/readers/__init__.py +1 -0
- commonmeta/readers/bibtex_reader.py +0 -0
- commonmeta/readers/cff_reader.py +199 -0
- commonmeta/readers/codemeta_reader.py +112 -0
- commonmeta/readers/commonmeta_reader.py +13 -0
- commonmeta/readers/crossref_reader.py +409 -0
- commonmeta/readers/crossref_xml_reader.py +505 -0
- commonmeta/readers/csl_reader.py +98 -0
- commonmeta/readers/datacite_reader.py +390 -0
- commonmeta/readers/datacite_xml_reader.py +359 -0
- commonmeta/readers/inveniordm_reader.py +218 -0
- commonmeta/readers/json_feed_reader.py +420 -0
- commonmeta/readers/kbase_reader.py +205 -0
- commonmeta/readers/ris_reader.py +103 -0
- commonmeta/readers/schema_org_reader.py +506 -0
- commonmeta/resources/cff_v1.2.0.json +1827 -0
- commonmeta/resources/commonmeta_v0.12.json +601 -0
- commonmeta/resources/commonmeta_v0.13.json +559 -0
- commonmeta/resources/commonmeta_v0.14.json +573 -0
- commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
- commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
- commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
- commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
- commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
- commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
- commonmeta/resources/crossref/fundref.xsd +49 -0
- commonmeta/resources/crossref/module-ali.xsd +39 -0
- commonmeta/resources/crossref/relations.xsd +444 -0
- commonmeta/resources/crossref-v0.2.json +60 -0
- commonmeta/resources/csl-data.json +538 -0
- commonmeta/resources/datacite-v4.5.json +829 -0
- commonmeta/resources/datacite-v4.5pr.json +608 -0
- commonmeta/resources/ietf-bcp-47.json +3025 -0
- commonmeta/resources/iso-8601.json +3182 -0
- commonmeta/resources/spdx/licenses.json +4851 -0
- commonmeta/resources/spdx-schema..json +903 -0
- commonmeta/resources/styles/apa.csl +1697 -0
- commonmeta/resources/styles/chicago-author-date.csl +684 -0
- commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
- commonmeta/resources/styles/ieee.csl +468 -0
- commonmeta/resources/styles/modern-language-association.csl +341 -0
- commonmeta/resources/styles/vancouver.csl +376 -0
- commonmeta/schema_utils.py +27 -0
- commonmeta/translators.py +47 -0
- commonmeta/utils.py +1108 -0
- commonmeta/writers/__init__.py +1 -0
- commonmeta/writers/bibtex_writer.py +149 -0
- commonmeta/writers/citation_writer.py +70 -0
- commonmeta/writers/commonmeta_writer.py +68 -0
- commonmeta/writers/crossref_xml_writer.py +17 -0
- commonmeta/writers/csl_writer.py +79 -0
- commonmeta/writers/datacite_writer.py +193 -0
- commonmeta/writers/inveniordm_writer.py +94 -0
- commonmeta/writers/ris_writer.py +58 -0
- commonmeta/writers/schema_org_writer.py +146 -0
- {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
- commonmeta_py-0.24.dist-info/RECORD +75 -0
- {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
- commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
- commonmeta_py/__init__.py +0 -2
- commonmeta_py-0.22.dist-info/RECORD +0 -5
- {commonmeta_py-0.22.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
"""Writers for different metadata formats"""
|
@@ -0,0 +1,149 @@
|
|
1
|
+
"""Bibtex writer for commonmeta-py"""
|
2
|
+
from bibtexparser.bwriter import BibTexWriter
|
3
|
+
from bibtexparser.bibdatabase import BibDatabase
|
4
|
+
from bibtexparser.customization import page_double_hyphen
|
5
|
+
|
6
|
+
from ..utils import pages_as_string, get_language
|
7
|
+
from ..base_utils import compact
|
8
|
+
from ..author_utils import authors_as_string
|
9
|
+
from ..date_utils import get_month_from_date, get_iso8601_date, MONTH_SHORT_NAMES
|
10
|
+
from ..doi_utils import doi_from_url
|
11
|
+
from ..constants import CM_TO_BIB_TRANSLATIONS, Commonmeta
|
12
|
+
|
13
|
+
|
14
|
+
def write_bibtex(metadata: Commonmeta) -> str:
|
15
|
+
"""Write bibtex"""
|
16
|
+
if metadata.write_errors is not None:
|
17
|
+
return None
|
18
|
+
item = write_bibtex_item(metadata)
|
19
|
+
bibtex_str = """
|
20
|
+
@comment{
|
21
|
+
BibTeX entry created by commonmeta-py
|
22
|
+
}
|
23
|
+
"""
|
24
|
+
bib_database = BibDatabase()
|
25
|
+
bib_database.entries = [item]
|
26
|
+
bib_database.entries[0] = page_double_hyphen(bib_database.entries[0])
|
27
|
+
writer = BibTexWriter()
|
28
|
+
writer.common_strings = True
|
29
|
+
writer.indent = " "
|
30
|
+
bibtex_str = writer.write(bib_database)
|
31
|
+
|
32
|
+
# Hack to remove curly braces around month names
|
33
|
+
for month_name in MONTH_SHORT_NAMES:
|
34
|
+
bibtex_str = bibtex_str.replace(f"{{{month_name}}}", month_name)
|
35
|
+
return bibtex_str
|
36
|
+
|
37
|
+
|
38
|
+
def write_bibtex_item(metadata: Commonmeta) -> dict:
|
39
|
+
"""Write bibtex item"""
|
40
|
+
container = metadata.container if metadata.container else {}
|
41
|
+
date_published = get_iso8601_date(metadata.date.get("published", None))
|
42
|
+
authors = authors_as_string(metadata.contributors)
|
43
|
+
if metadata.titles and len(metadata.titles) > 1:
|
44
|
+
title = ": ".join(
|
45
|
+
[
|
46
|
+
metadata.titles[0].get("title", None),
|
47
|
+
metadata.titles[1].get("title", None),
|
48
|
+
]
|
49
|
+
)
|
50
|
+
elif metadata.titles and len(metadata.titles) == 1:
|
51
|
+
title = metadata.titles[0].get("title", None)
|
52
|
+
else:
|
53
|
+
title = None
|
54
|
+
doi = doi_from_url(metadata.id)
|
55
|
+
_id = doi if doi else metadata.id
|
56
|
+
_type = CM_TO_BIB_TRANSLATIONS.get(metadata.type, "misc")
|
57
|
+
abstract = (
|
58
|
+
metadata.descriptions[0].get("description", None)
|
59
|
+
if metadata.descriptions
|
60
|
+
else None
|
61
|
+
)
|
62
|
+
author = authors if authors and len(authors) > 0 else None
|
63
|
+
license_ = str(metadata.license.get("url")) if metadata.license else None
|
64
|
+
institution = metadata.publisher.get("name", None) if _type == "phdthesis" else None
|
65
|
+
issn = (
|
66
|
+
container.get("identifier", None)
|
67
|
+
if container.get("identifierType", None) == "ISSN"
|
68
|
+
else None
|
69
|
+
)
|
70
|
+
isbn = (
|
71
|
+
container.get("identifier", None)
|
72
|
+
if container.get("identifierType", None) == "ISBN"
|
73
|
+
else None
|
74
|
+
)
|
75
|
+
issue = container.get("issue", None)
|
76
|
+
journal = (
|
77
|
+
container.get("title", None)
|
78
|
+
if _type not in ["inbook", "inproceedings"]
|
79
|
+
and container.get("type") in ["Journal", "Periodical"]
|
80
|
+
else None
|
81
|
+
)
|
82
|
+
booktitle = (
|
83
|
+
container.get("title", None) if _type in ["inbook", "inproceedings"] else None
|
84
|
+
)
|
85
|
+
language = get_language(metadata.language)
|
86
|
+
location = (
|
87
|
+
container.get("location", None)
|
88
|
+
if _type not in ["article", "phdthesis"]
|
89
|
+
else None
|
90
|
+
)
|
91
|
+
month = get_month_from_date(date_published)
|
92
|
+
pages = pages_as_string(container)
|
93
|
+
publisher = (
|
94
|
+
metadata.publisher.get("name", None)
|
95
|
+
if _type not in ["article", "phdthesis"]
|
96
|
+
else None
|
97
|
+
)
|
98
|
+
series = container.get("series", None)
|
99
|
+
url = metadata.url
|
100
|
+
year = date_published[:4] if date_published else None
|
101
|
+
|
102
|
+
return compact(
|
103
|
+
{
|
104
|
+
"ID": _id,
|
105
|
+
"ENTRYTYPE": _type,
|
106
|
+
"abstract": abstract,
|
107
|
+
"author": author,
|
108
|
+
"copyright": license_,
|
109
|
+
"doi": doi,
|
110
|
+
"institution": institution,
|
111
|
+
"isbn": isbn,
|
112
|
+
"issn": issn,
|
113
|
+
"issue": issue,
|
114
|
+
"journal": journal,
|
115
|
+
"booktitle": booktitle,
|
116
|
+
"language": language,
|
117
|
+
"location": location,
|
118
|
+
"month": month,
|
119
|
+
"pages": pages,
|
120
|
+
"publisher": publisher,
|
121
|
+
"series": series,
|
122
|
+
"title": title,
|
123
|
+
"url": url,
|
124
|
+
"urldate": date_published,
|
125
|
+
"year": year,
|
126
|
+
}
|
127
|
+
)
|
128
|
+
|
129
|
+
|
130
|
+
def write_bibtex_list(metalist):
|
131
|
+
"""Write bibtex list"""
|
132
|
+
if metalist is None:
|
133
|
+
return None
|
134
|
+
|
135
|
+
bib_database = BibDatabase()
|
136
|
+
bib_database.entries = [write_bibtex_item(item) for item in metalist.items]
|
137
|
+
|
138
|
+
# TODO: Fix page_double_hyphen in write_bibtex_item
|
139
|
+
bib_database.entries = [page_double_hyphen(entry) for entry in bib_database.entries]
|
140
|
+
writer = BibTexWriter()
|
141
|
+
writer.common_strings = True
|
142
|
+
writer.indent = " "
|
143
|
+
bibtex_str = writer.write(bib_database)
|
144
|
+
|
145
|
+
# Hack to remove curly braces around month names
|
146
|
+
# TODO: Fix this in write_bibtex_item
|
147
|
+
for month_name in MONTH_SHORT_NAMES:
|
148
|
+
bibtex_str = bibtex_str.replace(f"{{{month_name}}}", month_name)
|
149
|
+
return bibtex_str
|
@@ -0,0 +1,70 @@
|
|
1
|
+
"""Citation writer for commonmeta-py"""
|
2
|
+
import orjson as json
|
3
|
+
import re
|
4
|
+
from pydash import py_
|
5
|
+
from citeproc import CitationStylesStyle, CitationStylesBibliography
|
6
|
+
from citeproc import Citation, CitationItem
|
7
|
+
from citeproc import formatter
|
8
|
+
from citeproc.source.json import CiteProcJSON
|
9
|
+
from citeproc_styles import get_style_filepath
|
10
|
+
|
11
|
+
|
12
|
+
def write_citation(metadata):
|
13
|
+
"""Write citation"""
|
14
|
+
|
15
|
+
# Process the JSON data to generate a citeproc-py BibliographySource.
|
16
|
+
item = write_citation_item(metadata)
|
17
|
+
style_path = get_style_filepath(metadata.style)
|
18
|
+
style = CitationStylesStyle(style_path, locale=metadata.locale)
|
19
|
+
bib = CitationStylesBibliography(style, item, formatter.html)
|
20
|
+
citation = Citation([CitationItem(metadata.id)])
|
21
|
+
|
22
|
+
# workaround for the issue with the vancouver style and de locale
|
23
|
+
try:
|
24
|
+
bib.register(citation)
|
25
|
+
return _clean_result(str(bib.bibliography()[0]))
|
26
|
+
except Exception as e:
|
27
|
+
print(e)
|
28
|
+
return f"Error: citation not available for style {metadata.style} and locale {metadata.locale}."
|
29
|
+
|
30
|
+
|
31
|
+
def write_citation_item(metadata):
|
32
|
+
"""Write citation item"""
|
33
|
+
if metadata.write_errors is not None:
|
34
|
+
return None
|
35
|
+
csl = json.loads(metadata.write(to="csl"))
|
36
|
+
|
37
|
+
# Remove keys that are not supported by citeproc-py.
|
38
|
+
csl = py_.omit(csl, "copyright", "categories")
|
39
|
+
return CiteProcJSON([csl])
|
40
|
+
|
41
|
+
|
42
|
+
def write_citation_list(metalist, **kwargs):
|
43
|
+
"""Write citation list"""
|
44
|
+
if metalist is None:
|
45
|
+
return None
|
46
|
+
|
47
|
+
style = kwargs.get("style", "apa")
|
48
|
+
locale = kwargs.get("locale", "en-US")
|
49
|
+
style_path = get_style_filepath(style)
|
50
|
+
style = CitationStylesStyle(style_path, locale=locale) #
|
51
|
+
|
52
|
+
def format_citation(index, item):
|
53
|
+
bib = CitationStylesBibliography(style, item, formatter.html)
|
54
|
+
_id = metalist.items[index].id
|
55
|
+
citation = Citation([CitationItem(_id)])
|
56
|
+
bib.register(citation)
|
57
|
+
return _clean_result(str(bib.bibliography()[0]))
|
58
|
+
|
59
|
+
citations = [write_citation_item(item) for item in metalist.items]
|
60
|
+
bibliographies = [
|
61
|
+
format_citation(index, item) for index, item in enumerate(citations)
|
62
|
+
]
|
63
|
+
return "\n\n".join(bibliographies)
|
64
|
+
|
65
|
+
|
66
|
+
def _clean_result(text):
|
67
|
+
"""Remove double spaces, punctuation."""
|
68
|
+
text = re.sub(r"\s\s+", " ", text)
|
69
|
+
text = re.sub(r"\.\.+", ".", text)
|
70
|
+
return text
|
@@ -0,0 +1,68 @@
|
|
1
|
+
"""Commonmeta writer for commonmeta-py"""
|
2
|
+
|
3
|
+
import orjson as json
|
4
|
+
import orjsonl
|
5
|
+
import pydash as py_
|
6
|
+
from ..base_utils import compact
|
7
|
+
|
8
|
+
|
9
|
+
def write_commonmeta(metadata):
|
10
|
+
"""Write commonmeta"""
|
11
|
+
if metadata is None:
|
12
|
+
return None
|
13
|
+
|
14
|
+
data = py_.omit(
|
15
|
+
vars(metadata),
|
16
|
+
[
|
17
|
+
"via",
|
18
|
+
"is_valid",
|
19
|
+
"date_created",
|
20
|
+
"date_published",
|
21
|
+
"date_registered",
|
22
|
+
"date_updated",
|
23
|
+
"state",
|
24
|
+
],
|
25
|
+
)
|
26
|
+
data = py_.rename_keys(
|
27
|
+
data,
|
28
|
+
{
|
29
|
+
"additional_type": "additionalType",
|
30
|
+
"archive_locations": "archiveLocations",
|
31
|
+
"geo_locations": "geoLocations",
|
32
|
+
"funding_references": "fundingReferences",
|
33
|
+
},
|
34
|
+
)
|
35
|
+
return json.dumps(compact(data))
|
36
|
+
|
37
|
+
|
38
|
+
def write_commonmeta_list(metalist):
|
39
|
+
"""Write commonmeta list. If filename is provided,
|
40
|
+
write to file. Optionally, use JSON Lines format."""
|
41
|
+
if metalist is None:
|
42
|
+
return None
|
43
|
+
|
44
|
+
def format_item(item):
|
45
|
+
"""Format item for commonmeta list"""
|
46
|
+
item = py_.omit(vars(item), ["via", "is_valid"])
|
47
|
+
return compact(item)
|
48
|
+
|
49
|
+
items = [format_item(item) for item in metalist.items]
|
50
|
+
output = compact(
|
51
|
+
{
|
52
|
+
"id": metalist.id,
|
53
|
+
"title": metalist.title,
|
54
|
+
"description": metalist.description,
|
55
|
+
"items": items,
|
56
|
+
}
|
57
|
+
)
|
58
|
+
|
59
|
+
if metalist.filename and metalist.filename.rsplit(".", 1)[1] in ["jsonl", "json"]:
|
60
|
+
if metalist.jsonlines:
|
61
|
+
orjsonl.save(metalist.filename, items)
|
62
|
+
else:
|
63
|
+
json_output = json.dumps(output).decode("utf-8")
|
64
|
+
with open(metalist.filename, "w") as file:
|
65
|
+
file.write(json_output)
|
66
|
+
return metalist.filename
|
67
|
+
else:
|
68
|
+
return json.dumps(output).decode("utf-8")
|
@@ -0,0 +1,17 @@
|
|
1
|
+
"""Crossref XML writer for commonmeta-py"""
|
2
|
+
from typing import Optional
|
3
|
+
from ..constants import Commonmeta
|
4
|
+
from ..crossref_utils import generate_crossref_xml, generate_crossref_xml_list
|
5
|
+
|
6
|
+
|
7
|
+
def write_crossref_xml(metadata: Commonmeta) -> Optional[str]:
|
8
|
+
"""Write Crossref XML"""
|
9
|
+
return generate_crossref_xml(metadata)
|
10
|
+
|
11
|
+
|
12
|
+
def write_crossref_xml_list(metalist):
|
13
|
+
"""Write crossref_xml list"""
|
14
|
+
if metalist is None:
|
15
|
+
return None
|
16
|
+
|
17
|
+
return generate_crossref_xml_list(metalist)
|
@@ -0,0 +1,79 @@
|
|
1
|
+
"""CSL-JSON writer for commonmeta-py"""
|
2
|
+
import orjson as json
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from ..utils import pages_as_string, to_csl
|
6
|
+
from ..base_utils import wrap, presence, parse_attributes, compact
|
7
|
+
from ..date_utils import get_date_parts
|
8
|
+
from ..doi_utils import doi_from_url
|
9
|
+
from ..constants import CM_TO_CSL_TRANSLATIONS, Commonmeta
|
10
|
+
|
11
|
+
|
12
|
+
def write_csl(metadata: Commonmeta) -> Optional[str]:
|
13
|
+
"""Write CSL-JSON"""
|
14
|
+
item = write_csl_item(metadata)
|
15
|
+
if item is None:
|
16
|
+
return None
|
17
|
+
return json.dumps(item)
|
18
|
+
|
19
|
+
|
20
|
+
def write_csl_item(metadata) -> Optional[dict]:
|
21
|
+
"""Write CSL-JSON item"""
|
22
|
+
if metadata is None or metadata.write_errors is not None:
|
23
|
+
return None
|
24
|
+
if len(wrap(metadata.contributors)) == 0:
|
25
|
+
author = None
|
26
|
+
else:
|
27
|
+
author = to_csl(wrap(metadata.contributors))
|
28
|
+
|
29
|
+
if metadata.type == "Software" and metadata.version is not None:
|
30
|
+
_type = "book"
|
31
|
+
else:
|
32
|
+
_type = CM_TO_CSL_TRANSLATIONS.get(metadata.type, "Document")
|
33
|
+
|
34
|
+
container = metadata.container or {}
|
35
|
+
publisher = metadata.publisher or {}
|
36
|
+
return compact(
|
37
|
+
{
|
38
|
+
"type": _type,
|
39
|
+
"id": metadata.id,
|
40
|
+
"DOI": doi_from_url(metadata.id),
|
41
|
+
"URL": metadata.url,
|
42
|
+
"categories": presence(
|
43
|
+
parse_attributes(
|
44
|
+
wrap(metadata.subjects), content="subject", first=False
|
45
|
+
)
|
46
|
+
),
|
47
|
+
"language": metadata.language,
|
48
|
+
"author": author,
|
49
|
+
# "contributor": to_csl(wrap(metadata.contributors)),
|
50
|
+
"issued": get_date_parts(metadata.date.get("published"))
|
51
|
+
if metadata.date.get("published", None)
|
52
|
+
else None,
|
53
|
+
"submitted": get_date_parts(metadata.date.get("submitted"))
|
54
|
+
if metadata.date.get("submitted", None)
|
55
|
+
else None,
|
56
|
+
"accessed": get_date_parts(metadata.date.get("accessed"))
|
57
|
+
if metadata.date.get("accessed", None)
|
58
|
+
else None,
|
59
|
+
"abstract": parse_attributes(
|
60
|
+
metadata.descriptions, content="description", first=True
|
61
|
+
),
|
62
|
+
"container-title": container.get("title", None),
|
63
|
+
"volume": container.get("volume", None),
|
64
|
+
"issue": container.get("issue", None),
|
65
|
+
"page": pages_as_string(container),
|
66
|
+
"publisher": publisher.get("name", None),
|
67
|
+
"title": parse_attributes(metadata.titles, content="title", first=True),
|
68
|
+
"copyright": metadata.license.get("id", None) if metadata.license else None,
|
69
|
+
"version": metadata.version,
|
70
|
+
}
|
71
|
+
)
|
72
|
+
|
73
|
+
|
74
|
+
def write_csl_list(metalist):
|
75
|
+
"""Write CSL-JSON list"""
|
76
|
+
if metalist is None:
|
77
|
+
return None
|
78
|
+
items = [write_csl_item(item) for item in metalist.items]
|
79
|
+
return json.dumps(items)
|
@@ -0,0 +1,193 @@
|
|
1
|
+
"""DataCite writer for commonmeta-py"""
|
2
|
+
import orjson as json
|
3
|
+
from typing import Optional, Union
|
4
|
+
|
5
|
+
from ..base_utils import wrap, compact
|
6
|
+
from ..doi_utils import doi_from_url, normalize_doi
|
7
|
+
from ..constants import (
|
8
|
+
CM_TO_BIB_TRANSLATIONS,
|
9
|
+
CM_TO_CSL_TRANSLATIONS,
|
10
|
+
CM_TO_CR_TRANSLATIONS,
|
11
|
+
CM_TO_DC_TRANSLATIONS,
|
12
|
+
CM_TO_RIS_TRANSLATIONS,
|
13
|
+
CM_TO_SO_TRANSLATIONS,
|
14
|
+
Commonmeta,
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
def write_datacite(metadata: Commonmeta) -> Optional[Union[str, dict]]:
|
19
|
+
"""Write datacite. Make sure JSON Schema validates before writing"""
|
20
|
+
if metadata.write_errors is not None:
|
21
|
+
return "{}"
|
22
|
+
|
23
|
+
alternate_identifiers = [
|
24
|
+
{
|
25
|
+
"alternateIdentifier": i.get("identifier", None),
|
26
|
+
"alternateIdentifierType": i.get("identifierType", None),
|
27
|
+
}
|
28
|
+
for i in wrap(metadata.identifiers)
|
29
|
+
if i.get("id", None) != metadata.id
|
30
|
+
]
|
31
|
+
|
32
|
+
creators = [
|
33
|
+
to_datacite_creator(i)
|
34
|
+
for i in wrap(metadata.contributors)
|
35
|
+
if i.get("contributorRoles", None) == ["Author"]
|
36
|
+
]
|
37
|
+
contributors = [
|
38
|
+
to_datacite_creator(i)
|
39
|
+
for i in wrap(metadata.contributors)
|
40
|
+
if i.get("contributorRoles", None) != ["Author"]
|
41
|
+
]
|
42
|
+
related_identifiers = [
|
43
|
+
to_datacite_related_identifier(i)
|
44
|
+
for i in wrap(metadata.references)
|
45
|
+
if i.get("id", None)
|
46
|
+
]
|
47
|
+
|
48
|
+
resource__typegeneral = CM_TO_DC_TRANSLATIONS.get(metadata.type, "Other")
|
49
|
+
resource_type = CM_TO_CR_TRANSLATIONS.get(metadata.type, "Other")
|
50
|
+
if resource__typegeneral == resource_type or resource__typegeneral in [
|
51
|
+
"Dataset",
|
52
|
+
"JournalArticle",
|
53
|
+
"Other",
|
54
|
+
"Preprint",
|
55
|
+
"Software",
|
56
|
+
]:
|
57
|
+
resource_type = None
|
58
|
+
types = compact(
|
59
|
+
{
|
60
|
+
"resourceTypeGeneral": resource__typegeneral,
|
61
|
+
"resourceType": resource_type,
|
62
|
+
"schemaOrg": CM_TO_SO_TRANSLATIONS.get(metadata.type, "CreativeWork"),
|
63
|
+
"citeproc": CM_TO_CSL_TRANSLATIONS.get(metadata.type, "article"),
|
64
|
+
"bibtex": CM_TO_BIB_TRANSLATIONS.get(metadata.type, "misc"),
|
65
|
+
"ris": CM_TO_RIS_TRANSLATIONS.get(metadata.type, "GEN"),
|
66
|
+
}
|
67
|
+
)
|
68
|
+
publication_year = (
|
69
|
+
metadata.date.get("published")[:4]
|
70
|
+
if metadata.date.get("published", None)
|
71
|
+
else None
|
72
|
+
)
|
73
|
+
|
74
|
+
def to_datacite_date(date: dict) -> dict:
|
75
|
+
"""Convert dates to datacite dates"""
|
76
|
+
for k, v in date.items():
|
77
|
+
if k == "published":
|
78
|
+
k = "issued"
|
79
|
+
return {
|
80
|
+
"date": v,
|
81
|
+
"dateType": k.title(),
|
82
|
+
}
|
83
|
+
|
84
|
+
dates = [to_datacite_date(i) for i in wrap(metadata.date)]
|
85
|
+
|
86
|
+
license_ = (
|
87
|
+
[
|
88
|
+
compact(
|
89
|
+
{
|
90
|
+
"rightsIdentifier": metadata.license.get("id").lower()
|
91
|
+
if metadata.license.get("id", None)
|
92
|
+
else None,
|
93
|
+
"rightsIdentifierScheme": "SPDX",
|
94
|
+
"rightsUri": metadata.license.get("url", None),
|
95
|
+
"schemeUri": "https://spdx.org/licenses/",
|
96
|
+
}
|
97
|
+
)
|
98
|
+
]
|
99
|
+
if metadata.license
|
100
|
+
else None
|
101
|
+
)
|
102
|
+
|
103
|
+
descriptions = [
|
104
|
+
compact(
|
105
|
+
{
|
106
|
+
"description": i.get("description", None),
|
107
|
+
"descriptionType": i.get("type", None) or "Other",
|
108
|
+
"lang": i.get("language", None),
|
109
|
+
}
|
110
|
+
)
|
111
|
+
for i in wrap(metadata.descriptions)
|
112
|
+
]
|
113
|
+
|
114
|
+
data = compact(
|
115
|
+
{
|
116
|
+
"id": metadata.id,
|
117
|
+
"doi": doi_from_url(metadata.id),
|
118
|
+
"url": metadata.url,
|
119
|
+
"creators": creators,
|
120
|
+
"titles": metadata.titles,
|
121
|
+
"publisher": metadata.publisher,
|
122
|
+
"publicationYear": publication_year,
|
123
|
+
"subjects": metadata.subjects,
|
124
|
+
"contributors": contributors,
|
125
|
+
"dates": dates,
|
126
|
+
"language": metadata.language,
|
127
|
+
"types": types,
|
128
|
+
"alternateIdentifiers": alternate_identifiers,
|
129
|
+
"relatedIdentifiers": related_identifiers,
|
130
|
+
"version": metadata.version,
|
131
|
+
"rightsList": license_,
|
132
|
+
"descriptions": descriptions,
|
133
|
+
"geoLocations": metadata.geo_locations,
|
134
|
+
"fundingReferences": metadata.funding_references,
|
135
|
+
"schemaVersion": "http://datacite.org/schema/kernel-4",
|
136
|
+
}
|
137
|
+
)
|
138
|
+
return json.dumps(data)
|
139
|
+
|
140
|
+
|
141
|
+
def to_datacite_creator(creator: dict) -> dict:
|
142
|
+
"""Convert creators to datacite creators"""
|
143
|
+
_type = creator.get("type", None)
|
144
|
+
if creator.get("familyName", None):
|
145
|
+
name = ", ".join([creator.get("familyName", ""), creator.get("givenName", "")])
|
146
|
+
elif creator.get("name", None):
|
147
|
+
name = creator.get("name", None)
|
148
|
+
name_identifiers = creator.get("id", None)
|
149
|
+
if name_identifiers:
|
150
|
+
|
151
|
+
def format_name_identifier(name_identifier):
|
152
|
+
return {
|
153
|
+
"nameIdentifier": name_identifier,
|
154
|
+
"nameIdentifierScheme": "ORCID",
|
155
|
+
"schemeUri": "https://orcid.org",
|
156
|
+
}
|
157
|
+
|
158
|
+
name_identifiers = [format_name_identifier(i) for i in wrap(name_identifiers)]
|
159
|
+
return compact(
|
160
|
+
{
|
161
|
+
"name": name,
|
162
|
+
"givenName": creator.get("givenName", None),
|
163
|
+
"familyName": creator.get("familyName", None),
|
164
|
+
"nameType": _type + "al" if _type else None,
|
165
|
+
"nameIdentifiers": name_identifiers,
|
166
|
+
"affiliation": creator.get("affiliations", None),
|
167
|
+
}
|
168
|
+
)
|
169
|
+
|
170
|
+
|
171
|
+
def to_datacite_titles(titles: list) -> list:
|
172
|
+
"""Convert titles to datacite titles"""
|
173
|
+
return [
|
174
|
+
{
|
175
|
+
"title": title.get("title", None),
|
176
|
+
"titleType": title.get("type", None),
|
177
|
+
"lang": title.get("language", None),
|
178
|
+
}
|
179
|
+
for title in titles
|
180
|
+
]
|
181
|
+
|
182
|
+
|
183
|
+
def to_datacite_related_identifier(reference: dict) -> dict:
|
184
|
+
"""Convert reference to datacite related_identifier"""
|
185
|
+
_id = normalize_doi(reference.get("id", None))
|
186
|
+
url = reference.get("id", None)
|
187
|
+
return compact(
|
188
|
+
{
|
189
|
+
"relatedIdentifier": _id if _id else url,
|
190
|
+
"relatedIdentifierType": "DOI" if _id else "URL",
|
191
|
+
"relationType": "References",
|
192
|
+
}
|
193
|
+
)
|
@@ -0,0 +1,94 @@
|
|
1
|
+
"""InvenioRDM writer for commonmeta-py"""
|
2
|
+
|
3
|
+
import orjson as json
|
4
|
+
|
5
|
+
from ..utils import to_inveniordm
|
6
|
+
from ..base_utils import compact, wrap, presence, parse_attributes
|
7
|
+
from ..doi_utils import doi_from_url, validate_suffix
|
8
|
+
from ..constants import CM_TO_INVENIORDM_TRANSLATIONS
|
9
|
+
from ..utils import pages_as_string, get_language, validate_orcid
|
10
|
+
|
11
|
+
|
12
|
+
def write_inveniordm(metadata):
|
13
|
+
"""Write inveniordm"""
|
14
|
+
if metadata is None or metadata.write_errors is not None:
|
15
|
+
return None
|
16
|
+
_type = CM_TO_INVENIORDM_TRANSLATIONS.get(metadata.type, "Other")
|
17
|
+
creators = [
|
18
|
+
to_inveniordm_creator(i)
|
19
|
+
for i in wrap(metadata.contributors)
|
20
|
+
if i.get("contributorRoles", None) == ["Author"]
|
21
|
+
]
|
22
|
+
identifiers = [
|
23
|
+
{
|
24
|
+
"identifier": i.get("identifier", None),
|
25
|
+
"scheme": i.get("identifierType", None),
|
26
|
+
}
|
27
|
+
for i in wrap(metadata.identifiers)
|
28
|
+
if i.get("id", None) != metadata.id
|
29
|
+
]
|
30
|
+
publisher = metadata.publisher or {}
|
31
|
+
data = compact(
|
32
|
+
{
|
33
|
+
"metadata": {
|
34
|
+
"resource_type": {"id": _type},
|
35
|
+
"doi": doi_from_url(metadata.id),
|
36
|
+
"creators": creators,
|
37
|
+
"title": parse_attributes(metadata.titles, content="title", first=True),
|
38
|
+
"publisher": publisher.get("name", None),
|
39
|
+
"publication_date": metadata.date.get("published")
|
40
|
+
if metadata.date.get("published", None)
|
41
|
+
else None,
|
42
|
+
"subjects": parse_attributes(
|
43
|
+
wrap(metadata.subjects), content="subject", first=False
|
44
|
+
),
|
45
|
+
"contributors": to_inveniordm(wrap(metadata.contributors)),
|
46
|
+
"description": parse_attributes(
|
47
|
+
metadata.descriptions, content="description", first=True
|
48
|
+
),
|
49
|
+
"license": metadata.license.get("id", None)
|
50
|
+
if metadata.license
|
51
|
+
else None,
|
52
|
+
"languages": [{"id": get_language(metadata.language, format="alpha_3")}]
|
53
|
+
if metadata.language
|
54
|
+
else None,
|
55
|
+
"identifiers": identifiers,
|
56
|
+
"version": metadata.version,
|
57
|
+
},
|
58
|
+
}
|
59
|
+
)
|
60
|
+
return json.dumps(data)
|
61
|
+
|
62
|
+
|
63
|
+
def to_inveniordm_creator(creator: dict) -> dict:
|
64
|
+
"""Convert creators to inveniordm creators"""
|
65
|
+
|
66
|
+
def format_identifier(id):
|
67
|
+
identifier = validate_orcid(id)
|
68
|
+
if identifier:
|
69
|
+
return [
|
70
|
+
{
|
71
|
+
"identifier": identifier,
|
72
|
+
"scheme": "orcid",
|
73
|
+
}
|
74
|
+
]
|
75
|
+
return None
|
76
|
+
|
77
|
+
_type = creator.get("type", None)
|
78
|
+
if creator.get("familyName", None):
|
79
|
+
name = ", ".join([creator.get("familyName", ""), creator.get("givenName", "")])
|
80
|
+
elif creator.get("name", None):
|
81
|
+
name = creator.get("name", None)
|
82
|
+
|
83
|
+
return {
|
84
|
+
"person_or_org": compact(
|
85
|
+
{
|
86
|
+
"name": name,
|
87
|
+
"given_name": creator.get("givenName", None),
|
88
|
+
"family_name": creator.get("familyName", None),
|
89
|
+
"type": _type.lower() + "al" if _type else None,
|
90
|
+
"identifiers": format_identifier(creator.get("id", None)),
|
91
|
+
"affiliation": creator.get("affiliations", None),
|
92
|
+
}
|
93
|
+
)
|
94
|
+
}
|