commonmeta-py 0.106__py3-none-any.whl → 0.108__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +12 -3
- commonmeta/api_utils.py +3 -2
- commonmeta/base_utils.py +186 -3
- commonmeta/cli.py +114 -34
- commonmeta/constants.py +20 -0
- commonmeta/file_utils.py +112 -0
- commonmeta/metadata.py +102 -42
- commonmeta/readers/codemeta_reader.py +1 -1
- commonmeta/readers/crossref_reader.py +23 -10
- commonmeta/readers/crossref_xml_reader.py +1 -1
- commonmeta/readers/datacite_reader.py +6 -4
- commonmeta/readers/{json_feed_reader.py → jsonfeed_reader.py} +12 -12
- commonmeta/resources/crossref/common5.4.0.xsd +1264 -0
- commonmeta/resources/crossref/{crossref5.3.1.xsd → crossref5.4.0.xsd} +286 -88
- commonmeta/resources/crossref/doi_resources5.4.0.xsd +117 -0
- commonmeta/resources/crossref/fundingdata5.4.0.xsd +59 -0
- commonmeta/resources/crossref/fundref.xsd +29 -19
- commonmeta/resources/crossref/languages5.4.0.xsd +8119 -0
- commonmeta/resources/crossref/mediatypes5.4.0.xsd +2207 -0
- commonmeta/resources/crossref/module-ali.xsd +14 -6
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-common.xsd +101 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-content.xsd +683 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-presentation.xsd +2092 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-strict-content.xsd +186 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3.xsd +9 -0
- commonmeta/resources/crossref/standard-modules/mathml3/module-ali.xsd +47 -0
- commonmeta/resources/crossref/standard-modules/module-ali.xsd +47 -0
- commonmeta/resources/crossref/standard-modules/xlink.xsd +100 -0
- commonmeta/resources/crossref/standard-modules/xml.xsd +287 -0
- commonmeta/resources/crossref/xml.xsd +287 -0
- commonmeta/schema_utils.py +25 -0
- commonmeta/utils.py +90 -15
- commonmeta/writers/bibtex_writer.py +5 -5
- commonmeta/writers/citation_writer.py +10 -5
- commonmeta/writers/commonmeta_writer.py +5 -17
- commonmeta/writers/crossref_xml_writer.py +1032 -4
- commonmeta/writers/csl_writer.py +6 -6
- commonmeta/writers/datacite_writer.py +11 -6
- commonmeta/writers/inveniordm_writer.py +286 -10
- commonmeta/writers/ris_writer.py +3 -3
- commonmeta/writers/schema_org_writer.py +10 -5
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/METADATA +5 -2
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/RECORD +46 -32
- commonmeta/crossref_utils.py +0 -583
- commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/WHEEL +0 -0
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/entry_points.txt +0 -0
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/licenses/LICENSE +0 -0
commonmeta/crossref_utils.py
DELETED
@@ -1,583 +0,0 @@
|
|
1
|
-
"""Crossref utils module for commonmeta-py"""
|
2
|
-
|
3
|
-
import uuid
|
4
|
-
from datetime import datetime
|
5
|
-
from typing import Optional
|
6
|
-
|
7
|
-
import pydash as py_
|
8
|
-
from dateutil.parser import parse
|
9
|
-
from furl import furl
|
10
|
-
from lxml import etree
|
11
|
-
|
12
|
-
from .constants import ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS, Commonmeta
|
13
|
-
from .doi_utils import doi_from_url, validate_doi
|
14
|
-
from .utils import compact, normalize_id, normalize_orcid, validate_url, wrap
|
15
|
-
|
16
|
-
|
17
|
-
def generate_crossref_xml(metadata: Commonmeta) -> Optional[str]:
|
18
|
-
"""Generate Crossref XML. First checks for write errors (JSON schema validation)"""
|
19
|
-
xml = crossref_root()
|
20
|
-
head = etree.SubElement(xml, "head")
|
21
|
-
# we use a uuid as batch_id
|
22
|
-
etree.SubElement(head, "doi_batch_id").text = str(uuid.uuid4())
|
23
|
-
etree.SubElement(head, "timestamp").text = datetime.now().strftime("%Y%m%d%H%M%S")
|
24
|
-
depositor = etree.SubElement(head, "depositor")
|
25
|
-
etree.SubElement(depositor, "depositor_name").text = metadata.depositor
|
26
|
-
etree.SubElement(depositor, "email_address").text = metadata.email
|
27
|
-
etree.SubElement(head, "registrant").text = metadata.registrant
|
28
|
-
|
29
|
-
body = etree.SubElement(xml, "body")
|
30
|
-
body = insert_crossref_work(metadata, body)
|
31
|
-
return etree.tostring(
|
32
|
-
xml,
|
33
|
-
doctype='<?xml version="1.0" encoding="UTF-8"?>',
|
34
|
-
pretty_print=True,
|
35
|
-
)
|
36
|
-
|
37
|
-
|
38
|
-
def insert_crossref_work(metadata, xml):
|
39
|
-
"""Insert crossref work"""
|
40
|
-
if metadata.type not in ["JournalArticle", "Article", "BlogPost"]:
|
41
|
-
return xml
|
42
|
-
if doi_from_url(metadata.id) is None or metadata.url is None:
|
43
|
-
return xml
|
44
|
-
if metadata.type == "JournalArticle":
|
45
|
-
xml = insert_journal(metadata, xml)
|
46
|
-
elif metadata.type in ["Article", "BlogPost"]:
|
47
|
-
xml = insert_posted_content(metadata, xml)
|
48
|
-
|
49
|
-
|
50
|
-
def insert_journal(metadata, xml):
|
51
|
-
"""Insert journal"""
|
52
|
-
journal = etree.SubElement(xml, "journal")
|
53
|
-
if metadata.language is not None:
|
54
|
-
journal_metadata = etree.SubElement(
|
55
|
-
journal, "journal_metadata", {"language": metadata.language[:2]}
|
56
|
-
)
|
57
|
-
else:
|
58
|
-
journal_metadata = etree.SubElement(journal, "journal_metadata")
|
59
|
-
if (
|
60
|
-
metadata.container is not None
|
61
|
-
and metadata.container.get("title", None) is not None
|
62
|
-
):
|
63
|
-
etree.SubElement(journal_metadata, "full_title").text = metadata.container.get(
|
64
|
-
"title"
|
65
|
-
)
|
66
|
-
journal_metadata = insert_group_title(metadata, journal_metadata)
|
67
|
-
journal_article = etree.SubElement(
|
68
|
-
journal, "journal_article", {"publication_type": "full_text"}
|
69
|
-
)
|
70
|
-
journal_article = insert_crossref_titles(metadata, journal_article)
|
71
|
-
journal_article = insert_crossref_contributors(metadata, journal_article)
|
72
|
-
journal_article = insert_crossref_publication_date(metadata, journal_article)
|
73
|
-
journal_article = insert_crossref_abstract(metadata, journal_article)
|
74
|
-
journal_article = insert_crossref_issn(metadata, journal_article)
|
75
|
-
journal_article = insert_item_number(metadata, journal_article)
|
76
|
-
journal_article = insert_funding_references(metadata, journal_article)
|
77
|
-
journal_article = insert_crossref_access_indicators(metadata, journal_article)
|
78
|
-
journal_article = insert_crossref_relations(metadata, journal_article)
|
79
|
-
journal_article = insert_archive_locations(metadata, journal_article)
|
80
|
-
journal_article = insert_doi_data(metadata, journal_article)
|
81
|
-
journal_article = insert_citation_list(metadata, journal_article)
|
82
|
-
|
83
|
-
return journal
|
84
|
-
|
85
|
-
|
86
|
-
def insert_posted_content(metadata, xml):
|
87
|
-
"""Insert posted content"""
|
88
|
-
if metadata.language is not None:
|
89
|
-
posted_content = etree.SubElement(
|
90
|
-
xml, "posted_content", {"type": "other", "language": metadata.language[:2]}
|
91
|
-
)
|
92
|
-
else:
|
93
|
-
posted_content = etree.SubElement(xml, "posted_content", {"type": "other"})
|
94
|
-
|
95
|
-
posted_content = insert_group_title(metadata, posted_content)
|
96
|
-
posted_content = insert_crossref_contributors(metadata, posted_content)
|
97
|
-
posted_content = insert_crossref_titles(metadata, posted_content)
|
98
|
-
posted_content = insert_posted_date(metadata, posted_content)
|
99
|
-
posted_content = insert_institution(metadata, posted_content)
|
100
|
-
posted_content = insert_item_number(metadata, posted_content)
|
101
|
-
posted_content = insert_crossref_abstract(metadata, posted_content)
|
102
|
-
posted_content = insert_funding_references(metadata, posted_content)
|
103
|
-
posted_content = insert_crossref_access_indicators(metadata, posted_content)
|
104
|
-
posted_content = insert_crossref_relations(metadata, posted_content)
|
105
|
-
posted_content = insert_archive_locations(metadata, posted_content)
|
106
|
-
posted_content = insert_doi_data(metadata, posted_content)
|
107
|
-
posted_content = insert_citation_list(metadata, posted_content)
|
108
|
-
|
109
|
-
return xml
|
110
|
-
|
111
|
-
|
112
|
-
def insert_group_title(metadata, xml):
|
113
|
-
"""Insert group title"""
|
114
|
-
if metadata.subjects is None or len(metadata.subjects) == 0:
|
115
|
-
return xml
|
116
|
-
group_title = metadata.subjects[0].get("subject", None)
|
117
|
-
# strip optional FOS (Field of Science) prefix
|
118
|
-
if group_title.startswith("FOS: "):
|
119
|
-
group_title = group_title[5:]
|
120
|
-
etree.SubElement(xml, "group_title").text = group_title
|
121
|
-
return xml
|
122
|
-
|
123
|
-
|
124
|
-
def insert_crossref_contributors(metadata, xml):
|
125
|
-
"""Insert crossref contributors"""
|
126
|
-
if metadata.contributors is None or len(metadata.contributors) == 0:
|
127
|
-
return xml
|
128
|
-
contributors = etree.SubElement(xml, "contributors")
|
129
|
-
con = [
|
130
|
-
c
|
131
|
-
for c in metadata.contributors
|
132
|
-
if c.get("contributorRoles", None) == ["Author"]
|
133
|
-
or c.get("contributorRoles", None) == ["Editor"]
|
134
|
-
]
|
135
|
-
for num, contributor in enumerate(con):
|
136
|
-
contributor_role = (
|
137
|
-
"author" if "Author" in contributor.get("contributorRoles") else None
|
138
|
-
)
|
139
|
-
if contributor_role is None:
|
140
|
-
contributor_role = (
|
141
|
-
"editor" if "Editor" in contributor.get("contributorRoles") else None
|
142
|
-
)
|
143
|
-
sequence = "first" if num == 0 else "additional"
|
144
|
-
if (
|
145
|
-
contributor.get("type", None) == "Organization"
|
146
|
-
and contributor.get("name", None) is not None
|
147
|
-
):
|
148
|
-
etree.SubElement(
|
149
|
-
contributors,
|
150
|
-
"organization",
|
151
|
-
{"contributor_role": contributor_role, "sequence": sequence},
|
152
|
-
).text = contributor.get("name")
|
153
|
-
elif (
|
154
|
-
contributor.get("givenName", None) is not None
|
155
|
-
or contributor.get("familyName", None) is not None
|
156
|
-
):
|
157
|
-
person_name = etree.SubElement(
|
158
|
-
contributors,
|
159
|
-
"person_name",
|
160
|
-
{"contributor_role": contributor_role, "sequence": sequence},
|
161
|
-
)
|
162
|
-
person_name = insert_crossref_person(contributor, person_name)
|
163
|
-
elif contributor.get("affiliations", None) is not None:
|
164
|
-
anonymous = etree.SubElement(
|
165
|
-
contributors,
|
166
|
-
"anonymous",
|
167
|
-
{"contributor_role": contributor_role, "sequence": sequence},
|
168
|
-
)
|
169
|
-
anonymous = insert_crossref_anonymous(contributor, anonymous)
|
170
|
-
else:
|
171
|
-
etree.SubElement(
|
172
|
-
contributors,
|
173
|
-
"anonymous",
|
174
|
-
{"contributor_role": contributor_role, "sequence": sequence},
|
175
|
-
)
|
176
|
-
return xml
|
177
|
-
|
178
|
-
|
179
|
-
def insert_crossref_person(contributor, xml):
|
180
|
-
"""Insert crossref person"""
|
181
|
-
if contributor.get("givenName", None) is not None:
|
182
|
-
etree.SubElement(xml, "given_name").text = contributor.get("givenName")
|
183
|
-
if contributor.get("familyName", None) is not None:
|
184
|
-
etree.SubElement(xml, "surname").text = contributor.get("familyName")
|
185
|
-
|
186
|
-
if contributor.get("affiliations", None) is not None:
|
187
|
-
affiliations = etree.SubElement(xml, "affiliations")
|
188
|
-
institution = etree.SubElement(affiliations, "institution")
|
189
|
-
if py_.get(contributor, "affiliations.0.name") is not None:
|
190
|
-
etree.SubElement(institution, "institution_name").text = py_.get(
|
191
|
-
contributor, "affiliations.0.name"
|
192
|
-
)
|
193
|
-
if py_.get(contributor, "affiliations.0.id") is not None:
|
194
|
-
etree.SubElement(
|
195
|
-
institution, "institution_id", {"type": "ror"}
|
196
|
-
).text = py_.get(contributor, "affiliations.0.id")
|
197
|
-
orcid = normalize_orcid(contributor.get("id", None))
|
198
|
-
if orcid is not None:
|
199
|
-
etree.SubElement(xml, "ORCID").text = orcid
|
200
|
-
return xml
|
201
|
-
|
202
|
-
|
203
|
-
def insert_crossref_anonymous(contributor, xml):
|
204
|
-
"""Insert crossref anonymous"""
|
205
|
-
if contributor.get("affiliations", None) is None:
|
206
|
-
return xml
|
207
|
-
affiliations = etree.SubElement(xml, "affiliations")
|
208
|
-
institution = etree.SubElement(affiliations, "institution")
|
209
|
-
if py_.get(contributor, "affiliations.0.name") is not None:
|
210
|
-
etree.SubElement(institution, "institution_name").text = py_.get(
|
211
|
-
contributor, "affiliations.0.name"
|
212
|
-
)
|
213
|
-
return xml
|
214
|
-
|
215
|
-
|
216
|
-
def insert_crossref_titles(metadata, xml):
|
217
|
-
"""Insert crossref titles"""
|
218
|
-
titles = etree.SubElement(xml, "titles")
|
219
|
-
for title in wrap(metadata.titles):
|
220
|
-
if isinstance(title, dict):
|
221
|
-
etree.SubElement(titles, "title").text = title.get("title", None)
|
222
|
-
else:
|
223
|
-
etree.SubElement(titles, "title").text = title
|
224
|
-
return xml
|
225
|
-
|
226
|
-
|
227
|
-
def insert_citation_list(metadata, xml):
|
228
|
-
"""Insert citation list"""
|
229
|
-
if metadata.references is None or len(metadata.references) == 0:
|
230
|
-
return xml
|
231
|
-
|
232
|
-
citation_list = etree.SubElement(xml, "citation_list")
|
233
|
-
for i, ref in enumerate(metadata.references):
|
234
|
-
print(i)
|
235
|
-
if ref.get("id", None) is None:
|
236
|
-
continue
|
237
|
-
citation = etree.SubElement(
|
238
|
-
citation_list, "citation", {"key": ref.get("key", f"ref{i + 1}")}
|
239
|
-
)
|
240
|
-
if ref.get("journal_title", None) is not None:
|
241
|
-
etree.SubElement(citation, "journal_article").text = ref.get(
|
242
|
-
"journal_title"
|
243
|
-
)
|
244
|
-
if ref.get("author", None) is not None:
|
245
|
-
etree.SubElement(citation, "author").text = ref.get("author")
|
246
|
-
if ref.get("volume", None) is not None:
|
247
|
-
etree.SubElement(citation, "volume").text = ref.get("volume")
|
248
|
-
if ref.get("first_page", None) is not None:
|
249
|
-
etree.SubElement(citation, "first_page").text = ref.get("first_page")
|
250
|
-
if ref.get("publicationYear", None) is not None:
|
251
|
-
etree.SubElement(citation, "cYear").text = ref.get("publicationYear")
|
252
|
-
if ref.get("title", None) is not None:
|
253
|
-
etree.SubElement(citation, "article_title").text = ref.get("title")
|
254
|
-
if ref.get("id", None) is not None:
|
255
|
-
etree.SubElement(citation, "doi").text = doi_from_url(ref.get("id"))
|
256
|
-
if ref.get("unstructured", None) is not None:
|
257
|
-
etree.SubElement(citation, "unstructured_citation").text = ref.get(
|
258
|
-
"unstructured"
|
259
|
-
)
|
260
|
-
print(xml)
|
261
|
-
return xml
|
262
|
-
|
263
|
-
|
264
|
-
def insert_crossref_access_indicators(metadata, xml):
|
265
|
-
"""Insert crossref access indicators"""
|
266
|
-
rights_uri = (
|
267
|
-
metadata.license.get("url", None) if metadata.license is not None else None
|
268
|
-
)
|
269
|
-
if rights_uri is None:
|
270
|
-
return xml
|
271
|
-
program = etree.SubElement(
|
272
|
-
xml,
|
273
|
-
"program",
|
274
|
-
{
|
275
|
-
"xmlns": "http://www.crossref.org/AccessIndicators.xsd",
|
276
|
-
"name": "AccessIndicators",
|
277
|
-
},
|
278
|
-
)
|
279
|
-
etree.SubElement(program, "license_ref", {"applies_to": "vor"}).text = rights_uri
|
280
|
-
etree.SubElement(program, "license_ref", {"applies_to": "tdm"}).text = rights_uri
|
281
|
-
return xml
|
282
|
-
|
283
|
-
|
284
|
-
def insert_crossref_relations(metadata, xml):
|
285
|
-
"""Insert crossref relations"""
|
286
|
-
if metadata.relations is None or len(metadata.relations) == 0:
|
287
|
-
return xml
|
288
|
-
program = etree.SubElement(
|
289
|
-
xml,
|
290
|
-
"program",
|
291
|
-
{
|
292
|
-
"xmlns": "http://www.crossref.org/relations.xsd",
|
293
|
-
"name": "relations",
|
294
|
-
},
|
295
|
-
)
|
296
|
-
for relation in metadata.relations:
|
297
|
-
if relation.get("type", None) in [
|
298
|
-
"IsPartOf",
|
299
|
-
"HasPart",
|
300
|
-
"IsReviewOf",
|
301
|
-
"HasReview",
|
302
|
-
"IsRelatedMaterial",
|
303
|
-
"HasRelatedMaterial",
|
304
|
-
]:
|
305
|
-
group = "inter_work_relation"
|
306
|
-
elif relation.get("type", None) in [
|
307
|
-
"IsIdenticalTo",
|
308
|
-
"IsPreprintOf",
|
309
|
-
"HasPreprint",
|
310
|
-
"IsTranslationOf",
|
311
|
-
"HasTranslation",
|
312
|
-
"IsVersionOf",
|
313
|
-
"HasVersion",
|
314
|
-
]:
|
315
|
-
group = "intra_work_relation"
|
316
|
-
else:
|
317
|
-
continue
|
318
|
-
|
319
|
-
related_item = etree.SubElement(program, "related_item")
|
320
|
-
f = furl(relation.get("id", None))
|
321
|
-
if validate_doi(relation.get("id", None)):
|
322
|
-
identifier_type = "doi"
|
323
|
-
_id = doi_from_url(relation.get("id", None))
|
324
|
-
elif f.host == "portal.issn.org":
|
325
|
-
identifier_type = "issn"
|
326
|
-
_id = f.path.segments[-1]
|
327
|
-
elif validate_url(relation.get("id", None)) == "URL":
|
328
|
-
identifier_type = "uri"
|
329
|
-
_id = relation.get("id", None)
|
330
|
-
else:
|
331
|
-
identifier_type = "other"
|
332
|
-
_id = relation.get("id", None)
|
333
|
-
|
334
|
-
etree.SubElement(
|
335
|
-
related_item,
|
336
|
-
group,
|
337
|
-
{
|
338
|
-
"relationship-type": py_.lower_first(relation.get("type"))
|
339
|
-
if relation.get("type", None) is not None
|
340
|
-
else None,
|
341
|
-
"identifier-type": identifier_type,
|
342
|
-
},
|
343
|
-
).text = _id
|
344
|
-
|
345
|
-
return xml
|
346
|
-
|
347
|
-
|
348
|
-
def insert_funding_references(metadata, xml):
|
349
|
-
"""Insert funding references"""
|
350
|
-
if metadata.funding_references is None or len(metadata.funding_references) == 0:
|
351
|
-
return xml
|
352
|
-
program = etree.SubElement(
|
353
|
-
xml,
|
354
|
-
"program",
|
355
|
-
{
|
356
|
-
"xmlns": "http://www.crossref.org/fundref.xsd",
|
357
|
-
"name": "fundref",
|
358
|
-
},
|
359
|
-
)
|
360
|
-
for funding_reference in metadata.funding_references:
|
361
|
-
assertion = etree.SubElement(program, "assertion", {"name": "fundgroup"})
|
362
|
-
funder_name = etree.SubElement(
|
363
|
-
assertion,
|
364
|
-
"assertion",
|
365
|
-
{"name": "funder_name"},
|
366
|
-
)
|
367
|
-
if funding_reference.get("funderIdentifier", None) is not None:
|
368
|
-
funder_identifier = funding_reference.get("funderIdentifier", None)
|
369
|
-
|
370
|
-
# translate ROR to Crossref funder ID until Crossref supports ROR
|
371
|
-
funder_identifier = ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS.get(
|
372
|
-
funder_identifier, funder_identifier
|
373
|
-
)
|
374
|
-
|
375
|
-
etree.SubElement(
|
376
|
-
funder_name,
|
377
|
-
"assertion",
|
378
|
-
{"name": "funder_identifier"},
|
379
|
-
).text = funder_identifier
|
380
|
-
if funding_reference.get("awardNumber", None) is not None:
|
381
|
-
etree.SubElement(
|
382
|
-
assertion,
|
383
|
-
"assertion",
|
384
|
-
{"name": "award_number"},
|
385
|
-
).text = funding_reference.get("awardNumber", None)
|
386
|
-
funder_name.text = funding_reference.get("funderName", None)
|
387
|
-
return xml
|
388
|
-
|
389
|
-
|
390
|
-
def insert_crossref_subjects(metadata, xml):
|
391
|
-
"""Insert crossref subjects"""
|
392
|
-
if metadata.subjects is None:
|
393
|
-
return xml
|
394
|
-
subjects = etree.SubElement(xml, "subjects")
|
395
|
-
for subject in metadata.subjects:
|
396
|
-
if isinstance(subject, dict):
|
397
|
-
etree.SubElement(subjects, "subject").text = subject.get("subject", None)
|
398
|
-
else:
|
399
|
-
etree.SubElement(subjects, "subject").text = subject
|
400
|
-
return xml
|
401
|
-
|
402
|
-
|
403
|
-
def insert_crossref_language(metadata, xml):
|
404
|
-
"""Insert crossref language"""
|
405
|
-
if metadata.language is None:
|
406
|
-
return xml
|
407
|
-
etree.SubElement(xml, "language").text = metadata.language
|
408
|
-
return xml
|
409
|
-
|
410
|
-
|
411
|
-
def insert_crossref_publication_date(metadata, xml):
|
412
|
-
"""Insert crossref publication date"""
|
413
|
-
pub_date = parse(metadata.date.get("published", None))
|
414
|
-
if pub_date is None:
|
415
|
-
return xml
|
416
|
-
|
417
|
-
publication_date = etree.SubElement(
|
418
|
-
xml, "publication_date", {"media_type": "online"}
|
419
|
-
)
|
420
|
-
etree.SubElement(publication_date, "month").text = f"{pub_date.month:d}"
|
421
|
-
etree.SubElement(publication_date, "day").text = f"{pub_date.day:d}"
|
422
|
-
etree.SubElement(publication_date, "year").text = str(pub_date.year)
|
423
|
-
return xml
|
424
|
-
|
425
|
-
|
426
|
-
def insert_posted_date(metadata, xml):
|
427
|
-
"""Insert posted date"""
|
428
|
-
pub_date = parse(metadata.date.get("published", None))
|
429
|
-
if pub_date is None:
|
430
|
-
return xml
|
431
|
-
|
432
|
-
posted_date = etree.SubElement(xml, "posted_date", {"media_type": "online"})
|
433
|
-
etree.SubElement(posted_date, "month").text = f"{pub_date.month:d}"
|
434
|
-
etree.SubElement(posted_date, "day").text = f"{pub_date.day:d}"
|
435
|
-
etree.SubElement(posted_date, "year").text = str(pub_date.year)
|
436
|
-
return xml
|
437
|
-
|
438
|
-
|
439
|
-
def insert_institution(metadata, xml):
|
440
|
-
"""Insert institution"""
|
441
|
-
if metadata.publisher.get("name", None) is None:
|
442
|
-
return xml
|
443
|
-
institution = etree.SubElement(xml, "institution")
|
444
|
-
etree.SubElement(institution, "institution_name").text = metadata.publisher.get(
|
445
|
-
"name"
|
446
|
-
)
|
447
|
-
return xml
|
448
|
-
|
449
|
-
|
450
|
-
def insert_item_number(metadata, xml):
|
451
|
-
"""Insert item number"""
|
452
|
-
if metadata.identifiers is None:
|
453
|
-
return xml
|
454
|
-
for identifier in metadata.identifiers:
|
455
|
-
if identifier.get("identifierType", None) == "UUID":
|
456
|
-
# strip hyphen from UUIDs, as item_number can only be 32 characters long (UUIDv4 is 36 characters long)
|
457
|
-
if identifier.get("identifierType", None) == "UUID":
|
458
|
-
identifier["identifier"] = identifier.get("identifier", "").replace(
|
459
|
-
"-", ""
|
460
|
-
)
|
461
|
-
etree.SubElement(
|
462
|
-
xml,
|
463
|
-
"item_number",
|
464
|
-
{"item_number_type": identifier.get("identifierType", "").lower()},
|
465
|
-
).text = identifier.get("identifier", None)
|
466
|
-
else:
|
467
|
-
continue
|
468
|
-
return xml
|
469
|
-
|
470
|
-
|
471
|
-
def insert_archive_locations(metadata, xml):
|
472
|
-
"""Insert archive locations"""
|
473
|
-
if metadata.archive_locations is None:
|
474
|
-
return xml
|
475
|
-
archive_locations = etree.SubElement(xml, "archive_locations")
|
476
|
-
for archive_location in metadata.archive_locations:
|
477
|
-
etree.SubElement(archive_locations, "archive", {"name": archive_location})
|
478
|
-
return xml
|
479
|
-
|
480
|
-
|
481
|
-
def insert_doi_data(metadata, xml):
|
482
|
-
"""Insert doi data"""
|
483
|
-
if doi_from_url(metadata.id) is None or metadata.url is None:
|
484
|
-
return xml
|
485
|
-
doi_data = etree.SubElement(xml, "doi_data")
|
486
|
-
etree.SubElement(doi_data, "doi").text = doi_from_url(metadata.id)
|
487
|
-
etree.SubElement(doi_data, "resource").text = metadata.url
|
488
|
-
collection = etree.SubElement(doi_data, "collection", {"property": "text-mining"})
|
489
|
-
item = etree.SubElement(collection, "item")
|
490
|
-
etree.SubElement(item, "resource", {"mime_type": "text/html"}).text = metadata.url
|
491
|
-
if metadata.files is None:
|
492
|
-
return xml
|
493
|
-
for file in metadata.files:
|
494
|
-
# Crossref schema currently doesn't support text/markdown
|
495
|
-
if file.get("mimeType", None) == "text/markdown":
|
496
|
-
file["mimeType"] = "text/plain"
|
497
|
-
item = etree.SubElement(collection, "item")
|
498
|
-
etree.SubElement(
|
499
|
-
item, "resource", {"mime_type": file.get("mimeType", "")}
|
500
|
-
).text = file.get("url", None)
|
501
|
-
return xml
|
502
|
-
|
503
|
-
|
504
|
-
def insert_crossref_license(metadata, xml):
|
505
|
-
"""Insert crossref license"""
|
506
|
-
if metadata.license is None:
|
507
|
-
return xml
|
508
|
-
license_ = etree.SubElement(xml, "license")
|
509
|
-
if isinstance(metadata.license, dict):
|
510
|
-
r = metadata.license
|
511
|
-
else:
|
512
|
-
r = {}
|
513
|
-
r["rights"] = metadata.license
|
514
|
-
r["rightsUri"] = normalize_id(metadata.license)
|
515
|
-
attributes = compact(
|
516
|
-
{
|
517
|
-
"rightsURI": r.get("rightsUri", None),
|
518
|
-
"rightsIdentifier": r.get("rightsIdentifier", None),
|
519
|
-
"rightsIdentifierScheme": r.get("rightsIdentifierScheme"),
|
520
|
-
"schemeURI": r.get("schemeUri", None),
|
521
|
-
"xml:lang": r.get("lang", None),
|
522
|
-
}
|
523
|
-
)
|
524
|
-
etree.SubElement(license_, "rights", attributes).text = r.get("rights", None)
|
525
|
-
return xml
|
526
|
-
|
527
|
-
|
528
|
-
def insert_crossref_issn(metadata, xml):
|
529
|
-
"""Insert crossref issn"""
|
530
|
-
if (
|
531
|
-
metadata.container is None
|
532
|
-
or metadata.container.get("identifierType", None) != "ISSN"
|
533
|
-
):
|
534
|
-
return xml
|
535
|
-
etree.SubElement(xml, "issn").text = metadata.container.get("identifier", None)
|
536
|
-
return xml
|
537
|
-
|
538
|
-
|
539
|
-
def insert_crossref_abstract(metadata, xml):
|
540
|
-
"""Insert crossref abstrac"""
|
541
|
-
if metadata.descriptions is None:
|
542
|
-
return xml
|
543
|
-
if isinstance(metadata.descriptions[0], dict):
|
544
|
-
d = metadata.descriptions[0]
|
545
|
-
else:
|
546
|
-
d = {}
|
547
|
-
d["description"] = metadata.descriptions[0]
|
548
|
-
abstract = etree.SubElement(
|
549
|
-
xml, "abstract", {"xmlns": "http://www.ncbi.nlm.nih.gov/JATS1"}
|
550
|
-
)
|
551
|
-
etree.SubElement(abstract, "p").text = d.get("description", None)
|
552
|
-
return xml
|
553
|
-
|
554
|
-
|
555
|
-
def crossref_root():
|
556
|
-
"""Crossref root with namespaces"""
|
557
|
-
doi_batch = """<doi_batch xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.crossref.org/schema/5.3.1" xmlns:jats="http://www.ncbi.nlm.nih.gov/JATS1" xmlns:fr="http://www.crossref.org/fundref.xsd" xmlns:mml="http://www.w3.org/1998/Math/MathML" xsi:schemaLocation="http://www.crossref.org/schema/5.3.1 https://www.crossref.org/schemas/crossref5.3.1.xsd" version="5.3.1"></doi_batch>"""
|
558
|
-
return etree.fromstring(doi_batch)
|
559
|
-
|
560
|
-
|
561
|
-
def generate_crossref_xml_list(metalist) -> Optional[str]:
|
562
|
-
"""Generate Crossref XML list."""
|
563
|
-
if not metalist.is_valid:
|
564
|
-
return None
|
565
|
-
xml = crossref_root()
|
566
|
-
head = etree.SubElement(xml, "head")
|
567
|
-
# we use a uuid as batch_id
|
568
|
-
etree.SubElement(head, "doi_batch_id").text = str(uuid.uuid4())
|
569
|
-
etree.SubElement(head, "timestamp").text = datetime.now().strftime("%Y%m%d%H%M%S")
|
570
|
-
depositor = etree.SubElement(head, "depositor")
|
571
|
-
etree.SubElement(depositor, "depositor_name").text = metalist.depositor or "test"
|
572
|
-
etree.SubElement(depositor, "email_address").text = (
|
573
|
-
metalist.email or "info@example.org"
|
574
|
-
)
|
575
|
-
etree.SubElement(head, "registrant").text = metalist.registrant or "test"
|
576
|
-
|
577
|
-
body = etree.SubElement(xml, "body")
|
578
|
-
body = [insert_crossref_work(item, body) for item in metalist.items]
|
579
|
-
return etree.tostring(
|
580
|
-
xml,
|
581
|
-
doctype='<?xml version="1.0" encoding="UTF-8"?>',
|
582
|
-
pretty_print=True,
|
583
|
-
)
|