commonmeta-py 0.22__py3-none-any.whl → 0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. commonmeta/__init__.py +96 -0
  2. commonmeta/api_utils.py +77 -0
  3. commonmeta/author_utils.py +260 -0
  4. commonmeta/base_utils.py +121 -0
  5. commonmeta/cli.py +200 -0
  6. commonmeta/constants.py +587 -0
  7. commonmeta/crossref_utils.py +575 -0
  8. commonmeta/date_utils.py +193 -0
  9. commonmeta/doi_utils.py +273 -0
  10. commonmeta/metadata.py +320 -0
  11. commonmeta/readers/__init__.py +1 -0
  12. commonmeta/readers/bibtex_reader.py +0 -0
  13. commonmeta/readers/cff_reader.py +199 -0
  14. commonmeta/readers/codemeta_reader.py +112 -0
  15. commonmeta/readers/commonmeta_reader.py +13 -0
  16. commonmeta/readers/crossref_reader.py +409 -0
  17. commonmeta/readers/crossref_xml_reader.py +505 -0
  18. commonmeta/readers/csl_reader.py +98 -0
  19. commonmeta/readers/datacite_reader.py +390 -0
  20. commonmeta/readers/datacite_xml_reader.py +359 -0
  21. commonmeta/readers/inveniordm_reader.py +218 -0
  22. commonmeta/readers/json_feed_reader.py +420 -0
  23. commonmeta/readers/kbase_reader.py +205 -0
  24. commonmeta/readers/ris_reader.py +103 -0
  25. commonmeta/readers/schema_org_reader.py +506 -0
  26. commonmeta/resources/cff_v1.2.0.json +1827 -0
  27. commonmeta/resources/commonmeta_v0.12.json +601 -0
  28. commonmeta/resources/commonmeta_v0.13.json +559 -0
  29. commonmeta/resources/commonmeta_v0.14.json +573 -0
  30. commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
  31. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
  32. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
  33. commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
  34. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
  35. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
  36. commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
  37. commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
  38. commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
  39. commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
  40. commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
  41. commonmeta/resources/crossref/fundref.xsd +49 -0
  42. commonmeta/resources/crossref/module-ali.xsd +39 -0
  43. commonmeta/resources/crossref/relations.xsd +444 -0
  44. commonmeta/resources/crossref-v0.2.json +60 -0
  45. commonmeta/resources/csl-data.json +538 -0
  46. commonmeta/resources/datacite-v4.5.json +829 -0
  47. commonmeta/resources/datacite-v4.5pr.json +608 -0
  48. commonmeta/resources/ietf-bcp-47.json +3025 -0
  49. commonmeta/resources/iso-8601.json +3182 -0
  50. commonmeta/resources/spdx/licenses.json +4851 -0
  51. commonmeta/resources/spdx-schema..json +903 -0
  52. commonmeta/resources/styles/apa.csl +1697 -0
  53. commonmeta/resources/styles/chicago-author-date.csl +684 -0
  54. commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
  55. commonmeta/resources/styles/ieee.csl +468 -0
  56. commonmeta/resources/styles/modern-language-association.csl +341 -0
  57. commonmeta/resources/styles/vancouver.csl +376 -0
  58. commonmeta/schema_utils.py +27 -0
  59. commonmeta/translators.py +47 -0
  60. commonmeta/utils.py +1108 -0
  61. commonmeta/writers/__init__.py +1 -0
  62. commonmeta/writers/bibtex_writer.py +149 -0
  63. commonmeta/writers/citation_writer.py +70 -0
  64. commonmeta/writers/commonmeta_writer.py +68 -0
  65. commonmeta/writers/crossref_xml_writer.py +17 -0
  66. commonmeta/writers/csl_writer.py +79 -0
  67. commonmeta/writers/datacite_writer.py +193 -0
  68. commonmeta/writers/inveniordm_writer.py +94 -0
  69. commonmeta/writers/ris_writer.py +58 -0
  70. commonmeta/writers/schema_org_writer.py +146 -0
  71. {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
  72. commonmeta_py-0.24.dist-info/RECORD +75 -0
  73. {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
  74. commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
  75. commonmeta_py/__init__.py +0 -2
  76. commonmeta_py-0.22.dist-info/RECORD +0 -5
  77. {commonmeta_py-0.22.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0
@@ -0,0 +1,575 @@
1
+ """Crossref utils module for commonmeta-py"""
2
+
3
+ from lxml import etree
4
+ from typing import Optional
5
+ from datetime import datetime
6
+ from dateutil.parser import parse
7
+ import uuid
8
+ import pydash as py_
9
+ from furl import furl
10
+
11
+ from .constants import Commonmeta
12
+ from .utils import wrap, compact, normalize_orcid, normalize_id, validate_url
13
+ from .doi_utils import doi_from_url, validate_doi
14
+
15
+
16
+ def generate_crossref_xml(metadata: Commonmeta) -> Optional[str]:
17
+ """Generate Crossref XML. First checks for write errors (JSON schema validation)"""
18
+ xml = crossref_root()
19
+ head = etree.SubElement(xml, "head")
20
+ # we use a uuid as batch_id
21
+ etree.SubElement(head, "doi_batch_id").text = str(uuid.uuid4())
22
+ etree.SubElement(head, "timestamp").text = datetime.now().strftime("%Y%m%d%H%M%S")
23
+ depositor = etree.SubElement(head, "depositor")
24
+ etree.SubElement(depositor, "depositor_name").text = metadata.depositor
25
+ etree.SubElement(depositor, "email_address").text = metadata.email
26
+ etree.SubElement(head, "registrant").text = metadata.registrant
27
+
28
+ body = etree.SubElement(xml, "body")
29
+ body = insert_crossref_work(metadata, body)
30
+ return etree.tostring(
31
+ xml,
32
+ doctype='<?xml version="1.0" encoding="UTF-8"?>',
33
+ pretty_print=True,
34
+ )
35
+
36
+
37
+ def insert_crossref_work(metadata, xml):
38
+ """Insert crossref work"""
39
+ if metadata.type not in ["JournalArticle", "Article"]:
40
+ return xml
41
+ if doi_from_url(metadata.id) is None or metadata.url is None:
42
+ return xml
43
+ if metadata.type == "JournalArticle":
44
+ xml = insert_journal(metadata, xml)
45
+ elif metadata.type == "Article":
46
+ xml = insert_posted_content(metadata, xml)
47
+
48
+
49
+ def insert_journal(metadata, xml):
50
+ """Insert journal"""
51
+ journal = etree.SubElement(xml, "journal")
52
+ if metadata.language is not None:
53
+ journal_metadata = etree.SubElement(
54
+ journal, "journal_metadata", {"language": metadata.language[:2]}
55
+ )
56
+ else:
57
+ journal_metadata = etree.SubElement(journal, "journal_metadata")
58
+ if (
59
+ metadata.container is not None
60
+ and metadata.container.get("title", None) is not None
61
+ ):
62
+ etree.SubElement(journal_metadata, "full_title").text = metadata.container.get(
63
+ "title"
64
+ )
65
+ journal_metadata = insert_group_title(metadata, journal_metadata)
66
+ journal_article = etree.SubElement(
67
+ journal, "journal_article", {"publication_type": "full_text"}
68
+ )
69
+ journal_article = insert_crossref_titles(metadata, journal_article)
70
+ journal_article = insert_crossref_contributors(metadata, journal_article)
71
+ journal_article = insert_crossref_publication_date(metadata, journal_article)
72
+ journal_article = insert_crossref_abstract(metadata, journal_article)
73
+ journal_article = insert_crossref_issn(metadata, journal_article)
74
+ journal_article = insert_item_number(metadata, journal_article)
75
+ journal_article = insert_funding_references(metadata, journal_article)
76
+ journal_article = insert_crossref_access_indicators(metadata, journal_article)
77
+ journal_article = insert_crossref_relations(metadata, journal_article)
78
+ journal_article = insert_archive_locations(metadata, journal_article)
79
+ journal_article = insert_doi_data(metadata, journal_article)
80
+ journal_article = insert_citation_list(metadata, journal_article)
81
+
82
+ return journal
83
+
84
+
85
+ def insert_posted_content(metadata, xml):
86
+ """Insert posted content"""
87
+ if metadata.language is not None:
88
+ posted_content = etree.SubElement(
89
+ xml, "posted_content", {"type": "other", "language": metadata.language[:2]}
90
+ )
91
+ else:
92
+ posted_content = etree.SubElement(xml, "posted_content", {"type": "other"})
93
+
94
+ posted_content = insert_group_title(metadata, posted_content)
95
+ posted_content = insert_crossref_contributors(metadata, posted_content)
96
+ posted_content = insert_crossref_titles(metadata, posted_content)
97
+ posted_content = insert_posted_date(metadata, posted_content)
98
+ posted_content = insert_institution(metadata, posted_content)
99
+ posted_content = insert_item_number(metadata, posted_content)
100
+ posted_content = insert_crossref_abstract(metadata, posted_content)
101
+ posted_content = insert_funding_references(metadata, posted_content)
102
+ posted_content = insert_crossref_access_indicators(metadata, posted_content)
103
+ posted_content = insert_crossref_relations(metadata, posted_content)
104
+ posted_content = insert_archive_locations(metadata, posted_content)
105
+ posted_content = insert_doi_data(metadata, posted_content)
106
+ posted_content = insert_citation_list(metadata, posted_content)
107
+
108
+ return xml
109
+
110
+
111
+ def insert_group_title(metadata, xml):
112
+ """Insert group title"""
113
+ if metadata.subjects is None or len(metadata.subjects) == 0:
114
+ return xml
115
+ etree.SubElement(xml, "group_title").text = metadata.subjects[0].get(
116
+ "subject", None
117
+ )
118
+ return xml
119
+
120
+
121
+ def insert_crossref_contributors(metadata, xml):
122
+ """Insert crossref contributors"""
123
+ if metadata.contributors is None or len(metadata.contributors) == 0:
124
+ return xml
125
+ contributors = etree.SubElement(xml, "contributors")
126
+ con = [
127
+ c
128
+ for c in metadata.contributors
129
+ if c.get("contributorRoles", None) == ["Author"]
130
+ or c.get("contributorRoles", None) == ["Editor"]
131
+ ]
132
+ for num, contributor in enumerate(con):
133
+ contributor_role = (
134
+ "author" if "Author" in contributor.get("contributorRoles") else None
135
+ )
136
+ if contributor_role is None:
137
+ contributor_role = (
138
+ "editor" if "Editor" in contributor.get("contributorRoles") else None
139
+ )
140
+ sequence = "first" if num == 0 else "additional"
141
+ if (
142
+ contributor.get("type", None) == "Organization"
143
+ and contributor.get("name", None) is not None
144
+ ):
145
+ etree.SubElement(
146
+ contributors,
147
+ "organization",
148
+ {"contributor_role": contributor_role, "sequence": sequence},
149
+ ).text = contributor.get("name")
150
+ elif (
151
+ contributor.get("givenName", None) is not None
152
+ or contributor.get("familyName", None) is not None
153
+ ):
154
+ person_name = etree.SubElement(
155
+ contributors,
156
+ "person_name",
157
+ {"contributor_role": contributor_role, "sequence": sequence},
158
+ )
159
+ person_name = insert_crossref_person(contributor, person_name)
160
+ elif contributor.get("affiliations", None) is not None:
161
+ anonymous = etree.SubElement(
162
+ contributors,
163
+ "anonymous",
164
+ {"contributor_role": contributor_role, "sequence": sequence},
165
+ )
166
+ anonymous = insert_crossref_anonymous(contributor, anonymous)
167
+ else:
168
+ etree.SubElement(
169
+ contributors,
170
+ "anonymous",
171
+ {"contributor_role": contributor_role, "sequence": sequence},
172
+ )
173
+ return xml
174
+
175
+
176
+ def insert_crossref_person(contributor, xml):
177
+ """Insert crossref person"""
178
+ if contributor.get("givenName", None) is not None:
179
+ etree.SubElement(xml, "given_name").text = contributor.get("givenName")
180
+ if contributor.get("familyName", None) is not None:
181
+ etree.SubElement(xml, "surname").text = contributor.get("familyName")
182
+
183
+ if contributor.get("affiliations", None) is not None:
184
+ affiliations = etree.SubElement(xml, "affiliations")
185
+ institution = etree.SubElement(affiliations, "institution")
186
+ if py_.get(contributor, "affiliations.0.name") is not None:
187
+ etree.SubElement(institution, "institution_name").text = py_.get(
188
+ contributor, "affiliations.0.name"
189
+ )
190
+ if py_.get(contributor, "affiliations.0.id") is not None:
191
+ etree.SubElement(
192
+ institution, "institution_id", {"type": "ror"}
193
+ ).text = py_.get(contributor, "affiliations.0.id")
194
+ orcid = normalize_orcid(contributor.get("id", None))
195
+ if orcid is not None:
196
+ etree.SubElement(xml, "ORCID").text = orcid
197
+ return xml
198
+
199
+
200
+ def insert_crossref_anonymous(contributor, xml):
201
+ """Insert crossref anonymous"""
202
+ if contributor.get("affiliations", None) is None:
203
+ return xml
204
+ affiliations = etree.SubElement(xml, "affiliations")
205
+ institution = etree.SubElement(affiliations, "institution")
206
+ if py_.get(contributor, "affiliations.0.name") is not None:
207
+ etree.SubElement(institution, "institution_name").text = py_.get(
208
+ contributor, "affiliations.0.name"
209
+ )
210
+ return xml
211
+
212
+
213
+ def insert_crossref_titles(metadata, xml):
214
+ """Insert crossref titles"""
215
+ titles = etree.SubElement(xml, "titles")
216
+ for title in wrap(metadata.titles):
217
+ if isinstance(title, dict):
218
+ etree.SubElement(titles, "title").text = title.get("title", None)
219
+ else:
220
+ etree.SubElement(titles, "title").text = title
221
+ return xml
222
+
223
+
224
+ def insert_citation_list(metadata, xml):
225
+ """Insert citation list"""
226
+ if metadata.references is None or len(metadata.references) == 0:
227
+ return xml
228
+
229
+ citation_list = etree.SubElement(xml, "citation_list")
230
+ for ref in metadata.references:
231
+ if ref.get("id", None) is None:
232
+ continue
233
+ citation = etree.SubElement(
234
+ citation_list, "citation", {"key": ref.get("key", None)}
235
+ )
236
+ if ref.get("journal_title", None) is not None:
237
+ etree.SubElement(citation, "journal_article").text = ref.get(
238
+ "journal_title"
239
+ )
240
+ if ref.get("author", None) is not None:
241
+ etree.SubElement(citation, "author").text = ref.get("author")
242
+ if ref.get("volume", None) is not None:
243
+ etree.SubElement(citation, "volume").text = ref.get("volume")
244
+ if ref.get("first_page", None) is not None:
245
+ etree.SubElement(citation, "first_page").text = ref.get("first_page")
246
+ if ref.get("publicationYear", None) is not None:
247
+ etree.SubElement(citation, "cYear").text = ref.get("publicationYear")
248
+ if ref.get("title", None) is not None:
249
+ etree.SubElement(citation, "article_title").text = ref.get("title")
250
+ if ref.get("id", None) is not None:
251
+ etree.SubElement(citation, "doi").text = doi_from_url(ref.get("id"))
252
+ if ref.get("unstructured", None) is not None:
253
+ etree.SubElement(citation, "unstructured_citation").text = ref.get(
254
+ "unstructured"
255
+ )
256
+ return xml
257
+
258
+
259
+ def insert_crossref_access_indicators(metadata, xml):
260
+ """Insert crossref access indicators"""
261
+ rights_uri = (
262
+ metadata.license.get("url", None) if metadata.license is not None else None
263
+ )
264
+ if rights_uri is None:
265
+ return xml
266
+ program = etree.SubElement(
267
+ xml,
268
+ "program",
269
+ {
270
+ "xmlns": "http://www.crossref.org/AccessIndicators.xsd",
271
+ "name": "AccessIndicators",
272
+ },
273
+ )
274
+ etree.SubElement(program, "license_ref", {"applies_to": "vor"}).text = rights_uri
275
+ etree.SubElement(program, "license_ref", {"applies_to": "tdm"}).text = rights_uri
276
+ return xml
277
+
278
+
279
+ def insert_crossref_relations(metadata, xml):
280
+ """Insert crossref relations"""
281
+ if metadata.relations is None or len(metadata.relations) == 0:
282
+ return xml
283
+ program = etree.SubElement(
284
+ xml,
285
+ "program",
286
+ {
287
+ "xmlns": "http://www.crossref.org/relations.xsd",
288
+ "name": "relations",
289
+ },
290
+ )
291
+ for relation in metadata.relations:
292
+ if relation.get("type", None) in [
293
+ "IsPartOf",
294
+ "HasPart",
295
+ "IsReviewOf",
296
+ "HasReview",
297
+ "IsRelatedMaterial",
298
+ "HasRelatedMaterial",
299
+ ]:
300
+ group = "inter_work_relation"
301
+ elif relation.get("type", None) in [
302
+ "IsIdenticalTo",
303
+ "IsPreprintOf",
304
+ "HasPreprint",
305
+ "IsTranslationOf",
306
+ "HasTranslation",
307
+ "IsVersionOf",
308
+ "HasVersion",
309
+ ]:
310
+ group = "intra_work_relation"
311
+ else:
312
+ continue
313
+
314
+ related_item = etree.SubElement(program, "related_item")
315
+ f = furl(relation.get("id", None))
316
+ if validate_doi(relation.get("id", None)):
317
+ identifier_type = "doi"
318
+ _id = doi_from_url(relation.get("id", None))
319
+ elif f.host == "portal.issn.org":
320
+ identifier_type = "issn"
321
+ _id = f.path.segments[-1]
322
+ elif validate_url(relation.get("id", None)) == "URL":
323
+ identifier_type = "uri"
324
+ _id = relation.get("id", None)
325
+ else:
326
+ identifier_type = "other"
327
+ _id = relation.get("id", None)
328
+
329
+ etree.SubElement(
330
+ related_item,
331
+ group,
332
+ {
333
+ "relationship-type": py_.lower_first(relation.get("type"))
334
+ if relation.get("type", None) is not None
335
+ else None,
336
+ "identifier-type": identifier_type,
337
+ },
338
+ ).text = _id
339
+
340
+ return xml
341
+
342
+
343
+ def insert_funding_references(metadata, xml):
344
+ """Insert funding references"""
345
+ if metadata.funding_references is None or len(metadata.funding_references) == 0:
346
+ return xml
347
+ program = etree.SubElement(
348
+ xml,
349
+ "program",
350
+ {
351
+ "xmlns": "http://www.crossref.org/fundref.xsd",
352
+ "name": "fundref",
353
+ },
354
+ )
355
+ for funding_reference in metadata.funding_references:
356
+ assertion = etree.SubElement(program, "assertion", {"name": "fundgroup"})
357
+ funder_name = etree.SubElement(
358
+ assertion,
359
+ "assertion",
360
+ {"name": "funder_name"},
361
+ )
362
+ if funding_reference.get("funderIdentifier", None) is not None:
363
+ etree.SubElement(
364
+ funder_name,
365
+ "assertion",
366
+ {"name": "funder_identifier"},
367
+ ).text = funding_reference.get("funderIdentifier", None)
368
+ if funding_reference.get("awardNumber", None) is not None:
369
+ etree.SubElement(
370
+ assertion,
371
+ "assertion",
372
+ {"name": "award_number"},
373
+ ).text = funding_reference.get("awardNumber", None)
374
+ funder_name.text = funding_reference.get("funderName", None)
375
+ return xml
376
+
377
+
378
+ def insert_crossref_subjects(metadata, xml):
379
+ """Insert crossref subjects"""
380
+ if metadata.subjects is None:
381
+ return xml
382
+ subjects = etree.SubElement(xml, "subjects")
383
+ for subject in metadata.subjects:
384
+ if isinstance(subject, dict):
385
+ etree.SubElement(subjects, "subject").text = subject.get("subject", None)
386
+ else:
387
+ etree.SubElement(subjects, "subject").text = subject
388
+ return xml
389
+
390
+
391
+ def insert_crossref_language(metadata, xml):
392
+ """Insert crossref language"""
393
+ if metadata.language is None:
394
+ return xml
395
+ etree.SubElement(xml, "language").text = metadata.language
396
+ return xml
397
+
398
+
399
+ def insert_crossref_publication_date(metadata, xml):
400
+ """Insert crossref publication date"""
401
+ pub_date = parse(metadata.date.get("published", None))
402
+ if pub_date is None:
403
+ return xml
404
+
405
+ publication_date = etree.SubElement(
406
+ xml, "publication_date", {"media_type": "online"}
407
+ )
408
+ etree.SubElement(publication_date, "month").text = f"{pub_date.month:d}"
409
+ etree.SubElement(publication_date, "day").text = f"{pub_date.day:d}"
410
+ etree.SubElement(publication_date, "year").text = str(pub_date.year)
411
+ return xml
412
+
413
+
414
+ def insert_posted_date(metadata, xml):
415
+ """Insert posted date"""
416
+ pub_date = parse(metadata.date.get("published", None))
417
+ if pub_date is None:
418
+ return xml
419
+
420
+ posted_date = etree.SubElement(xml, "posted_date", {"media_type": "online"})
421
+ etree.SubElement(posted_date, "month").text = f"{pub_date.month:d}"
422
+ etree.SubElement(posted_date, "day").text = f"{pub_date.day:d}"
423
+ etree.SubElement(posted_date, "year").text = str(pub_date.year)
424
+ return xml
425
+
426
+
427
+ def insert_institution(metadata, xml):
428
+ """Insert institution"""
429
+ if metadata.publisher.get("name", None) is None:
430
+ return xml
431
+ institution = etree.SubElement(xml, "institution")
432
+ etree.SubElement(institution, "institution_name").text = metadata.publisher.get(
433
+ "name"
434
+ )
435
+ return xml
436
+
437
+
438
+ def insert_item_number(metadata, xml):
439
+ """Insert item number"""
440
+ if metadata.identifiers is None:
441
+ return xml
442
+ for identifier in metadata.identifiers:
443
+ if identifier.get("identifier", None) is None:
444
+ continue
445
+ if identifier.get("identifierType", None) is not None:
446
+ # strip hyphen from UUIDs, as item_number can only be 32 characters long (UUIDv4 is 36 characters long)
447
+ if identifier.get("identifierType", None) == "UUID":
448
+ identifier["identifier"] = identifier.get("identifier", "").replace(
449
+ "-", ""
450
+ )
451
+ etree.SubElement(
452
+ xml,
453
+ "item_number",
454
+ {"item_number_type": identifier.get("identifierType", "").lower()},
455
+ ).text = identifier.get("identifier", None)
456
+ else:
457
+ etree.SubElement(xml, "item_number").text = identifier.get(
458
+ "identifier", None
459
+ )
460
+ return xml
461
+
462
+
463
+ def insert_archive_locations(metadata, xml):
464
+ """Insert archive locations"""
465
+ if metadata.archive_locations is None:
466
+ return xml
467
+ archive_locations = etree.SubElement(xml, "archive_locations")
468
+ for archive_location in metadata.archive_locations:
469
+ etree.SubElement(archive_locations, "archive", {"name": archive_location})
470
+ return xml
471
+
472
+
473
+ def insert_doi_data(metadata, xml):
474
+ """Insert doi data"""
475
+ if doi_from_url(metadata.id) is None or metadata.url is None:
476
+ return xml
477
+ doi_data = etree.SubElement(xml, "doi_data")
478
+ etree.SubElement(doi_data, "doi").text = doi_from_url(metadata.id)
479
+ etree.SubElement(doi_data, "resource").text = metadata.url
480
+ collection = etree.SubElement(doi_data, "collection", {"property": "text-mining"})
481
+ item = etree.SubElement(collection, "item")
482
+ etree.SubElement(item, "resource", {"mime_type": "text/html"}).text = metadata.url
483
+ if metadata.files is None:
484
+ return xml
485
+ for file in metadata.files:
486
+ # Crossref schema currently doesn't support text/markdown
487
+ if file.get("mimeType", None) == "text/markdown":
488
+ file["mimeType"] = "text/plain"
489
+ item = etree.SubElement(collection, "item")
490
+ etree.SubElement(
491
+ item, "resource", {"mime_type": file.get("mimeType", "")}
492
+ ).text = file.get("url", None)
493
+ return xml
494
+
495
+
496
+ def insert_crossref_license(metadata, xml):
497
+ """Insert crossref license"""
498
+ if metadata.license is None:
499
+ return xml
500
+ license_ = etree.SubElement(xml, "license")
501
+ if isinstance(metadata.license, dict):
502
+ r = metadata.license
503
+ else:
504
+ r = {}
505
+ r["rights"] = metadata.license
506
+ r["rightsUri"] = normalize_id(metadata.license)
507
+ attributes = compact(
508
+ {
509
+ "rightsURI": r.get("rightsUri", None),
510
+ "rightsIdentifier": r.get("rightsIdentifier", None),
511
+ "rightsIdentifierScheme": r.get("rightsIdentifierScheme"),
512
+ "schemeURI": r.get("schemeUri", None),
513
+ "xml:lang": r.get("lang", None),
514
+ }
515
+ )
516
+ etree.SubElement(license_, "rights", attributes).text = r.get("rights", None)
517
+ return xml
518
+
519
+
520
+ def insert_crossref_issn(metadata, xml):
521
+ """Insert crossref issn"""
522
+ if (
523
+ metadata.container is None
524
+ or metadata.container.get("identifierType", None) != "ISSN"
525
+ ):
526
+ return xml
527
+ etree.SubElement(xml, "issn").text = metadata.container.get("identifier", None)
528
+ return xml
529
+
530
+
531
+ def insert_crossref_abstract(metadata, xml):
532
+ """Insert crossref abstrac"""
533
+ if metadata.descriptions is None:
534
+ return xml
535
+ if isinstance(metadata.descriptions[0], dict):
536
+ d = metadata.descriptions[0]
537
+ else:
538
+ d = {}
539
+ d["description"] = metadata.descriptions[0]
540
+ abstract = etree.SubElement(
541
+ xml, "abstract", {"xmlns": "http://www.ncbi.nlm.nih.gov/JATS1"}
542
+ )
543
+ etree.SubElement(abstract, "p").text = d.get("description", None)
544
+ return xml
545
+
546
+
547
+ def crossref_root():
548
+ """Crossref root with namespaces"""
549
+ doi_batch = """<doi_batch xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.crossref.org/schema/5.3.1" xmlns:jats="http://www.ncbi.nlm.nih.gov/JATS1" xmlns:fr="http://www.crossref.org/fundref.xsd" xmlns:mml="http://www.w3.org/1998/Math/MathML" xsi:schemaLocation="http://www.crossref.org/schema/5.3.1 https://www.crossref.org/schemas/crossref5.3.1.xsd" version="5.3.1"></doi_batch>"""
550
+ return etree.fromstring(doi_batch)
551
+
552
+
553
+ def generate_crossref_xml_list(metalist) -> Optional[str]:
554
+ """Generate Crossref XML list."""
555
+ if not metalist.is_valid:
556
+ return None
557
+ xml = crossref_root()
558
+ head = etree.SubElement(xml, "head")
559
+ # we use a uuid as batch_id
560
+ etree.SubElement(head, "doi_batch_id").text = str(uuid.uuid4())
561
+ etree.SubElement(head, "timestamp").text = datetime.now().strftime("%Y%m%d%H%M%S")
562
+ depositor = etree.SubElement(head, "depositor")
563
+ etree.SubElement(depositor, "depositor_name").text = metalist.depositor or "test"
564
+ etree.SubElement(depositor, "email_address").text = (
565
+ metalist.email or "info@example.org"
566
+ )
567
+ etree.SubElement(head, "registrant").text = metalist.registrant or "test"
568
+
569
+ body = etree.SubElement(xml, "body")
570
+ body = [insert_crossref_work(item, body) for item in metalist.items]
571
+ return etree.tostring(
572
+ xml,
573
+ doctype='<?xml version="1.0" encoding="UTF-8"?>',
574
+ pretty_print=True,
575
+ )