commonmeta-py 0.107__py3-none-any.whl → 0.108__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. commonmeta/__init__.py +12 -15
  2. commonmeta/api_utils.py +3 -2
  3. commonmeta/base_utils.py +186 -3
  4. commonmeta/cli.py +114 -34
  5. commonmeta/constants.py +20 -0
  6. commonmeta/file_utils.py +112 -0
  7. commonmeta/metadata.py +102 -42
  8. commonmeta/readers/codemeta_reader.py +1 -1
  9. commonmeta/readers/crossref_reader.py +23 -10
  10. commonmeta/readers/crossref_xml_reader.py +1 -1
  11. commonmeta/readers/datacite_reader.py +6 -4
  12. commonmeta/readers/{json_feed_reader.py → jsonfeed_reader.py} +12 -12
  13. commonmeta/resources/crossref/common5.4.0.xsd +1264 -0
  14. commonmeta/resources/crossref/{crossref5.3.1.xsd → crossref5.4.0.xsd} +286 -88
  15. commonmeta/resources/crossref/doi_resources5.4.0.xsd +117 -0
  16. commonmeta/resources/crossref/fundingdata5.4.0.xsd +59 -0
  17. commonmeta/resources/crossref/fundref.xsd +29 -19
  18. commonmeta/resources/crossref/languages5.4.0.xsd +8119 -0
  19. commonmeta/resources/crossref/mediatypes5.4.0.xsd +2207 -0
  20. commonmeta/resources/crossref/module-ali.xsd +14 -6
  21. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-common.xsd +101 -0
  22. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-content.xsd +683 -0
  23. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-presentation.xsd +2092 -0
  24. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-strict-content.xsd +186 -0
  25. commonmeta/resources/crossref/standard-modules/mathml3/mathml3.xsd +9 -0
  26. commonmeta/resources/crossref/standard-modules/mathml3/module-ali.xsd +47 -0
  27. commonmeta/resources/crossref/standard-modules/module-ali.xsd +47 -0
  28. commonmeta/resources/crossref/standard-modules/xlink.xsd +100 -0
  29. commonmeta/resources/crossref/standard-modules/xml.xsd +287 -0
  30. commonmeta/resources/crossref/xml.xsd +287 -0
  31. commonmeta/schema_utils.py +25 -0
  32. commonmeta/utils.py +25 -9
  33. commonmeta/writers/bibtex_writer.py +5 -5
  34. commonmeta/writers/commonmeta_writer.py +4 -17
  35. commonmeta/writers/crossref_xml_writer.py +1031 -4
  36. commonmeta/writers/csl_writer.py +1 -2
  37. commonmeta/writers/datacite_writer.py +8 -4
  38. commonmeta/writers/inveniordm_writer.py +277 -2
  39. commonmeta/writers/ris_writer.py +3 -3
  40. commonmeta/writers/schema_org_writer.py +10 -5
  41. {commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/METADATA +4 -2
  42. {commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/RECORD +45 -31
  43. commonmeta/crossref_utils.py +0 -662
  44. commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
  45. {commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/WHEEL +0 -0
  46. {commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/entry_points.txt +0 -0
  47. {commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/licenses/LICENSE +0 -0
@@ -1,19 +1,1046 @@
1
1
  """Crossref XML writer for commonmeta-py"""
2
2
 
3
+ import io
4
+ from datetime import datetime
5
+ from time import time
3
6
  from typing import Optional
4
7
 
8
+ import orjson as json
9
+ import requests
10
+ from dateutil.parser import parse as date_parse
11
+ from furl import furl
12
+ from marshmallow import Schema, fields
13
+ from pydash import py_
14
+ from requests_toolbelt.multipart.encoder import MultipartEncoder
15
+
16
+ from ..base_utils import compact, parse_xml, unparse_xml, unparse_xml_list, wrap
5
17
  from ..constants import Commonmeta
6
- from ..crossref_utils import generate_crossref_xml, generate_crossref_xml_list
18
+ from ..doi_utils import doi_from_url, validate_doi
19
+ from ..utils import validate_url
20
+
21
+ POSTED_CONTENT_TYPES = [
22
+ "preprint",
23
+ "working_paper",
24
+ "letter",
25
+ "dissertation",
26
+ "report",
27
+ "review",
28
+ "other",
29
+ ]
30
+
31
+ MARSHMALLOW_MAP = {
32
+ "abstracts": "jats:abstract",
33
+ "license": "ai:program",
34
+ "funding_references": "fr:program",
35
+ "relations": "rel:program",
36
+ "references": "citation_list",
37
+ }
38
+
39
+
40
+ class CrossrefXMLSchema(Schema):
41
+ """Crossref XML schema"""
42
+
43
+ # root element
44
+ book = fields.Dict()
45
+ conference = fields.Dict()
46
+ database = fields.Dict()
47
+ dissertation = fields.Dict()
48
+ journal = fields.Dict()
49
+ peer_review = fields.Dict()
50
+ report_paper = fields.Dict()
51
+ pending_publication = fields.Dict()
52
+ posted_content = fields.Dict()
53
+ sa_component = fields.Dict()
54
+ standard = fields.Dict()
55
+
56
+ # elements
57
+ group_title = fields.String()
58
+ book_metadata = fields.Dict()
59
+ database_metadata = fields.Dict()
60
+ event_metadata = fields.Dict()
61
+ proceedings_metadata = fields.Dict()
62
+ journal_metadata = fields.Dict()
63
+ journal_issue = fields.Dict()
64
+ journal_article = fields.Dict()
65
+ component = fields.Dict()
66
+ titles = fields.Dict()
67
+ contributors = fields.Dict()
68
+ abstracts = fields.List(fields.Dict(), data_key="jats:abstract")
69
+ publication_date = fields.Dict()
70
+ posted_date = fields.Dict()
71
+ review_date = fields.Dict()
72
+ approval_date = fields.Dict()
73
+ publisher_item = fields.Dict()
74
+ institution = fields.Dict()
75
+ item_number = fields.Dict()
76
+ institution = fields.Dict()
77
+ isbn = fields.String()
78
+ issn = fields.String()
79
+ publisher = fields.Dict()
80
+ description = fields.Dict()
81
+ funding_references = fields.Dict(data_key="fr:program")
82
+ license = fields.Dict(data_key="ai:program")
83
+ relations = fields.Dict(data_key="rel:program")
84
+ archive_locations = fields.List(fields.Dict())
85
+ doi_data = fields.Dict(data_key="doi_data")
86
+ references = fields.Dict(data_key="citation_list")
87
+
88
+
89
+ def convert_crossref_xml(metadata: Commonmeta) -> Optional[dict]:
90
+ """Convert Crossref XML"""
91
+
92
+ # return None if type is not supported by Crossref
93
+ if metadata.type not in [
94
+ "Article",
95
+ "BlogPost",
96
+ "Book",
97
+ "BookChapter",
98
+ "Component",
99
+ "Dataset",
100
+ "Dissertation",
101
+ "JournalArticle",
102
+ "PeerReview",
103
+ "ProceedingsArticle",
104
+ "Report",
105
+ "Standard",
106
+ ]:
107
+ return None
108
+
109
+ # return None if doi or url are not present
110
+ if doi_from_url(metadata.id) is None or metadata.url is None:
111
+ return None
112
+
113
+ titles = get_titles(metadata)
114
+ contributors = get_contributors(metadata)
115
+ abstracts = get_abstracts(metadata)
116
+ relations = get_relations(metadata)
117
+ doi_data = get_doi_data(metadata)
118
+ references = get_references(metadata)
119
+ funding_references = get_funding_references(metadata)
120
+ license = get_license(metadata)
121
+ kwargs = {}
122
+
123
+ if metadata.type == "Article":
124
+ if metadata.additional_type in POSTED_CONTENT_TYPES:
125
+ kwargs["type"] = metadata.additional_type
126
+ else:
127
+ kwargs["type"] = "other"
128
+ kwargs["language"] = metadata.language
129
+ data = compact(
130
+ {
131
+ "posted_content": get_attributes(metadata, **kwargs),
132
+ "group_title": get_group_title(metadata),
133
+ "contributors": contributors,
134
+ "titles": titles,
135
+ "posted_date": get_publication_date(metadata),
136
+ "institution": get_institution(metadata),
137
+ "item_number": get_item_number(metadata),
138
+ "abstracts": abstracts,
139
+ "funding_references": funding_references,
140
+ "license": license,
141
+ "relations": relations,
142
+ "doi_data": doi_data,
143
+ "references": references,
144
+ }
145
+ )
146
+ elif metadata.type == "BlogPost":
147
+ kwargs["type"] = "other"
148
+ kwargs["language"] = metadata.language
149
+ data = compact(
150
+ {
151
+ "posted_content": get_attributes(metadata, **kwargs),
152
+ "group_title": get_group_title(metadata),
153
+ "contributors": contributors,
154
+ "titles": titles,
155
+ "posted_date": get_publication_date(metadata),
156
+ "institution": get_institution(metadata),
157
+ "item_number": get_item_number(metadata),
158
+ "abstracts": abstracts,
159
+ "funding_references": funding_references,
160
+ "license": license,
161
+ "relations": relations,
162
+ "doi_data": doi_data,
163
+ "references": references,
164
+ }
165
+ )
166
+ elif metadata.type == "Book":
167
+ kwargs["book_type"] = "monograph"
168
+ data = compact(
169
+ {
170
+ "book": get_attributes(metadata, **kwargs),
171
+ "book_metadata": get_book_metadata(metadata),
172
+ "contributors": contributors,
173
+ "titles": titles,
174
+ "abstracts": abstracts,
175
+ "publication_date": get_publication_date(metadata, media_type="online"),
176
+ "isbn": get_isbn(metadata),
177
+ "publisher": get_publisher(metadata),
178
+ "publisher_item": None,
179
+ "funding_references": funding_references,
180
+ "license": license,
181
+ "relations": relations,
182
+ "archive_locations": get_archive_locations(metadata),
183
+ "doi_data": doi_data,
184
+ "references": references,
185
+ }
186
+ )
187
+ elif metadata.type == "BookChapter":
188
+ kwargs["book_type"] = "monograph"
189
+ data = compact(
190
+ {
191
+ "book": get_attributes(metadata, **kwargs),
192
+ "book_metadata": get_book_metadata(metadata),
193
+ "contributors": contributors,
194
+ "titles": titles,
195
+ "publication_date": get_publication_date(metadata, media_type="online"),
196
+ "isbn": get_isbn(metadata),
197
+ "publisher": get_publisher(metadata),
198
+ "abstracts": abstracts,
199
+ "funding_references": funding_references,
200
+ "license": license,
201
+ "relations": relations,
202
+ "archive_locations": get_archive_locations(metadata),
203
+ "doi_data": doi_data,
204
+ "references": references,
205
+ }
206
+ )
207
+ elif metadata.type == "Component":
208
+ data = compact(
209
+ {
210
+ "sa_component": get_attributes(metadata),
211
+ "component": {"@reg-agency": "CrossRef"},
212
+ "description": None,
213
+ "doi_data": doi_data,
214
+ }
215
+ )
216
+ elif metadata.type == "Dataset":
217
+ publisher = py_.get(metadata, "publisher.name")
218
+ if publisher is not None:
219
+ publisher_item = {
220
+ "title": publisher,
221
+ }
222
+ data = compact(
223
+ {
224
+ "database": {},
225
+ "database_metadata": get_database_metadata(metadata),
226
+ "publisher_item": publisher_item if publisher else None,
227
+ "institution": get_institution(metadata),
228
+ "component": {"@parent_relation": "isPartOf"},
229
+ "titles": titles,
230
+ "contributors": contributors,
231
+ "publication_date": get_publication_date(metadata, media_type="online"),
232
+ "doi_data": doi_data,
233
+ }
234
+ )
235
+ elif metadata.type == "Dissertation":
236
+ data = compact(
237
+ {
238
+ "dissertation": get_attributes(metadata, **kwargs),
239
+ "contributors": contributors,
240
+ "titles": titles,
241
+ "approval_date": get_publication_date(metadata),
242
+ "institution": get_institution(metadata),
243
+ "degree": None,
244
+ "isbn": get_isbn(metadata),
245
+ "publisher_item": None,
246
+ "funding_references": funding_references,
247
+ "license": license,
248
+ "relations": relations,
249
+ "doi_data": doi_data,
250
+ }
251
+ )
252
+
253
+ elif metadata.type == "JournalArticle":
254
+ publisher_item = None
255
+ kwargs["language"] = metadata.language
256
+ data = compact(
257
+ {
258
+ "journal": {},
259
+ "journal_metadata": get_journal_metadata(metadata),
260
+ "journal_issue": get_journal_issue(metadata),
261
+ "journal_article": get_attributes(metadata, **kwargs),
262
+ "titles": titles,
263
+ "contributors": contributors,
264
+ "abstracts": abstracts,
265
+ "publication_date": get_publication_date(metadata, media_type="online"),
266
+ "publisher_item": publisher_item,
267
+ "funding_references": funding_references,
268
+ "license": license,
269
+ "crossmark": None,
270
+ "relations": relations,
271
+ "archive_locations": get_archive_locations(metadata),
272
+ "doi_data": doi_data,
273
+ "references": references,
274
+ }
275
+ )
276
+ elif metadata.type == "PeerReview":
277
+ kwargs["type"] = "author-comment"
278
+ kwargs["stage"] = "pre-publication"
279
+ data = compact(
280
+ {
281
+ "peer_review": get_attributes(metadata, **kwargs),
282
+ "contributors": contributors,
283
+ "titles": titles,
284
+ "review_date": get_publication_date(metadata),
285
+ "license": license,
286
+ "relations": relations,
287
+ "doi_data": doi_data,
288
+ }
289
+ )
290
+ elif metadata.type == "ProceedingsArticle":
291
+ publisher_item = None
292
+ data = compact(
293
+ {
294
+ "conference": get_attributes(metadata, **kwargs),
295
+ "event_metadata": get_event_metadata(metadata),
296
+ "proceedings_metadata": get_proceedings_metadata(metadata),
297
+ "proceedings_title": py_.get(metadata, "container.title"),
298
+ "publisher": get_publisher(metadata),
299
+ "conference_paper": get_attributes(metadata, **kwargs),
300
+ "contributors": contributors,
301
+ "titles": titles,
302
+ "publication_date": get_publication_date(metadata),
303
+ "abstracts": abstracts,
304
+ "publisher_item": publisher_item,
305
+ "funding_references": funding_references,
306
+ "license": license,
307
+ "crossmark": None,
308
+ "relations": relations,
309
+ "archive_locations": get_archive_locations(metadata),
310
+ "doi_data": doi_data,
311
+ "references": references,
312
+ }
313
+ )
314
+ elif metadata.type == "Standard":
315
+ publisher_item = None
316
+ data = compact(
317
+ {
318
+ "standard": get_attributes(metadata, **kwargs),
319
+ "journal_metadata": get_journal_metadata(metadata),
320
+ "journal_issue": get_journal_issue(metadata),
321
+ "titles": titles,
322
+ "contributors": contributors,
323
+ "publication_date": get_publication_date(metadata),
324
+ "publisher_item": publisher_item,
325
+ "funding_references": funding_references,
326
+ "license": license,
327
+ "crossmark": None,
328
+ "relations": relations,
329
+ "archive_locations": get_archive_locations(metadata),
330
+ "doi_data": doi_data,
331
+ "references": references,
332
+ "component_list": None,
333
+ }
334
+ )
335
+ else:
336
+ data = None
337
+ return data
7
338
 
8
339
 
9
340
  def write_crossref_xml(metadata: Commonmeta) -> Optional[str]:
10
341
  """Write Crossref XML"""
11
- return generate_crossref_xml(metadata)
342
+
343
+ data = convert_crossref_xml(metadata)
344
+ if data is None:
345
+ return None
346
+ schema = CrossrefXMLSchema()
347
+ crossref_xml = schema.dump(data)
348
+
349
+ # Ensure the order of fields in the XML matches the expected order
350
+ field_order = [MARSHMALLOW_MAP.get(k, k) for k in list(data.keys())]
351
+ crossref_xml = {k: crossref_xml[k] for k in field_order if k in crossref_xml}
352
+ # Convert to XML
353
+ return unparse_xml(crossref_xml, dialect="crossref")
12
354
 
13
355
 
14
356
  def write_crossref_xml_list(metalist):
15
357
  """Write crossref_xml list"""
16
- if metalist is None:
358
+ if metalist is None or not metalist.is_valid:
359
+ return None
360
+
361
+ schema = CrossrefXMLSchema()
362
+ crossref_xml_list = []
363
+ for item in metalist.items:
364
+ data = convert_crossref_xml(item)
365
+ crossref_xml = schema.dump(data)
366
+
367
+ # Ensure the order of fields in the XML matches the expected order
368
+ field_order = [MARSHMALLOW_MAP.get(k, k) for k in list(data.keys())]
369
+ crossref_xml = {k: crossref_xml[k] for k in field_order if k in crossref_xml}
370
+ crossref_xml_list.append(crossref_xml)
371
+ head = {
372
+ "depositor": metalist.depositor,
373
+ "email": metalist.email,
374
+ "registrant": metalist.registrant,
375
+ }
376
+ return unparse_xml_list(crossref_xml_list, dialect="crossref", head=head)
377
+
378
+
379
+ def push_crossref_xml_list(metalist, login_id: str, login_passwd: str) -> bytes:
380
+ """Push crossref_xml list to Crossref API, returns the API response."""
381
+
382
+ input = write_crossref_xml_list(metalist)
383
+
384
+ # Convert string to bytes if necessary
385
+ if isinstance(input, str):
386
+ input = input.encode("utf-8")
387
+
388
+ # The filename displayed in the Crossref admin interface, using the current UNIX timestamp
389
+ filename = f"{int(time())}"
390
+
391
+ # Create multipart form data
392
+ multipart_data = MultipartEncoder(
393
+ fields={
394
+ "fname": (filename, io.BytesIO(input), "application/xml"),
395
+ "operation": "doMDUpload",
396
+ "login_id": login_id,
397
+ "login_passwd": login_passwd,
398
+ }
399
+ )
400
+
401
+ # Set up the request
402
+ post_url = "https://doi.crossref.org/servlet/deposit"
403
+ headers = {"Content-Type": multipart_data.content_type}
404
+
405
+ try:
406
+ # Send the request
407
+ resp = requests.post(post_url, data=multipart_data, headers=headers, timeout=10)
408
+ resp.raise_for_status()
409
+
410
+ # Parse the response
411
+ response = parse_xml(resp.content)
412
+ status = py_.get(response, "html.body.h2")
413
+ if status == "SUCCESS":
414
+ items = []
415
+ for item in metalist.items:
416
+ items.append(
417
+ {
418
+ "doi": item.id,
419
+ "updated": datetime.now().isoformat("T", "seconds"),
420
+ "status": "submitted",
421
+ }
422
+ )
423
+
424
+ # orjson has different options
425
+ return json.dumps(items, option=json.OPT_INDENT_2)
426
+
427
+ # if there is an error
428
+ message = py_.get(response, "html.body.p")
429
+ raise CrossrefError(f"Error uploading batch: {message}")
430
+
431
+ except requests.exceptions.RequestException as e:
432
+ raise CrossrefError(f"Error uploading batch: {str(e)}") from e
433
+
434
+
435
+ def get_attributes(obj, **kwargs) -> dict:
436
+ """Get root attributes"""
437
+ return compact(
438
+ {
439
+ "@type": kwargs.get("type", None),
440
+ "@book_type": kwargs.get("book_type", None),
441
+ "@language": kwargs.get("language", None),
442
+ "@stage": kwargs.get("stage", None),
443
+ "@reg-agency": kwargs.get("reg-agency", None),
444
+ }
445
+ )
446
+
447
+
448
+ def get_journal_metadata(obj) -> Optional[dict]:
449
+ """get journal metadata"""
450
+ issn = (
451
+ py_.get(obj, "container.identifier")
452
+ if py_.get(obj, "container.identifierType") == "ISSN"
453
+ else None
454
+ )
455
+ return compact(
456
+ {
457
+ "@language": py_.get(obj, "language"),
458
+ "full_title": py_.get(obj, "container.title"),
459
+ "issn": issn,
460
+ }
461
+ )
462
+
463
+
464
+ def get_book_metadata(obj) -> Optional[dict]:
465
+ return compact(
466
+ {
467
+ "@language": py_.get(obj, "language"),
468
+ }
469
+ )
470
+
471
+
472
+ def get_database_metadata(obj) -> Optional[dict]:
473
+ return compact(
474
+ {
475
+ "@language": py_.get(obj, "language"),
476
+ }
477
+ )
478
+
479
+
480
+ def get_event_metadata(obj) -> Optional[dict]:
481
+ """get event metadata"""
482
+ if py_.get(obj, "container.title") is None:
483
+ return None
484
+
485
+ return compact(
486
+ {
487
+ "conference_name": py_.get(obj, "container.title"),
488
+ "conference_location": py_.get(obj, "container.location"),
489
+ "conference_date": None,
490
+ }
491
+ )
492
+
493
+
494
+ def get_proceedings_metadata(obj) -> Optional[dict]:
495
+ """get proceedings metadata"""
496
+ if py_.get(obj, "container.title") is None:
497
+ return None
498
+
499
+ return compact(
500
+ {
501
+ "@language": py_.get(obj, "language"),
502
+ "proceedings_title": py_.get(obj, "container.title"),
503
+ }
504
+ )
505
+
506
+
507
+ def get_journal_issue(obj) -> Optional[dict]:
508
+ """get journal issue"""
509
+ volume = py_.get(obj, "container.volume")
510
+ if volume is not None:
511
+ volume = {"volume": volume}
512
+ return compact(
513
+ {
514
+ "publication_date": get_publication_date(obj),
515
+ "journal_volume": volume,
516
+ "issue": py_.get(obj, "container.issue"),
517
+ }
518
+ )
519
+
520
+
521
+ def get_institution(obj) -> Optional[dict]:
522
+ """get institution"""
523
+ if py_.get(obj, "container.title") is None:
524
+ return None
525
+
526
+ return compact(
527
+ {
528
+ "institution_name": py_.get(obj, "container.title"),
529
+ "institution_id": {
530
+ "#text": py_.get(obj, "container.identifier"),
531
+ "@type": "ror",
532
+ }
533
+ if py_.get(obj, "container.identifierTyoe") == "ROR"
534
+ else None,
535
+ }
536
+ )
537
+
538
+
539
+ def get_titles(obj) -> Optional[dict]:
540
+ """get titles"""
541
+
542
+ title = {}
543
+ for t in wrap(py_.get(obj, "titles", [])):
544
+ if isinstance(t, str):
545
+ title["title"] = t
546
+ elif isinstance(t, dict) and t.get("titleType", None) == "Subtitle":
547
+ title["subtitle"] = t.get("title", None)
548
+ elif isinstance(title, dict):
549
+ title["title"] = t.get("title", None)
550
+ return title
551
+
552
+
553
+ def get_contributors(obj) -> Optional[dict]:
554
+ """get contributors"""
555
+
556
+ def map_affiliations(affiliations):
557
+ """map affiliations"""
558
+ if affiliations is None:
559
+ return None
560
+ return [
561
+ compact(
562
+ {
563
+ "institution": compact(
564
+ {
565
+ "institution_name": affiliation.get("name", None),
566
+ "institution_id": {
567
+ "#text": affiliation.get("id"),
568
+ "@type": "ror",
569
+ }
570
+ if affiliation.get("id", None) is not None
571
+ else None,
572
+ }
573
+ ),
574
+ }
575
+ )
576
+ for affiliation in affiliations
577
+ ]
578
+
579
+ if py_.get(obj, "contributors") is None or len(py_.get(obj, "contributors")) == 0:
580
+ return None
581
+
582
+ con = [
583
+ c
584
+ for c in py_.get(obj, "contributors")
585
+ if c.get("contributorRoles", None) == ["Author"]
586
+ or c.get("contributorRoles", None) == ["Editor"]
587
+ ]
588
+
589
+ person_names = []
590
+ organizations = []
591
+ anonymous_contributors = []
592
+
593
+ for num, contributor in enumerate(con):
594
+ contributor_role = (
595
+ "author" if "Author" in contributor.get("contributorRoles") else None
596
+ )
597
+ if contributor_role is None:
598
+ contributor_role = (
599
+ "editor" if "Editor" in contributor.get("contributorRoles") else None
600
+ )
601
+ sequence = "first" if num == 0 else "additional"
602
+ if (
603
+ contributor.get("type", None) == "Organization"
604
+ and contributor.get("name", None) is not None
605
+ ):
606
+ organizations.append(
607
+ {
608
+ "@contributor_role": contributor_role,
609
+ "@sequence": sequence,
610
+ "#text": contributor.get("name"),
611
+ }
612
+ )
613
+ elif (
614
+ contributor.get("givenName", None) is not None
615
+ or contributor.get("familyName", None) is not None
616
+ ):
617
+ person_names.append(
618
+ compact(
619
+ {
620
+ "@contributor_role": contributor_role,
621
+ "@sequence": sequence,
622
+ "given_name": contributor.get("givenName", None),
623
+ "surname": contributor.get("familyName", None),
624
+ "affiliations": map_affiliations(
625
+ contributor.get("affiliations", None)
626
+ ),
627
+ "ORCID": contributor.get("id", None),
628
+ }
629
+ )
630
+ )
631
+ else:
632
+ anonymous_contributors.append(
633
+ compact(
634
+ {
635
+ "@contributor_role": contributor_role,
636
+ "@sequence": sequence,
637
+ "affiliations": map_affiliations(
638
+ contributor.get("affiliations", None)
639
+ ),
640
+ }
641
+ )
642
+ )
643
+
644
+ result = {}
645
+ if person_names:
646
+ result["person_name"] = person_names
647
+ if organizations:
648
+ result["organization"] = organizations
649
+ if anonymous_contributors:
650
+ result["anonymous"] = anonymous_contributors
651
+
652
+ return result if result else None
653
+
654
+
655
+ def get_publisher(obj) -> Optional[dict]:
656
+ """get publisher"""
657
+ if py_.get(obj, "publisher.name") is None:
658
+ return None
659
+
660
+ return {
661
+ "publisher_name": py_.get(obj, "publisher.name"),
662
+ }
663
+
664
+
665
+ def get_abstracts(obj) -> Optional[list]:
666
+ """get abstracts"""
667
+ if py_.get(obj, "descriptions") is None:
668
+ return None
669
+
670
+ abstracts = []
671
+ for d in wrap(py_.get(obj, "descriptions", [])):
672
+ if d.get("type", None) == "Abstract":
673
+ abstracts.append(
674
+ {
675
+ "@xmlns:jats": "http://www.ncbi.nlm.nih.gov/JATS1",
676
+ "jats:p": d.get("description", None),
677
+ }
678
+ )
679
+ elif d.get("type", None) == "Other":
680
+ abstracts.append(
681
+ {
682
+ "@xmlns:jats": "http://www.ncbi.nlm.nih.gov/JATS1",
683
+ "jats:p": d.get("description", None),
684
+ }
685
+ )
686
+ return abstracts
687
+
688
+
689
+ def get_group_title(obj) -> Optional[str]:
690
+ """Get group title from metadata"""
691
+ if py_.get(obj, "subjects") is None or len(py_.get(obj, "subjects")) == 0:
17
692
  return None
693
+ group_title = py_.get(obj, "subjects[0].subject")
694
+
695
+ # strip optional FOS (Field of Science) prefix
696
+ if group_title.startswith("FOS: "):
697
+ group_title = group_title[5:]
698
+
699
+ return group_title
700
+
701
+
702
+ def get_item_number(obj) -> Optional[dict]:
703
+ """Insert item number"""
704
+ if py_.get(obj, "identifiers") is None:
705
+ return None
706
+
707
+ for identifier in py_.get(obj, "identifiers"):
708
+ if identifier.get("identifierType", None) == "UUID":
709
+ # strip hyphen from UUIDs, as item_number can only be 32 characters long (UUIDv4 is 36 characters long)
710
+ return {
711
+ "@item_number_type": identifier.get("identifierType", "").lower(),
712
+ "#text": identifier.get("identifier", None).replace("-", ""),
713
+ }
714
+
715
+
716
+ def get_publication_date(obj, media_type: str = None) -> Optional[str]:
717
+ """get publication date"""
718
+ pub_date = date_parse(py_.get(obj, "date.published"))
719
+ if pub_date is None:
720
+ return None
721
+
722
+ return compact(
723
+ {
724
+ "@media_type": media_type,
725
+ "month": f"{pub_date.month:d}",
726
+ "day": f"{pub_date.day:d}",
727
+ "year": str(pub_date.year),
728
+ }
729
+ )
730
+
731
+
732
+ def get_archive_locations(obj) -> Optional[list]:
733
+ """get archive locations"""
734
+ if (
735
+ py_.get(obj, "archive_locations") is None
736
+ or len(py_.get(obj, "archive_locations")) == 0
737
+ ):
738
+ return None
739
+
740
+ return [
741
+ compact(
742
+ {
743
+ "archive": {"@name": location},
744
+ }
745
+ )
746
+ for location in py_.get(obj, "archive_locations")
747
+ ]
748
+
749
+
750
+ def get_references(obj) -> Optional[dict]:
751
+ """get references"""
752
+ if py_.get(obj, "references") is None or len(py_.get(obj, "references")) == 0:
753
+ return None
754
+
755
+ citations = []
756
+ for i, ref in enumerate(py_.get(obj, "references")):
757
+ reference = compact(
758
+ {
759
+ "@key": ref.get("key", f"ref{i + 1}"),
760
+ "doi": doi_from_url(ref.get("id", None)),
761
+ "journal_title": ref.get("journal_title", None),
762
+ "author": ref.get("author", None),
763
+ "volume": ref.get("volume", None),
764
+ "first_page": ref.get("first_page", None),
765
+ "cYear": ref.get("publicationYear", None),
766
+ "article_title": ref.get("title", None),
767
+ "unstructured_citation": ref.get("unstructured", None),
768
+ }
769
+ )
770
+ citations.append(reference)
771
+ return {"citation": citations}
772
+
773
+
774
+ def get_license(obj) -> Optional[dict]:
775
+ """get license"""
776
+ rights_uri = py_.get(obj, "license.url")
777
+ if rights_uri is None:
778
+ return None
779
+
780
+ return {
781
+ "@xmlns:ai": "http://www.crossref.org/AccessIndicators.xsd",
782
+ "@name": "AccessIndicators",
783
+ "ai:license_ref": [
784
+ {
785
+ "@applies_to": "vor",
786
+ "#text": rights_uri,
787
+ },
788
+ {
789
+ "@applies_to": "tdm",
790
+ "#text": rights_uri,
791
+ },
792
+ ],
793
+ }
794
+
795
+
796
+ def get_funding_references(obj) -> Optional[dict]:
797
+ """Get funding references"""
798
+ if (
799
+ py_.get(obj, "funding_references") is None
800
+ or len(py_.get(obj, "funding_references")) == 0
801
+ ):
802
+ return None
803
+
804
+ funding_references = []
805
+ for funding_reference in wrap(py_.get(obj, "funding_references")):
806
+ funder_identifier = funding_reference.get("funderIdentifier", None)
807
+ funder_identifier_type = funding_reference.get("funderIdentifierType", None)
808
+
809
+ if funder_identifier is not None and funder_identifier_type == "ROR":
810
+ assertion = {
811
+ "@name": "ror",
812
+ "#text": funder_identifier,
813
+ }
814
+
815
+ funding_references.append(assertion)
816
+ elif funding_reference.get("funderName", None) is not None:
817
+ assertion = {
818
+ "@name": "funder_name",
819
+ "#text": funding_reference.get("funderName"),
820
+ }
821
+ funding_references.append(assertion)
822
+
823
+ if funding_reference.get("awardNumber", None) is not None:
824
+ assertion = {
825
+ "@name": "award_number",
826
+ "#text": funding_reference.get("awardNumber"),
827
+ }
828
+ funding_references.append(assertion)
829
+ return {
830
+ "@xmlns:fr": "http://www.crossref.org/fundref.xsd",
831
+ "@name": "fundref",
832
+ "fr:assertion": funding_references,
833
+ }
834
+
835
+
836
+ def get_relations(obj) -> list:
837
+ """get relations"""
838
+ if py_.get(obj, "relations") is None or len(py_.get(obj, "relations")) == 0:
839
+ return None
840
+
841
+ def format_relation(relation):
842
+ """format relation"""
843
+
844
+ if relation.get("type", None) in [
845
+ "IsPartOf",
846
+ "HasPart",
847
+ "IsReviewOf",
848
+ "HasReview",
849
+ "IsRelatedMaterial",
850
+ "HasRelatedMaterial",
851
+ ]:
852
+ group = "rel:inter_work_relation"
853
+ elif relation.get("type", None) in [
854
+ "IsIdenticalTo",
855
+ "IsPreprintOf",
856
+ "HasPreprint",
857
+ "IsTranslationOf",
858
+ "HasTranslation",
859
+ "IsVersionOf",
860
+ "HasVersion",
861
+ ]:
862
+ group = "rel:intra_work_relation"
863
+ else:
864
+ return None
865
+
866
+ f = furl(relation.get("id", None))
867
+ if validate_doi(relation.get("id", None)):
868
+ identifier_type = "doi"
869
+ _id = doi_from_url(relation.get("id", None))
870
+ elif f.host == "portal.issn.org" and obj.type in [
871
+ "Article",
872
+ "BlogPost",
873
+ ]:
874
+ identifier_type = "issn"
875
+ _id = f.path.segments[-1] if f.path.segments else None
876
+ elif validate_url(relation.get("id", None)) == "URL":
877
+ identifier_type = "uri"
878
+ _id = relation.get("id", None)
879
+ else:
880
+ identifier_type = "other"
881
+ _id = relation.get("id", None)
882
+
883
+ return {
884
+ group: compact(
885
+ {
886
+ "@relationship-type": py_.lower_first(relation.get("type"))
887
+ if relation.get("type", None) is not None
888
+ else None,
889
+ "@identifier-type": identifier_type,
890
+ "#text": _id,
891
+ },
892
+ )
893
+ }
894
+
895
+ return {
896
+ "@xmlns:rel": "http://www.crossref.org/relations.xsd",
897
+ "@name": "relations",
898
+ "rel:related_item": [
899
+ format_relation(i)
900
+ for i in py_.get(obj, "relations")
901
+ if format_relation(i) is not None
902
+ ],
903
+ }
904
+
905
+
906
+ def get_subjects(obj) -> Optional[list]:
907
+ """Get crossref subjects"""
908
+ if py_.get(obj, "subjects") is None:
909
+ return None
910
+ subjects = []
911
+ for subject in py_.get(obj, "subjects"):
912
+ if isinstance(subject, dict):
913
+ subjects.append(subject.get("subject", None))
914
+ else:
915
+ subjects.append(subject)
916
+ return subjects
917
+
918
+
919
+ def get_doi_data(obj) -> Optional[dict]:
920
+ """get doi data"""
921
+ if doi_from_url(py_.get(obj, "id")) is None or py_.get(obj, "url") is None:
922
+ return None
923
+
924
+ items = [
925
+ {
926
+ "resource": {
927
+ "@mime_type": "text/html",
928
+ "#text": py_.get(obj, "url"),
929
+ }
930
+ }
931
+ ]
932
+ for file in wrap(py_.get(obj, "files")):
933
+ if file.get("mimeType", None) is not None and file.get("url", None) is not None:
934
+ items.append(
935
+ {
936
+ "resource": {
937
+ "@mime_type": file.get("mimeType"),
938
+ "#text": file.get("url"),
939
+ }
940
+ }
941
+ )
942
+
943
+ return compact(
944
+ {
945
+ "doi": doi_from_url(py_.get(obj, "id")),
946
+ "resource": py_.get(obj, "url"),
947
+ "collection": {
948
+ "@property": "text-mining",
949
+ "item": items,
950
+ },
951
+ }
952
+ )
953
+
954
+
955
+ def get_isbn(obj):
956
+ """get isbn"""
957
+ if py_.get(obj, "container.identifierType") != "ISBN":
958
+ return None
959
+ return py_.get(obj, "container.identifier")
960
+
961
+
962
+ def get_issn(obj):
963
+ """get issn"""
964
+ if py_.get(obj, "container.identifierType") != "ISSN":
965
+ return None
966
+ return py_.get(obj, "container.identifier")
967
+
968
+
969
+ """Errors for the Crossref XML API.
970
+
971
+ Error responses will be converted into an exception from this module.
972
+ """
973
+
974
+
975
+ class HttpError(Exception):
976
+ """Exception raised when a connection problem happens."""
977
+
978
+
979
+ class CrossrefError(Exception):
980
+ """Exception raised when the server returns a known HTTP error code.
981
+
982
+ Known HTTP error codes include:
983
+
984
+ * 204 No Content
985
+ * 400 Bad Request
986
+ * 401 Unauthorized
987
+ * 403 Forbidden
988
+ * 404 Not Found
989
+ * 410 Gone (deleted)
990
+ """
991
+
992
+ @staticmethod
993
+ def factory(err_code, *args):
994
+ """Create exceptions through a Factory based on the HTTP error code."""
995
+ if err_code == 204:
996
+ return CrossrefNoContentError(*args)
997
+ elif err_code == 400:
998
+ return CrossrefBadRequestError(*args)
999
+ elif err_code == 401:
1000
+ return CrossrefUnauthorizedError(*args)
1001
+ elif err_code == 403:
1002
+ return CrossrefForbiddenError(*args)
1003
+ elif err_code == 404:
1004
+ return CrossrefNotFoundError(*args)
1005
+ else:
1006
+ return CrossrefServerError(*args)
1007
+
1008
+
1009
+ class CrossrefServerError(CrossrefError):
1010
+ """An internal server error happened on the Crossref end. Try later.
1011
+
1012
+ Base class for all 5XX-related HTTP error codes.
1013
+ """
1014
+
1015
+
1016
+ class CrossrefRequestError(CrossrefError):
1017
+ """A Crossref request error. You made an invalid request.
1018
+
1019
+ Base class for all 4XX-related HTTP error codes as well as 204.
1020
+ """
1021
+
1022
+
1023
+ class CrossrefNoContentError(CrossrefRequestError):
1024
+ """DOI is known to Crossref, but not resolvable.
1025
+
1026
+ This might be due to handle's latency.
1027
+ """
1028
+
1029
+
1030
+ class CrossrefBadRequestError(CrossrefRequestError):
1031
+ """Bad request error.
1032
+
1033
+ Bad requests can include e.g. invalid XML, wrong domain, wrong prefix.
1034
+ """
1035
+
1036
+
1037
+ class CrossrefUnauthorizedError(CrossrefRequestError):
1038
+ """Bad username or password."""
1039
+
1040
+
1041
+ class CrossrefForbiddenError(CrossrefRequestError):
1042
+ """Login problem, record belongs to another party or quota exceeded."""
1043
+
18
1044
 
19
- return generate_crossref_xml_list(metalist)
1045
+ class CrossrefNotFoundError(CrossrefRequestError):
1046
+ """DOI does not exist in the database."""