commonmeta-py 0.23__py3-none-any.whl → 0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. commonmeta/__init__.py +96 -0
  2. commonmeta/api_utils.py +77 -0
  3. commonmeta/author_utils.py +260 -0
  4. commonmeta/base_utils.py +121 -0
  5. commonmeta/cli.py +200 -0
  6. commonmeta/constants.py +587 -0
  7. commonmeta/crossref_utils.py +575 -0
  8. commonmeta/date_utils.py +193 -0
  9. commonmeta/doi_utils.py +273 -0
  10. commonmeta/metadata.py +320 -0
  11. commonmeta/readers/__init__.py +1 -0
  12. commonmeta/readers/cff_reader.py +199 -0
  13. commonmeta/readers/codemeta_reader.py +112 -0
  14. commonmeta/readers/commonmeta_reader.py +13 -0
  15. commonmeta/readers/crossref_reader.py +409 -0
  16. commonmeta/readers/crossref_xml_reader.py +505 -0
  17. commonmeta/readers/csl_reader.py +98 -0
  18. commonmeta/readers/datacite_reader.py +390 -0
  19. commonmeta/readers/datacite_xml_reader.py +359 -0
  20. commonmeta/readers/inveniordm_reader.py +218 -0
  21. commonmeta/readers/json_feed_reader.py +420 -0
  22. commonmeta/readers/kbase_reader.py +205 -0
  23. commonmeta/readers/ris_reader.py +103 -0
  24. commonmeta/readers/schema_org_reader.py +506 -0
  25. commonmeta/resources/cff_v1.2.0.json +1827 -0
  26. commonmeta/resources/commonmeta_v0.12.json +601 -0
  27. commonmeta/resources/commonmeta_v0.13.json +559 -0
  28. commonmeta/resources/commonmeta_v0.14.json +573 -0
  29. commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
  30. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
  31. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
  32. commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
  33. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
  34. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
  35. commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
  36. commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
  37. commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
  38. commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
  39. commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
  40. commonmeta/resources/crossref/fundref.xsd +49 -0
  41. commonmeta/resources/crossref/module-ali.xsd +39 -0
  42. commonmeta/resources/crossref/relations.xsd +444 -0
  43. commonmeta/resources/crossref-v0.2.json +60 -0
  44. commonmeta/resources/csl-data.json +538 -0
  45. commonmeta/resources/datacite-v4.5.json +829 -0
  46. commonmeta/resources/datacite-v4.5pr.json +608 -0
  47. commonmeta/resources/ietf-bcp-47.json +3025 -0
  48. commonmeta/resources/iso-8601.json +3182 -0
  49. commonmeta/resources/spdx/licenses.json +4851 -0
  50. commonmeta/resources/spdx-schema..json +903 -0
  51. commonmeta/resources/styles/apa.csl +1697 -0
  52. commonmeta/resources/styles/chicago-author-date.csl +684 -0
  53. commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
  54. commonmeta/resources/styles/ieee.csl +468 -0
  55. commonmeta/resources/styles/modern-language-association.csl +341 -0
  56. commonmeta/resources/styles/vancouver.csl +376 -0
  57. commonmeta/schema_utils.py +27 -0
  58. commonmeta/translators.py +47 -0
  59. commonmeta/utils.py +1108 -0
  60. commonmeta/writers/__init__.py +1 -0
  61. commonmeta/writers/bibtex_writer.py +149 -0
  62. commonmeta/writers/citation_writer.py +70 -0
  63. commonmeta/writers/commonmeta_writer.py +68 -0
  64. commonmeta/writers/crossref_xml_writer.py +17 -0
  65. commonmeta/writers/csl_writer.py +79 -0
  66. commonmeta/writers/datacite_writer.py +193 -0
  67. commonmeta/writers/inveniordm_writer.py +94 -0
  68. commonmeta/writers/ris_writer.py +58 -0
  69. commonmeta/writers/schema_org_writer.py +146 -0
  70. {commonmeta_py-0.23.dist-info → commonmeta_py-0.25.dist-info}/METADATA +56 -45
  71. commonmeta_py-0.25.dist-info/RECORD +75 -0
  72. {commonmeta_py-0.23.dist-info → commonmeta_py-0.25.dist-info}/WHEEL +1 -1
  73. commonmeta_py-0.25.dist-info/entry_points.txt +3 -0
  74. commonmeta_py-0.23.dist-info/RECORD +0 -5
  75. /commonmeta_py/__init__.py → /commonmeta/readers/bibtex_reader.py +0 -0
  76. {commonmeta_py-0.23.dist-info/licenses → commonmeta_py-0.25.dist-info}/LICENSE +0 -0
@@ -0,0 +1,505 @@
1
+ """crossref_xml reader for commonmeta-py"""
2
+
3
+ from typing import Optional
4
+ from collections import defaultdict
5
+ import httpx
6
+ from pydash import py_
7
+
8
+ from ..utils import (
9
+ doi_from_url,
10
+ dict_to_spdx,
11
+ from_crossref_xml,
12
+ normalize_cc_url,
13
+ normalize_issn,
14
+ normalize_url,
15
+ )
16
+ from ..base_utils import (
17
+ compact,
18
+ wrap,
19
+ presence,
20
+ sanitize,
21
+ parse_attributes,
22
+ parse_xml,
23
+ )
24
+ from ..author_utils import get_authors
25
+ from ..date_utils import get_date_from_crossref_parts, get_iso8601_date
26
+ from ..doi_utils import get_doi_ra, crossref_xml_api_url, normalize_doi
27
+ from ..constants import (
28
+ Commonmeta,
29
+ CR_TO_CM_TRANSLATIONS,
30
+ CROSSREF_CONTAINER_TYPES,
31
+ CR_TO_CM_CONTAINER_TRANSLATIONS,
32
+ )
33
+
34
+
35
+ def get_crossref_xml(pid: str, **kwargs) -> dict:
36
+ """Get crossref_xml metadata from a DOI"""
37
+ doi = doi_from_url(pid)
38
+ if doi is None:
39
+ return {"state": "not_found"}
40
+ url = crossref_xml_api_url(doi)
41
+ response = httpx.get(
42
+ url, headers={"Accept": "text/xml;charset=utf-8"}, timeout=10, **kwargs
43
+ )
44
+ if response.status_code != 200:
45
+ return {"state": "not_found"}
46
+
47
+ return parse_xml(response.text, dialect="crossref") | {"via": "crossref_xml"}
48
+
49
+
50
+ def read_crossref_xml(data: dict, **kwargs) -> Commonmeta:
51
+ """read_crossref_xml"""
52
+ if data is None:
53
+ return {"state": "not_found"}
54
+ meta = py_.get(
55
+ data, "crossref_result.query_result.body.query.doi_record.crossref", {}
56
+ )
57
+
58
+ # query contains information from outside metadata schema, e.g. publisher name
59
+ query = py_.get(data, "crossref_result.query_result.body.query", {})
60
+
61
+ # read_options = ActiveSupport::HashWithIndifferentAccess.
62
+ # new(options.except(:doi, :id, :url,
63
+ # :sandbox, :validate, :ra))
64
+ read_options = kwargs or {}
65
+
66
+ member_id = next(
67
+ (
68
+ i
69
+ for i in wrap(query.get("crm-item", None))
70
+ if i.get("name", None) == "member-id"
71
+ ),
72
+ {},
73
+ ).get("#text", None)
74
+ publisher_id = (
75
+ "https://api.crossref.org/members/" + member_id if member_id else None
76
+ )
77
+ publisher = compact(
78
+ {
79
+ "id": publisher_id,
80
+ "name": next(
81
+ (
82
+ i
83
+ for i in wrap(query.get("crm-item", None))
84
+ if i.get("name", None) == "publisher-name"
85
+ ),
86
+ {},
87
+ ).get("#text", None),
88
+ }
89
+ )
90
+
91
+ # fetch metadata depending of Crossref type
92
+ if py_.get(meta, "journal.journal_article", None):
93
+ bibmeta = py_.get(meta, "journal.journal_article", {})
94
+ resource_type = "journal-article"
95
+ language = py_.get(meta, "journal.journal_metadata.language")
96
+ elif py_.get(meta, "journal.journal_issue", None):
97
+ bibmeta = py_.get(meta, "journal.journal_issue", {})
98
+ resource_type = "journal-issue"
99
+ language = py_.get(meta, "journal.journal_metadata.language")
100
+ elif py_.get(meta, "journal", None):
101
+ bibmeta = py_.get(meta, "journal", {})
102
+ resource_type = "journal"
103
+ language = py_.get(meta, "journal.journal_metadata.language")
104
+ elif py_.get(meta, "posted_content", None):
105
+ bibmeta = meta.get("posted_content", {})
106
+ if publisher.get("name", None) is None:
107
+ publisher = {"name": py_.get(bibmeta, "institution.institution_name", None)}
108
+ resource_type = "posted-content"
109
+ language = py_.get(meta, "posted_content.language")
110
+ elif py_.get(meta, "book.content_item"):
111
+ bibmeta = py_.get(meta, "book.content_item")
112
+ resource_type = "book-chapter"
113
+ language = py_.get(meta, "book.book_metadata.language")
114
+ elif py_.get(meta, "book.book_series_metadata"):
115
+ bibmeta = py_.get(meta, "book.book_series_metadata")
116
+ resource_type = "book-series"
117
+ language = bibmeta.get("language", None)
118
+ elif py_.get(meta, "book.book_set_metadata"):
119
+ bibmeta = py_.get(meta, "book.book_set_metadata")
120
+ resource_type = "book-set"
121
+ language = bibmeta.get("language", None)
122
+ elif py_.get(meta, "book.book_metadata"):
123
+ bibmeta = py_.get(meta, "book.book_metadata")
124
+ resource_type = "book"
125
+ language = bibmeta.get("language", None)
126
+ elif py_.get(meta, "conference", None):
127
+ bibmeta = py_.get(meta, "conference.conference_paper", {})
128
+ resource_type = "proceedings-article"
129
+ language = bibmeta.get("language", None)
130
+ elif py_.get(meta, "sa_component", None):
131
+ bibmeta = py_.get(meta, "sa_component.component_list.component", {})
132
+ resource_type = "component"
133
+ language = None
134
+ elif py_.get(meta, "database", None):
135
+ bibmeta = py_.get(meta, "database.dataset", {})
136
+ resource_type = "dataset"
137
+ language = py_.get(meta, "database.database_metadata.language")
138
+ elif py_.get(meta, "report_paper", None):
139
+ bibmeta = py_.get(meta, "report_paper.report_paper_metadata", {})
140
+ resource_type = "report"
141
+ language = bibmeta.get("language", None)
142
+ elif py_.get(meta, "peer_review", None):
143
+ bibmeta = py_.get(meta, "peer_review", {})
144
+ resource_type = "peer-review"
145
+ language = bibmeta.get("language", None)
146
+ elif py_.get(meta, "dissertation", None):
147
+ bibmeta = py_.get(meta, "dissertation", {})
148
+ resource_type = "dissertation"
149
+ language = bibmeta.get("language", None)
150
+ else:
151
+ bibmeta = {}
152
+ resource_type = ""
153
+ language = None
154
+
155
+ _id = normalize_doi(
156
+ kwargs.get("doi", None)
157
+ or kwargs.get("id", None)
158
+ or py_.get(bibmeta, "doi_data.doi")
159
+ )
160
+ _type = CR_TO_CM_TRANSLATIONS.get(resource_type, "Other")
161
+ url = parse_attributes(py_.get(bibmeta, "doi_data.resource"))
162
+ url = normalize_url(url)
163
+ titles = crossref_titles(bibmeta)
164
+ contributors = crossref_people(bibmeta)
165
+
166
+ date: dict = defaultdict(list)
167
+ date["created"] = next(
168
+ (
169
+ i
170
+ for i in wrap(query.get("crm-item", None))
171
+ if i.get("name", None) == "created"
172
+ ),
173
+ {},
174
+ ).get("#text", None)
175
+ date["published"] = (
176
+ get_date_from_crossref_parts(bibmeta.get("publication_date", {}))
177
+ or get_date_from_crossref_parts(bibmeta.get("review_date", {}))
178
+ or date["created"]
179
+ )
180
+ date["updated"] = next(
181
+ (
182
+ i
183
+ for i in wrap(query.get("crm-item", None))
184
+ if i.get("name", None) == "last-update"
185
+ ),
186
+ {},
187
+ ).get("#text", None)
188
+
189
+ # TODO: fix timestamp. Until then, remove time as this is not always stable with Crossref (different server timezones)
190
+ date = {k: get_iso8601_date(v) for k, v in date.items()}
191
+
192
+ descriptions = crossref_description(bibmeta)
193
+ funding = (
194
+ py_.get(bibmeta, "program.0")
195
+ or py_.get(bibmeta, "program.0.assertion")
196
+ or py_.get(bibmeta, "crossmark.custom_metadata.program.0.assertion")
197
+ )
198
+ funding_references = crossref_funding(wrap(funding))
199
+
200
+ license_ = (
201
+ py_.get(bibmeta, "program.0.license_ref")
202
+ or py_.get(bibmeta, "crossmark.custom_metadata.program.0.license_ref")
203
+ or py_.get(bibmeta, "crossmark.custom_metadata.program.1.license_ref")
204
+ )
205
+ license_ = crossref_license(wrap(license_))
206
+
207
+ # By using book_metadata, we can account for where resource_type is `BookChapter` and not assume its a whole book
208
+ # if book_metadata:
209
+ # # identifiers = crossref_alternate_identifiers(book_metadata)
210
+ # container = compact(
211
+ # {
212
+ # "type": "Book",
213
+ # "title": py_.get(book_metadata, "titles.title"),
214
+ # "firstPage": py_.get(bibmeta, "pages.first_page"),
215
+ # "lastPage": py_.get(bibmeta, "pages.last_page"),
216
+ # #'identifiers' => identifiers
217
+ # }
218
+ # )
219
+
220
+ # elif book_series_metadata.get("series_metadata", None):
221
+ # issn = normalize_issn(
222
+ # py_.get(book_series_metadata, "series_metadata.issn.0.#text")
223
+ # )
224
+ # container = compact(
225
+ # {
226
+ # "type": "Book Series",
227
+ # "identifier": issn,
228
+ # "identifierType": "ISSN" if issn else None,
229
+ # "title": py_.get(book_series_metadata, "series_metadata.titles.title"),
230
+ # "volume": bibmeta.get("volume", None),
231
+ # }
232
+ # )
233
+ # else:
234
+ # container = None
235
+ container = crossref_container(meta, resource_type=resource_type)
236
+ references = [
237
+ crossref_reference(i) for i in wrap(py_.get(bibmeta, "citation_list.citation"))
238
+ ]
239
+ files = presence(meta.get("contentUrl", None))
240
+ provider = (
241
+ bibmeta.get("reg-agency").capitalize()
242
+ if bibmeta.get("reg-agency", None)
243
+ else None
244
+ )
245
+ if provider is None:
246
+ provider = get_doi_ra(_id)
247
+ state = "findable" if meta or read_options else "not_found"
248
+
249
+ return {
250
+ # required properties
251
+ "id": _id,
252
+ "type": _type,
253
+ "url": url,
254
+ "contributors": presence(contributors),
255
+ "titles": presence(titles),
256
+ "publisher": publisher,
257
+ "date": compact(date),
258
+ # recommended and optional properties
259
+ "subjects": presence(None),
260
+ "language": language,
261
+ "alternate_identifiers": None,
262
+ "sizes": None,
263
+ "formats": None,
264
+ "version": None,
265
+ "license": presence(license_),
266
+ "descriptions": presence(descriptions),
267
+ "geo_locations": None,
268
+ "funding_references": presence(funding_references),
269
+ "references": references,
270
+ "relations": None,
271
+ # other properties
272
+ "date_created": None,
273
+ "date_registered": None,
274
+ "date_published": None,
275
+ "date_updated": None,
276
+ "content_url": presence(files),
277
+ "container": presence(container),
278
+ "provider": provider,
279
+ "state": state,
280
+ "schema_version": None,
281
+ } | read_options
282
+
283
+
284
+ def crossref_titles(bibmeta):
285
+ """Title information from Crossref metadata."""
286
+ title = parse_attributes(py_.get(bibmeta, "titles.0.title"))
287
+ subtitle = parse_attributes(py_.get(bibmeta, "titles.0.subtitle"))
288
+ original_language_title = parse_attributes(
289
+ py_.get(bibmeta, "titles.0.original_language_title")
290
+ )
291
+ language = parse_attributes(
292
+ py_.get(bibmeta, "titles.0.original_language_title"), content="language"
293
+ )
294
+ if title is None and original_language_title is None:
295
+ return None
296
+ if title and original_language_title is None and subtitle is None:
297
+ return [{"title": sanitize(title)}]
298
+ if original_language_title:
299
+ return [
300
+ compact(
301
+ {
302
+ "title": sanitize(original_language_title),
303
+ "lang": language,
304
+ }
305
+ )
306
+ ]
307
+ if subtitle:
308
+ return [
309
+ compact({"title": sanitize(title)}),
310
+ {
311
+ "title": sanitize(subtitle),
312
+ "titleType": "Subtitle",
313
+ },
314
+ ]
315
+
316
+
317
+ def crossref_description(bibmeta):
318
+ """Description information from Crossref metadata."""
319
+
320
+ def format_abstract(element):
321
+ """Format abstract"""
322
+ if isinstance(element.get("p", None), list):
323
+ element["p"] = element["p"][0]
324
+ if isinstance(element.get("p", None), dict):
325
+ element["p"] = element["p"]["#text"]
326
+ description_type = (
327
+ "Abstract" if element.get("abstract-type", None) == "abstract" else "Other"
328
+ )
329
+ return compact(
330
+ {
331
+ "descriptionType": description_type,
332
+ "description": sanitize(
333
+ parse_attributes(element, content="p", first=True)
334
+ ),
335
+ }
336
+ )
337
+
338
+ return [format_abstract(i) for i in wrap(bibmeta.get("abstract", None))]
339
+
340
+
341
+ def crossref_people(bibmeta):
342
+ """Person information from Crossref metadata."""
343
+
344
+ person = py_.get(bibmeta, "contributors.person_name") or bibmeta.get(
345
+ "person_name", None
346
+ )
347
+ organization = wrap(py_.get(bibmeta, "contributors.organization"))
348
+
349
+ return get_authors(from_crossref_xml(wrap(person) + wrap(organization)))
350
+
351
+ # (Array.wrap(person) + Array.wrap(organization)).select do |a|
352
+ # a['contributor_role'] == contributor_role
353
+ # end.map do |a|
354
+ # name_identifiers = if normalize_orcid(parse_attributes(a['ORCID'])).present?
355
+ # [{
356
+ # 'nameIdentifier' => normalize_orcid(parse_attributes(a['ORCID'])), 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org'
357
+ # }]
358
+ # end
359
+ # if a['surname'].present? || a['given_name'].present? || name_identifiers.present?
360
+ # given_name = parse_attributes(a['given_name'])
361
+ # family_name = parse_attributes(a['surname'])
362
+ # affiliation = Array.wrap(a['affiliation']).map do |a|
363
+ # if a.is_a?(Hash)
364
+ # a
365
+ # elsif a.is_a?(Hash) && a.key?('#text') && a[#text'].strip.blank?
366
+ # nil
367
+ # elsif a.is_a?(Hash) && a.key?('_#text_')
368
+ # { 'name' => a['#text'] }
369
+ # elsif a.strip.blank?
370
+ # nil
371
+ # elsif a.is_a?(String)
372
+ # { 'name' => a }
373
+ # end
374
+ # end.compact
375
+
376
+ # { 'nameType' => 'Personal',
377
+ # 'nameIdentifiers' => name_identifiers,
378
+ # 'name' => [family_name, given_name].compact.join(', '),
379
+ # 'givenName' => given_name,
380
+ # 'familyName' => family_name,
381
+ # 'affiliation' => affiliation.presence,
382
+ # 'contributorType' => contributor_role == 'editor' ? 'Editor' : nil }.compact
383
+ # else
384
+ # { 'nameType' => 'Organizational',
385
+ # 'name' => a['name'] || a['#text'] }
386
+
387
+
388
+ def crossref_reference(reference: Optional[dict]) -> Optional[dict]:
389
+ """Get reference from Crossref reference"""
390
+ if reference is None or not isinstance(reference, dict):
391
+ return None
392
+ doi = parse_attributes(reference.get("doi", None))
393
+ unstructured = reference.get("unstructured_citation", None)
394
+ if isinstance(unstructured, dict):
395
+ text = unstructured.get("font", None) or unstructured.get("#text", None)
396
+ else:
397
+ text = reference.get("unstructured_citation", None)
398
+ metadata = {
399
+ "key": reference.get("key", None),
400
+ "id": normalize_doi(doi) if doi else None,
401
+ "contributor": reference.get("author", None),
402
+ "title": reference.get("article_title", None),
403
+ "publisher": reference.get("publisher", None),
404
+ "publicationYear": reference.get("cYear", None),
405
+ "volume": reference.get("volume", None),
406
+ "issue": reference.get("issue", None),
407
+ "firstPage": reference.get("first_page", None),
408
+ "lastPage": reference.get("last_page", None),
409
+ "containerTitle": reference.get("journal_title", None),
410
+ "edition": None,
411
+ "unstructured": sanitize(text) if text else None,
412
+ }
413
+ return compact(metadata)
414
+
415
+
416
+ def crossref_container(meta: dict, resource_type: str = "JournalArticle") -> dict:
417
+ """Get container from Crossref"""
418
+ container_type = CROSSREF_CONTAINER_TYPES.get(resource_type, None)
419
+ issn = next(
420
+ (
421
+ i
422
+ for i in wrap(
423
+ py_.get(meta, f"{container_type}.{container_type}_metadata.issn")
424
+ )
425
+ + wrap(
426
+ py_.get(
427
+ meta,
428
+ f"{container_type}.{container_type}_series_metadata.series_metadata.issn",
429
+ )
430
+ )
431
+ if i.get("media_type", None) == "electronic"
432
+ ),
433
+ {},
434
+ ) or next(
435
+ (
436
+ i
437
+ for i in wrap(
438
+ py_.get(meta, f"{container_type}.{container_type}_metadata.issn")
439
+ )
440
+ + wrap(
441
+ py_.get(
442
+ meta,
443
+ f"{container_type}.{container_type}_series_metadata.series_metadata.issn",
444
+ )
445
+ )
446
+ if i.get("media_type", None) == "print"
447
+ ),
448
+ {},
449
+ )
450
+ issn = normalize_issn(issn) if issn else None
451
+ isbn = py_.get(meta, f"conference.{container_type}_metadata.isbn.#text")
452
+ container_title = (
453
+ py_.get(meta, f"{container_type}.{container_type}_metadata.full_title")
454
+ or py_.get(meta, f"{container_type}.{container_type}_metadata.titles.0.title")
455
+ or py_.get(meta, f"conference.{container_type}_metadata.{container_type}_title")
456
+ or py_.get(
457
+ meta,
458
+ f"{container_type}.{container_type}_series_metadata.series_metadata.titles.0.title",
459
+ )
460
+ )
461
+ volume = py_.get(
462
+ meta,
463
+ f"{container_type}.{container_type}_issue.{container_type}_volume.volume",
464
+ )
465
+ issue = py_.get(meta, f"{container_type}.{container_type}_issue.issue")
466
+ return compact(
467
+ {
468
+ "type": CR_TO_CM_CONTAINER_TRANSLATIONS.get(container_type, None),
469
+ "identifier": issn or isbn,
470
+ "identifierType": "ISSN" if issn else "ISBN" if isbn else None,
471
+ "title": container_title,
472
+ "volume": volume,
473
+ "issue": issue,
474
+ "firstPage": py_.get(
475
+ meta, f"{container_type}.{container_type}_article.pages.first_page"
476
+ )
477
+ or py_.get(meta, f"{container_type}.content_item.pages.first_page")
478
+ or py_.get(meta, "conference.conference_paper.pages.first_page"),
479
+ "lastPage": py_.get(
480
+ meta, f"{container_type}.{container_type}_article.pages.last_page"
481
+ )
482
+ or py_.get(meta, f"{container_type}.content_item.pages.last_page")
483
+ or py_.get(meta, "conference.conference_paper.pages.last_page"),
484
+ "location": py_.get(meta, "conference.event_metadata.conference_location"),
485
+ "series": py_.get(meta, "conference.event_metadata.conference_acronym"),
486
+ }
487
+ )
488
+
489
+
490
+ def crossref_funding(funding: list) -> list:
491
+ """Get assertions from Crossref"""
492
+ return []
493
+
494
+
495
+ def crossref_license(licenses: list) -> dict:
496
+ """Get license from Crossref"""
497
+
498
+ def map_element(element):
499
+ """Format element"""
500
+ url = parse_attributes(element)
501
+ url = normalize_cc_url(url)
502
+ return dict_to_spdx({"url": url})
503
+
504
+ # return only the first license found
505
+ return next((map_element(i) for i in licenses), None)
@@ -0,0 +1,98 @@
1
+ """CSL-JSON reader for commonmeta-py"""
2
+ from ..utils import dict_to_spdx, from_csl, normalize_id, name_to_fos, encode_doi
3
+ from ..base_utils import wrap, compact, sanitize, presence
4
+ from ..author_utils import get_authors
5
+ from ..date_utils import get_date_from_date_parts
6
+ from ..doi_utils import get_doi_ra, doi_from_url
7
+ from ..constants import (
8
+ CSL_TO_CM_TRANSLATIONS,
9
+ Commonmeta,
10
+ )
11
+
12
+
13
+ def read_csl(data: dict, **kwargs) -> Commonmeta:
14
+ """read_csl"""
15
+ if data is None:
16
+ return {"state": "not_found"}
17
+ meta = data
18
+
19
+ read_options = kwargs or {}
20
+
21
+ _id = normalize_id(meta.get("id", None) or meta.get("DOI", None))
22
+ _type = CSL_TO_CM_TRANSLATIONS.get(meta.get("type", None), "Other")
23
+
24
+ # optionally generate a DOI if missing but a DOI prefix is provided
25
+ prefix = read_options.get("prefix", None)
26
+ if doi_from_url(_id) is None and prefix is not None:
27
+ _id = encode_doi(prefix)
28
+
29
+ contributors = get_authors(from_csl(wrap(meta.get("author", None))))
30
+ contrib = get_authors(from_csl(wrap(meta.get("editor", None))))
31
+ if contrib:
32
+ contributors += contrib
33
+
34
+ date = {"published": get_date_from_date_parts(meta.get("issued", None))}
35
+
36
+ license_ = meta.get("copyright", None)
37
+ if license_ is not None:
38
+ license_ = dict_to_spdx({"url": meta.get("copyright")})
39
+
40
+ pages = meta.get("page", "").split("-")
41
+ publisher = meta.get("publisher", None)
42
+ if isinstance(publisher, str):
43
+ publisher = {"name": publisher}
44
+ relations = []
45
+ issn = meta.get("ISSN", None)
46
+ if issn is not None:
47
+ relations.append(
48
+ {
49
+ "id": issn_as_url(issn),
50
+ "type": "IsPartOf",
51
+ }
52
+ )
53
+ container = compact(
54
+ {
55
+ "type": "Periodical",
56
+ "title": meta.get("container-title", None),
57
+ "identifier": issn,
58
+ "identifierType": "ISSN" if meta.get("ISSN", None) else None,
59
+ "volume": meta.get("volume", None),
60
+ "issue": meta.get("issue", None),
61
+ "firstPage": pages[0],
62
+ "lastPage": pages[1] if len(pages) > 1 else None,
63
+ }
64
+ )
65
+
66
+ state = "findable" if _id or read_options else "not_found"
67
+ subjects = [name_to_fos(i) for i in wrap(meta.get("keywords", None))]
68
+
69
+ if meta.get("abstract", None):
70
+ descriptions = [
71
+ {
72
+ "description": sanitize(str(meta.get("abstract"))),
73
+ "type": "Abstract",
74
+ }
75
+ ]
76
+ else:
77
+ descriptions = None
78
+
79
+ provider = get_doi_ra(_id)
80
+
81
+ return {
82
+ "id": _id,
83
+ "type": _type,
84
+ "url": normalize_id(meta.get("URL", None)),
85
+ "titles": [{"title": meta.get("title", None)}],
86
+ "contributors": presence(contributors),
87
+ "publisher": presence(publisher),
88
+ "date": compact(date),
89
+ "container": container,
90
+ "references": None,
91
+ "relations": presence(relations),
92
+ "descriptions": descriptions,
93
+ "license": license_,
94
+ "version": meta.get("version", None),
95
+ "subjects": subjects,
96
+ "provider": provider,
97
+ "state": state,
98
+ } | read_options