commonmeta-py 0.22__py3-none-any.whl → 0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +96 -0
- commonmeta/api_utils.py +77 -0
- commonmeta/author_utils.py +260 -0
- commonmeta/base_utils.py +121 -0
- commonmeta/cli.py +200 -0
- commonmeta/constants.py +587 -0
- commonmeta/crossref_utils.py +575 -0
- commonmeta/date_utils.py +193 -0
- commonmeta/doi_utils.py +273 -0
- commonmeta/metadata.py +320 -0
- commonmeta/readers/__init__.py +1 -0
- commonmeta/readers/bibtex_reader.py +0 -0
- commonmeta/readers/cff_reader.py +199 -0
- commonmeta/readers/codemeta_reader.py +112 -0
- commonmeta/readers/commonmeta_reader.py +13 -0
- commonmeta/readers/crossref_reader.py +409 -0
- commonmeta/readers/crossref_xml_reader.py +505 -0
- commonmeta/readers/csl_reader.py +98 -0
- commonmeta/readers/datacite_reader.py +390 -0
- commonmeta/readers/datacite_xml_reader.py +359 -0
- commonmeta/readers/inveniordm_reader.py +218 -0
- commonmeta/readers/json_feed_reader.py +420 -0
- commonmeta/readers/kbase_reader.py +205 -0
- commonmeta/readers/ris_reader.py +103 -0
- commonmeta/readers/schema_org_reader.py +506 -0
- commonmeta/resources/cff_v1.2.0.json +1827 -0
- commonmeta/resources/commonmeta_v0.12.json +601 -0
- commonmeta/resources/commonmeta_v0.13.json +559 -0
- commonmeta/resources/commonmeta_v0.14.json +573 -0
- commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
- commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
- commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
- commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
- commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
- commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
- commonmeta/resources/crossref/fundref.xsd +49 -0
- commonmeta/resources/crossref/module-ali.xsd +39 -0
- commonmeta/resources/crossref/relations.xsd +444 -0
- commonmeta/resources/crossref-v0.2.json +60 -0
- commonmeta/resources/csl-data.json +538 -0
- commonmeta/resources/datacite-v4.5.json +829 -0
- commonmeta/resources/datacite-v4.5pr.json +608 -0
- commonmeta/resources/ietf-bcp-47.json +3025 -0
- commonmeta/resources/iso-8601.json +3182 -0
- commonmeta/resources/spdx/licenses.json +4851 -0
- commonmeta/resources/spdx-schema..json +903 -0
- commonmeta/resources/styles/apa.csl +1697 -0
- commonmeta/resources/styles/chicago-author-date.csl +684 -0
- commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
- commonmeta/resources/styles/ieee.csl +468 -0
- commonmeta/resources/styles/modern-language-association.csl +341 -0
- commonmeta/resources/styles/vancouver.csl +376 -0
- commonmeta/schema_utils.py +27 -0
- commonmeta/translators.py +47 -0
- commonmeta/utils.py +1108 -0
- commonmeta/writers/__init__.py +1 -0
- commonmeta/writers/bibtex_writer.py +149 -0
- commonmeta/writers/citation_writer.py +70 -0
- commonmeta/writers/commonmeta_writer.py +68 -0
- commonmeta/writers/crossref_xml_writer.py +17 -0
- commonmeta/writers/csl_writer.py +79 -0
- commonmeta/writers/datacite_writer.py +193 -0
- commonmeta/writers/inveniordm_writer.py +94 -0
- commonmeta/writers/ris_writer.py +58 -0
- commonmeta/writers/schema_org_writer.py +146 -0
- {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
- commonmeta_py-0.24.dist-info/RECORD +75 -0
- {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
- commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
- commonmeta_py/__init__.py +0 -2
- commonmeta_py-0.22.dist-info/RECORD +0 -5
- {commonmeta_py-0.22.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0
@@ -0,0 +1,505 @@
|
|
1
|
+
"""crossref_xml reader for commonmeta-py"""
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
from collections import defaultdict
|
5
|
+
import httpx
|
6
|
+
from pydash import py_
|
7
|
+
|
8
|
+
from ..utils import (
|
9
|
+
doi_from_url,
|
10
|
+
dict_to_spdx,
|
11
|
+
from_crossref_xml,
|
12
|
+
normalize_cc_url,
|
13
|
+
normalize_issn,
|
14
|
+
normalize_url,
|
15
|
+
)
|
16
|
+
from ..base_utils import (
|
17
|
+
compact,
|
18
|
+
wrap,
|
19
|
+
presence,
|
20
|
+
sanitize,
|
21
|
+
parse_attributes,
|
22
|
+
parse_xml,
|
23
|
+
)
|
24
|
+
from ..author_utils import get_authors
|
25
|
+
from ..date_utils import get_date_from_crossref_parts, get_iso8601_date
|
26
|
+
from ..doi_utils import get_doi_ra, crossref_xml_api_url, normalize_doi
|
27
|
+
from ..constants import (
|
28
|
+
Commonmeta,
|
29
|
+
CR_TO_CM_TRANSLATIONS,
|
30
|
+
CROSSREF_CONTAINER_TYPES,
|
31
|
+
CR_TO_CM_CONTAINER_TRANSLATIONS,
|
32
|
+
)
|
33
|
+
|
34
|
+
|
35
|
+
def get_crossref_xml(pid: str, **kwargs) -> dict:
|
36
|
+
"""Get crossref_xml metadata from a DOI"""
|
37
|
+
doi = doi_from_url(pid)
|
38
|
+
if doi is None:
|
39
|
+
return {"state": "not_found"}
|
40
|
+
url = crossref_xml_api_url(doi)
|
41
|
+
response = httpx.get(
|
42
|
+
url, headers={"Accept": "text/xml;charset=utf-8"}, timeout=10, **kwargs
|
43
|
+
)
|
44
|
+
if response.status_code != 200:
|
45
|
+
return {"state": "not_found"}
|
46
|
+
|
47
|
+
return parse_xml(response.text, dialect="crossref") | {"via": "crossref_xml"}
|
48
|
+
|
49
|
+
|
50
|
+
def read_crossref_xml(data: dict, **kwargs) -> Commonmeta:
|
51
|
+
"""read_crossref_xml"""
|
52
|
+
if data is None:
|
53
|
+
return {"state": "not_found"}
|
54
|
+
meta = py_.get(
|
55
|
+
data, "crossref_result.query_result.body.query.doi_record.crossref", {}
|
56
|
+
)
|
57
|
+
|
58
|
+
# query contains information from outside metadata schema, e.g. publisher name
|
59
|
+
query = py_.get(data, "crossref_result.query_result.body.query", {})
|
60
|
+
|
61
|
+
# read_options = ActiveSupport::HashWithIndifferentAccess.
|
62
|
+
# new(options.except(:doi, :id, :url,
|
63
|
+
# :sandbox, :validate, :ra))
|
64
|
+
read_options = kwargs or {}
|
65
|
+
|
66
|
+
member_id = next(
|
67
|
+
(
|
68
|
+
i
|
69
|
+
for i in wrap(query.get("crm-item", None))
|
70
|
+
if i.get("name", None) == "member-id"
|
71
|
+
),
|
72
|
+
{},
|
73
|
+
).get("#text", None)
|
74
|
+
publisher_id = (
|
75
|
+
"https://api.crossref.org/members/" + member_id if member_id else None
|
76
|
+
)
|
77
|
+
publisher = compact(
|
78
|
+
{
|
79
|
+
"id": publisher_id,
|
80
|
+
"name": next(
|
81
|
+
(
|
82
|
+
i
|
83
|
+
for i in wrap(query.get("crm-item", None))
|
84
|
+
if i.get("name", None) == "publisher-name"
|
85
|
+
),
|
86
|
+
{},
|
87
|
+
).get("#text", None),
|
88
|
+
}
|
89
|
+
)
|
90
|
+
|
91
|
+
# fetch metadata depending of Crossref type
|
92
|
+
if py_.get(meta, "journal.journal_article", None):
|
93
|
+
bibmeta = py_.get(meta, "journal.journal_article", {})
|
94
|
+
resource_type = "journal-article"
|
95
|
+
language = py_.get(meta, "journal.journal_metadata.language")
|
96
|
+
elif py_.get(meta, "journal.journal_issue", None):
|
97
|
+
bibmeta = py_.get(meta, "journal.journal_issue", {})
|
98
|
+
resource_type = "journal-issue"
|
99
|
+
language = py_.get(meta, "journal.journal_metadata.language")
|
100
|
+
elif py_.get(meta, "journal", None):
|
101
|
+
bibmeta = py_.get(meta, "journal", {})
|
102
|
+
resource_type = "journal"
|
103
|
+
language = py_.get(meta, "journal.journal_metadata.language")
|
104
|
+
elif py_.get(meta, "posted_content", None):
|
105
|
+
bibmeta = meta.get("posted_content", {})
|
106
|
+
if publisher.get("name", None) is None:
|
107
|
+
publisher = {"name": py_.get(bibmeta, "institution.institution_name", None)}
|
108
|
+
resource_type = "posted-content"
|
109
|
+
language = py_.get(meta, "posted_content.language")
|
110
|
+
elif py_.get(meta, "book.content_item"):
|
111
|
+
bibmeta = py_.get(meta, "book.content_item")
|
112
|
+
resource_type = "book-chapter"
|
113
|
+
language = py_.get(meta, "book.book_metadata.language")
|
114
|
+
elif py_.get(meta, "book.book_series_metadata"):
|
115
|
+
bibmeta = py_.get(meta, "book.book_series_metadata")
|
116
|
+
resource_type = "book-series"
|
117
|
+
language = bibmeta.get("language", None)
|
118
|
+
elif py_.get(meta, "book.book_set_metadata"):
|
119
|
+
bibmeta = py_.get(meta, "book.book_set_metadata")
|
120
|
+
resource_type = "book-set"
|
121
|
+
language = bibmeta.get("language", None)
|
122
|
+
elif py_.get(meta, "book.book_metadata"):
|
123
|
+
bibmeta = py_.get(meta, "book.book_metadata")
|
124
|
+
resource_type = "book"
|
125
|
+
language = bibmeta.get("language", None)
|
126
|
+
elif py_.get(meta, "conference", None):
|
127
|
+
bibmeta = py_.get(meta, "conference.conference_paper", {})
|
128
|
+
resource_type = "proceedings-article"
|
129
|
+
language = bibmeta.get("language", None)
|
130
|
+
elif py_.get(meta, "sa_component", None):
|
131
|
+
bibmeta = py_.get(meta, "sa_component.component_list.component", {})
|
132
|
+
resource_type = "component"
|
133
|
+
language = None
|
134
|
+
elif py_.get(meta, "database", None):
|
135
|
+
bibmeta = py_.get(meta, "database.dataset", {})
|
136
|
+
resource_type = "dataset"
|
137
|
+
language = py_.get(meta, "database.database_metadata.language")
|
138
|
+
elif py_.get(meta, "report_paper", None):
|
139
|
+
bibmeta = py_.get(meta, "report_paper.report_paper_metadata", {})
|
140
|
+
resource_type = "report"
|
141
|
+
language = bibmeta.get("language", None)
|
142
|
+
elif py_.get(meta, "peer_review", None):
|
143
|
+
bibmeta = py_.get(meta, "peer_review", {})
|
144
|
+
resource_type = "peer-review"
|
145
|
+
language = bibmeta.get("language", None)
|
146
|
+
elif py_.get(meta, "dissertation", None):
|
147
|
+
bibmeta = py_.get(meta, "dissertation", {})
|
148
|
+
resource_type = "dissertation"
|
149
|
+
language = bibmeta.get("language", None)
|
150
|
+
else:
|
151
|
+
bibmeta = {}
|
152
|
+
resource_type = ""
|
153
|
+
language = None
|
154
|
+
|
155
|
+
_id = normalize_doi(
|
156
|
+
kwargs.get("doi", None)
|
157
|
+
or kwargs.get("id", None)
|
158
|
+
or py_.get(bibmeta, "doi_data.doi")
|
159
|
+
)
|
160
|
+
_type = CR_TO_CM_TRANSLATIONS.get(resource_type, "Other")
|
161
|
+
url = parse_attributes(py_.get(bibmeta, "doi_data.resource"))
|
162
|
+
url = normalize_url(url)
|
163
|
+
titles = crossref_titles(bibmeta)
|
164
|
+
contributors = crossref_people(bibmeta)
|
165
|
+
|
166
|
+
date: dict = defaultdict(list)
|
167
|
+
date["created"] = next(
|
168
|
+
(
|
169
|
+
i
|
170
|
+
for i in wrap(query.get("crm-item", None))
|
171
|
+
if i.get("name", None) == "created"
|
172
|
+
),
|
173
|
+
{},
|
174
|
+
).get("#text", None)
|
175
|
+
date["published"] = (
|
176
|
+
get_date_from_crossref_parts(bibmeta.get("publication_date", {}))
|
177
|
+
or get_date_from_crossref_parts(bibmeta.get("review_date", {}))
|
178
|
+
or date["created"]
|
179
|
+
)
|
180
|
+
date["updated"] = next(
|
181
|
+
(
|
182
|
+
i
|
183
|
+
for i in wrap(query.get("crm-item", None))
|
184
|
+
if i.get("name", None) == "last-update"
|
185
|
+
),
|
186
|
+
{},
|
187
|
+
).get("#text", None)
|
188
|
+
|
189
|
+
# TODO: fix timestamp. Until then, remove time as this is not always stable with Crossref (different server timezones)
|
190
|
+
date = {k: get_iso8601_date(v) for k, v in date.items()}
|
191
|
+
|
192
|
+
descriptions = crossref_description(bibmeta)
|
193
|
+
funding = (
|
194
|
+
py_.get(bibmeta, "program.0")
|
195
|
+
or py_.get(bibmeta, "program.0.assertion")
|
196
|
+
or py_.get(bibmeta, "crossmark.custom_metadata.program.0.assertion")
|
197
|
+
)
|
198
|
+
funding_references = crossref_funding(wrap(funding))
|
199
|
+
|
200
|
+
license_ = (
|
201
|
+
py_.get(bibmeta, "program.0.license_ref")
|
202
|
+
or py_.get(bibmeta, "crossmark.custom_metadata.program.0.license_ref")
|
203
|
+
or py_.get(bibmeta, "crossmark.custom_metadata.program.1.license_ref")
|
204
|
+
)
|
205
|
+
license_ = crossref_license(wrap(license_))
|
206
|
+
|
207
|
+
# By using book_metadata, we can account for where resource_type is `BookChapter` and not assume its a whole book
|
208
|
+
# if book_metadata:
|
209
|
+
# # identifiers = crossref_alternate_identifiers(book_metadata)
|
210
|
+
# container = compact(
|
211
|
+
# {
|
212
|
+
# "type": "Book",
|
213
|
+
# "title": py_.get(book_metadata, "titles.title"),
|
214
|
+
# "firstPage": py_.get(bibmeta, "pages.first_page"),
|
215
|
+
# "lastPage": py_.get(bibmeta, "pages.last_page"),
|
216
|
+
# #'identifiers' => identifiers
|
217
|
+
# }
|
218
|
+
# )
|
219
|
+
|
220
|
+
# elif book_series_metadata.get("series_metadata", None):
|
221
|
+
# issn = normalize_issn(
|
222
|
+
# py_.get(book_series_metadata, "series_metadata.issn.0.#text")
|
223
|
+
# )
|
224
|
+
# container = compact(
|
225
|
+
# {
|
226
|
+
# "type": "Book Series",
|
227
|
+
# "identifier": issn,
|
228
|
+
# "identifierType": "ISSN" if issn else None,
|
229
|
+
# "title": py_.get(book_series_metadata, "series_metadata.titles.title"),
|
230
|
+
# "volume": bibmeta.get("volume", None),
|
231
|
+
# }
|
232
|
+
# )
|
233
|
+
# else:
|
234
|
+
# container = None
|
235
|
+
container = crossref_container(meta, resource_type=resource_type)
|
236
|
+
references = [
|
237
|
+
crossref_reference(i) for i in wrap(py_.get(bibmeta, "citation_list.citation"))
|
238
|
+
]
|
239
|
+
files = presence(meta.get("contentUrl", None))
|
240
|
+
provider = (
|
241
|
+
bibmeta.get("reg-agency").capitalize()
|
242
|
+
if bibmeta.get("reg-agency", None)
|
243
|
+
else None
|
244
|
+
)
|
245
|
+
if provider is None:
|
246
|
+
provider = get_doi_ra(_id)
|
247
|
+
state = "findable" if meta or read_options else "not_found"
|
248
|
+
|
249
|
+
return {
|
250
|
+
# required properties
|
251
|
+
"id": _id,
|
252
|
+
"type": _type,
|
253
|
+
"url": url,
|
254
|
+
"contributors": presence(contributors),
|
255
|
+
"titles": presence(titles),
|
256
|
+
"publisher": publisher,
|
257
|
+
"date": compact(date),
|
258
|
+
# recommended and optional properties
|
259
|
+
"subjects": presence(None),
|
260
|
+
"language": language,
|
261
|
+
"alternate_identifiers": None,
|
262
|
+
"sizes": None,
|
263
|
+
"formats": None,
|
264
|
+
"version": None,
|
265
|
+
"license": presence(license_),
|
266
|
+
"descriptions": presence(descriptions),
|
267
|
+
"geo_locations": None,
|
268
|
+
"funding_references": presence(funding_references),
|
269
|
+
"references": references,
|
270
|
+
"relations": None,
|
271
|
+
# other properties
|
272
|
+
"date_created": None,
|
273
|
+
"date_registered": None,
|
274
|
+
"date_published": None,
|
275
|
+
"date_updated": None,
|
276
|
+
"content_url": presence(files),
|
277
|
+
"container": presence(container),
|
278
|
+
"provider": provider,
|
279
|
+
"state": state,
|
280
|
+
"schema_version": None,
|
281
|
+
} | read_options
|
282
|
+
|
283
|
+
|
284
|
+
def crossref_titles(bibmeta):
|
285
|
+
"""Title information from Crossref metadata."""
|
286
|
+
title = parse_attributes(py_.get(bibmeta, "titles.0.title"))
|
287
|
+
subtitle = parse_attributes(py_.get(bibmeta, "titles.0.subtitle"))
|
288
|
+
original_language_title = parse_attributes(
|
289
|
+
py_.get(bibmeta, "titles.0.original_language_title")
|
290
|
+
)
|
291
|
+
language = parse_attributes(
|
292
|
+
py_.get(bibmeta, "titles.0.original_language_title"), content="language"
|
293
|
+
)
|
294
|
+
if title is None and original_language_title is None:
|
295
|
+
return None
|
296
|
+
if title and original_language_title is None and subtitle is None:
|
297
|
+
return [{"title": sanitize(title)}]
|
298
|
+
if original_language_title:
|
299
|
+
return [
|
300
|
+
compact(
|
301
|
+
{
|
302
|
+
"title": sanitize(original_language_title),
|
303
|
+
"lang": language,
|
304
|
+
}
|
305
|
+
)
|
306
|
+
]
|
307
|
+
if subtitle:
|
308
|
+
return [
|
309
|
+
compact({"title": sanitize(title)}),
|
310
|
+
{
|
311
|
+
"title": sanitize(subtitle),
|
312
|
+
"titleType": "Subtitle",
|
313
|
+
},
|
314
|
+
]
|
315
|
+
|
316
|
+
|
317
|
+
def crossref_description(bibmeta):
|
318
|
+
"""Description information from Crossref metadata."""
|
319
|
+
|
320
|
+
def format_abstract(element):
|
321
|
+
"""Format abstract"""
|
322
|
+
if isinstance(element.get("p", None), list):
|
323
|
+
element["p"] = element["p"][0]
|
324
|
+
if isinstance(element.get("p", None), dict):
|
325
|
+
element["p"] = element["p"]["#text"]
|
326
|
+
description_type = (
|
327
|
+
"Abstract" if element.get("abstract-type", None) == "abstract" else "Other"
|
328
|
+
)
|
329
|
+
return compact(
|
330
|
+
{
|
331
|
+
"descriptionType": description_type,
|
332
|
+
"description": sanitize(
|
333
|
+
parse_attributes(element, content="p", first=True)
|
334
|
+
),
|
335
|
+
}
|
336
|
+
)
|
337
|
+
|
338
|
+
return [format_abstract(i) for i in wrap(bibmeta.get("abstract", None))]
|
339
|
+
|
340
|
+
|
341
|
+
def crossref_people(bibmeta):
|
342
|
+
"""Person information from Crossref metadata."""
|
343
|
+
|
344
|
+
person = py_.get(bibmeta, "contributors.person_name") or bibmeta.get(
|
345
|
+
"person_name", None
|
346
|
+
)
|
347
|
+
organization = wrap(py_.get(bibmeta, "contributors.organization"))
|
348
|
+
|
349
|
+
return get_authors(from_crossref_xml(wrap(person) + wrap(organization)))
|
350
|
+
|
351
|
+
# (Array.wrap(person) + Array.wrap(organization)).select do |a|
|
352
|
+
# a['contributor_role'] == contributor_role
|
353
|
+
# end.map do |a|
|
354
|
+
# name_identifiers = if normalize_orcid(parse_attributes(a['ORCID'])).present?
|
355
|
+
# [{
|
356
|
+
# 'nameIdentifier' => normalize_orcid(parse_attributes(a['ORCID'])), 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org'
|
357
|
+
# }]
|
358
|
+
# end
|
359
|
+
# if a['surname'].present? || a['given_name'].present? || name_identifiers.present?
|
360
|
+
# given_name = parse_attributes(a['given_name'])
|
361
|
+
# family_name = parse_attributes(a['surname'])
|
362
|
+
# affiliation = Array.wrap(a['affiliation']).map do |a|
|
363
|
+
# if a.is_a?(Hash)
|
364
|
+
# a
|
365
|
+
# elsif a.is_a?(Hash) && a.key?('#text') && a[#text'].strip.blank?
|
366
|
+
# nil
|
367
|
+
# elsif a.is_a?(Hash) && a.key?('_#text_')
|
368
|
+
# { 'name' => a['#text'] }
|
369
|
+
# elsif a.strip.blank?
|
370
|
+
# nil
|
371
|
+
# elsif a.is_a?(String)
|
372
|
+
# { 'name' => a }
|
373
|
+
# end
|
374
|
+
# end.compact
|
375
|
+
|
376
|
+
# { 'nameType' => 'Personal',
|
377
|
+
# 'nameIdentifiers' => name_identifiers,
|
378
|
+
# 'name' => [family_name, given_name].compact.join(', '),
|
379
|
+
# 'givenName' => given_name,
|
380
|
+
# 'familyName' => family_name,
|
381
|
+
# 'affiliation' => affiliation.presence,
|
382
|
+
# 'contributorType' => contributor_role == 'editor' ? 'Editor' : nil }.compact
|
383
|
+
# else
|
384
|
+
# { 'nameType' => 'Organizational',
|
385
|
+
# 'name' => a['name'] || a['#text'] }
|
386
|
+
|
387
|
+
|
388
|
+
def crossref_reference(reference: Optional[dict]) -> Optional[dict]:
|
389
|
+
"""Get reference from Crossref reference"""
|
390
|
+
if reference is None or not isinstance(reference, dict):
|
391
|
+
return None
|
392
|
+
doi = parse_attributes(reference.get("doi", None))
|
393
|
+
unstructured = reference.get("unstructured_citation", None)
|
394
|
+
if isinstance(unstructured, dict):
|
395
|
+
text = unstructured.get("font", None) or unstructured.get("#text", None)
|
396
|
+
else:
|
397
|
+
text = reference.get("unstructured_citation", None)
|
398
|
+
metadata = {
|
399
|
+
"key": reference.get("key", None),
|
400
|
+
"id": normalize_doi(doi) if doi else None,
|
401
|
+
"contributor": reference.get("author", None),
|
402
|
+
"title": reference.get("article_title", None),
|
403
|
+
"publisher": reference.get("publisher", None),
|
404
|
+
"publicationYear": reference.get("cYear", None),
|
405
|
+
"volume": reference.get("volume", None),
|
406
|
+
"issue": reference.get("issue", None),
|
407
|
+
"firstPage": reference.get("first_page", None),
|
408
|
+
"lastPage": reference.get("last_page", None),
|
409
|
+
"containerTitle": reference.get("journal_title", None),
|
410
|
+
"edition": None,
|
411
|
+
"unstructured": sanitize(text) if text else None,
|
412
|
+
}
|
413
|
+
return compact(metadata)
|
414
|
+
|
415
|
+
|
416
|
+
def crossref_container(meta: dict, resource_type: str = "JournalArticle") -> dict:
|
417
|
+
"""Get container from Crossref"""
|
418
|
+
container_type = CROSSREF_CONTAINER_TYPES.get(resource_type, None)
|
419
|
+
issn = next(
|
420
|
+
(
|
421
|
+
i
|
422
|
+
for i in wrap(
|
423
|
+
py_.get(meta, f"{container_type}.{container_type}_metadata.issn")
|
424
|
+
)
|
425
|
+
+ wrap(
|
426
|
+
py_.get(
|
427
|
+
meta,
|
428
|
+
f"{container_type}.{container_type}_series_metadata.series_metadata.issn",
|
429
|
+
)
|
430
|
+
)
|
431
|
+
if i.get("media_type", None) == "electronic"
|
432
|
+
),
|
433
|
+
{},
|
434
|
+
) or next(
|
435
|
+
(
|
436
|
+
i
|
437
|
+
for i in wrap(
|
438
|
+
py_.get(meta, f"{container_type}.{container_type}_metadata.issn")
|
439
|
+
)
|
440
|
+
+ wrap(
|
441
|
+
py_.get(
|
442
|
+
meta,
|
443
|
+
f"{container_type}.{container_type}_series_metadata.series_metadata.issn",
|
444
|
+
)
|
445
|
+
)
|
446
|
+
if i.get("media_type", None) == "print"
|
447
|
+
),
|
448
|
+
{},
|
449
|
+
)
|
450
|
+
issn = normalize_issn(issn) if issn else None
|
451
|
+
isbn = py_.get(meta, f"conference.{container_type}_metadata.isbn.#text")
|
452
|
+
container_title = (
|
453
|
+
py_.get(meta, f"{container_type}.{container_type}_metadata.full_title")
|
454
|
+
or py_.get(meta, f"{container_type}.{container_type}_metadata.titles.0.title")
|
455
|
+
or py_.get(meta, f"conference.{container_type}_metadata.{container_type}_title")
|
456
|
+
or py_.get(
|
457
|
+
meta,
|
458
|
+
f"{container_type}.{container_type}_series_metadata.series_metadata.titles.0.title",
|
459
|
+
)
|
460
|
+
)
|
461
|
+
volume = py_.get(
|
462
|
+
meta,
|
463
|
+
f"{container_type}.{container_type}_issue.{container_type}_volume.volume",
|
464
|
+
)
|
465
|
+
issue = py_.get(meta, f"{container_type}.{container_type}_issue.issue")
|
466
|
+
return compact(
|
467
|
+
{
|
468
|
+
"type": CR_TO_CM_CONTAINER_TRANSLATIONS.get(container_type, None),
|
469
|
+
"identifier": issn or isbn,
|
470
|
+
"identifierType": "ISSN" if issn else "ISBN" if isbn else None,
|
471
|
+
"title": container_title,
|
472
|
+
"volume": volume,
|
473
|
+
"issue": issue,
|
474
|
+
"firstPage": py_.get(
|
475
|
+
meta, f"{container_type}.{container_type}_article.pages.first_page"
|
476
|
+
)
|
477
|
+
or py_.get(meta, f"{container_type}.content_item.pages.first_page")
|
478
|
+
or py_.get(meta, "conference.conference_paper.pages.first_page"),
|
479
|
+
"lastPage": py_.get(
|
480
|
+
meta, f"{container_type}.{container_type}_article.pages.last_page"
|
481
|
+
)
|
482
|
+
or py_.get(meta, f"{container_type}.content_item.pages.last_page")
|
483
|
+
or py_.get(meta, "conference.conference_paper.pages.last_page"),
|
484
|
+
"location": py_.get(meta, "conference.event_metadata.conference_location"),
|
485
|
+
"series": py_.get(meta, "conference.event_metadata.conference_acronym"),
|
486
|
+
}
|
487
|
+
)
|
488
|
+
|
489
|
+
|
490
|
+
def crossref_funding(funding: list) -> list:
|
491
|
+
"""Get assertions from Crossref"""
|
492
|
+
return []
|
493
|
+
|
494
|
+
|
495
|
+
def crossref_license(licenses: list) -> dict:
|
496
|
+
"""Get license from Crossref"""
|
497
|
+
|
498
|
+
def map_element(element):
|
499
|
+
"""Format element"""
|
500
|
+
url = parse_attributes(element)
|
501
|
+
url = normalize_cc_url(url)
|
502
|
+
return dict_to_spdx({"url": url})
|
503
|
+
|
504
|
+
# return only the first license found
|
505
|
+
return next((map_element(i) for i in licenses), None)
|
@@ -0,0 +1,98 @@
|
|
1
|
+
"""CSL-JSON reader for commonmeta-py"""
|
2
|
+
from ..utils import dict_to_spdx, from_csl, normalize_id, name_to_fos, encode_doi
|
3
|
+
from ..base_utils import wrap, compact, sanitize, presence
|
4
|
+
from ..author_utils import get_authors
|
5
|
+
from ..date_utils import get_date_from_date_parts
|
6
|
+
from ..doi_utils import get_doi_ra, doi_from_url
|
7
|
+
from ..constants import (
|
8
|
+
CSL_TO_CM_TRANSLATIONS,
|
9
|
+
Commonmeta,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
def read_csl(data: dict, **kwargs) -> Commonmeta:
|
14
|
+
"""read_csl"""
|
15
|
+
if data is None:
|
16
|
+
return {"state": "not_found"}
|
17
|
+
meta = data
|
18
|
+
|
19
|
+
read_options = kwargs or {}
|
20
|
+
|
21
|
+
_id = normalize_id(meta.get("id", None) or meta.get("DOI", None))
|
22
|
+
_type = CSL_TO_CM_TRANSLATIONS.get(meta.get("type", None), "Other")
|
23
|
+
|
24
|
+
# optionally generate a DOI if missing but a DOI prefix is provided
|
25
|
+
prefix = read_options.get("prefix", None)
|
26
|
+
if doi_from_url(_id) is None and prefix is not None:
|
27
|
+
_id = encode_doi(prefix)
|
28
|
+
|
29
|
+
contributors = get_authors(from_csl(wrap(meta.get("author", None))))
|
30
|
+
contrib = get_authors(from_csl(wrap(meta.get("editor", None))))
|
31
|
+
if contrib:
|
32
|
+
contributors += contrib
|
33
|
+
|
34
|
+
date = {"published": get_date_from_date_parts(meta.get("issued", None))}
|
35
|
+
|
36
|
+
license_ = meta.get("copyright", None)
|
37
|
+
if license_ is not None:
|
38
|
+
license_ = dict_to_spdx({"url": meta.get("copyright")})
|
39
|
+
|
40
|
+
pages = meta.get("page", "").split("-")
|
41
|
+
publisher = meta.get("publisher", None)
|
42
|
+
if isinstance(publisher, str):
|
43
|
+
publisher = {"name": publisher}
|
44
|
+
relations = []
|
45
|
+
issn = meta.get("ISSN", None)
|
46
|
+
if issn is not None:
|
47
|
+
relations.append(
|
48
|
+
{
|
49
|
+
"id": issn_as_url(issn),
|
50
|
+
"type": "IsPartOf",
|
51
|
+
}
|
52
|
+
)
|
53
|
+
container = compact(
|
54
|
+
{
|
55
|
+
"type": "Periodical",
|
56
|
+
"title": meta.get("container-title", None),
|
57
|
+
"identifier": issn,
|
58
|
+
"identifierType": "ISSN" if meta.get("ISSN", None) else None,
|
59
|
+
"volume": meta.get("volume", None),
|
60
|
+
"issue": meta.get("issue", None),
|
61
|
+
"firstPage": pages[0],
|
62
|
+
"lastPage": pages[1] if len(pages) > 1 else None,
|
63
|
+
}
|
64
|
+
)
|
65
|
+
|
66
|
+
state = "findable" if _id or read_options else "not_found"
|
67
|
+
subjects = [name_to_fos(i) for i in wrap(meta.get("keywords", None))]
|
68
|
+
|
69
|
+
if meta.get("abstract", None):
|
70
|
+
descriptions = [
|
71
|
+
{
|
72
|
+
"description": sanitize(str(meta.get("abstract"))),
|
73
|
+
"type": "Abstract",
|
74
|
+
}
|
75
|
+
]
|
76
|
+
else:
|
77
|
+
descriptions = None
|
78
|
+
|
79
|
+
provider = get_doi_ra(_id)
|
80
|
+
|
81
|
+
return {
|
82
|
+
"id": _id,
|
83
|
+
"type": _type,
|
84
|
+
"url": normalize_id(meta.get("URL", None)),
|
85
|
+
"titles": [{"title": meta.get("title", None)}],
|
86
|
+
"contributors": presence(contributors),
|
87
|
+
"publisher": presence(publisher),
|
88
|
+
"date": compact(date),
|
89
|
+
"container": container,
|
90
|
+
"references": None,
|
91
|
+
"relations": presence(relations),
|
92
|
+
"descriptions": descriptions,
|
93
|
+
"license": license_,
|
94
|
+
"version": meta.get("version", None),
|
95
|
+
"subjects": subjects,
|
96
|
+
"provider": provider,
|
97
|
+
"state": state,
|
98
|
+
} | read_options
|