commonmeta-py 0.23__py3-none-any.whl → 0.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +96 -0
- commonmeta/api_utils.py +77 -0
- commonmeta/author_utils.py +260 -0
- commonmeta/base_utils.py +121 -0
- commonmeta/cli.py +200 -0
- commonmeta/constants.py +587 -0
- commonmeta/crossref_utils.py +575 -0
- commonmeta/date_utils.py +193 -0
- commonmeta/doi_utils.py +273 -0
- commonmeta/metadata.py +320 -0
- commonmeta/readers/__init__.py +1 -0
- commonmeta/readers/cff_reader.py +199 -0
- commonmeta/readers/codemeta_reader.py +112 -0
- commonmeta/readers/commonmeta_reader.py +13 -0
- commonmeta/readers/crossref_reader.py +409 -0
- commonmeta/readers/crossref_xml_reader.py +505 -0
- commonmeta/readers/csl_reader.py +98 -0
- commonmeta/readers/datacite_reader.py +390 -0
- commonmeta/readers/datacite_xml_reader.py +359 -0
- commonmeta/readers/inveniordm_reader.py +218 -0
- commonmeta/readers/json_feed_reader.py +420 -0
- commonmeta/readers/kbase_reader.py +205 -0
- commonmeta/readers/ris_reader.py +103 -0
- commonmeta/readers/schema_org_reader.py +506 -0
- commonmeta/resources/cff_v1.2.0.json +1827 -0
- commonmeta/resources/commonmeta_v0.12.json +601 -0
- commonmeta/resources/commonmeta_v0.13.json +559 -0
- commonmeta/resources/commonmeta_v0.14.json +573 -0
- commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
- commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
- commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
- commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
- commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
- commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
- commonmeta/resources/crossref/fundref.xsd +49 -0
- commonmeta/resources/crossref/module-ali.xsd +39 -0
- commonmeta/resources/crossref/relations.xsd +444 -0
- commonmeta/resources/crossref-v0.2.json +60 -0
- commonmeta/resources/csl-data.json +538 -0
- commonmeta/resources/datacite-v4.5.json +829 -0
- commonmeta/resources/datacite-v4.5pr.json +608 -0
- commonmeta/resources/ietf-bcp-47.json +3025 -0
- commonmeta/resources/iso-8601.json +3182 -0
- commonmeta/resources/spdx/licenses.json +4851 -0
- commonmeta/resources/spdx-schema..json +903 -0
- commonmeta/resources/styles/apa.csl +1697 -0
- commonmeta/resources/styles/chicago-author-date.csl +684 -0
- commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
- commonmeta/resources/styles/ieee.csl +468 -0
- commonmeta/resources/styles/modern-language-association.csl +341 -0
- commonmeta/resources/styles/vancouver.csl +376 -0
- commonmeta/schema_utils.py +27 -0
- commonmeta/translators.py +47 -0
- commonmeta/utils.py +1108 -0
- commonmeta/writers/__init__.py +1 -0
- commonmeta/writers/bibtex_writer.py +149 -0
- commonmeta/writers/citation_writer.py +70 -0
- commonmeta/writers/commonmeta_writer.py +68 -0
- commonmeta/writers/crossref_xml_writer.py +17 -0
- commonmeta/writers/csl_writer.py +79 -0
- commonmeta/writers/datacite_writer.py +193 -0
- commonmeta/writers/inveniordm_writer.py +94 -0
- commonmeta/writers/ris_writer.py +58 -0
- commonmeta/writers/schema_org_writer.py +146 -0
- {commonmeta_py-0.23.dist-info → commonmeta_py-0.25.dist-info}/METADATA +56 -45
- commonmeta_py-0.25.dist-info/RECORD +75 -0
- {commonmeta_py-0.23.dist-info → commonmeta_py-0.25.dist-info}/WHEEL +1 -1
- commonmeta_py-0.25.dist-info/entry_points.txt +3 -0
- commonmeta_py-0.23.dist-info/RECORD +0 -5
- /commonmeta_py/__init__.py → /commonmeta/readers/bibtex_reader.py +0 -0
- {commonmeta_py-0.23.dist-info/licenses → commonmeta_py-0.25.dist-info}/LICENSE +0 -0
@@ -0,0 +1,409 @@
|
|
1
|
+
"""crossref reader for commonmeta-py"""
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
import httpx
|
5
|
+
from pydash import py_
|
6
|
+
|
7
|
+
from ..utils import (
|
8
|
+
dict_to_spdx,
|
9
|
+
normalize_cc_url,
|
10
|
+
normalize_url,
|
11
|
+
normalize_doi,
|
12
|
+
normalize_issn,
|
13
|
+
issn_as_url,
|
14
|
+
)
|
15
|
+
from ..base_utils import wrap, compact, presence, sanitize, parse_attributes
|
16
|
+
from ..author_utils import get_authors
|
17
|
+
from ..date_utils import get_date_from_date_parts
|
18
|
+
from ..doi_utils import (
|
19
|
+
doi_as_url,
|
20
|
+
doi_from_url,
|
21
|
+
crossref_api_url,
|
22
|
+
crossref_api_query_url,
|
23
|
+
crossref_api_sample_url,
|
24
|
+
)
|
25
|
+
from ..constants import (
|
26
|
+
CR_TO_CM_TRANSLATIONS,
|
27
|
+
CR_TO_CM_CONTAINER_TRANSLATIONS,
|
28
|
+
CROSSREF_CONTAINER_TYPES,
|
29
|
+
Commonmeta,
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
def get_crossref_list(query: dict, **kwargs) -> list[dict]:
|
34
|
+
"""get_crossref list from Crossref API."""
|
35
|
+
url = crossref_api_query_url(query, **kwargs)
|
36
|
+
response = httpx.get(url, timeout=30, **kwargs)
|
37
|
+
if response.status_code != 200:
|
38
|
+
return []
|
39
|
+
return response.json().get("message", {}).get("items", [])
|
40
|
+
|
41
|
+
|
42
|
+
def get_crossref(pid: str, **kwargs) -> dict:
|
43
|
+
"""get_crossref"""
|
44
|
+
doi = doi_from_url(pid)
|
45
|
+
if doi is None:
|
46
|
+
return {"state": "not_found"}
|
47
|
+
url = crossref_api_url(doi)
|
48
|
+
response = httpx.get(url, timeout=10, **kwargs)
|
49
|
+
if response.status_code != 200:
|
50
|
+
return {"state": "not_found"}
|
51
|
+
return response.json().get("message", {}) | {"via": "crossref"}
|
52
|
+
|
53
|
+
|
54
|
+
def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
|
55
|
+
"""read_crossref"""
|
56
|
+
if data is None:
|
57
|
+
return {"state": "not_found"}
|
58
|
+
meta = data
|
59
|
+
# read_options = ActiveSupport::HashWithIndifferentAccess.
|
60
|
+
# new(options.except(:doi, :id, :url,
|
61
|
+
# :sandbox, :validate, :ra))
|
62
|
+
read_options = kwargs or {}
|
63
|
+
|
64
|
+
doi = meta.get("DOI", None)
|
65
|
+
_id = doi_as_url(doi)
|
66
|
+
_type = CR_TO_CM_TRANSLATIONS.get(meta.get("type", None)) or "Other"
|
67
|
+
|
68
|
+
archive_locations = wrap(meta.get("archive", None))
|
69
|
+
|
70
|
+
if meta.get("author", None):
|
71
|
+
contributors = get_authors(wrap(meta.get("author")), via="crossref")
|
72
|
+
else:
|
73
|
+
contributors = []
|
74
|
+
|
75
|
+
def editor_type(item):
|
76
|
+
item["contributorType"] = "Editor"
|
77
|
+
return item
|
78
|
+
|
79
|
+
editors = [editor_type(i) for i in wrap(meta.get("editor", None))]
|
80
|
+
if editors:
|
81
|
+
contributors += get_authors(editors)
|
82
|
+
|
83
|
+
url = normalize_url(py_.get(meta, "resource.primary.URL"))
|
84
|
+
titles = get_titles(meta)
|
85
|
+
publisher = compact({"name": meta.get("publisher", None)})
|
86
|
+
|
87
|
+
date = compact(
|
88
|
+
{
|
89
|
+
"published": py_.get(meta, "issued.date-time")
|
90
|
+
or get_date_from_date_parts(meta.get("issued", None))
|
91
|
+
or py_.get(meta, "created.date-time")
|
92
|
+
}
|
93
|
+
)
|
94
|
+
identifiers = []
|
95
|
+
identifiers.append(
|
96
|
+
compact(
|
97
|
+
{
|
98
|
+
"identifier": _id,
|
99
|
+
"identifierType": "DOI",
|
100
|
+
}
|
101
|
+
)
|
102
|
+
)
|
103
|
+
license_ = meta.get("license", None)
|
104
|
+
if license_ is not None:
|
105
|
+
license_ = normalize_cc_url(license_[0].get("URL", None))
|
106
|
+
license_ = dict_to_spdx({"url": license_}) if license_ else None
|
107
|
+
issn = get_issn(meta)
|
108
|
+
container = get_container(meta, issn=issn)
|
109
|
+
relations = get_relations(meta.get("relation", None))
|
110
|
+
if issn is not None:
|
111
|
+
relations.append(
|
112
|
+
{
|
113
|
+
"id": issn_as_url(issn),
|
114
|
+
"type": "IsPartOf",
|
115
|
+
}
|
116
|
+
)
|
117
|
+
relations = py_.uniq(relations)
|
118
|
+
references = py_.uniq([get_reference(i) for i in wrap(meta.get("reference", None))])
|
119
|
+
funding_references = from_crossref_funding(wrap(meta.get("funder", None)))
|
120
|
+
|
121
|
+
description = meta.get("abstract", None)
|
122
|
+
if description is not None:
|
123
|
+
descriptions = [{"description": sanitize(description), "type": "Abstract"}]
|
124
|
+
else:
|
125
|
+
descriptions = None
|
126
|
+
|
127
|
+
subjects = py_.uniq(
|
128
|
+
[
|
129
|
+
{"subject": i}
|
130
|
+
for i in wrap(meta.get("subject", None) or meta.get("group-title", None))
|
131
|
+
]
|
132
|
+
)
|
133
|
+
files = py_.uniq(
|
134
|
+
[
|
135
|
+
get_file(i)
|
136
|
+
for i in wrap(meta.get("link", None))
|
137
|
+
if i["content-type"] != "unspecified"
|
138
|
+
]
|
139
|
+
)
|
140
|
+
|
141
|
+
return {
|
142
|
+
# required properties
|
143
|
+
"id": _id,
|
144
|
+
"type": _type,
|
145
|
+
# recommended and optional properties
|
146
|
+
"additionalType": None,
|
147
|
+
"archiveLocations": presence(archive_locations),
|
148
|
+
"container": presence(container),
|
149
|
+
"contributors": presence(contributors),
|
150
|
+
"date": presence(date),
|
151
|
+
"descriptions": presence(descriptions),
|
152
|
+
"files": presence(files),
|
153
|
+
"fundingReferences": presence(funding_references),
|
154
|
+
"geoLocations": None,
|
155
|
+
"identifiers": identifiers,
|
156
|
+
"language": meta.get("language", None),
|
157
|
+
"license": license_,
|
158
|
+
"provider": "Crossref",
|
159
|
+
"publisher": presence(publisher),
|
160
|
+
"references": presence(references),
|
161
|
+
"relations": presence(relations),
|
162
|
+
"subjects": presence(subjects),
|
163
|
+
"titles": presence(titles),
|
164
|
+
"url": url,
|
165
|
+
"version": meta.get("version", None),
|
166
|
+
} | read_options
|
167
|
+
|
168
|
+
|
169
|
+
def get_titles(meta):
|
170
|
+
"""Title information from Crossref metadata."""
|
171
|
+
titles = wrap(parse_attributes(meta.get("title", None)))
|
172
|
+
subtitles = wrap(parse_attributes(meta.get("subtitle", None)))
|
173
|
+
original_language_titles = wrap(
|
174
|
+
parse_attributes(meta.get("original_language_title", None))
|
175
|
+
)
|
176
|
+
language = None
|
177
|
+
return (
|
178
|
+
[{"title": sanitize(i)} for i in titles]
|
179
|
+
+ [
|
180
|
+
compact(
|
181
|
+
{
|
182
|
+
"title": sanitize(i),
|
183
|
+
"titleType": "Subtitle",
|
184
|
+
}
|
185
|
+
)
|
186
|
+
for i in subtitles
|
187
|
+
]
|
188
|
+
+ [
|
189
|
+
compact(
|
190
|
+
{
|
191
|
+
"title": sanitize(i),
|
192
|
+
"titleType": "TranslatedTitle",
|
193
|
+
"lang": language,
|
194
|
+
}
|
195
|
+
)
|
196
|
+
for i in original_language_titles
|
197
|
+
]
|
198
|
+
)
|
199
|
+
|
200
|
+
|
201
|
+
def get_reference(reference: Optional[dict]) -> Optional[dict]:
|
202
|
+
"""Get reference from Crossref reference"""
|
203
|
+
if reference is None or not isinstance(reference, dict):
|
204
|
+
return None
|
205
|
+
doi = reference.get("DOI", None)
|
206
|
+
metadata = {
|
207
|
+
"key": reference.get("key", None),
|
208
|
+
"id": normalize_doi(doi) if doi else None,
|
209
|
+
"contributor": reference.get("author", None),
|
210
|
+
"title": reference.get("article-title", None),
|
211
|
+
"publisher": reference.get("publisher", None),
|
212
|
+
"publicationYear": reference.get("year", None),
|
213
|
+
"volume": reference.get("volume", None),
|
214
|
+
"issue": reference.get("issue", None),
|
215
|
+
"firstPage": reference.get("first-page", None),
|
216
|
+
"lastPage": reference.get("last-page", None),
|
217
|
+
"containerTitle": reference.get("journal-title", None),
|
218
|
+
"edition": None,
|
219
|
+
"unstructured": reference.get("unstructured", None),
|
220
|
+
}
|
221
|
+
return compact(metadata)
|
222
|
+
|
223
|
+
|
224
|
+
def get_relations(relations: list) -> list:
|
225
|
+
"""Get relations from Crossref"""
|
226
|
+
supported_types = [
|
227
|
+
"IsNewVersionOf",
|
228
|
+
"IsPreviousVersionOf",
|
229
|
+
"IsVersionOf",
|
230
|
+
"HasVersion",
|
231
|
+
"IsPartOf",
|
232
|
+
"HasPart",
|
233
|
+
"IsVariantFormOf",
|
234
|
+
"IsOriginalFormOf",
|
235
|
+
"IsIdenticalTo",
|
236
|
+
"IsTranslationOf",
|
237
|
+
"IsReviewedBy",
|
238
|
+
"Reviews",
|
239
|
+
"HasReview",
|
240
|
+
"IsPreprintOf",
|
241
|
+
"HasPreprint",
|
242
|
+
"IsSupplementTo",
|
243
|
+
"IsSupplementedBy",
|
244
|
+
]
|
245
|
+
|
246
|
+
if not relations:
|
247
|
+
return []
|
248
|
+
|
249
|
+
def format_relation(key, values):
|
250
|
+
_type = py_.pascal_case(key)
|
251
|
+
if _type not in supported_types:
|
252
|
+
return None
|
253
|
+
rs = []
|
254
|
+
for value in values:
|
255
|
+
if value.get("id-type", None) == "doi":
|
256
|
+
_id = doi_as_url(value.get("id", None))
|
257
|
+
elif value.get("id-type", None) == "issn":
|
258
|
+
_id = issn_as_url(value.get("id", None))
|
259
|
+
else:
|
260
|
+
_id = value.get("id", None)
|
261
|
+
|
262
|
+
rs.append({"type": _type, "id": _id})
|
263
|
+
|
264
|
+
return rs
|
265
|
+
|
266
|
+
return py_.uniq(
|
267
|
+
py_.compact(py_.flatten([format_relation(k, v) for k, v in relations.items()]))
|
268
|
+
)
|
269
|
+
|
270
|
+
|
271
|
+
def get_file(file: dict) -> dict:
|
272
|
+
"""Get file from Crossref"""
|
273
|
+
return compact(
|
274
|
+
{
|
275
|
+
"url": file.get("URL", None),
|
276
|
+
"mimeType": file.get("content-type", None),
|
277
|
+
}
|
278
|
+
)
|
279
|
+
|
280
|
+
|
281
|
+
def get_issn(meta: dict) -> Optional[str]:
|
282
|
+
"""Get ISSN from Crossref"""
|
283
|
+
issn = (
|
284
|
+
next(
|
285
|
+
(
|
286
|
+
item
|
287
|
+
for item in wrap(meta.get("issn-type", None))
|
288
|
+
if item["type"] == "electronic"
|
289
|
+
),
|
290
|
+
None,
|
291
|
+
)
|
292
|
+
or next(
|
293
|
+
(
|
294
|
+
item
|
295
|
+
for item in wrap(meta.get("issn-type", None))
|
296
|
+
if item["type"] == "print"
|
297
|
+
),
|
298
|
+
None,
|
299
|
+
)
|
300
|
+
or next(
|
301
|
+
(
|
302
|
+
item
|
303
|
+
for item in py_.get(meta, "relation.is-part-of", [])
|
304
|
+
if item["id-type"] == "issn"
|
305
|
+
),
|
306
|
+
None,
|
307
|
+
)
|
308
|
+
or {}
|
309
|
+
)
|
310
|
+
return (
|
311
|
+
normalize_issn(issn.get("value", None) or issn.get("id", None))
|
312
|
+
if issn
|
313
|
+
else None
|
314
|
+
)
|
315
|
+
|
316
|
+
|
317
|
+
def get_container(meta: dict, issn: str) -> dict:
|
318
|
+
"""Get container from Crossref"""
|
319
|
+
container_type = CROSSREF_CONTAINER_TYPES.get(meta.get("type", None))
|
320
|
+
container_type = CR_TO_CM_CONTAINER_TRANSLATIONS.get(container_type, None)
|
321
|
+
isbn = (
|
322
|
+
next(
|
323
|
+
(
|
324
|
+
item
|
325
|
+
for item in wrap(meta.get("isbn-type", None))
|
326
|
+
if item["type"] == "electronic"
|
327
|
+
),
|
328
|
+
None,
|
329
|
+
)
|
330
|
+
or next(
|
331
|
+
(
|
332
|
+
item
|
333
|
+
for item in wrap(meta.get("isbn-type", None))
|
334
|
+
if item["type"] == "print"
|
335
|
+
),
|
336
|
+
None,
|
337
|
+
)
|
338
|
+
or {}
|
339
|
+
)
|
340
|
+
isbn = isbn["value"] if isbn else None
|
341
|
+
container_title = parse_attributes(meta.get("container-title", None), first=True)
|
342
|
+
volume = meta.get("volume", None)
|
343
|
+
issue = py_.get(meta, "journal-issue.issue")
|
344
|
+
if meta.get("page", None):
|
345
|
+
pages = meta.get("page", None).split("-")
|
346
|
+
first_page = pages[0]
|
347
|
+
last_page = pages[1] if len(pages) > 1 else None
|
348
|
+
else:
|
349
|
+
first_page = None
|
350
|
+
last_page = None
|
351
|
+
|
352
|
+
# TODO: add support for series, location, missing in Crossref JSON
|
353
|
+
|
354
|
+
return compact(
|
355
|
+
{
|
356
|
+
"type": container_type,
|
357
|
+
"identifier": issn or isbn,
|
358
|
+
"identifierType": "ISSN" if issn else "ISBN" if isbn else None,
|
359
|
+
"title": container_title,
|
360
|
+
"volume": volume,
|
361
|
+
"issue": issue,
|
362
|
+
"firstPage": first_page,
|
363
|
+
"lastPage": last_page,
|
364
|
+
}
|
365
|
+
)
|
366
|
+
|
367
|
+
|
368
|
+
def from_crossref_funding(funding_references: list) -> list:
|
369
|
+
"""Get funding references from Crossref"""
|
370
|
+
formatted_funding_references = []
|
371
|
+
for funding in funding_references:
|
372
|
+
f = compact(
|
373
|
+
{
|
374
|
+
"funderName": funding.get("name", None),
|
375
|
+
"funderIdentifier": doi_as_url(funding["DOI"])
|
376
|
+
if funding.get("DOI", None) is not None
|
377
|
+
else None,
|
378
|
+
"funderIdentifierType": "Crossref Funder ID"
|
379
|
+
if funding.get("DOI", "").startswith("10.13039")
|
380
|
+
else None,
|
381
|
+
}
|
382
|
+
)
|
383
|
+
f = py_.omit(f, "DOI", "doi-asserted-by")
|
384
|
+
if (
|
385
|
+
funding.get("name", None) is not None
|
386
|
+
and funding.get("award", None) is not None
|
387
|
+
):
|
388
|
+
for award in wrap(funding["award"]):
|
389
|
+
fund_ref = f.copy()
|
390
|
+
fund_ref["awardNumber"] = award
|
391
|
+
formatted_funding_references.append(fund_ref)
|
392
|
+
elif f != {}:
|
393
|
+
formatted_funding_references.append(f)
|
394
|
+
return py_.uniq(formatted_funding_references)
|
395
|
+
|
396
|
+
|
397
|
+
def get_random_crossref_id(number: int = 1, **kwargs) -> list:
|
398
|
+
"""Get random DOI from Crossref"""
|
399
|
+
number = 20 if number > 20 else number
|
400
|
+
url = crossref_api_sample_url(number, **kwargs)
|
401
|
+
try:
|
402
|
+
response = httpx.get(url, timeout=10)
|
403
|
+
if response.status_code != 200:
|
404
|
+
return []
|
405
|
+
|
406
|
+
items = py_.get(response.json(), "message.items")
|
407
|
+
return [i.get("DOI") for i in items]
|
408
|
+
except (httpx.ReadTimeout, httpx.ConnectError):
|
409
|
+
return []
|