commonmeta-py 0.16.9__tar.gz → 0.17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/PKG-INFO +1 -1
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/author_utils.py +1 -2
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/constants.py +1 -2
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/crossref_utils.py +12 -12
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/metadata.py +10 -12
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/cff_reader.py +1 -1
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/crossref_reader.py +17 -24
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/datacite_reader.py +53 -13
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/datacite_xml_reader.py +7 -21
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/inveniordm_reader.py +4 -11
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/json_feed_reader.py +5 -7
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/kbase_reader.py +4 -8
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/ris_reader.py +1 -1
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/schema_org_reader.py +26 -17
- commonmeta_py-0.17/commonmeta/resources/commonmeta_v0.13.json +583 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/schema_utils.py +1 -1
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/utils.py +1 -1
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/writers/commonmeta_writer.py +9 -2
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/writers/datacite_writer.py +13 -6
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/pyproject.toml +1 -1
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/LICENSE +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/README.md +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/__init__.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/api_utils.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/base_utils.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/cli.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/date_utils.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/doi_utils.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/__init__.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/bibtex_reader.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/codemeta_reader.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/commonmeta_reader.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/crossref_xml_reader.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/readers/csl_reader.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/cff_v1.2.0.json +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/commonmeta_v0.12.json +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/AccessIndicators.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/JATS-journalpublishing1.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/clinicaltrials.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/common5.3.1.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/crossref5.3.1.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/crossref_query_output3.0.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/fundref.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/module-ali.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref/relations.xsd +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/crossref-v0.2.json +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/csl-data.json +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/datacite-v4.5.json +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/ietf-bcp-47.json +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/iso-8601.json +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/spdx/licenses.json +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/spdx-schema..json +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/styles/apa.csl +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/styles/chicago-author-date.csl +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/styles/harvard-cite-them-right.csl +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/styles/ieee.csl +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/styles/modern-language-association.csl +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/resources/styles/vancouver.csl +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/translators.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/writers/__init__.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/writers/bibtex_writer.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/writers/citation_writer.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/writers/crossref_xml_writer.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/writers/csl_writer.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/writers/ris_writer.py +0 -0
- {commonmeta_py-0.16.9 → commonmeta_py-0.17}/commonmeta/writers/schema_org_writer.py +0 -0
@@ -119,8 +119,7 @@ def get_one_author(author, **kwargs):
|
|
119
119
|
"name": name if _type == "Organization" else None,
|
120
120
|
"givenName": given_name if _type == "Person" else None,
|
121
121
|
"familyName": family_name if _type == "Person" else None,
|
122
|
-
"affiliation": presence(
|
123
|
-
get_affiliations(wrap(author.get("affiliation", None)))
|
122
|
+
"affiliation": presence(get_affiliations(wrap(author.get("affiliation", None)))
|
124
123
|
),
|
125
124
|
}
|
126
125
|
)
|
@@ -16,7 +16,7 @@ class Commonmeta(TypedDict):
|
|
16
16
|
subjects: Optional[List[dict]]
|
17
17
|
contributors: Optional[List[dict]]
|
18
18
|
language: Optional[str]
|
19
|
-
|
19
|
+
identifiers: Optional[List[dict]]
|
20
20
|
relations: Optional[List[dict]]
|
21
21
|
sizes: Optional[List[dict]]
|
22
22
|
formats: Optional[List[dict]]
|
@@ -30,7 +30,6 @@ class Commonmeta(TypedDict):
|
|
30
30
|
files: Optional[List[dict]]
|
31
31
|
agency: Optional[str]
|
32
32
|
state: str
|
33
|
-
schema_version: Optional[str]
|
34
33
|
|
35
34
|
|
36
35
|
# source: https://www.bibtex.com/e/entry-types/
|
@@ -430,29 +430,29 @@ def insert_institution(metadata, xml):
|
|
430
430
|
|
431
431
|
def insert_item_number(metadata, xml):
|
432
432
|
"""Insert item number"""
|
433
|
-
if metadata.
|
433
|
+
if metadata.identifiers is None:
|
434
434
|
return xml
|
435
|
-
for
|
436
|
-
if
|
435
|
+
for identifier in metadata.identifiers:
|
436
|
+
if identifier.get("identifier", None) is None:
|
437
437
|
continue
|
438
|
-
if
|
438
|
+
if identifier.get("identifierType", None) is not None:
|
439
439
|
# strip hyphen from UUIDs, as item_number can only be 32 characters long (UUIDv4 is 36 characters long)
|
440
|
-
if
|
441
|
-
|
442
|
-
"
|
440
|
+
if identifier.get("identifierType", None) == "UUID":
|
441
|
+
identifier["identifier"] = identifier.get(
|
442
|
+
"identifier", ""
|
443
443
|
).replace("-", "")
|
444
444
|
etree.SubElement(
|
445
445
|
xml,
|
446
446
|
"item_number",
|
447
447
|
{
|
448
|
-
"item_number_type":
|
449
|
-
"
|
448
|
+
"item_number_type": identifier.get(
|
449
|
+
"identifierType", ""
|
450
450
|
).lower()
|
451
451
|
},
|
452
|
-
).text =
|
452
|
+
).text = identifier.get("identifier", None)
|
453
453
|
else:
|
454
|
-
etree.SubElement(xml, "item_number").text =
|
455
|
-
"
|
454
|
+
etree.SubElement(xml, "item_number").text = identifier.get(
|
455
|
+
"identifier", None
|
456
456
|
)
|
457
457
|
return xml
|
458
458
|
|
@@ -79,35 +79,32 @@ class Metadata:
|
|
79
79
|
# required properties
|
80
80
|
self.id = meta.get("id") # pylint: disable=C0103
|
81
81
|
self.type = meta.get("type")
|
82
|
+
# recommended and optional properties
|
82
83
|
self.url = meta.get("url")
|
83
84
|
self.contributors = meta.get("contributors")
|
84
85
|
self.titles = meta.get("titles")
|
85
86
|
self.publisher = meta.get("publisher")
|
86
87
|
self.date = meta.get("date")
|
87
|
-
|
88
|
-
self.additional_type = meta.get("additional_type")
|
88
|
+
self.additional_type = meta.get("additionalType")
|
89
89
|
self.subjects = meta.get("subjects")
|
90
90
|
self.language = meta.get("language")
|
91
|
-
self.
|
91
|
+
self.identifiers = meta.get("identifiers")
|
92
92
|
self.relations = meta.get("relations")
|
93
|
-
self.sizes = meta.get("sizes")
|
94
|
-
self.formats = meta.get("formats")
|
95
93
|
self.version = meta.get("version")
|
96
94
|
self.license = meta.get("license")
|
97
95
|
self.descriptions = meta.get("descriptions")
|
98
|
-
self.geo_locations = meta.get("
|
99
|
-
self.funding_references = meta.get("
|
96
|
+
self.geo_locations = meta.get("geoLocations")
|
97
|
+
self.funding_references = meta.get("fundingReferences")
|
100
98
|
self.references = meta.get("references")
|
99
|
+
self.files = meta.get("files")
|
100
|
+
self.container = meta.get("container")
|
101
|
+
self.provider = meta.get("provider")
|
102
|
+
self.archive_locations = meta.get("archiveLocations")
|
101
103
|
# other properties
|
102
104
|
self.date_created = meta.get("date_created")
|
103
105
|
self.date_registered = meta.get("date_registered")
|
104
106
|
self.date_published = meta.get("date_published")
|
105
107
|
self.date_updated = meta.get("date_updated")
|
106
|
-
self.files = meta.get("files")
|
107
|
-
self.container = meta.get("container")
|
108
|
-
self.provider = meta.get("provider")
|
109
|
-
self.schema_version = meta.get("schema_version")
|
110
|
-
self.archive_locations = meta.get("archive_locations")
|
111
108
|
self.state = meta.get("state")
|
112
109
|
|
113
110
|
# Catch errors in the reader, then validate against JSON schema for Commonmeta
|
@@ -222,6 +219,7 @@ class Metadata:
|
|
222
219
|
elif to == "datacite":
|
223
220
|
instance = json.loads(write_datacite(self))
|
224
221
|
self.write_errors = json_schema_errors(instance, schema="datacite")
|
222
|
+
print(self.write_errors)
|
225
223
|
return write_datacite(self)
|
226
224
|
elif to == "crossref_xml":
|
227
225
|
doi = doi_from_url(self.id)
|
@@ -18,8 +18,6 @@ from ..date_utils import get_date_from_date_parts
|
|
18
18
|
from ..doi_utils import (
|
19
19
|
doi_as_url,
|
20
20
|
doi_from_url,
|
21
|
-
# get_doi_ra,
|
22
|
-
# get_crossref_member,
|
23
21
|
crossref_api_url,
|
24
22
|
crossref_api_query_url,
|
25
23
|
crossref_api_sample_url,
|
@@ -113,7 +111,7 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
113
111
|
description = meta.get("abstract", None)
|
114
112
|
if description is not None:
|
115
113
|
descriptions = [
|
116
|
-
{"description": sanitize(description), "
|
114
|
+
{"description": sanitize(description), "type": "Abstract"}
|
117
115
|
]
|
118
116
|
else:
|
119
117
|
descriptions = None
|
@@ -124,43 +122,38 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
124
122
|
for i in wrap(meta.get("subject", None) or meta.get("group-title", None))
|
125
123
|
]
|
126
124
|
)
|
127
|
-
files = [
|
125
|
+
files = py_.uniq([
|
128
126
|
get_file(i)
|
129
127
|
for i in wrap(meta.get("link", None))
|
130
128
|
if i["content-type"] != "unspecified"
|
131
|
-
]
|
132
|
-
|
133
|
-
state = "findable" if meta or read_options else "not_found"
|
129
|
+
])
|
134
130
|
|
135
131
|
return {
|
136
132
|
# required properties
|
137
133
|
"id": _id,
|
138
134
|
"type": _type,
|
135
|
+
# recommended and optional properties
|
139
136
|
"url": url,
|
140
137
|
"contributors": presence(contributors),
|
141
138
|
"titles": presence(titles),
|
142
139
|
"publisher": presence(publisher),
|
143
140
|
"date": presence(date),
|
144
|
-
|
145
|
-
"additional_type": None,
|
141
|
+
"additionalType": None,
|
146
142
|
"subjects": presence(subjects),
|
147
143
|
"language": meta.get("language", None),
|
148
|
-
"
|
144
|
+
"identifiers": None,
|
149
145
|
"sizes": None,
|
150
146
|
"formats": None,
|
151
147
|
"version": meta.get("version", None),
|
152
148
|
"license": license_,
|
153
149
|
"descriptions": descriptions,
|
154
|
-
"
|
155
|
-
"
|
150
|
+
"geoLocations": None,
|
151
|
+
"fundingReferences": presence(funding_references),
|
156
152
|
"references": presence(references),
|
157
153
|
"relations": presence(relations),
|
158
|
-
# other properties
|
159
154
|
"files": presence(files),
|
160
155
|
"container": presence(container),
|
161
|
-
"provider": "Crossref",
|
162
|
-
"state": state,
|
163
|
-
"schema_version": None,
|
156
|
+
"provider": "Crossref",
|
164
157
|
} | read_options
|
165
158
|
|
166
159
|
|
@@ -203,7 +196,7 @@ def get_reference(reference: Optional[dict]) -> Optional[dict]:
|
|
203
196
|
doi = reference.get("DOI", None)
|
204
197
|
metadata = {
|
205
198
|
"key": reference.get("key", None),
|
206
|
-
"
|
199
|
+
"id": normalize_doi(doi) if doi else None,
|
207
200
|
"contributor": reference.get("author", None),
|
208
201
|
"title": reference.get("article-title", None),
|
209
202
|
"publisher": reference.get("publisher", None),
|
@@ -214,7 +207,7 @@ def get_reference(reference: Optional[dict]) -> Optional[dict]:
|
|
214
207
|
"lastPage": reference.get("last-page", None),
|
215
208
|
"containerTitle": reference.get("journal-title", None),
|
216
209
|
"edition": None,
|
217
|
-
"unstructured": reference.get("unstructured", None)
|
210
|
+
"unstructured": reference.get("unstructured", None),
|
218
211
|
}
|
219
212
|
return compact(metadata)
|
220
213
|
|
@@ -367,7 +360,7 @@ def from_crossref_funding(funding_references: list) -> list:
|
|
367
360
|
"""Get funding references from Crossref"""
|
368
361
|
formatted_funding_references = []
|
369
362
|
for funding in funding_references:
|
370
|
-
|
363
|
+
f = compact(
|
371
364
|
{
|
372
365
|
"funderName": funding.get("name", None),
|
373
366
|
"funderIdentifier": doi_as_url(funding["DOI"])
|
@@ -378,18 +371,18 @@ def from_crossref_funding(funding_references: list) -> list:
|
|
378
371
|
else None,
|
379
372
|
}
|
380
373
|
)
|
381
|
-
|
374
|
+
f = py_.omit(f, "DOI", "doi-asserted-by")
|
382
375
|
if (
|
383
376
|
funding.get("name", None) is not None
|
384
377
|
and funding.get("award", None) is not None
|
385
378
|
):
|
386
379
|
for award in wrap(funding["award"]):
|
387
|
-
fund_ref =
|
380
|
+
fund_ref = f.copy()
|
388
381
|
fund_ref["awardNumber"] = award
|
389
382
|
formatted_funding_references.append(fund_ref)
|
390
|
-
elif
|
391
|
-
formatted_funding_references.append(
|
392
|
-
return formatted_funding_references
|
383
|
+
elif f != {}:
|
384
|
+
formatted_funding_references.append(f)
|
385
|
+
return py_.uniq(formatted_funding_references)
|
393
386
|
|
394
387
|
|
395
388
|
def get_random_crossref_id(number: int = 1, **kwargs) -> list:
|
@@ -82,13 +82,23 @@ def read_datacite(data: dict, **kwargs) -> Commonmeta:
|
|
82
82
|
license_ = dict_to_spdx({"url": license_}) if license_ else None
|
83
83
|
|
84
84
|
files = [get_file(i) for i in wrap(meta.get("content_url"))]
|
85
|
+
|
86
|
+
identifiers = get_identifiers(wrap(meta.get("alternateIdentifiers", None)))
|
85
87
|
references = get_references(
|
86
88
|
wrap(meta.get("relatedItems", None) or meta.get("relatedIdentifiers", None))
|
87
89
|
)
|
88
90
|
relations = get_relations(wrap(meta.get("relatedIdentifiers", None)))
|
89
91
|
descriptions = get_descriptions(wrap(meta.get("descriptions", None)))
|
90
92
|
geo_locations = get_geolocation(wrap(meta.get("geoLocations", None)))
|
91
|
-
|
93
|
+
|
94
|
+
def format_subject(subject):
|
95
|
+
"""format_subject"""
|
96
|
+
return compact({
|
97
|
+
"subject": subject.get("subject", None),
|
98
|
+
"language": subject.get("lang", None),
|
99
|
+
})
|
100
|
+
|
101
|
+
subjects = py_.uniq([format_subject(i) for i in wrap(meta.get("subjects", None))])
|
92
102
|
|
93
103
|
return {
|
94
104
|
# required properties
|
@@ -101,28 +111,60 @@ def read_datacite(data: dict, **kwargs) -> Commonmeta:
|
|
101
111
|
"publisher": publisher,
|
102
112
|
"date": compact(date),
|
103
113
|
# recommended and optional properties
|
104
|
-
"
|
105
|
-
"subjects": presence(
|
114
|
+
"additionalType": additional_type,
|
115
|
+
"subjects": presence(subjects),
|
106
116
|
"language": meta.get("language", None),
|
107
|
-
"
|
108
|
-
"sizes": presence(meta.get("sizes", None)),
|
109
|
-
"formats": presence(formats),
|
117
|
+
"identifiers": presence(identifiers),
|
110
118
|
"version": meta.get("version", None),
|
111
119
|
"license": presence(license_),
|
112
120
|
"descriptions": descriptions,
|
113
|
-
"
|
114
|
-
"
|
121
|
+
"geoLocations": presence(geo_locations),
|
122
|
+
"fundingReferences": presence(meta.get("fundingReferences", None)),
|
115
123
|
"references": presence(references),
|
116
124
|
"relations": presence(relations),
|
117
125
|
# other properties
|
118
126
|
"files": presence(files),
|
119
127
|
"container": presence(container),
|
120
128
|
"provider": "DataCite",
|
121
|
-
"state": "findable",
|
122
|
-
"schema_version": meta.get("schemaVersion", None),
|
123
129
|
} | read_options
|
124
130
|
|
125
131
|
|
132
|
+
def get_identifiers(identifiers: list) -> list:
|
133
|
+
"""get_identifiers"""
|
134
|
+
|
135
|
+
def is_identifier(identifier):
|
136
|
+
"""supported identifier types"""
|
137
|
+
return identifier.get("identifierType", None) in [
|
138
|
+
"ARK",
|
139
|
+
"arXiv",
|
140
|
+
"Bibcode",
|
141
|
+
"DOI",
|
142
|
+
"Handle",
|
143
|
+
"ISBN",
|
144
|
+
"ISSN",
|
145
|
+
"PMID",
|
146
|
+
"PMCID",
|
147
|
+
"PURL",
|
148
|
+
"URL",
|
149
|
+
"URN",
|
150
|
+
"Other"
|
151
|
+
]
|
152
|
+
|
153
|
+
def format_identifier(identifier):
|
154
|
+
"""format_identifier"""
|
155
|
+
if is_identifier(identifier):
|
156
|
+
type_ = identifier.get("identifierType")
|
157
|
+
else:
|
158
|
+
type_ = "Other"
|
159
|
+
|
160
|
+
return compact(
|
161
|
+
{
|
162
|
+
"identifier": identifier.get("alternateIdentifier", None),
|
163
|
+
"identifierType": type_,
|
164
|
+
}
|
165
|
+
)
|
166
|
+
return [format_identifier(i) for i in wrap(identifiers)]
|
167
|
+
|
126
168
|
def get_references(references: list) -> list:
|
127
169
|
"""get_references"""
|
128
170
|
|
@@ -181,9 +223,7 @@ def get_relations(relations: list) -> list:
|
|
181
223
|
}
|
182
224
|
)
|
183
225
|
|
184
|
-
return [
|
185
|
-
map_relation(i) for i in relations if is_relation(i)
|
186
|
-
]
|
226
|
+
return [map_relation(i) for i in relations if is_relation(i)]
|
187
227
|
|
188
228
|
|
189
229
|
def get_file(file: str) -> dict:
|
@@ -72,13 +72,13 @@ def read_datacite_xml(data: dict, **kwargs) -> Commonmeta:
|
|
72
72
|
def format_description(description):
|
73
73
|
"""format_description"""
|
74
74
|
if isinstance(description, str):
|
75
|
-
return {"description": description, "
|
75
|
+
return {"description": description, "type": "Abstract"}
|
76
76
|
if isinstance(description, dict):
|
77
77
|
return compact(
|
78
78
|
{
|
79
79
|
"description": sanitize(description.get("#text", None)),
|
80
|
-
"
|
81
|
-
"
|
80
|
+
"type": description.get("descriptionType", "Abstract"),
|
81
|
+
"language": description.get("xml:lang", None),
|
82
82
|
}
|
83
83
|
)
|
84
84
|
return None
|
@@ -167,18 +167,6 @@ def read_datacite_xml(data: dict, **kwargs) -> Commonmeta:
|
|
167
167
|
|
168
168
|
geo_locations = [] # [format_geo_location(i) for i in wrap(py_.get(meta, "geoLocations.geoLocation")) if i]
|
169
169
|
|
170
|
-
def map_size(size):
|
171
|
-
"""map_size"""
|
172
|
-
return size.get("#text")
|
173
|
-
|
174
|
-
sizes = [map_size(i) for i in wrap(meta.get("sizes", None))]
|
175
|
-
|
176
|
-
def map_format(format_):
|
177
|
-
"""map_format"""
|
178
|
-
return format_.get("#text")
|
179
|
-
|
180
|
-
formats = [map_format(i) for i in wrap(meta.get("formats", None))]
|
181
|
-
|
182
170
|
def map_rights(rights):
|
183
171
|
"""map_rights"""
|
184
172
|
return compact(
|
@@ -227,17 +215,15 @@ def read_datacite_xml(data: dict, **kwargs) -> Commonmeta:
|
|
227
215
|
"publisher": publisher,
|
228
216
|
"date": date,
|
229
217
|
# recommended and optional properties
|
230
|
-
"
|
218
|
+
"additionalType": presence(additional_type),
|
231
219
|
"subjects": presence(subjects),
|
232
220
|
"language": meta.get("language", None),
|
233
|
-
"
|
234
|
-
"sizes": presence(sizes),
|
235
|
-
"formats": presence(formats),
|
221
|
+
"identifiers": presence(meta.get("alternateIdentifiers", None)),
|
236
222
|
"version": meta.get("version", None),
|
237
223
|
"license": presence(license_),
|
238
224
|
"descriptions": presence(descriptions),
|
239
|
-
"
|
240
|
-
"
|
225
|
+
"geoLocations": presence(geo_locations),
|
226
|
+
"fundingReferences": presence(funding_references),
|
241
227
|
"references": presence(references),
|
242
228
|
"relations": presence(relations),
|
243
229
|
# other properties
|
@@ -87,8 +87,6 @@ def read_inveniordm(data: dict, **kwargs) -> Commonmeta:
|
|
87
87
|
)
|
88
88
|
files = [get_file(i) for i in wrap(meta.get("files"))]
|
89
89
|
|
90
|
-
state = "findable" if meta or read_options else "not_found"
|
91
|
-
|
92
90
|
return {
|
93
91
|
# required properties
|
94
92
|
"id": _id,
|
@@ -103,22 +101,17 @@ def read_inveniordm(data: dict, **kwargs) -> Commonmeta:
|
|
103
101
|
# "additional_type": additional_type,
|
104
102
|
"subjects": presence(subjects),
|
105
103
|
"language": language,
|
106
|
-
# "alternate_identifiers": presence(meta.get("alternateIdentifiers", None)),
|
107
|
-
"sizes": None,
|
108
|
-
"formats": None,
|
109
104
|
"version": py_.get(meta, "metadata.version"),
|
110
105
|
"license": presence(license_),
|
111
106
|
"descriptions": descriptions,
|
112
|
-
"
|
107
|
+
"geoLocations": None,
|
113
108
|
# "funding_references": presence(meta.get("fundingReferences", None)),
|
114
|
-
|
109
|
+
"references": presence(references),
|
115
110
|
"relations": presence(relations),
|
116
111
|
# other properties
|
117
112
|
"files": files,
|
118
113
|
"container": container,
|
119
|
-
"provider": "
|
120
|
-
"state": state,
|
121
|
-
# "schema_version": meta.get("schemaVersion", None),
|
114
|
+
"provider": "DataCite",
|
122
115
|
} | read_options
|
123
116
|
|
124
117
|
|
@@ -199,7 +192,7 @@ def format_descriptions(descriptions: list) -> list:
|
|
199
192
|
return [
|
200
193
|
{
|
201
194
|
"description": sanitize(i),
|
202
|
-
"
|
195
|
+
"type": "Abstract" if index == 0 else "Other",
|
203
196
|
}
|
204
197
|
for index, i in enumerate(descriptions)
|
205
198
|
if i
|
@@ -121,8 +121,8 @@ def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
121
121
|
"type": "IsPartOf",
|
122
122
|
}
|
123
123
|
)
|
124
|
-
|
125
|
-
{"
|
124
|
+
identifiers = [
|
125
|
+
{"identifier": meta.get("id"), "identifierType": "UUID"}
|
126
126
|
]
|
127
127
|
files = get_files(_id)
|
128
128
|
state = "findable" if meta or read_options else "not_found"
|
@@ -140,14 +140,12 @@ def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
140
140
|
"additional_type": None,
|
141
141
|
"subjects": presence(subjects),
|
142
142
|
"language": meta.get("language", None),
|
143
|
-
"
|
144
|
-
"sizes": None,
|
145
|
-
"formats": None,
|
143
|
+
"identifiers": identifiers,
|
146
144
|
"version": None,
|
147
145
|
"license": license_,
|
148
146
|
"descriptions": descriptions,
|
149
|
-
"
|
150
|
-
"
|
147
|
+
"geoLocations": None,
|
148
|
+
"fundingReferences": presence(funding_references),
|
151
149
|
"references": presence(references),
|
152
150
|
"relations": presence(relations),
|
153
151
|
"files": files,
|
@@ -82,22 +82,18 @@ def read_kbase(data: dict, **kwargs) -> Commonmeta:
|
|
82
82
|
"additional_type": None,
|
83
83
|
"subjects": None,
|
84
84
|
"language": language,
|
85
|
-
"
|
86
|
-
"sizes": None,
|
87
|
-
"formats": None,
|
85
|
+
"identifiers": None,
|
88
86
|
"version": py_.get(meta, "metadata.version"),
|
89
87
|
"license": presence(license_),
|
90
88
|
"descriptions": descriptions,
|
91
89
|
"geo_locations": None,
|
92
|
-
"
|
90
|
+
"fundingReferences": presence(funding_references),
|
93
91
|
"references": presence(references),
|
94
92
|
"relations": presence(relations),
|
95
93
|
# other properties
|
96
94
|
"files": presence(files),
|
97
95
|
"container": container,
|
98
|
-
"provider": "
|
99
|
-
"state": state,
|
100
|
-
"schema_version": py_.get(data, "credit_metadata_schema_version"),
|
96
|
+
"provider": "DataCite",
|
101
97
|
} | read_options
|
102
98
|
|
103
99
|
|
@@ -202,7 +198,7 @@ def format_descriptions(descriptions: list) -> list:
|
|
202
198
|
return [
|
203
199
|
{
|
204
200
|
"description": sanitize(i),
|
205
|
-
"
|
201
|
+
"type": "Abstract" if index == 0 else "Other",
|
206
202
|
}
|
207
203
|
for index, i in enumerate(descriptions)
|
208
204
|
if i
|
@@ -44,7 +44,7 @@ def read_ris(data: Optional[str], **kwargs) -> Commonmeta:
|
|
44
44
|
# end
|
45
45
|
descriptions = None
|
46
46
|
if meta.get("AB", None) is not None:
|
47
|
-
descriptions = [{"description": meta.get("AB"), "
|
47
|
+
descriptions = [{"description": meta.get("AB"), "type": "Abstract"}]
|
48
48
|
if meta.get("T2", None) is not None:
|
49
49
|
container = compact(
|
50
50
|
{
|
@@ -64,20 +64,29 @@ def get_schema_org(pid: str, **kwargs) -> dict:
|
|
64
64
|
state = "bad_request"
|
65
65
|
return {"@id": url, "@type": "WebPage", "state": state, "via": "schema_org"}
|
66
66
|
elif response.headers.get("content-type") == "application/pdf":
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
67
|
+
try:
|
68
|
+
pdf = pikepdf.Pdf.open(io.BytesIO(response.content))
|
69
|
+
meta = pdf.docinfo if pdf.docinfo else {}
|
70
|
+
if meta.get("/doi", None) is not None:
|
71
|
+
return get_doi_meta(meta.get("/doi"))
|
72
|
+
date_modified = get_datetime_from_pdf_time(meta.get("/ModDate")) if meta.get("/ModDate", None) else None
|
73
|
+
name = meta.get("/Title", None)
|
74
|
+
return compact({
|
75
|
+
"@id": url,
|
76
|
+
"@type": "DigitalDocument",
|
77
|
+
"via": "schema_org",
|
78
|
+
"name": str(name),
|
79
|
+
"datePublished": date_modified,
|
80
|
+
"dateAccessed": datetime.now().isoformat("T", "seconds") if date_modified is None else None,
|
81
|
+
})
|
82
|
+
except Exception as error:
|
83
|
+
print(error)
|
84
|
+
return {
|
85
|
+
"@id": url,
|
86
|
+
"@type": "WebPage",
|
87
|
+
"state": "bad_request",
|
88
|
+
"via": "schema_org",
|
89
|
+
}
|
81
90
|
|
82
91
|
soup = BeautifulSoup(response.text, "html.parser")
|
83
92
|
|
@@ -226,7 +235,7 @@ def read_schema_org(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
226
235
|
descriptions = [
|
227
236
|
{
|
228
237
|
"description": sanitize(i),
|
229
|
-
"
|
238
|
+
"type": "Abstract",
|
230
239
|
}
|
231
240
|
for i in wrap(meta.get("description"))
|
232
241
|
]
|
@@ -251,7 +260,7 @@ def read_schema_org(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
251
260
|
geo_locations = [
|
252
261
|
schema_org_geolocation(i) for i in wrap(meta.get("spatialCoverage", None))
|
253
262
|
]
|
254
|
-
|
263
|
+
identifiers = None
|
255
264
|
provider = (
|
256
265
|
get_doi_ra(_id)
|
257
266
|
if doi_from_url(_id)
|
@@ -272,7 +281,7 @@ def read_schema_org(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
272
281
|
"additional_type": additional_type,
|
273
282
|
"subjects": presence(subjects),
|
274
283
|
"language": language,
|
275
|
-
"
|
284
|
+
"identifiers": identifiers,
|
276
285
|
"sizes": None,
|
277
286
|
"formats": None,
|
278
287
|
"version": meta.get("version", None),
|