commonmeta-py 0.123__py3-none-any.whl → 0.124__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +1 -1
- commonmeta/api_utils.py +2 -2
- commonmeta/metadata.py +34 -48
- commonmeta/readers/cff_reader.py +19 -16
- commonmeta/readers/codemeta_reader.py +18 -15
- commonmeta/readers/crossref_reader.py +31 -28
- commonmeta/readers/datacite_reader.py +28 -25
- commonmeta/readers/datacite_xml_reader.py +36 -33
- commonmeta/readers/inveniordm_reader.py +28 -25
- commonmeta/readers/kbase_reader.py +33 -30
- commonmeta/readers/ris_reader.py +18 -15
- commonmeta/schema_utils.py +5 -4
- commonmeta/writers/inveniordm_writer.py +1 -1
- {commonmeta_py-0.123.dist-info → commonmeta_py-0.124.dist-info}/METADATA +1 -1
- {commonmeta_py-0.123.dist-info → commonmeta_py-0.124.dist-info}/RECORD +18 -18
- {commonmeta_py-0.123.dist-info → commonmeta_py-0.124.dist-info}/WHEEL +0 -0
- {commonmeta_py-0.123.dist-info → commonmeta_py-0.124.dist-info}/entry_points.txt +0 -0
- {commonmeta_py-0.123.dist-info → commonmeta_py-0.124.dist-info}/licenses/LICENSE +0 -0
commonmeta/__init__.py
CHANGED
commonmeta/api_utils.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""API Utils module for commonmeta-py"""
|
2
2
|
|
3
3
|
from datetime import datetime as date
|
4
|
-
from typing import Optional
|
4
|
+
from typing import Dict, Optional
|
5
5
|
|
6
6
|
import jwt
|
7
7
|
import requests
|
@@ -31,7 +31,7 @@ def generate_ghost_token(key: str) -> str:
|
|
31
31
|
|
32
32
|
def update_ghost_post_via_api(
|
33
33
|
_id: str, api_key: Optional[str] = None, api_url: Optional[str] = None
|
34
|
-
) ->
|
34
|
+
) -> Dict[str, str]:
|
35
35
|
"""Update Ghost post via API"""
|
36
36
|
# get post doi and url from Rogue Scholar API
|
37
37
|
# post url is needed to find post via Ghost API
|
commonmeta/metadata.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""Metadata"""
|
2
2
|
|
3
3
|
from os import path
|
4
|
-
from typing import Optional, Union
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
5
5
|
|
6
6
|
import orjson as json
|
7
7
|
import yaml
|
@@ -69,7 +69,7 @@ from .writers.schema_org_writer import write_schema_org, write_schema_org_list
|
|
69
69
|
class Metadata:
|
70
70
|
"""Metadata"""
|
71
71
|
|
72
|
-
def __init__(self, string: Optional[Union[str,
|
72
|
+
def __init__(self, string: Optional[Union[str, Dict[str, Any]]], **kwargs):
|
73
73
|
if string is None or not isinstance(string, (str, dict)):
|
74
74
|
raise ValueError("No input found")
|
75
75
|
self.via = kwargs.get("via", None)
|
@@ -147,7 +147,7 @@ class Metadata:
|
|
147
147
|
# Default fallback
|
148
148
|
raise ValueError("No metadata found")
|
149
149
|
|
150
|
-
def _get_metadata_from_pid(self, pid, via) ->
|
150
|
+
def _get_metadata_from_pid(self, pid, via) -> Dict[str, Any]:
|
151
151
|
"""Helper method to get metadata from a PID."""
|
152
152
|
if via == "schema_org":
|
153
153
|
return get_schema_org(pid)
|
@@ -170,7 +170,7 @@ class Metadata:
|
|
170
170
|
else:
|
171
171
|
return {"pid": pid}
|
172
172
|
|
173
|
-
def _get_metadata_from_string(self, string, via) ->
|
173
|
+
def _get_metadata_from_string(self, string, via) -> Dict[str, Any]:
|
174
174
|
"""Helper method to get metadata from a string."""
|
175
175
|
try:
|
176
176
|
# XML formats
|
@@ -214,7 +214,7 @@ class Metadata:
|
|
214
214
|
except (TypeError, json.JSONDecodeError) as error:
|
215
215
|
return {"error": str(error)}
|
216
216
|
|
217
|
-
def read_metadata(self, data:
|
217
|
+
def read_metadata(self, data: Dict[str, Any], **kwargs) -> Dict[str, Any]:
|
218
218
|
"""Read and parse metadata from various formats."""
|
219
219
|
via = (isinstance(data, dict) and data.get("via")) or self.via
|
220
220
|
|
@@ -251,8 +251,8 @@ class Metadata:
|
|
251
251
|
else:
|
252
252
|
raise ValueError("No input format found")
|
253
253
|
|
254
|
-
def write(self, to: str = "commonmeta", **kwargs) -> str:
|
255
|
-
"""
|
254
|
+
def write(self, to: str = "commonmeta", **kwargs) -> Union[str, bytes]:
|
255
|
+
"""convert metadata list into different formats"""
|
256
256
|
try:
|
257
257
|
result = self._write_format(to, **kwargs)
|
258
258
|
if result is None or result == "":
|
@@ -262,20 +262,9 @@ class Metadata:
|
|
262
262
|
# More specific error message including the original JSONDecodeError details
|
263
263
|
raise ValueError(f"Invalid JSON: {str(e)}")
|
264
264
|
|
265
|
-
def _write_format(self, to: str, **kwargs) -> str:
|
265
|
+
def _write_format(self, to: str, **kwargs) -> Union[str, bytes]:
|
266
266
|
"""Helper method to handle writing to different formats."""
|
267
|
-
#
|
268
|
-
if to in ["commonmeta", "datacite", "inveniordm", "schema_org"]:
|
269
|
-
return self._write_json_format(to)
|
270
|
-
elif to in ["bibtex", "csl", "citation", "ris"]:
|
271
|
-
return self._write_text_format(to, **kwargs)
|
272
|
-
elif to in ["crossref_xml"]:
|
273
|
-
return self._write_xml_format(to, **kwargs)
|
274
|
-
else:
|
275
|
-
raise ValueError("No output format found")
|
276
|
-
|
277
|
-
def _write_json_format(self, to: str) -> str:
|
278
|
-
"""Handle JSON-based output formats."""
|
267
|
+
# JSON-based output formats
|
279
268
|
if to == "commonmeta":
|
280
269
|
result = json.dumps(write_commonmeta(self))
|
281
270
|
elif to == "datacite":
|
@@ -284,8 +273,22 @@ class Metadata:
|
|
284
273
|
result = json.dumps(write_inveniordm(self))
|
285
274
|
elif to == "schema_org":
|
286
275
|
result = json.dumps(write_schema_org(self))
|
276
|
+
# Text-based output formats
|
277
|
+
elif to == "bibtex":
|
278
|
+
return write_bibtex(self)
|
279
|
+
elif to == "csl":
|
280
|
+
return self._write_csl(**kwargs)
|
281
|
+
elif to == "citation":
|
282
|
+
self.style = kwargs.get("style", "apa")
|
283
|
+
self.locale = kwargs.get("locale", "en-US")
|
284
|
+
return write_citation(self)
|
285
|
+
elif to == "ris":
|
286
|
+
return write_ris(self)
|
287
|
+
# XML-based output formats
|
288
|
+
elif to == "crossref_xml":
|
289
|
+
return self._write_crossref_xml(**kwargs)
|
287
290
|
else:
|
288
|
-
|
291
|
+
raise ValueError("No output format found")
|
289
292
|
|
290
293
|
if isinstance(result, str):
|
291
294
|
# Verify it's valid JSON
|
@@ -304,26 +307,6 @@ class Metadata:
|
|
304
307
|
return "{}"
|
305
308
|
return "{}"
|
306
309
|
|
307
|
-
def _write_text_format(self, to: str, **kwargs) -> str:
|
308
|
-
"""Handle text-based output formats."""
|
309
|
-
if to == "bibtex":
|
310
|
-
return write_bibtex(self)
|
311
|
-
elif to == "csl":
|
312
|
-
return self._write_csl(**kwargs)
|
313
|
-
elif to == "citation":
|
314
|
-
self.style = kwargs.get("style", "apa")
|
315
|
-
self.locale = kwargs.get("locale", "en-US")
|
316
|
-
return write_citation(self)
|
317
|
-
elif to == "ris":
|
318
|
-
return write_ris(self)
|
319
|
-
return ""
|
320
|
-
|
321
|
-
def _write_xml_format(self, to: str, **kwargs) -> str:
|
322
|
-
"""Handle XML-based output formats."""
|
323
|
-
if to == "crossref_xml":
|
324
|
-
return self._write_crossref_xml(**kwargs)
|
325
|
-
return ""
|
326
|
-
|
327
310
|
def _write_csl(self, **kwargs) -> str:
|
328
311
|
"""Write in CSL format with error checking."""
|
329
312
|
csl_output = write_csl(self)
|
@@ -366,8 +349,8 @@ class MetadataList:
|
|
366
349
|
"""MetadataList"""
|
367
350
|
|
368
351
|
def __init__(
|
369
|
-
self, dct: Optional[Union[str,
|
370
|
-
) ->
|
352
|
+
self, dct: Optional[Union[str, Dict[str, Any]]] = None, **kwargs
|
353
|
+
) -> None:
|
371
354
|
if dct is None or not isinstance(dct, (str, bytes, dict)):
|
372
355
|
raise ValueError("No input found")
|
373
356
|
if isinstance(dct, dict):
|
@@ -423,12 +406,12 @@ class MetadataList:
|
|
423
406
|
else:
|
424
407
|
raise ValueError("No input format found")
|
425
408
|
|
426
|
-
def read_metadata_list(self,
|
409
|
+
def read_metadata_list(self, items, **kwargs) -> List[Metadata]:
|
427
410
|
"""read_metadata_list"""
|
428
411
|
kwargs["via"] = kwargs.get("via", None) or self.via
|
429
|
-
return [Metadata(i, **kwargs) for i in
|
412
|
+
return [Metadata(i, **kwargs) for i in items]
|
430
413
|
|
431
|
-
def write(self, to: str = "commonmeta", **kwargs) -> str:
|
414
|
+
def write(self, to: str = "commonmeta", **kwargs) -> Union[str, bytes]:
|
432
415
|
"""convert metadata list into different formats"""
|
433
416
|
if to == "bibtex":
|
434
417
|
output = write_bibtex_list(self)
|
@@ -479,12 +462,15 @@ class MetadataList:
|
|
479
462
|
else:
|
480
463
|
raise ValueError("No valid output format found")
|
481
464
|
|
482
|
-
def push(self, to: str = "commonmeta", **kwargs) -> str:
|
465
|
+
def push(self, to: str = "commonmeta", **kwargs) -> Union[str, bytes]:
|
483
466
|
"""push metadata list to external APIs"""
|
484
467
|
|
485
468
|
if to == "crossref_xml":
|
486
469
|
response = push_crossref_xml_list(
|
487
|
-
self,
|
470
|
+
self,
|
471
|
+
login_id=self.login_id,
|
472
|
+
login_passwd=self.login_passwd,
|
473
|
+
legacy_key=self.legacy_key,
|
488
474
|
)
|
489
475
|
return response
|
490
476
|
elif to == "datacite":
|
commonmeta/readers/cff_reader.py
CHANGED
@@ -99,22 +99,25 @@ def read_cff(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
99
99
|
state = "findable" if meta or read_options else "not_found"
|
100
100
|
|
101
101
|
return {
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
102
|
+
**{
|
103
|
+
"id": _id,
|
104
|
+
"type": _type,
|
105
|
+
# 'identifiers' => identifiers,
|
106
|
+
"url": url,
|
107
|
+
"titles": titles,
|
108
|
+
"contributors": presence(contributors),
|
109
|
+
"publisher": publisher,
|
110
|
+
"references": presence(references),
|
111
|
+
"date": date,
|
112
|
+
"descriptions": presence(descriptions),
|
113
|
+
"license": license_,
|
114
|
+
"version": meta.get("version", None),
|
115
|
+
"subjects": presence(subjects),
|
116
|
+
"provider": "DataCite" if _id else "GitHub",
|
117
|
+
"state": state,
|
118
|
+
},
|
119
|
+
**read_options,
|
120
|
+
}
|
118
121
|
|
119
122
|
|
120
123
|
def cff_contributors(contributors):
|
@@ -97,18 +97,21 @@ def read_codemeta(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
97
97
|
state = "findable" if meta or read_options else "not_found"
|
98
98
|
|
99
99
|
return {
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
100
|
+
**{
|
101
|
+
"id": _id,
|
102
|
+
"type": _type,
|
103
|
+
"url": normalize_id(meta.get("codeRepository", None)),
|
104
|
+
"identifiers": None,
|
105
|
+
"titles": titles,
|
106
|
+
"contributors": presence(contributors),
|
107
|
+
"publisher": publisher,
|
108
|
+
"date": compact(date),
|
109
|
+
"descriptions": descriptions,
|
110
|
+
"license": license_,
|
111
|
+
"version": meta.get("version", None),
|
112
|
+
"subjects": presence(subjects),
|
113
|
+
"provider": provider,
|
114
|
+
"state": state,
|
115
|
+
},
|
116
|
+
**read_options,
|
117
|
+
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""crossref reader for commonmeta-py"""
|
2
2
|
|
3
|
-
from typing import Optional
|
3
|
+
from typing import List, Optional
|
4
4
|
from xml.parsers.expat import ExpatError
|
5
5
|
|
6
6
|
import requests
|
@@ -33,7 +33,7 @@ from ..utils import (
|
|
33
33
|
)
|
34
34
|
|
35
35
|
|
36
|
-
def get_crossref_list(query: dict, **kwargs) ->
|
36
|
+
def get_crossref_list(query: dict, **kwargs) -> List[dict]:
|
37
37
|
"""get_crossref list from Crossref API."""
|
38
38
|
url = crossref_api_query_url(query, **kwargs)
|
39
39
|
response = requests.get(url, timeout=30, **kwargs)
|
@@ -51,7 +51,7 @@ def get_crossref(pid: str, **kwargs) -> dict:
|
|
51
51
|
response = requests.get(url, timeout=10, **kwargs)
|
52
52
|
if response.status_code != 200:
|
53
53
|
return {"state": "not_found"}
|
54
|
-
return response.json().get("message", {})
|
54
|
+
return {**response.json().get("message", {}), "via": "crossref"}
|
55
55
|
|
56
56
|
|
57
57
|
def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
|
@@ -138,31 +138,34 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
138
138
|
)
|
139
139
|
|
140
140
|
return {
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
141
|
+
**{
|
142
|
+
# required properties
|
143
|
+
"id": _id,
|
144
|
+
"type": _type,
|
145
|
+
# recommended and optional properties
|
146
|
+
"additionalType": additional_type,
|
147
|
+
"archiveLocations": presence(archive_locations),
|
148
|
+
"container": presence(container),
|
149
|
+
"contributors": presence(contributors),
|
150
|
+
"date": presence(date),
|
151
|
+
"descriptions": presence(descriptions),
|
152
|
+
"files": presence(files),
|
153
|
+
"fundingReferences": presence(funding_references),
|
154
|
+
"geoLocations": None,
|
155
|
+
"identifiers": identifiers,
|
156
|
+
"language": meta.get("language", None),
|
157
|
+
"license": license_,
|
158
|
+
"provider": "Crossref",
|
159
|
+
"publisher": presence(publisher),
|
160
|
+
"references": presence(references),
|
161
|
+
"relations": presence(relations),
|
162
|
+
"subjects": presence(subjects),
|
163
|
+
"titles": presence(titles),
|
164
|
+
"url": url,
|
165
|
+
"version": meta.get("version", None),
|
166
|
+
},
|
167
|
+
**read_options,
|
168
|
+
}
|
166
169
|
|
167
170
|
|
168
171
|
def get_titles(meta):
|
@@ -40,7 +40,7 @@ def get_datacite(pid: str, **kwargs) -> dict:
|
|
40
40
|
response = requests.get(url, timeout=10, **kwargs)
|
41
41
|
if response.status_code != 200:
|
42
42
|
return {"state": "not_found"}
|
43
|
-
return py_.get(response.json(), "data.attributes", {})
|
43
|
+
return {**py_.get(response.json(), "data.attributes", {}), "via": "datacite"}
|
44
44
|
except ReadTimeout:
|
45
45
|
return {"state": "timeout"}
|
46
46
|
|
@@ -114,30 +114,33 @@ def read_datacite(data: dict, **kwargs) -> Commonmeta:
|
|
114
114
|
subjects = py_.uniq([format_subject(i) for i in wrap(meta.get("subjects", None))])
|
115
115
|
|
116
116
|
return {
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
117
|
+
**{
|
118
|
+
# required properties
|
119
|
+
"id": _id,
|
120
|
+
"type": _type,
|
121
|
+
# recommended and optional properties
|
122
|
+
"additionalType": additional_type,
|
123
|
+
"container": presence(container),
|
124
|
+
"contributors": presence(contributors),
|
125
|
+
"date": compact(date),
|
126
|
+
"descriptions": presence(descriptions),
|
127
|
+
"files": presence(files),
|
128
|
+
"fundingReferences": presence(meta.get("fundingReferences", None)),
|
129
|
+
"geoLocations": presence(geo_locations),
|
130
|
+
"identifiers": presence(identifiers),
|
131
|
+
"language": meta.get("language", None),
|
132
|
+
"license": presence(license_),
|
133
|
+
"provider": "DataCite",
|
134
|
+
"publisher": publisher,
|
135
|
+
"references": presence(references),
|
136
|
+
"relations": presence(relations),
|
137
|
+
"subjects": presence(subjects),
|
138
|
+
"titles": presence(titles),
|
139
|
+
"url": normalize_url(meta.get("url", None)),
|
140
|
+
"version": meta.get("version", None),
|
141
|
+
},
|
142
|
+
**read_options,
|
143
|
+
}
|
141
144
|
|
142
145
|
|
143
146
|
def get_identifiers(identifiers: list) -> list:
|
@@ -22,7 +22,7 @@ def get_datacite_xml(pid: str, **kwargs) -> dict:
|
|
22
22
|
response = requests.get(url, timeout=10, **kwargs)
|
23
23
|
if response.status_code != 200:
|
24
24
|
return {"state": "not_found"}
|
25
|
-
return py_.get(response.json(), "data.attributes", {})
|
25
|
+
return {**py_.get(response.json(), "data.attributes", {}), "via": "datacite_xml"}
|
26
26
|
|
27
27
|
|
28
28
|
def read_datacite_xml(data: dict, **kwargs) -> Commonmeta:
|
@@ -203,38 +203,41 @@ def read_datacite_xml(data: dict, **kwargs) -> Commonmeta:
|
|
203
203
|
state = "findable" if _id or read_options else "not_found"
|
204
204
|
|
205
205
|
return {
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
206
|
+
**{
|
207
|
+
# required properties
|
208
|
+
"id": _id,
|
209
|
+
"type": _type,
|
210
|
+
"doi": doi_from_url(_id),
|
211
|
+
"url": normalize_url(meta.get("url", None)),
|
212
|
+
"contributors": presence(contributors),
|
213
|
+
"titles": compact(titles),
|
214
|
+
"publisher": publisher,
|
215
|
+
"date": date,
|
216
|
+
# recommended and optional properties
|
217
|
+
"additionalType": presence(additional_type),
|
218
|
+
"subjects": presence(subjects),
|
219
|
+
"language": meta.get("language", None),
|
220
|
+
"identifiers": identifiers,
|
221
|
+
"version": meta.get("version", None),
|
222
|
+
"license": presence(license_),
|
223
|
+
"descriptions": presence(descriptions),
|
224
|
+
"geoLocations": presence(geo_locations),
|
225
|
+
"fundingReferences": presence(funding_references),
|
226
|
+
"references": presence(references),
|
227
|
+
"relations": presence(relations),
|
228
|
+
# other properties
|
229
|
+
"date_created": strip_milliseconds(meta.get("created", None)),
|
230
|
+
"date_registered": strip_milliseconds(meta.get("registered", None)),
|
231
|
+
"date_published": strip_milliseconds(meta.get("published", None)),
|
232
|
+
"date_updated": strip_milliseconds(meta.get("updated", None)),
|
233
|
+
"files": presence(files),
|
234
|
+
"container": presence(meta.get("container", None)),
|
235
|
+
"provider": "DataCite",
|
236
|
+
"state": state,
|
237
|
+
"schema_version": meta.get("xmlns", None),
|
238
|
+
},
|
239
|
+
**read_options,
|
240
|
+
}
|
238
241
|
|
239
242
|
|
240
243
|
def get_xml_identifiers(identifiers: list) -> list:
|
@@ -122,31 +122,34 @@ def read_inveniordm(data: dict, **kwargs) -> Commonmeta:
|
|
122
122
|
files = [get_file(i) for i in wrap(meta.get("files"))]
|
123
123
|
|
124
124
|
return {
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
125
|
+
**{
|
126
|
+
# required properties
|
127
|
+
"id": _id,
|
128
|
+
"type": _type,
|
129
|
+
"doi": doi_from_url(_id),
|
130
|
+
"url": url,
|
131
|
+
"contributors": presence(contributors),
|
132
|
+
"titles": titles,
|
133
|
+
"publisher": publisher,
|
134
|
+
"date": compact(date),
|
135
|
+
# recommended and optional properties
|
136
|
+
# "additional_type": additional_type,
|
137
|
+
"subjects": presence(subjects),
|
138
|
+
"language": get_language(language),
|
139
|
+
"version": py_.get(meta, "metadata.version"),
|
140
|
+
"license": presence(license_),
|
141
|
+
"descriptions": descriptions,
|
142
|
+
"geoLocations": None,
|
143
|
+
"fundingReferences": presence(funding_references),
|
144
|
+
"references": presence(references),
|
145
|
+
"relations": presence(relations),
|
146
|
+
# other properties
|
147
|
+
"files": files,
|
148
|
+
"container": container,
|
149
|
+
"provider": "DataCite",
|
150
|
+
},
|
151
|
+
**read_options,
|
152
|
+
}
|
150
153
|
|
151
154
|
|
152
155
|
def get_references(references: list) -> list:
|
@@ -2,15 +2,15 @@
|
|
2
2
|
|
3
3
|
from pydash import py_
|
4
4
|
|
5
|
-
from ..utils import normalize_url, normalize_doi, from_curie, from_kbase
|
6
|
-
from ..base_utils import compact, wrap, presence, sanitize
|
7
5
|
from ..author_utils import get_authors
|
8
|
-
from ..
|
9
|
-
from ..doi_utils import doi_from_url, validate_doi
|
6
|
+
from ..base_utils import compact, presence, sanitize, wrap
|
10
7
|
from ..constants import (
|
11
8
|
COMMONMETA_RELATION_TYPES,
|
12
9
|
Commonmeta,
|
13
10
|
)
|
11
|
+
from ..date_utils import normalize_date_dict
|
12
|
+
from ..doi_utils import doi_from_url, validate_doi
|
13
|
+
from ..utils import from_curie, from_kbase, normalize_doi, normalize_url
|
14
14
|
|
15
15
|
|
16
16
|
def read_kbase(data: dict, **kwargs) -> Commonmeta:
|
@@ -70,32 +70,35 @@ def read_kbase(data: dict, **kwargs) -> Commonmeta:
|
|
70
70
|
state = "findable" if meta or read_options else "not_found"
|
71
71
|
|
72
72
|
return {
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
73
|
+
**{
|
74
|
+
# required properties
|
75
|
+
"id": _id,
|
76
|
+
"type": _type,
|
77
|
+
"doi": doi_from_url(_id),
|
78
|
+
"url": normalize_url(meta.get("url", None)),
|
79
|
+
"contributors": presence(contributors),
|
80
|
+
"titles": titles,
|
81
|
+
"publisher": publisher,
|
82
|
+
"date": compact(date),
|
83
|
+
# recommended and optional properties
|
84
|
+
"additional_type": None,
|
85
|
+
"subjects": None,
|
86
|
+
"language": language,
|
87
|
+
"identifiers": None,
|
88
|
+
"version": py_.get(meta, "metadata.version"),
|
89
|
+
"license": presence(license_),
|
90
|
+
"descriptions": descriptions,
|
91
|
+
"geo_locations": None,
|
92
|
+
"fundingReferences": presence(funding_references),
|
93
|
+
"references": presence(references),
|
94
|
+
"relations": presence(relations),
|
95
|
+
# other properties
|
96
|
+
"files": presence(files),
|
97
|
+
"container": container,
|
98
|
+
"provider": "DataCite",
|
99
|
+
},
|
100
|
+
**read_options,
|
101
|
+
}
|
99
102
|
|
100
103
|
|
101
104
|
def format_title(title: dict) -> dict:
|
commonmeta/readers/ris_reader.py
CHANGED
@@ -67,21 +67,24 @@ def read_ris(data: Optional[str], **kwargs) -> Commonmeta:
|
|
67
67
|
state = "findable" if meta.get("DO", None) or read_options else "not_found"
|
68
68
|
|
69
69
|
return {
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
70
|
+
**{
|
71
|
+
"id": _id,
|
72
|
+
"type": _type,
|
73
|
+
"doi": doi_from_url(_id),
|
74
|
+
"url": normalize_url(meta.get("UR", None)),
|
75
|
+
"titles": [{"title": meta.get("T1", None)}],
|
76
|
+
"descriptions": descriptions,
|
77
|
+
"contributors": presence(contributors),
|
78
|
+
"publisher": presence(publisher),
|
79
|
+
"container": container,
|
80
|
+
# 'related_identifiers': related_identifiers,
|
81
|
+
"date": date,
|
82
|
+
"subjects": subjects,
|
83
|
+
"language": meta.get("LA", None),
|
84
|
+
"state": state,
|
85
|
+
},
|
86
|
+
**read_options,
|
87
|
+
}
|
85
88
|
|
86
89
|
|
87
90
|
def ris_meta(data):
|
commonmeta/schema_utils.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
"""Schema utils for commonmeta-py"""
|
2
2
|
|
3
3
|
from os import path
|
4
|
+
from typing import Any, Dict, Optional, Union
|
4
5
|
|
5
6
|
import orjson as json
|
6
7
|
import xmlschema
|
7
8
|
from jsonschema import Draft202012Validator, ValidationError
|
8
9
|
|
9
10
|
|
10
|
-
def json_schema_errors(instance, schema: str = "commonmeta"):
|
11
|
+
def json_schema_errors(instance: Dict[str, Any], schema: str = "commonmeta") -> Optional[str]:
|
11
12
|
"""validate against JSON schema"""
|
12
13
|
schema_map = {
|
13
14
|
"commonmeta": "commonmeta_v0.16",
|
@@ -30,7 +31,7 @@ def json_schema_errors(instance, schema: str = "commonmeta"):
|
|
30
31
|
return error.message
|
31
32
|
|
32
33
|
|
33
|
-
def xml_schema_errors(instance, schema: str = "crossref_xml"):
|
34
|
+
def xml_schema_errors(instance: Union[str, bytes], schema: str = "crossref_xml") -> Optional[Union[bool, Exception]]:
|
34
35
|
"""validate against XML schema"""
|
35
36
|
schema_map = {
|
36
37
|
"crossref_xml": "crossref5.4.0",
|
@@ -40,8 +41,8 @@ def xml_schema_errors(instance, schema: str = "crossref_xml"):
|
|
40
41
|
raise ValueError("No schema found")
|
41
42
|
base_dir = path.join(path.dirname(__file__), "resources", "crossref")
|
42
43
|
schema_path = path.join(base_dir, "crossref5.4.0.xsd")
|
43
|
-
|
44
|
-
return
|
44
|
+
schema_obj = xmlschema.XMLSchema(schema_path)
|
45
|
+
return schema_obj.validate(instance)
|
45
46
|
except xmlschema.validators.exceptions.XMLSchemaValidationError as error:
|
46
47
|
print(error)
|
47
48
|
return error
|
@@ -677,7 +677,7 @@ def update_legacy_record(record, legacy_key: str, field:str=None) -> dict:
|
|
677
677
|
"archived": "true",
|
678
678
|
}
|
679
679
|
else:
|
680
|
-
print(f"nothing to update for id {record.get(
|
680
|
+
print(f"nothing to update for id {record.get('uuid')}")
|
681
681
|
return record # nothing to update
|
682
682
|
|
683
683
|
request_url = f"https://{legacy_host}/rest/v1/posts?id=eq.{record['uuid']}"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: commonmeta-py
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.124
|
4
4
|
Summary: Library for conversions to/from the Commonmeta scholarly metadata format
|
5
5
|
Project-URL: Homepage, https://python.commonmeta.org
|
6
6
|
Project-URL: Repository, https://github.com/front-matter/commonmeta-py
|
@@ -1,5 +1,5 @@
|
|
1
|
-
commonmeta/__init__.py,sha256=
|
2
|
-
commonmeta/api_utils.py,sha256=
|
1
|
+
commonmeta/__init__.py,sha256=pp3RcQ6mEgusaIHYnKMwYYMDh-6dGBRUhu2jP0ecdHw,2098
|
2
|
+
commonmeta/api_utils.py,sha256=P8LMHHYiF4OTi97_5k4KstcBreooMkOAKZ4ebxsAv4o,2691
|
3
3
|
commonmeta/author_utils.py,sha256=3lYW5s1rOUWNTKs1FP6XLfEUY3yCLOe_3L_VdJTDMp0,8585
|
4
4
|
commonmeta/base_utils.py,sha256=-MGy9q2uTiJEkPWQUYOJMdq-3tRpNnvBwlLjvllQ5g8,11164
|
5
5
|
commonmeta/cli.py,sha256=pdBpBosLNq3RS9buO-Voqawc9Ay1eSt-xP5O97iOft4,8480
|
@@ -7,25 +7,25 @@ commonmeta/constants.py,sha256=wSTEUiHeRdXLwjXEQD9AU2hxFyEKi5OTX2iHOKO6nF0,19844
|
|
7
7
|
commonmeta/date_utils.py,sha256=H2cCobX0JREIUOT_cCigGd3MG7prGiQpXk1m4ZNrFwU,6318
|
8
8
|
commonmeta/doi_utils.py,sha256=cOogLatKg6qea2jgMd3yLALSTfaTNUgr-IkBXIK4xZw,11498
|
9
9
|
commonmeta/file_utils.py,sha256=eFYDWyR8Gr722nvFmp542hCm-TGmO_q4ciZ85IPHpjA,2893
|
10
|
-
commonmeta/metadata.py,sha256=
|
11
|
-
commonmeta/schema_utils.py,sha256=
|
10
|
+
commonmeta/metadata.py,sha256=_N9suKA10uiWU5-3a349x-H9VbhzEZ6rQ1PqXOCiinA,18408
|
11
|
+
commonmeta/schema_utils.py,sha256=O6OxiySUrvnNJvslJLY86RFu2HldrZFi21OaLXnV3mA,1912
|
12
12
|
commonmeta/translators.py,sha256=CBMK4jrXRmGZiAhCh6wsJjhbDJWbcsda8UvXFXxccAw,1363
|
13
13
|
commonmeta/utils.py,sha256=pJnh3EzOU1E2nutnAZsopY_NsUX6zYmxoj5bIYqqWvE,50574
|
14
14
|
commonmeta/readers/__init__.py,sha256=vOf7UsOKNoh_ZCuyexxhAmPMt8wjB-pF_CfpWRaN8pk,45
|
15
15
|
commonmeta/readers/bibtex_reader.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
commonmeta/readers/cff_reader.py,sha256=
|
17
|
-
commonmeta/readers/codemeta_reader.py,sha256=
|
16
|
+
commonmeta/readers/cff_reader.py,sha256=HMFK6QIg_XIlhmYIWL4EfIyuidYl5L_0TAwyG78XPlU,6244
|
17
|
+
commonmeta/readers/codemeta_reader.py,sha256=QmJjh9f6kLxOWCBaZqdqY-LPlA0g2cX90yvPt0p1Nw8,3743
|
18
18
|
commonmeta/readers/commonmeta_reader.py,sha256=46XrCr2whkUP4uiaNiFXS7ABwowcRdWcLG-3OcfhVdk,303
|
19
|
-
commonmeta/readers/crossref_reader.py,sha256=
|
19
|
+
commonmeta/readers/crossref_reader.py,sha256=ydyiFw3m4P8LgMuornQrwJPds8bWPBGR03wtUG3Or-Y,13273
|
20
20
|
commonmeta/readers/crossref_xml_reader.py,sha256=A2iAFT2IbhTNcfL-Dx4xO2SFSNOu_HNyPisus0u4UJI,18719
|
21
21
|
commonmeta/readers/csl_reader.py,sha256=OxzC2AZKfv43BCah4XGYvlK_LUK-5mxXFcjdzB5vv_o,3216
|
22
|
-
commonmeta/readers/datacite_reader.py,sha256=
|
23
|
-
commonmeta/readers/datacite_xml_reader.py,sha256=
|
24
|
-
commonmeta/readers/inveniordm_reader.py,sha256=
|
22
|
+
commonmeta/readers/datacite_reader.py,sha256=4b_AP8m_aOUNVnVB0nU9j4-a8WTpmYJA8QPr5v35qyw,12219
|
23
|
+
commonmeta/readers/datacite_xml_reader.py,sha256=nsPc7JBbIKgx6Yaauq8vmhUE-o6K0t_MAuExv6FO2AU,13205
|
24
|
+
commonmeta/readers/inveniordm_reader.py,sha256=DtSloEZDu8bL-QLQAqAW1aDsS6ESTDZyhme379IekjY,8502
|
25
25
|
commonmeta/readers/jsonfeed_reader.py,sha256=zcPxxuyAGW8W7w0-VwP9AhpX97qVWHQJUIJ5p4bBbfE,15655
|
26
|
-
commonmeta/readers/kbase_reader.py,sha256=
|
26
|
+
commonmeta/readers/kbase_reader.py,sha256=0Y9cHRNs_7kHyocN4IESXbgmXJiq4TXoxvGeUYGml1s,6896
|
27
27
|
commonmeta/readers/openalex_reader.py,sha256=4HUkBsut_iUjhUcC5c1GHgxnKsYQc-fgY43QILgVZEg,12826
|
28
|
-
commonmeta/readers/ris_reader.py,sha256=
|
28
|
+
commonmeta/readers/ris_reader.py,sha256=nwK8Eux0wPjwKqXFWS4Cfd6FAY7Id4Mi_hgkTwPntHs,3766
|
29
29
|
commonmeta/readers/schema_org_reader.py,sha256=AlFMmuUovqlMYkwL9F1Um6bX5vIWzhqmreHCrzsC3rU,17275
|
30
30
|
commonmeta/resources/cff_v1.2.0.json,sha256=MpfjDYgX7fN9PLiG54ISZ2uu9WItNqfh-yaRuTf6Ptg,46691
|
31
31
|
commonmeta/resources/commonmeta_v0.12.json,sha256=HUSNReXh2JN3Q6YWSt7CE69js8dh50OlpMYGTyU98oU,16762
|
@@ -80,11 +80,11 @@ commonmeta/writers/commonmeta_writer.py,sha256=QpfyhG__7o_XpsOTCPWxGymO7YKwZi2LQ
|
|
80
80
|
commonmeta/writers/crossref_xml_writer.py,sha256=cGrX9vacNwDdK7TbH1RWig68SH8hg5VusuqkI9gU1DY,33931
|
81
81
|
commonmeta/writers/csl_writer.py,sha256=4gDYs1EzK4_L2UIRTfs25wgHmYRwdRP2zmfxF9387oU,2779
|
82
82
|
commonmeta/writers/datacite_writer.py,sha256=bcinpwhq7XnVthKHH8-sdXA34dSlvFH4ImYH768iaQU,6428
|
83
|
-
commonmeta/writers/inveniordm_writer.py,sha256=
|
83
|
+
commonmeta/writers/inveniordm_writer.py,sha256=2CD2XYThHgZhUUeVVzZU6A_p6lOz7Xdt0Ij4Zy_E1PM,25772
|
84
84
|
commonmeta/writers/ris_writer.py,sha256=3SdyEvMRaPRP1SV1MB-MXBlunE7x6og7RF1zuWtetPc,2094
|
85
85
|
commonmeta/writers/schema_org_writer.py,sha256=s18_x0ReXwAGBoEAwp2q-HCgFQ-h5qRg6JyAlqCoSFE,5871
|
86
|
-
commonmeta_py-0.
|
87
|
-
commonmeta_py-0.
|
88
|
-
commonmeta_py-0.
|
89
|
-
commonmeta_py-0.
|
90
|
-
commonmeta_py-0.
|
86
|
+
commonmeta_py-0.124.dist-info/METADATA,sha256=t0hoWqdbSD8HDjdOpJtoxhjb9_coYI_z9wBMgN5VZIU,7656
|
87
|
+
commonmeta_py-0.124.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
88
|
+
commonmeta_py-0.124.dist-info/entry_points.txt,sha256=U4w4BoRuS3rN5t5Y-uYSyOeU5Lh_VRVMS9OIDzIgw4w,50
|
89
|
+
commonmeta_py-0.124.dist-info/licenses/LICENSE,sha256=wsIvxF9Q9GC9vA_s79zTWP3BkXJdfUNRmALlU8GbW1s,1074
|
90
|
+
commonmeta_py-0.124.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|