commonmeta-py 0.15.2__tar.gz → 0.65__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/LICENSE +1 -1
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/PKG-INFO +10 -11
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/README.md +2 -2
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/__init__.py +4 -2
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/author_utils.py +6 -6
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/base_utils.py +1 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/cli.py +1 -2
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/constants.py +86 -3
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/crossref_utils.py +46 -38
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/date_utils.py +26 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/doi_utils.py +82 -5
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/metadata.py +25 -54
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/cff_reader.py +1 -1
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/crossref_reader.py +51 -42
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/crossref_xml_reader.py +5 -7
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/csl_reader.py +4 -4
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/datacite_reader.py +87 -28
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/datacite_xml_reader.py +54 -29
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/inveniordm_reader.py +62 -37
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/json_feed_reader.py +100 -87
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/kbase_reader.py +5 -9
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/ris_reader.py +1 -1
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/schema_org_reader.py +180 -50
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/commonmeta_v0.12.json +6 -1
- commonmeta_py-0.65/commonmeta/resources/commonmeta_v0.13.json +559 -0
- commonmeta_py-0.65/commonmeta/resources/commonmeta_v0.14.json +573 -0
- commonmeta_py-0.65/commonmeta/resources/commonmeta_v0.15.json +575 -0
- commonmeta_py-0.65/commonmeta/resources/datacite-v4.5pr.json +608 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/schema_utils.py +1 -1
- commonmeta_py-0.65/commonmeta/translators.py +47 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/utils.py +221 -78
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/bibtex_writer.py +2 -2
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/citation_writer.py +1 -2
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/commonmeta_writer.py +10 -2
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/csl_writer.py +10 -3
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/datacite_writer.py +29 -10
- commonmeta_py-0.65/commonmeta/writers/inveniordm_writer.py +358 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/schema_org_writer.py +2 -2
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/pyproject.toml +17 -10
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/api_utils.py +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/__init__.py +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/bibtex_reader.py +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/codemeta_reader.py +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/commonmeta_reader.py +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/cff_v1.2.0.json +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/AccessIndicators.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/clinicaltrials.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/common5.3.1.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/crossref5.3.1.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/crossref_query_output3.0.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/fundref.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/module-ali.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/relations.xsd +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref-v0.2.json +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/csl-data.json +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/datacite-v4.5.json +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/ietf-bcp-47.json +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/iso-8601.json +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/spdx/licenses.json +0 -0
- /commonmeta_py-0.15.2/commonmeta/resources/spdx-schema..json → /commonmeta_py-0.65/commonmeta/resources/spdx-schema.json +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/apa.csl +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/chicago-author-date.csl +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/harvard-cite-them-right.csl +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/ieee.csl +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/modern-language-association.csl +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/vancouver.csl +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/__init__.py +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/crossref_xml_writer.py +0 -0
- {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/ris_writer.py +0 -0
@@ -1,19 +1,20 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: commonmeta-py
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.65
|
4
4
|
Summary: Library for conversions to/from the Commonmeta scholarly metadata format
|
5
5
|
Home-page: https://python.commonmeta.org
|
6
6
|
License: MIT
|
7
7
|
Keywords: science,metadata,commonmeta,bibtex,csl,crossref,datacite
|
8
8
|
Author: Martin Fenner
|
9
9
|
Author-email: martin@front-matter.io
|
10
|
-
Requires-Python: >=3.9,<4.0
|
10
|
+
Requires-Python: >=3.9,<4.0
|
11
11
|
Classifier: License :: OSI Approved :: MIT License
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
13
13
|
Classifier: Programming Language :: Python :: 3.9
|
14
14
|
Classifier: Programming Language :: Python :: 3.10
|
15
15
|
Classifier: Programming Language :: Python :: 3.11
|
16
16
|
Classifier: Programming Language :: Python :: 3.12
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
17
18
|
Requires-Dist: PyYAML (>=6.0,<7.0)
|
18
19
|
Requires-Dist: anyio (>=4.2.0,<5.0.0)
|
19
20
|
Requires-Dist: asyncclick (>=8.1.7.1,<9.0.0.0)
|
@@ -27,23 +28,22 @@ Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
27
28
|
Requires-Dist: datacite (>=1.1,<2.0)
|
28
29
|
Requires-Dist: dateparser (>=1.1.7,<2.0.0)
|
29
30
|
Requires-Dist: docutils (>=0.19,<0.20)
|
31
|
+
Requires-Dist: edtf (>=5.0.0,<6.0.0)
|
30
32
|
Requires-Dist: furl (>=2.1.3,<3.0.0)
|
31
|
-
Requires-Dist: httpx (>=0.
|
33
|
+
Requires-Dist: httpx (>=0.27,<0.28)
|
32
34
|
Requires-Dist: jsonschema (>=4.21,<5.0)
|
33
|
-
Requires-Dist:
|
34
|
-
Requires-Dist: jupyterlab-quarto (>=0.2.8,<0.3.0)
|
35
|
-
Requires-Dist: lxml (>=5.1.0,<6.0.0)
|
35
|
+
Requires-Dist: lxml (>=4.8)
|
36
36
|
Requires-Dist: nameparser (>=1.1.2,<2.0.0)
|
37
37
|
Requires-Dist: nbstripout (>=0.6,<0.7)
|
38
38
|
Requires-Dist: nh3 (>=0.2.14,<0.3.0)
|
39
39
|
Requires-Dist: orjson (>=3.9.14,<4.0.0)
|
40
40
|
Requires-Dist: orjsonl (>=1.0.0,<2.0.0)
|
41
|
+
Requires-Dist: pikepdf (>=8.14,<10.0)
|
41
42
|
Requires-Dist: pycountry (>=23.12.11,<24.0.0)
|
42
43
|
Requires-Dist: pydash (>=7.0,<8.0)
|
43
44
|
Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
|
44
45
|
Requires-Dist: python-dateutil (>=2.8.2,<3.0.0)
|
45
|
-
Requires-Dist:
|
46
|
-
Requires-Dist: setuptools (>=69.0,<70.0)
|
46
|
+
Requires-Dist: setuptools (>=70.0,<71.0)
|
47
47
|
Requires-Dist: simplejson (>=3.18,<4.0)
|
48
48
|
Requires-Dist: sphinx-autodoc-typehints (>=1.19,<2.0)
|
49
49
|
Requires-Dist: sphinxcontrib-issuetracker (>=0.11,<0.12)
|
@@ -51,7 +51,6 @@ Requires-Dist: types-PyYAML (>=6.0,<7.0)
|
|
51
51
|
Requires-Dist: types-beautifulsoup4 (>=4.11,<5.0)
|
52
52
|
Requires-Dist: types-dateparser (>=1.1,<2.0)
|
53
53
|
Requires-Dist: types-xmltodict (>=0.13,<0.14)
|
54
|
-
Requires-Dist: vcrpy (>=5.1.0,<6.0.0)
|
55
54
|
Requires-Dist: xmltodict (>=0.12,<0.13)
|
56
55
|
Project-URL: Documentation, https://python.commonmeta.org
|
57
56
|
Project-URL: Repository, https://github.com/front-matter/commonmeta-py
|
@@ -102,7 +101,7 @@ Commometa-py reads and/or writes these metadata formats:
|
|
102
101
|
| [CSV](ttps://en.wikipedia.org/wiki/Comma-separated_values) | csv | text/csv | no | later |
|
103
102
|
| [BibTex](http://en.wikipedia.org/wiki/BibTeX) | bibtex | application/x-bibtex | later | yes |
|
104
103
|
| [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | yes | yes |
|
105
|
-
| [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json |
|
104
|
+
| [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | yes | yes |
|
106
105
|
| [JSON Feed](https://www.jsonfeed.org/) | json_feed_item | application/feed+json | yes | later |
|
107
106
|
|
108
107
|
_commonmeta_: the Commonmeta format is the native format for the library and used internally.
|
@@ -111,7 +110,7 @@ _Later_: we plan to implement this format in a later release.
|
|
111
110
|
|
112
111
|
## Documentation
|
113
112
|
|
114
|
-
Documentation (work in progress) for using the library is available at the [commonmeta-py Documentation](https://commonmeta
|
113
|
+
Documentation (work in progress) for using the library is available at the [commonmeta-py Documentation](https://python.commonmeta.org/) website and includes several interactive Jupyter Notebooks .
|
115
114
|
|
116
115
|
## Meta
|
117
116
|
|
@@ -43,7 +43,7 @@ Commometa-py reads and/or writes these metadata formats:
|
|
43
43
|
| [CSV](ttps://en.wikipedia.org/wiki/Comma-separated_values) | csv | text/csv | no | later |
|
44
44
|
| [BibTex](http://en.wikipedia.org/wiki/BibTeX) | bibtex | application/x-bibtex | later | yes |
|
45
45
|
| [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | yes | yes |
|
46
|
-
| [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json |
|
46
|
+
| [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | yes | yes |
|
47
47
|
| [JSON Feed](https://www.jsonfeed.org/) | json_feed_item | application/feed+json | yes | later |
|
48
48
|
|
49
49
|
_commonmeta_: the Commonmeta format is the native format for the library and used internally.
|
@@ -52,7 +52,7 @@ _Later_: we plan to implement this format in a later release.
|
|
52
52
|
|
53
53
|
## Documentation
|
54
54
|
|
55
|
-
Documentation (work in progress) for using the library is available at the [commonmeta-py Documentation](https://commonmeta
|
55
|
+
Documentation (work in progress) for using the library is available at the [commonmeta-py Documentation](https://python.commonmeta.org/) website and includes several interactive Jupyter Notebooks .
|
56
56
|
|
57
57
|
## Meta
|
58
58
|
|
@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
|
|
10
10
|
"""
|
11
11
|
|
12
12
|
__title__ = "commonmeta-py"
|
13
|
-
__version__ = "0.
|
13
|
+
__version__ = "0.65"
|
14
14
|
__author__ = "Martin Fenner"
|
15
15
|
__license__ = "MIT"
|
16
16
|
|
@@ -54,7 +54,6 @@ from .utils import (
|
|
54
54
|
validate_orcid,
|
55
55
|
validate_url,
|
56
56
|
get_language,
|
57
|
-
encode_doi,
|
58
57
|
name_to_fos,
|
59
58
|
from_json_feed,
|
60
59
|
)
|
@@ -88,9 +87,12 @@ from .doi_utils import (
|
|
88
87
|
doi_from_url,
|
89
88
|
doi_as_url,
|
90
89
|
doi_resolver,
|
90
|
+
decode_doi,
|
91
|
+
encode_doi,
|
91
92
|
datacite_api_url,
|
92
93
|
get_doi_ra,
|
93
94
|
normalize_doi,
|
94
95
|
validate_doi,
|
95
96
|
validate_prefix,
|
97
|
+
is_rogue_scholar_doi,
|
96
98
|
)
|
@@ -63,7 +63,6 @@ def get_one_author(author, **kwargs):
|
|
63
63
|
) or parse_attributes(
|
64
64
|
author.get("contributorName", None), content="type", first=True
|
65
65
|
)
|
66
|
-
|
67
66
|
# also handle Crossref, JSON Feed, or DataCite metadata
|
68
67
|
_id = (
|
69
68
|
author.get("id", None)
|
@@ -72,7 +71,7 @@ def get_one_author(author, **kwargs):
|
|
72
71
|
or next(
|
73
72
|
(
|
74
73
|
format_name_identifier(i)
|
75
|
-
for i in wrap(author.get("nameIdentifiers", None))
|
74
|
+
for i in wrap(author.get("nameIdentifiers", None or author.get("identifiers", None)))
|
76
75
|
),
|
77
76
|
None,
|
78
77
|
)
|
@@ -109,6 +108,9 @@ def get_one_author(author, **kwargs):
|
|
109
108
|
given_name = None
|
110
109
|
family_name = None
|
111
110
|
|
111
|
+
# support various keys for affiliations
|
112
|
+
affiliations = author.get("affiliation", None) or author.get("affiliations", None)
|
113
|
+
|
112
114
|
# return author in commonmeta format, using name vs. given/family name
|
113
115
|
# depending on type
|
114
116
|
return compact(
|
@@ -119,9 +121,7 @@ def get_one_author(author, **kwargs):
|
|
119
121
|
"name": name if _type == "Organization" else None,
|
120
122
|
"givenName": given_name if _type == "Person" else None,
|
121
123
|
"familyName": family_name if _type == "Person" else None,
|
122
|
-
"
|
123
|
-
get_affiliations(wrap(author.get("affiliation", None)))
|
124
|
-
),
|
124
|
+
"affiliations": presence(get_affiliations(wrap(affiliations))),
|
125
125
|
}
|
126
126
|
)
|
127
127
|
|
@@ -184,7 +184,7 @@ def cleanup_author(author):
|
|
184
184
|
"""clean up author string"""
|
185
185
|
if author is None:
|
186
186
|
return None
|
187
|
-
|
187
|
+
|
188
188
|
if author.startswith(","):
|
189
189
|
return None
|
190
190
|
|
@@ -5,8 +5,7 @@ import orjson as json
|
|
5
5
|
|
6
6
|
from commonmeta import Metadata, MetadataList # __version__
|
7
7
|
from commonmeta.api_utils import update_ghost_post_via_api
|
8
|
-
from commonmeta.doi_utils import validate_prefix
|
9
|
-
from commonmeta.utils import encode_doi, decode_doi
|
8
|
+
from commonmeta.doi_utils import validate_prefix, encode_doi, decode_doi
|
10
9
|
from commonmeta.readers.json_feed_reader import (
|
11
10
|
get_json_feed_item_uuid,
|
12
11
|
)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
"""Constants for commonmeta-py"""
|
2
|
+
|
2
3
|
from typing import Optional, TypedDict, List
|
3
4
|
|
4
5
|
|
@@ -16,7 +17,7 @@ class Commonmeta(TypedDict):
|
|
16
17
|
subjects: Optional[List[dict]]
|
17
18
|
contributors: Optional[List[dict]]
|
18
19
|
language: Optional[str]
|
19
|
-
|
20
|
+
identifiers: Optional[List[dict]]
|
20
21
|
relations: Optional[List[dict]]
|
21
22
|
sizes: Optional[List[dict]]
|
22
23
|
formats: Optional[List[dict]]
|
@@ -30,7 +31,6 @@ class Commonmeta(TypedDict):
|
|
30
31
|
files: Optional[List[dict]]
|
31
32
|
agency: Optional[str]
|
32
33
|
state: str
|
33
|
-
schema_version: Optional[str]
|
34
34
|
|
35
35
|
|
36
36
|
# source: https://www.bibtex.com/e/entry-types/
|
@@ -239,9 +239,9 @@ INVENIORDM_TO_CM_TRANSLATIONS = {
|
|
239
239
|
"book": "Book",
|
240
240
|
"section": "BookChapter",
|
241
241
|
"conferencepaper": "ProceedingsArticle",
|
242
|
-
"article": "JournalArticle",
|
243
242
|
"patent": "Patent",
|
244
243
|
"publication": "JournalArticle",
|
244
|
+
"publication-preprint": "Article",
|
245
245
|
"report": "Report",
|
246
246
|
"softwaredocumentation": "Software",
|
247
247
|
"thesis": "Dissertation",
|
@@ -267,6 +267,17 @@ INVENIORDM_TO_CM_TRANSLATIONS = {
|
|
267
267
|
"other": "Other",
|
268
268
|
}
|
269
269
|
|
270
|
+
CM_TO_INVENIORDM_TRANSLATIONS = {
|
271
|
+
"Article": "publication-preprint",
|
272
|
+
"Book": "book",
|
273
|
+
"Dataset": "dataset",
|
274
|
+
"Image": "image-other",
|
275
|
+
"JournalArticle": "publication-article",
|
276
|
+
"Presentation": "presentation",
|
277
|
+
"Software": "software",
|
278
|
+
"Other": "other",
|
279
|
+
}
|
280
|
+
|
270
281
|
CM_TO_DC_TRANSLATIONS = {
|
271
282
|
"Article": "Preprint",
|
272
283
|
"Audiovisual": "Audiovisual",
|
@@ -394,12 +405,35 @@ SO_TO_CM_TRANSLATIONS = {
|
|
394
405
|
"BookChapter": "BookChapter",
|
395
406
|
"CreativeWork": "Other",
|
396
407
|
"Dataset": "Dataset",
|
408
|
+
"DigitalDocument": "Document",
|
397
409
|
"Dissertation": "Dissertation",
|
398
410
|
"Instrument": "Instrument",
|
411
|
+
"MusicRecording": "Audiovisual",
|
412
|
+
"MusicAlbum": "Audiovisual",
|
399
413
|
"NewsArticle": "Article",
|
400
414
|
"Legislation": "LegalDocument",
|
415
|
+
"ProfilePage": "WebPage",
|
416
|
+
"Report": "Report",
|
401
417
|
"ScholarlyArticle": "JournalArticle",
|
402
418
|
"SoftwareSourceCode": "Software",
|
419
|
+
"Video": "Audiovisual",
|
420
|
+
"WebSite": "WebPage",
|
421
|
+
}
|
422
|
+
|
423
|
+
# OpenGraph to schema.org mapping
|
424
|
+
OG_TO_SO_TRANSLATIONS = {
|
425
|
+
"music.song": "MusicRecording",
|
426
|
+
"music.album": "MusicAlbum",
|
427
|
+
"music.playlist": "MusicPlaylist",
|
428
|
+
"music.radio_station": "RadioStation",
|
429
|
+
"video.movie": "Video",
|
430
|
+
"video.episode": "Video",
|
431
|
+
"video.tv_show": "Video",
|
432
|
+
"video.other": "Video",
|
433
|
+
"article": "Article",
|
434
|
+
"book": "Book",
|
435
|
+
"profile": "ProfilePage",
|
436
|
+
"website": "WebSite",
|
403
437
|
}
|
404
438
|
|
405
439
|
CM_TO_SO_TRANSLATIONS = {
|
@@ -573,3 +607,52 @@ COMMONMETA_CONTRIBUTOR_ROLES = [
|
|
573
607
|
"Maintainer",
|
574
608
|
"Other",
|
575
609
|
]
|
610
|
+
|
611
|
+
INVENIORDM_IDENTIFIER_TYPES = {
|
612
|
+
"Ark": "ark",
|
613
|
+
"ArXiv": "arxiv",
|
614
|
+
"Bibcode": "ads",
|
615
|
+
"CrossrefFunderID": "crossreffunderid",
|
616
|
+
"DOI": "doi",
|
617
|
+
"EAN13": "ean13",
|
618
|
+
"EISSN": "eissn",
|
619
|
+
"GRID": "grid",
|
620
|
+
"Handle": "handle",
|
621
|
+
"IGSN": "igsn",
|
622
|
+
"ISBN": "isbn",
|
623
|
+
"ISNI": "isni",
|
624
|
+
"ISSN": "issn",
|
625
|
+
"ISTC": "istc",
|
626
|
+
"LISSN": "lissn",
|
627
|
+
"LSID": "lsid",
|
628
|
+
"PMID": "pmid",
|
629
|
+
"PURL": "purl",
|
630
|
+
"UPC": "upc",
|
631
|
+
"URL": "url",
|
632
|
+
"URN": "urn",
|
633
|
+
"W3ID": "w3id",
|
634
|
+
"GUID": "guid",
|
635
|
+
"UUID": "uuid",
|
636
|
+
"Other": "other",
|
637
|
+
}
|
638
|
+
|
639
|
+
|
640
|
+
CROSSREF_FUNDER_ID_TO_ROR_TRANSLATIONS = {
|
641
|
+
"https://doi.org/10.13039/100000001": "https://ror.org/021nxhr62",
|
642
|
+
"https://doi.org/10.13039/501100000780": "https://ror.org/00k4n6c32",
|
643
|
+
"https://doi.org/10.13039/501100007601": "https://ror.org/00k4n6c32",
|
644
|
+
"https://doi.org/10.13039/501100001659": "https://ror.org/018mejw64",
|
645
|
+
"https://doi.org/10.13039/501100006390": "https://ror.org/019whta54",
|
646
|
+
"https://doi.org/10.13039/501100001711": "https://ror.org/00yjd3n13",
|
647
|
+
"https://doi.org/10.13039/501100003043": "https://ror.org/04wfr2810",
|
648
|
+
}
|
649
|
+
|
650
|
+
|
651
|
+
ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS = {
|
652
|
+
"https://ror.org/021nxhr62": "https://doi.org/10.13039/100000001",
|
653
|
+
"https://ror.org/00k4n6c32": "https://doi.org/10.13039/501100000780",
|
654
|
+
"https://ror.org/018mejw64": "https://doi.org/10.13039/501100001659",
|
655
|
+
"https://ror.org/019whta54": "https://doi.org/10.13039/501100006390",
|
656
|
+
"https://ror.org/00yjd3n13": "https://doi.org/10.13039/501100001711",
|
657
|
+
"https://ror.org/04wfr2810": "https://doi.org/10.13039/501100003043",
|
658
|
+
}
|
@@ -8,7 +8,7 @@ import uuid
|
|
8
8
|
import pydash as py_
|
9
9
|
from furl import furl
|
10
10
|
|
11
|
-
from .constants import Commonmeta
|
11
|
+
from .constants import Commonmeta, ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS
|
12
12
|
from .utils import wrap, compact, normalize_orcid, normalize_id, validate_url
|
13
13
|
from .doi_utils import doi_from_url, validate_doi
|
14
14
|
|
@@ -112,9 +112,11 @@ def insert_group_title(metadata, xml):
|
|
112
112
|
"""Insert group title"""
|
113
113
|
if metadata.subjects is None or len(metadata.subjects) == 0:
|
114
114
|
return xml
|
115
|
-
|
116
|
-
|
117
|
-
)
|
115
|
+
group_title = metadata.subjects[0].get("subject", None)
|
116
|
+
# strip optional FOS (Field of Science) prefix
|
117
|
+
if group_title.startswith("FOS: "):
|
118
|
+
group_title = group_title[5:]
|
119
|
+
etree.SubElement(xml, "group_title").text = group_title
|
118
120
|
return xml
|
119
121
|
|
120
122
|
|
@@ -131,8 +133,12 @@ def insert_crossref_contributors(metadata, xml):
|
|
131
133
|
]
|
132
134
|
for num, contributor in enumerate(con):
|
133
135
|
contributor_role = (
|
134
|
-
"author" if contributor.get("contributorRoles")
|
136
|
+
"author" if "Author" in contributor.get("contributorRoles") else None
|
135
137
|
)
|
138
|
+
if contributor_role is None:
|
139
|
+
contributor_role = (
|
140
|
+
"editor" if "Editor" in contributor.get("contributorRoles") else None
|
141
|
+
)
|
136
142
|
sequence = "first" if num == 0 else "additional"
|
137
143
|
if (
|
138
144
|
contributor.get("type", None) == "Organization"
|
@@ -153,7 +159,7 @@ def insert_crossref_contributors(metadata, xml):
|
|
153
159
|
{"contributor_role": contributor_role, "sequence": sequence},
|
154
160
|
)
|
155
161
|
person_name = insert_crossref_person(contributor, person_name)
|
156
|
-
elif contributor.get("
|
162
|
+
elif contributor.get("affiliations", None) is not None:
|
157
163
|
anonymous = etree.SubElement(
|
158
164
|
contributors,
|
159
165
|
"anonymous",
|
@@ -176,18 +182,17 @@ def insert_crossref_person(contributor, xml):
|
|
176
182
|
if contributor.get("familyName", None) is not None:
|
177
183
|
etree.SubElement(xml, "surname").text = contributor.get("familyName")
|
178
184
|
|
179
|
-
if contributor.get("
|
185
|
+
if contributor.get("affiliations", None) is not None:
|
180
186
|
affiliations = etree.SubElement(xml, "affiliations")
|
181
187
|
institution = etree.SubElement(affiliations, "institution")
|
182
|
-
if py_.get(contributor, "
|
188
|
+
if py_.get(contributor, "affiliations.0.name") is not None:
|
183
189
|
etree.SubElement(institution, "institution_name").text = py_.get(
|
184
|
-
contributor, "
|
190
|
+
contributor, "affiliations.0.name"
|
185
191
|
)
|
186
|
-
if py_.get(contributor, "
|
192
|
+
if py_.get(contributor, "affiliations.0.id") is not None:
|
187
193
|
etree.SubElement(
|
188
194
|
institution, "institution_id", {"type": "ror"}
|
189
|
-
).text = py_.get(contributor, "
|
190
|
-
|
195
|
+
).text = py_.get(contributor, "affiliations.0.id")
|
191
196
|
orcid = normalize_orcid(contributor.get("id", None))
|
192
197
|
if orcid is not None:
|
193
198
|
etree.SubElement(xml, "ORCID").text = orcid
|
@@ -196,13 +201,13 @@ def insert_crossref_person(contributor, xml):
|
|
196
201
|
|
197
202
|
def insert_crossref_anonymous(contributor, xml):
|
198
203
|
"""Insert crossref anonymous"""
|
199
|
-
if contributor.get("
|
204
|
+
if contributor.get("affiliations", None) is None:
|
200
205
|
return xml
|
201
206
|
affiliations = etree.SubElement(xml, "affiliations")
|
202
207
|
institution = etree.SubElement(affiliations, "institution")
|
203
|
-
if py_.get(contributor, "
|
208
|
+
if py_.get(contributor, "affiliations.0.name") is not None:
|
204
209
|
etree.SubElement(institution, "institution_name").text = py_.get(
|
205
|
-
contributor, "
|
210
|
+
contributor, "affiliations.0.name"
|
206
211
|
)
|
207
212
|
return xml
|
208
213
|
|
@@ -225,6 +230,8 @@ def insert_citation_list(metadata, xml):
|
|
225
230
|
|
226
231
|
citation_list = etree.SubElement(xml, "citation_list")
|
227
232
|
for ref in metadata.references:
|
233
|
+
if ref.get("id", None) is None:
|
234
|
+
continue
|
228
235
|
citation = etree.SubElement(
|
229
236
|
citation_list, "citation", {"key": ref.get("key", None)}
|
230
237
|
)
|
@@ -242,10 +249,12 @@ def insert_citation_list(metadata, xml):
|
|
242
249
|
etree.SubElement(citation, "cYear").text = ref.get("publicationYear")
|
243
250
|
if ref.get("title", None) is not None:
|
244
251
|
etree.SubElement(citation, "article_title").text = ref.get("title")
|
245
|
-
if ref.get("
|
246
|
-
etree.SubElement(citation, "doi").text = doi_from_url(ref.get("
|
247
|
-
if ref.get("
|
248
|
-
etree.SubElement(citation, "unstructured_citation").text = ref.get(
|
252
|
+
if ref.get("id", None) is not None:
|
253
|
+
etree.SubElement(citation, "doi").text = doi_from_url(ref.get("id"))
|
254
|
+
if ref.get("unstructured", None) is not None:
|
255
|
+
etree.SubElement(citation, "unstructured_citation").text = ref.get(
|
256
|
+
"unstructured"
|
257
|
+
)
|
249
258
|
return xml
|
250
259
|
|
251
260
|
|
@@ -353,11 +362,18 @@ def insert_funding_references(metadata, xml):
|
|
353
362
|
{"name": "funder_name"},
|
354
363
|
)
|
355
364
|
if funding_reference.get("funderIdentifier", None) is not None:
|
365
|
+
funder_identifier = funding_reference.get("funderIdentifier", None)
|
366
|
+
|
367
|
+
# translate ROR to Crossref funder ID until Crossref supports ROR
|
368
|
+
funder_identifier = ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS.get(
|
369
|
+
funder_identifier, funder_identifier
|
370
|
+
)
|
371
|
+
|
356
372
|
etree.SubElement(
|
357
373
|
funder_name,
|
358
374
|
"assertion",
|
359
375
|
{"name": "funder_identifier"},
|
360
|
-
).text =
|
376
|
+
).text = funder_identifier
|
361
377
|
if funding_reference.get("awardNumber", None) is not None:
|
362
378
|
etree.SubElement(
|
363
379
|
assertion,
|
@@ -430,30 +446,22 @@ def insert_institution(metadata, xml):
|
|
430
446
|
|
431
447
|
def insert_item_number(metadata, xml):
|
432
448
|
"""Insert item number"""
|
433
|
-
if metadata.
|
449
|
+
if metadata.identifiers is None:
|
434
450
|
return xml
|
435
|
-
for
|
436
|
-
if
|
437
|
-
continue
|
438
|
-
if alternate_identifier.get("alternateIdentifierType", None) is not None:
|
451
|
+
for identifier in metadata.identifiers:
|
452
|
+
if identifier.get("identifierType", None) == "UUID":
|
439
453
|
# strip hyphen from UUIDs, as item_number can only be 32 characters long (UUIDv4 is 36 characters long)
|
440
|
-
if
|
441
|
-
|
442
|
-
"
|
443
|
-
)
|
454
|
+
if identifier.get("identifierType", None) == "UUID":
|
455
|
+
identifier["identifier"] = identifier.get("identifier", "").replace(
|
456
|
+
"-", ""
|
457
|
+
)
|
444
458
|
etree.SubElement(
|
445
459
|
xml,
|
446
460
|
"item_number",
|
447
|
-
{
|
448
|
-
|
449
|
-
"alternateIdentifierType", ""
|
450
|
-
).lower()
|
451
|
-
},
|
452
|
-
).text = alternate_identifier.get("alternateIdentifier", None)
|
461
|
+
{"item_number_type": identifier.get("identifierType", "").lower()},
|
462
|
+
).text = identifier.get("identifier", None)
|
453
463
|
else:
|
454
|
-
|
455
|
-
"alternateIdentifier", None
|
456
|
-
)
|
464
|
+
continue
|
457
465
|
return xml
|
458
466
|
|
459
467
|
|
@@ -3,6 +3,8 @@ import datetime
|
|
3
3
|
from datetime import datetime as dt
|
4
4
|
from typing import Optional, Union
|
5
5
|
import dateparser
|
6
|
+
from edtf import parse_edtf, DateAndTime, Date
|
7
|
+
from edtf.parser.edtf_exceptions import EDTFParseException
|
6
8
|
import pydash as py_
|
7
9
|
|
8
10
|
from .base_utils import compact
|
@@ -158,6 +160,16 @@ def get_datetime_from_time(time: str) -> Optional[str]:
|
|
158
160
|
return None
|
159
161
|
|
160
162
|
|
163
|
+
def get_datetime_from_pdf_time(time: str) -> Optional[str]:
|
164
|
+
"""iso8601 datetime in slightly different format, used in PDF metadata"""
|
165
|
+
try:
|
166
|
+
time = str(time).replace("D:", "").replace("'", "")
|
167
|
+
return dt.strptime(time, "%Y%m%d%H%M%S%z").strftime("%Y-%m-%dT%H:%M:%SZ")
|
168
|
+
except ValueError as e:
|
169
|
+
print(e)
|
170
|
+
return None
|
171
|
+
|
172
|
+
|
161
173
|
def normalize_date_dict(data: dict) -> dict:
|
162
174
|
"""Normalize date dict
|
163
175
|
|
@@ -181,3 +193,17 @@ def normalize_date_dict(data: dict) -> dict:
|
|
181
193
|
"withdrawn": data.get("Withdrawn", None),
|
182
194
|
}
|
183
195
|
)
|
196
|
+
|
197
|
+
|
198
|
+
def validate_edtf(iso8601_time: Optional[str]) -> Optional[str]:
|
199
|
+
"""Validate EDTF string using edtf. Return None if invalid"""
|
200
|
+
if iso8601_time is None:
|
201
|
+
return None
|
202
|
+
try:
|
203
|
+
edtf = parse_edtf(iso8601_time)
|
204
|
+
except EDTFParseException as e:
|
205
|
+
print(e)
|
206
|
+
return None
|
207
|
+
if not isinstance(edtf, (DateAndTime, Date)):
|
208
|
+
return None
|
209
|
+
return edtf.isoformat()
|