commonmeta-py 0.15.2__tar.gz → 0.65__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/LICENSE +1 -1
  2. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/PKG-INFO +10 -11
  3. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/README.md +2 -2
  4. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/__init__.py +4 -2
  5. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/author_utils.py +6 -6
  6. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/base_utils.py +1 -0
  7. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/cli.py +1 -2
  8. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/constants.py +86 -3
  9. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/crossref_utils.py +46 -38
  10. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/date_utils.py +26 -0
  11. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/doi_utils.py +82 -5
  12. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/metadata.py +25 -54
  13. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/cff_reader.py +1 -1
  14. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/crossref_reader.py +51 -42
  15. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/crossref_xml_reader.py +5 -7
  16. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/csl_reader.py +4 -4
  17. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/datacite_reader.py +87 -28
  18. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/datacite_xml_reader.py +54 -29
  19. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/inveniordm_reader.py +62 -37
  20. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/json_feed_reader.py +100 -87
  21. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/kbase_reader.py +5 -9
  22. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/ris_reader.py +1 -1
  23. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/schema_org_reader.py +180 -50
  24. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/commonmeta_v0.12.json +6 -1
  25. commonmeta_py-0.65/commonmeta/resources/commonmeta_v0.13.json +559 -0
  26. commonmeta_py-0.65/commonmeta/resources/commonmeta_v0.14.json +573 -0
  27. commonmeta_py-0.65/commonmeta/resources/commonmeta_v0.15.json +575 -0
  28. commonmeta_py-0.65/commonmeta/resources/datacite-v4.5pr.json +608 -0
  29. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/schema_utils.py +1 -1
  30. commonmeta_py-0.65/commonmeta/translators.py +47 -0
  31. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/utils.py +221 -78
  32. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/bibtex_writer.py +2 -2
  33. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/citation_writer.py +1 -2
  34. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/commonmeta_writer.py +10 -2
  35. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/csl_writer.py +10 -3
  36. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/datacite_writer.py +29 -10
  37. commonmeta_py-0.65/commonmeta/writers/inveniordm_writer.py +358 -0
  38. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/schema_org_writer.py +2 -2
  39. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/pyproject.toml +17 -10
  40. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/api_utils.py +0 -0
  41. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/__init__.py +0 -0
  42. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/bibtex_reader.py +0 -0
  43. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/codemeta_reader.py +0 -0
  44. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/readers/commonmeta_reader.py +0 -0
  45. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/cff_v1.2.0.json +0 -0
  46. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/AccessIndicators.xsd +0 -0
  47. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +0 -0
  48. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +0 -0
  49. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +0 -0
  50. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +0 -0
  51. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +0 -0
  52. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/JATS-journalpublishing1.xsd +0 -0
  53. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/clinicaltrials.xsd +0 -0
  54. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/common5.3.1.xsd +0 -0
  55. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/crossref5.3.1.xsd +0 -0
  56. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/crossref_query_output3.0.xsd +0 -0
  57. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/fundref.xsd +0 -0
  58. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/module-ali.xsd +0 -0
  59. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref/relations.xsd +0 -0
  60. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/crossref-v0.2.json +0 -0
  61. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/csl-data.json +0 -0
  62. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/datacite-v4.5.json +0 -0
  63. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/ietf-bcp-47.json +0 -0
  64. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/iso-8601.json +0 -0
  65. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/spdx/licenses.json +0 -0
  66. /commonmeta_py-0.15.2/commonmeta/resources/spdx-schema..json → /commonmeta_py-0.65/commonmeta/resources/spdx-schema.json +0 -0
  67. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/apa.csl +0 -0
  68. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/chicago-author-date.csl +0 -0
  69. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/harvard-cite-them-right.csl +0 -0
  70. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/ieee.csl +0 -0
  71. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/modern-language-association.csl +0 -0
  72. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/resources/styles/vancouver.csl +0 -0
  73. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/__init__.py +0 -0
  74. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/crossref_xml_writer.py +0 -0
  75. {commonmeta_py-0.15.2 → commonmeta_py-0.65}/commonmeta/writers/ris_writer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2022-2023 Front Matter
3
+ Copyright (c) 2022-2024 Front Matter
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,19 +1,20 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: commonmeta-py
3
- Version: 0.15.2
3
+ Version: 0.65
4
4
  Summary: Library for conversions to/from the Commonmeta scholarly metadata format
5
5
  Home-page: https://python.commonmeta.org
6
6
  License: MIT
7
7
  Keywords: science,metadata,commonmeta,bibtex,csl,crossref,datacite
8
8
  Author: Martin Fenner
9
9
  Author-email: martin@front-matter.io
10
- Requires-Python: >=3.9,<4.0.0
10
+ Requires-Python: >=3.9,<4.0
11
11
  Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3.9
14
14
  Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
17
18
  Requires-Dist: PyYAML (>=6.0,<7.0)
18
19
  Requires-Dist: anyio (>=4.2.0,<5.0.0)
19
20
  Requires-Dist: asyncclick (>=8.1.7.1,<9.0.0.0)
@@ -27,23 +28,22 @@ Requires-Dist: click (>=8.1.7,<9.0.0)
27
28
  Requires-Dist: datacite (>=1.1,<2.0)
28
29
  Requires-Dist: dateparser (>=1.1.7,<2.0.0)
29
30
  Requires-Dist: docutils (>=0.19,<0.20)
31
+ Requires-Dist: edtf (>=5.0.0,<6.0.0)
30
32
  Requires-Dist: furl (>=2.1.3,<3.0.0)
31
- Requires-Dist: httpx (>=0.25,<0.26)
33
+ Requires-Dist: httpx (>=0.27,<0.28)
32
34
  Requires-Dist: jsonschema (>=4.21,<5.0)
33
- Requires-Dist: jupyterlab (>=4.0.9,<5.0.0)
34
- Requires-Dist: jupyterlab-quarto (>=0.2.8,<0.3.0)
35
- Requires-Dist: lxml (>=5.1.0,<6.0.0)
35
+ Requires-Dist: lxml (>=4.8)
36
36
  Requires-Dist: nameparser (>=1.1.2,<2.0.0)
37
37
  Requires-Dist: nbstripout (>=0.6,<0.7)
38
38
  Requires-Dist: nh3 (>=0.2.14,<0.3.0)
39
39
  Requires-Dist: orjson (>=3.9.14,<4.0.0)
40
40
  Requires-Dist: orjsonl (>=1.0.0,<2.0.0)
41
+ Requires-Dist: pikepdf (>=8.14,<10.0)
41
42
  Requires-Dist: pycountry (>=23.12.11,<24.0.0)
42
43
  Requires-Dist: pydash (>=7.0,<8.0)
43
44
  Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
44
45
  Requires-Dist: python-dateutil (>=2.8.2,<3.0.0)
45
- Requires-Dist: quartodoc (>=0.7.1,<0.8.0)
46
- Requires-Dist: setuptools (>=69.0,<70.0)
46
+ Requires-Dist: setuptools (>=70.0,<71.0)
47
47
  Requires-Dist: simplejson (>=3.18,<4.0)
48
48
  Requires-Dist: sphinx-autodoc-typehints (>=1.19,<2.0)
49
49
  Requires-Dist: sphinxcontrib-issuetracker (>=0.11,<0.12)
@@ -51,7 +51,6 @@ Requires-Dist: types-PyYAML (>=6.0,<7.0)
51
51
  Requires-Dist: types-beautifulsoup4 (>=4.11,<5.0)
52
52
  Requires-Dist: types-dateparser (>=1.1,<2.0)
53
53
  Requires-Dist: types-xmltodict (>=0.13,<0.14)
54
- Requires-Dist: vcrpy (>=5.1.0,<6.0.0)
55
54
  Requires-Dist: xmltodict (>=0.12,<0.13)
56
55
  Project-URL: Documentation, https://python.commonmeta.org
57
56
  Project-URL: Repository, https://github.com/front-matter/commonmeta-py
@@ -102,7 +101,7 @@ Commometa-py reads and/or writes these metadata formats:
102
101
  | [CSV](ttps://en.wikipedia.org/wiki/Comma-separated_values) | csv | text/csv | no | later |
103
102
  | [BibTex](http://en.wikipedia.org/wiki/BibTeX) | bibtex | application/x-bibtex | later | yes |
104
103
  | [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | yes | yes |
105
- | [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | later | yes |
104
+ | [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | yes | yes |
106
105
  | [JSON Feed](https://www.jsonfeed.org/) | json_feed_item | application/feed+json | yes | later |
107
106
 
108
107
  _commonmeta_: the Commonmeta format is the native format for the library and used internally.
@@ -111,7 +110,7 @@ _Later_: we plan to implement this format in a later release.
111
110
 
112
111
  ## Documentation
113
112
 
114
- Documentation (work in progress) for using the library is available at the [commonmeta-py Documentation](https://commonmeta-py.docs.front-matter.io) website and includes several interactive Jupyter Notebooks .
113
+ Documentation (work in progress) for using the library is available at the [commonmeta-py Documentation](https://python.commonmeta.org/) website and includes several interactive Jupyter Notebooks .
115
114
 
116
115
  ## Meta
117
116
 
@@ -43,7 +43,7 @@ Commometa-py reads and/or writes these metadata formats:
43
43
  | [CSV](ttps://en.wikipedia.org/wiki/Comma-separated_values) | csv | text/csv | no | later |
44
44
  | [BibTex](http://en.wikipedia.org/wiki/BibTeX) | bibtex | application/x-bibtex | later | yes |
45
45
  | [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | yes | yes |
46
- | [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | later | yes |
46
+ | [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | yes | yes |
47
47
  | [JSON Feed](https://www.jsonfeed.org/) | json_feed_item | application/feed+json | yes | later |
48
48
 
49
49
  _commonmeta_: the Commonmeta format is the native format for the library and used internally.
@@ -52,7 +52,7 @@ _Later_: we plan to implement this format in a later release.
52
52
 
53
53
  ## Documentation
54
54
 
55
- Documentation (work in progress) for using the library is available at the [commonmeta-py Documentation](https://commonmeta-py.docs.front-matter.io) website and includes several interactive Jupyter Notebooks .
55
+ Documentation (work in progress) for using the library is available at the [commonmeta-py Documentation](https://python.commonmeta.org/) website and includes several interactive Jupyter Notebooks .
56
56
 
57
57
  ## Meta
58
58
 
@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
10
10
  """
11
11
 
12
12
  __title__ = "commonmeta-py"
13
- __version__ = "0.13.0"
13
+ __version__ = "0.65"
14
14
  __author__ = "Martin Fenner"
15
15
  __license__ = "MIT"
16
16
 
@@ -54,7 +54,6 @@ from .utils import (
54
54
  validate_orcid,
55
55
  validate_url,
56
56
  get_language,
57
- encode_doi,
58
57
  name_to_fos,
59
58
  from_json_feed,
60
59
  )
@@ -88,9 +87,12 @@ from .doi_utils import (
88
87
  doi_from_url,
89
88
  doi_as_url,
90
89
  doi_resolver,
90
+ decode_doi,
91
+ encode_doi,
91
92
  datacite_api_url,
92
93
  get_doi_ra,
93
94
  normalize_doi,
94
95
  validate_doi,
95
96
  validate_prefix,
97
+ is_rogue_scholar_doi,
96
98
  )
@@ -63,7 +63,6 @@ def get_one_author(author, **kwargs):
63
63
  ) or parse_attributes(
64
64
  author.get("contributorName", None), content="type", first=True
65
65
  )
66
-
67
66
  # also handle Crossref, JSON Feed, or DataCite metadata
68
67
  _id = (
69
68
  author.get("id", None)
@@ -72,7 +71,7 @@ def get_one_author(author, **kwargs):
72
71
  or next(
73
72
  (
74
73
  format_name_identifier(i)
75
- for i in wrap(author.get("nameIdentifiers", None))
74
+ for i in wrap(author.get("nameIdentifiers", None or author.get("identifiers", None)))
76
75
  ),
77
76
  None,
78
77
  )
@@ -109,6 +108,9 @@ def get_one_author(author, **kwargs):
109
108
  given_name = None
110
109
  family_name = None
111
110
 
111
+ # support various keys for affiliations
112
+ affiliations = author.get("affiliation", None) or author.get("affiliations", None)
113
+
112
114
  # return author in commonmeta format, using name vs. given/family name
113
115
  # depending on type
114
116
  return compact(
@@ -119,9 +121,7 @@ def get_one_author(author, **kwargs):
119
121
  "name": name if _type == "Organization" else None,
120
122
  "givenName": given_name if _type == "Person" else None,
121
123
  "familyName": family_name if _type == "Person" else None,
122
- "affiliation": presence(
123
- get_affiliations(wrap(author.get("affiliation", None)))
124
- ),
124
+ "affiliations": presence(get_affiliations(wrap(affiliations))),
125
125
  }
126
126
  )
127
127
 
@@ -184,7 +184,7 @@ def cleanup_author(author):
184
184
  """clean up author string"""
185
185
  if author is None:
186
186
  return None
187
-
187
+
188
188
  if author.startswith(","):
189
189
  return None
190
190
 
@@ -94,6 +94,7 @@ def parse_xml(string: Optional[str], **kwargs) -> Optional[Union[dict, list]]:
94
94
  "item",
95
95
  "citation",
96
96
  "program",
97
+ "related_item",
97
98
  }
98
99
 
99
100
  kwargs["attr_prefix"] = ""
@@ -5,8 +5,7 @@ import orjson as json
5
5
 
6
6
  from commonmeta import Metadata, MetadataList # __version__
7
7
  from commonmeta.api_utils import update_ghost_post_via_api
8
- from commonmeta.doi_utils import validate_prefix
9
- from commonmeta.utils import encode_doi, decode_doi
8
+ from commonmeta.doi_utils import validate_prefix, encode_doi, decode_doi
10
9
  from commonmeta.readers.json_feed_reader import (
11
10
  get_json_feed_item_uuid,
12
11
  )
@@ -1,4 +1,5 @@
1
1
  """Constants for commonmeta-py"""
2
+
2
3
  from typing import Optional, TypedDict, List
3
4
 
4
5
 
@@ -16,7 +17,7 @@ class Commonmeta(TypedDict):
16
17
  subjects: Optional[List[dict]]
17
18
  contributors: Optional[List[dict]]
18
19
  language: Optional[str]
19
- alternate_identifiers: Optional[List[dict]]
20
+ identifiers: Optional[List[dict]]
20
21
  relations: Optional[List[dict]]
21
22
  sizes: Optional[List[dict]]
22
23
  formats: Optional[List[dict]]
@@ -30,7 +31,6 @@ class Commonmeta(TypedDict):
30
31
  files: Optional[List[dict]]
31
32
  agency: Optional[str]
32
33
  state: str
33
- schema_version: Optional[str]
34
34
 
35
35
 
36
36
  # source: https://www.bibtex.com/e/entry-types/
@@ -239,9 +239,9 @@ INVENIORDM_TO_CM_TRANSLATIONS = {
239
239
  "book": "Book",
240
240
  "section": "BookChapter",
241
241
  "conferencepaper": "ProceedingsArticle",
242
- "article": "JournalArticle",
243
242
  "patent": "Patent",
244
243
  "publication": "JournalArticle",
244
+ "publication-preprint": "Article",
245
245
  "report": "Report",
246
246
  "softwaredocumentation": "Software",
247
247
  "thesis": "Dissertation",
@@ -267,6 +267,17 @@ INVENIORDM_TO_CM_TRANSLATIONS = {
267
267
  "other": "Other",
268
268
  }
269
269
 
270
+ CM_TO_INVENIORDM_TRANSLATIONS = {
271
+ "Article": "publication-preprint",
272
+ "Book": "book",
273
+ "Dataset": "dataset",
274
+ "Image": "image-other",
275
+ "JournalArticle": "publication-article",
276
+ "Presentation": "presentation",
277
+ "Software": "software",
278
+ "Other": "other",
279
+ }
280
+
270
281
  CM_TO_DC_TRANSLATIONS = {
271
282
  "Article": "Preprint",
272
283
  "Audiovisual": "Audiovisual",
@@ -394,12 +405,35 @@ SO_TO_CM_TRANSLATIONS = {
394
405
  "BookChapter": "BookChapter",
395
406
  "CreativeWork": "Other",
396
407
  "Dataset": "Dataset",
408
+ "DigitalDocument": "Document",
397
409
  "Dissertation": "Dissertation",
398
410
  "Instrument": "Instrument",
411
+ "MusicRecording": "Audiovisual",
412
+ "MusicAlbum": "Audiovisual",
399
413
  "NewsArticle": "Article",
400
414
  "Legislation": "LegalDocument",
415
+ "ProfilePage": "WebPage",
416
+ "Report": "Report",
401
417
  "ScholarlyArticle": "JournalArticle",
402
418
  "SoftwareSourceCode": "Software",
419
+ "Video": "Audiovisual",
420
+ "WebSite": "WebPage",
421
+ }
422
+
423
+ # OpenGraph to schema.org mapping
424
+ OG_TO_SO_TRANSLATIONS = {
425
+ "music.song": "MusicRecording",
426
+ "music.album": "MusicAlbum",
427
+ "music.playlist": "MusicPlaylist",
428
+ "music.radio_station": "RadioStation",
429
+ "video.movie": "Video",
430
+ "video.episode": "Video",
431
+ "video.tv_show": "Video",
432
+ "video.other": "Video",
433
+ "article": "Article",
434
+ "book": "Book",
435
+ "profile": "ProfilePage",
436
+ "website": "WebSite",
403
437
  }
404
438
 
405
439
  CM_TO_SO_TRANSLATIONS = {
@@ -573,3 +607,52 @@ COMMONMETA_CONTRIBUTOR_ROLES = [
573
607
  "Maintainer",
574
608
  "Other",
575
609
  ]
610
+
611
+ INVENIORDM_IDENTIFIER_TYPES = {
612
+ "Ark": "ark",
613
+ "ArXiv": "arxiv",
614
+ "Bibcode": "ads",
615
+ "CrossrefFunderID": "crossreffunderid",
616
+ "DOI": "doi",
617
+ "EAN13": "ean13",
618
+ "EISSN": "eissn",
619
+ "GRID": "grid",
620
+ "Handle": "handle",
621
+ "IGSN": "igsn",
622
+ "ISBN": "isbn",
623
+ "ISNI": "isni",
624
+ "ISSN": "issn",
625
+ "ISTC": "istc",
626
+ "LISSN": "lissn",
627
+ "LSID": "lsid",
628
+ "PMID": "pmid",
629
+ "PURL": "purl",
630
+ "UPC": "upc",
631
+ "URL": "url",
632
+ "URN": "urn",
633
+ "W3ID": "w3id",
634
+ "GUID": "guid",
635
+ "UUID": "uuid",
636
+ "Other": "other",
637
+ }
638
+
639
+
640
+ CROSSREF_FUNDER_ID_TO_ROR_TRANSLATIONS = {
641
+ "https://doi.org/10.13039/100000001": "https://ror.org/021nxhr62",
642
+ "https://doi.org/10.13039/501100000780": "https://ror.org/00k4n6c32",
643
+ "https://doi.org/10.13039/501100007601": "https://ror.org/00k4n6c32",
644
+ "https://doi.org/10.13039/501100001659": "https://ror.org/018mejw64",
645
+ "https://doi.org/10.13039/501100006390": "https://ror.org/019whta54",
646
+ "https://doi.org/10.13039/501100001711": "https://ror.org/00yjd3n13",
647
+ "https://doi.org/10.13039/501100003043": "https://ror.org/04wfr2810",
648
+ }
649
+
650
+
651
+ ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS = {
652
+ "https://ror.org/021nxhr62": "https://doi.org/10.13039/100000001",
653
+ "https://ror.org/00k4n6c32": "https://doi.org/10.13039/501100000780",
654
+ "https://ror.org/018mejw64": "https://doi.org/10.13039/501100001659",
655
+ "https://ror.org/019whta54": "https://doi.org/10.13039/501100006390",
656
+ "https://ror.org/00yjd3n13": "https://doi.org/10.13039/501100001711",
657
+ "https://ror.org/04wfr2810": "https://doi.org/10.13039/501100003043",
658
+ }
@@ -8,7 +8,7 @@ import uuid
8
8
  import pydash as py_
9
9
  from furl import furl
10
10
 
11
- from .constants import Commonmeta
11
+ from .constants import Commonmeta, ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS
12
12
  from .utils import wrap, compact, normalize_orcid, normalize_id, validate_url
13
13
  from .doi_utils import doi_from_url, validate_doi
14
14
 
@@ -112,9 +112,11 @@ def insert_group_title(metadata, xml):
112
112
  """Insert group title"""
113
113
  if metadata.subjects is None or len(metadata.subjects) == 0:
114
114
  return xml
115
- etree.SubElement(xml, "group_title").text = metadata.subjects[0].get(
116
- "subject", None
117
- )
115
+ group_title = metadata.subjects[0].get("subject", None)
116
+ # strip optional FOS (Field of Science) prefix
117
+ if group_title.startswith("FOS: "):
118
+ group_title = group_title[5:]
119
+ etree.SubElement(xml, "group_title").text = group_title
118
120
  return xml
119
121
 
120
122
 
@@ -131,8 +133,12 @@ def insert_crossref_contributors(metadata, xml):
131
133
  ]
132
134
  for num, contributor in enumerate(con):
133
135
  contributor_role = (
134
- "author" if contributor.get("contributorRoles") == ["Author"] else "editor"
136
+ "author" if "Author" in contributor.get("contributorRoles") else None
135
137
  )
138
+ if contributor_role is None:
139
+ contributor_role = (
140
+ "editor" if "Editor" in contributor.get("contributorRoles") else None
141
+ )
136
142
  sequence = "first" if num == 0 else "additional"
137
143
  if (
138
144
  contributor.get("type", None) == "Organization"
@@ -153,7 +159,7 @@ def insert_crossref_contributors(metadata, xml):
153
159
  {"contributor_role": contributor_role, "sequence": sequence},
154
160
  )
155
161
  person_name = insert_crossref_person(contributor, person_name)
156
- elif contributor.get("affiliation", None) is not None:
162
+ elif contributor.get("affiliations", None) is not None:
157
163
  anonymous = etree.SubElement(
158
164
  contributors,
159
165
  "anonymous",
@@ -176,18 +182,17 @@ def insert_crossref_person(contributor, xml):
176
182
  if contributor.get("familyName", None) is not None:
177
183
  etree.SubElement(xml, "surname").text = contributor.get("familyName")
178
184
 
179
- if contributor.get("affiliation", None) is not None:
185
+ if contributor.get("affiliations", None) is not None:
180
186
  affiliations = etree.SubElement(xml, "affiliations")
181
187
  institution = etree.SubElement(affiliations, "institution")
182
- if py_.get(contributor, "affiliation.0.name") is not None:
188
+ if py_.get(contributor, "affiliations.0.name") is not None:
183
189
  etree.SubElement(institution, "institution_name").text = py_.get(
184
- contributor, "affiliation.0.name"
190
+ contributor, "affiliations.0.name"
185
191
  )
186
- if py_.get(contributor, "affiliation.0.id") is not None:
192
+ if py_.get(contributor, "affiliations.0.id") is not None:
187
193
  etree.SubElement(
188
194
  institution, "institution_id", {"type": "ror"}
189
- ).text = py_.get(contributor, "affiliation.0.id")
190
-
195
+ ).text = py_.get(contributor, "affiliations.0.id")
191
196
  orcid = normalize_orcid(contributor.get("id", None))
192
197
  if orcid is not None:
193
198
  etree.SubElement(xml, "ORCID").text = orcid
@@ -196,13 +201,13 @@ def insert_crossref_person(contributor, xml):
196
201
 
197
202
  def insert_crossref_anonymous(contributor, xml):
198
203
  """Insert crossref anonymous"""
199
- if contributor.get("affiliation", None) is None:
204
+ if contributor.get("affiliations", None) is None:
200
205
  return xml
201
206
  affiliations = etree.SubElement(xml, "affiliations")
202
207
  institution = etree.SubElement(affiliations, "institution")
203
- if py_.get(contributor, "affiliation.0.name") is not None:
208
+ if py_.get(contributor, "affiliations.0.name") is not None:
204
209
  etree.SubElement(institution, "institution_name").text = py_.get(
205
- contributor, "affiliation.0.name"
210
+ contributor, "affiliations.0.name"
206
211
  )
207
212
  return xml
208
213
 
@@ -225,6 +230,8 @@ def insert_citation_list(metadata, xml):
225
230
 
226
231
  citation_list = etree.SubElement(xml, "citation_list")
227
232
  for ref in metadata.references:
233
+ if ref.get("id", None) is None:
234
+ continue
228
235
  citation = etree.SubElement(
229
236
  citation_list, "citation", {"key": ref.get("key", None)}
230
237
  )
@@ -242,10 +249,12 @@ def insert_citation_list(metadata, xml):
242
249
  etree.SubElement(citation, "cYear").text = ref.get("publicationYear")
243
250
  if ref.get("title", None) is not None:
244
251
  etree.SubElement(citation, "article_title").text = ref.get("title")
245
- if ref.get("doi", None) is not None:
246
- etree.SubElement(citation, "doi").text = doi_from_url(ref.get("doi"))
247
- if ref.get("url", None) is not None:
248
- etree.SubElement(citation, "unstructured_citation").text = ref.get("url")
252
+ if ref.get("id", None) is not None:
253
+ etree.SubElement(citation, "doi").text = doi_from_url(ref.get("id"))
254
+ if ref.get("unstructured", None) is not None:
255
+ etree.SubElement(citation, "unstructured_citation").text = ref.get(
256
+ "unstructured"
257
+ )
249
258
  return xml
250
259
 
251
260
 
@@ -353,11 +362,18 @@ def insert_funding_references(metadata, xml):
353
362
  {"name": "funder_name"},
354
363
  )
355
364
  if funding_reference.get("funderIdentifier", None) is not None:
365
+ funder_identifier = funding_reference.get("funderIdentifier", None)
366
+
367
+ # translate ROR to Crossref funder ID until Crossref supports ROR
368
+ funder_identifier = ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS.get(
369
+ funder_identifier, funder_identifier
370
+ )
371
+
356
372
  etree.SubElement(
357
373
  funder_name,
358
374
  "assertion",
359
375
  {"name": "funder_identifier"},
360
- ).text = funding_reference.get("funderIdentifier", None)
376
+ ).text = funder_identifier
361
377
  if funding_reference.get("awardNumber", None) is not None:
362
378
  etree.SubElement(
363
379
  assertion,
@@ -430,30 +446,22 @@ def insert_institution(metadata, xml):
430
446
 
431
447
  def insert_item_number(metadata, xml):
432
448
  """Insert item number"""
433
- if metadata.alternate_identifiers is None:
449
+ if metadata.identifiers is None:
434
450
  return xml
435
- for alternate_identifier in metadata.alternate_identifiers:
436
- if alternate_identifier.get("alternateIdentifier", None) is None:
437
- continue
438
- if alternate_identifier.get("alternateIdentifierType", None) is not None:
451
+ for identifier in metadata.identifiers:
452
+ if identifier.get("identifierType", None) == "UUID":
439
453
  # strip hyphen from UUIDs, as item_number can only be 32 characters long (UUIDv4 is 36 characters long)
440
- if alternate_identifier.get("alternateIdentifierType", None) == "UUID":
441
- alternate_identifier["alternateIdentifier"] = alternate_identifier.get(
442
- "alternateIdentifier", ""
443
- ).replace("-", "")
454
+ if identifier.get("identifierType", None) == "UUID":
455
+ identifier["identifier"] = identifier.get("identifier", "").replace(
456
+ "-", ""
457
+ )
444
458
  etree.SubElement(
445
459
  xml,
446
460
  "item_number",
447
- {
448
- "item_number_type": alternate_identifier.get(
449
- "alternateIdentifierType", ""
450
- ).lower()
451
- },
452
- ).text = alternate_identifier.get("alternateIdentifier", None)
461
+ {"item_number_type": identifier.get("identifierType", "").lower()},
462
+ ).text = identifier.get("identifier", None)
453
463
  else:
454
- etree.SubElement(xml, "item_number").text = alternate_identifier.get(
455
- "alternateIdentifier", None
456
- )
464
+ continue
457
465
  return xml
458
466
 
459
467
 
@@ -3,6 +3,8 @@ import datetime
3
3
  from datetime import datetime as dt
4
4
  from typing import Optional, Union
5
5
  import dateparser
6
+ from edtf import parse_edtf, DateAndTime, Date
7
+ from edtf.parser.edtf_exceptions import EDTFParseException
6
8
  import pydash as py_
7
9
 
8
10
  from .base_utils import compact
@@ -158,6 +160,16 @@ def get_datetime_from_time(time: str) -> Optional[str]:
158
160
  return None
159
161
 
160
162
 
163
+ def get_datetime_from_pdf_time(time: str) -> Optional[str]:
164
+ """iso8601 datetime in slightly different format, used in PDF metadata"""
165
+ try:
166
+ time = str(time).replace("D:", "").replace("'", "")
167
+ return dt.strptime(time, "%Y%m%d%H%M%S%z").strftime("%Y-%m-%dT%H:%M:%SZ")
168
+ except ValueError as e:
169
+ print(e)
170
+ return None
171
+
172
+
161
173
  def normalize_date_dict(data: dict) -> dict:
162
174
  """Normalize date dict
163
175
 
@@ -181,3 +193,17 @@ def normalize_date_dict(data: dict) -> dict:
181
193
  "withdrawn": data.get("Withdrawn", None),
182
194
  }
183
195
  )
196
+
197
+
198
+ def validate_edtf(iso8601_time: Optional[str]) -> Optional[str]:
199
+ """Validate EDTF string using edtf. Return None if invalid"""
200
+ if iso8601_time is None:
201
+ return None
202
+ try:
203
+ edtf = parse_edtf(iso8601_time)
204
+ except EDTFParseException as e:
205
+ print(e)
206
+ return None
207
+ if not isinstance(edtf, (DateAndTime, Date)):
208
+ return None
209
+ return edtf.isoformat()