commonmeta-py 0.17.3__py3-none-any.whl → 0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. commonmeta_py/__init__.py +2 -0
  2. {commonmeta_py-0.17.3.dist-info → commonmeta_py-0.20.dist-info}/METADATA +45 -56
  3. commonmeta_py-0.20.dist-info/RECORD +5 -0
  4. {commonmeta_py-0.17.3.dist-info → commonmeta_py-0.20.dist-info}/WHEEL +1 -1
  5. {commonmeta_py-0.17.3.dist-info → commonmeta_py-0.20.dist-info/licenses}/LICENSE +1 -1
  6. commonmeta/__init__.py +0 -96
  7. commonmeta/api_utils.py +0 -77
  8. commonmeta/author_utils.py +0 -261
  9. commonmeta/base_utils.py +0 -121
  10. commonmeta/cli.py +0 -200
  11. commonmeta/constants.py +0 -576
  12. commonmeta/crossref_utils.py +0 -575
  13. commonmeta/date_utils.py +0 -193
  14. commonmeta/doi_utils.py +0 -260
  15. commonmeta/metadata.py +0 -317
  16. commonmeta/readers/__init__.py +0 -1
  17. commonmeta/readers/bibtex_reader.py +0 -0
  18. commonmeta/readers/cff_reader.py +0 -199
  19. commonmeta/readers/codemeta_reader.py +0 -112
  20. commonmeta/readers/commonmeta_reader.py +0 -13
  21. commonmeta/readers/crossref_reader.py +0 -409
  22. commonmeta/readers/crossref_xml_reader.py +0 -508
  23. commonmeta/readers/csl_reader.py +0 -98
  24. commonmeta/readers/datacite_reader.py +0 -384
  25. commonmeta/readers/datacite_xml_reader.py +0 -357
  26. commonmeta/readers/inveniordm_reader.py +0 -199
  27. commonmeta/readers/json_feed_reader.py +0 -422
  28. commonmeta/readers/kbase_reader.py +0 -205
  29. commonmeta/readers/ris_reader.py +0 -103
  30. commonmeta/readers/schema_org_reader.py +0 -493
  31. commonmeta/resources/cff_v1.2.0.json +0 -1827
  32. commonmeta/resources/commonmeta_v0.12.json +0 -601
  33. commonmeta/resources/commonmeta_v0.13.json +0 -571
  34. commonmeta/resources/crossref/AccessIndicators.xsd +0 -47
  35. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +0 -10130
  36. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +0 -48
  37. commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +0 -8705
  38. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +0 -8608
  39. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +0 -49
  40. commonmeta/resources/crossref/JATS-journalpublishing1.xsd +0 -6176
  41. commonmeta/resources/crossref/clinicaltrials.xsd +0 -61
  42. commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
  43. commonmeta/resources/crossref/crossref5.3.1.xsd +0 -1949
  44. commonmeta/resources/crossref/crossref_query_output3.0.xsd +0 -1097
  45. commonmeta/resources/crossref/fundref.xsd +0 -49
  46. commonmeta/resources/crossref/module-ali.xsd +0 -39
  47. commonmeta/resources/crossref/relations.xsd +0 -444
  48. commonmeta/resources/crossref-v0.2.json +0 -60
  49. commonmeta/resources/csl-data.json +0 -538
  50. commonmeta/resources/datacite-v4.5.json +0 -829
  51. commonmeta/resources/ietf-bcp-47.json +0 -3025
  52. commonmeta/resources/iso-8601.json +0 -3182
  53. commonmeta/resources/spdx/licenses.json +0 -4851
  54. commonmeta/resources/spdx-schema..json +0 -903
  55. commonmeta/resources/styles/apa.csl +0 -1697
  56. commonmeta/resources/styles/chicago-author-date.csl +0 -684
  57. commonmeta/resources/styles/harvard-cite-them-right.csl +0 -321
  58. commonmeta/resources/styles/ieee.csl +0 -468
  59. commonmeta/resources/styles/modern-language-association.csl +0 -341
  60. commonmeta/resources/styles/vancouver.csl +0 -376
  61. commonmeta/schema_utils.py +0 -27
  62. commonmeta/translators.py +0 -47
  63. commonmeta/utils.py +0 -1075
  64. commonmeta/writers/__init__.py +0 -1
  65. commonmeta/writers/bibtex_writer.py +0 -149
  66. commonmeta/writers/citation_writer.py +0 -70
  67. commonmeta/writers/commonmeta_writer.py +0 -68
  68. commonmeta/writers/crossref_xml_writer.py +0 -17
  69. commonmeta/writers/csl_writer.py +0 -78
  70. commonmeta/writers/datacite_writer.py +0 -190
  71. commonmeta/writers/ris_writer.py +0 -58
  72. commonmeta/writers/schema_org_writer.py +0 -146
  73. commonmeta_py-0.17.3.dist-info/RECORD +0 -72
  74. commonmeta_py-0.17.3.dist-info/entry_points.txt +0 -3
@@ -0,0 +1,2 @@
1
+ def hello() -> str:
2
+ return "Hello from commonmeta-py!"
@@ -1,60 +1,50 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: commonmeta-py
3
- Version: 0.17.3
3
+ Version: 0.20
4
4
  Summary: Library for conversions to/from the Commonmeta scholarly metadata format
5
- Home-page: https://python.commonmeta.org
6
- License: MIT
7
- Keywords: science,metadata,commonmeta,bibtex,csl,crossref,datacite
8
- Author: Martin Fenner
9
- Author-email: martin@front-matter.io
10
- Requires-Python: >=3.9,<4.0.0
11
- Classifier: License :: OSI Approved :: MIT License
12
- Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.9
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Requires-Dist: PyYAML (>=6.0,<7.0)
18
- Requires-Dist: anyio (>=4.2.0,<5.0.0)
19
- Requires-Dist: asyncclick (>=8.1.7.1,<9.0.0.0)
20
- Requires-Dist: asyncio (>=3.4.3,<4.0.0)
21
- Requires-Dist: base32_lib (>=1.0,<2.0)
22
- Requires-Dist: beautifulsoup4 (>=4.11,<5.0)
23
- Requires-Dist: bibtexparser (>=1.4,<2.0)
24
- Requires-Dist: citeproc-py (>=0.6,<0.7)
25
- Requires-Dist: citeproc-py-styles (>0.1)
26
- Requires-Dist: click (>=8.1.7,<9.0.0)
27
- Requires-Dist: datacite (>=1.1,<2.0)
28
- Requires-Dist: dateparser (>=1.1.7,<2.0.0)
29
- Requires-Dist: docutils (>=0.19,<0.20)
30
- Requires-Dist: furl (>=2.1.3,<3.0.0)
31
- Requires-Dist: httpx (>=0.24,<0.25)
32
- Requires-Dist: jsonschema (>=4.21,<5.0)
33
- Requires-Dist: jupyterlab (>=4.0.9,<5.0.0)
34
- Requires-Dist: jupyterlab-quarto (>=0.2.8,<0.3.0)
35
- Requires-Dist: lxml (>=4.8)
36
- Requires-Dist: nameparser (>=1.1.2,<2.0.0)
37
- Requires-Dist: nbstripout (>=0.6,<0.7)
38
- Requires-Dist: nh3 (>=0.2.14,<0.3.0)
39
- Requires-Dist: orjson (>=3.9.14,<4.0.0)
40
- Requires-Dist: orjsonl (>=1.0.0,<2.0.0)
41
- Requires-Dist: pikepdf (>=8.14.0,<9.0.0)
42
- Requires-Dist: pycountry (>=23.12.11,<24.0.0)
43
- Requires-Dist: pydash (>=7.0,<8.0)
44
- Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
45
- Requires-Dist: python-dateutil (>=2.8.2,<3.0.0)
46
- Requires-Dist: quartodoc (>=0.7.1,<0.8.0)
47
- Requires-Dist: setuptools (>=69.0,<70.0)
48
- Requires-Dist: simplejson (>=3.18,<4.0)
49
- Requires-Dist: sphinx-autodoc-typehints (>=1.19,<2.0)
50
- Requires-Dist: sphinxcontrib-issuetracker (>=0.11,<0.12)
51
- Requires-Dist: types-PyYAML (>=6.0,<7.0)
52
- Requires-Dist: types-beautifulsoup4 (>=4.11,<5.0)
53
- Requires-Dist: types-dateparser (>=1.1,<2.0)
54
- Requires-Dist: types-xmltodict (>=0.13,<0.14)
55
- Requires-Dist: xmltodict (>=0.12,<0.13)
56
- Project-URL: Documentation, https://python.commonmeta.org
57
- Project-URL: Repository, https://github.com/front-matter/commonmeta-py
5
+ Author-email: Martin Fenner <martin@front-matter.io>
6
+ License-File: LICENSE
7
+ Keywords: bibtex,commonmeta,crossref,csl,datacite,metadata,science
8
+ Requires-Python: >=3.9
9
+ Requires-Dist: anyio>=4.2.0
10
+ Requires-Dist: asyncclick>=8.1.7.1
11
+ Requires-Dist: asyncio>=3.4.3
12
+ Requires-Dist: base32-lib>=1.0
13
+ Requires-Dist: beautifulsoup4>=4.11
14
+ Requires-Dist: bibtexparser>=1.4
15
+ Requires-Dist: citeproc-py-styles>=0.1
16
+ Requires-Dist: citeproc-py>=0.6
17
+ Requires-Dist: click>=8.1.7
18
+ Requires-Dist: datacite>=1.1
19
+ Requires-Dist: dateparser>=1.1.7
20
+ Requires-Dist: docutils>=0.19
21
+ Requires-Dist: furl>=2.1.3
22
+ Requires-Dist: httpx>=0.24
23
+ Requires-Dist: jsonschema>=4.21
24
+ Requires-Dist: jupyterlab-quarto>=0.2.8
25
+ Requires-Dist: jupyterlab>=4.0.9
26
+ Requires-Dist: lxml>=4.8
27
+ Requires-Dist: nameparser>=1.1.2
28
+ Requires-Dist: nbstripout>=0.6
29
+ Requires-Dist: nh3>=0.2.14
30
+ Requires-Dist: orjson>=3.9.14
31
+ Requires-Dist: orjsonl>=1.0.0
32
+ Requires-Dist: pikepdf>=8.14.0
33
+ Requires-Dist: pycountry>=23.12.11
34
+ Requires-Dist: pydash>=7.0
35
+ Requires-Dist: pyjwt>=2.8.0
36
+ Requires-Dist: python-dateutil>=2.8.2
37
+ Requires-Dist: pyyaml>=6.0
38
+ Requires-Dist: quartodoc>=0.7.1
39
+ Requires-Dist: setuptools>=69.0
40
+ Requires-Dist: simplejson>=3.18
41
+ Requires-Dist: sphinx-autodoc-typehints>=1.19
42
+ Requires-Dist: sphinxcontrib-issuetracker>=0.11
43
+ Requires-Dist: types-beautifulsoup4>=4.11
44
+ Requires-Dist: types-dateparser>=1.1
45
+ Requires-Dist: types-pyyaml>=6.0
46
+ Requires-Dist: types-xmltodict>=0.13
47
+ Requires-Dist: xmltodict>=0.12
58
48
  Description-Content-Type: text/markdown
59
49
 
60
50
  [![DOI](https://zenodo.org/badge/570526578.svg)](https://zenodo.org/doi/10.5281/zenodo.8340374)
@@ -118,4 +108,3 @@ Documentation (work in progress) for using the library is available at the [comm
118
108
  Please note that this project is released with a [Contributor Code of Conduct](https://github.com/front-matter/commonmeta-py/blob/main/CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms.
119
109
 
120
110
  License: [MIT](https://github.com/front-matter/commonmeta-py/blob/main/LICENSE)
121
-
@@ -0,0 +1,5 @@
1
+ commonmeta_py/__init__.py,sha256=abXb57nBr4xBwU7yR0x2b8hdsOe1Oej3sjdevarGQMY,59
2
+ commonmeta_py-0.20.dist-info/METADATA,sha256=G0yLfr4LUcNLx-8qNCrQPpyEzVj3LYkAsS-dRDLE1BA,7510
3
+ commonmeta_py-0.20.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
4
+ commonmeta_py-0.20.dist-info/licenses/LICENSE,sha256=746hEF2wZCKkcckk5-_DcBLtHewfaEMS4iXTlA1PVwk,1074
5
+ commonmeta_py-0.20.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: hatchling 1.25.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2022-2023 Front Matter
3
+ Copyright (c) 2022-2024 Front Matter
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
commonmeta/__init__.py DELETED
@@ -1,96 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- # commonmeta-py
4
-
5
- """
6
- commonmeta-py library
7
- ~~~~~~~~~~~~~~~~~~~~~
8
-
9
- commonmeta-py is a Python library to convert scholarly metadata
10
- """
11
-
12
- __title__ = "commonmeta-py"
13
- __version__ = "0.13.0"
14
- __author__ = "Martin Fenner"
15
- __license__ = "MIT"
16
-
17
- # ruff: noqa: F401
18
- from .metadata import Metadata, MetadataList
19
- from .readers import (
20
- cff_reader,
21
- codemeta_reader,
22
- crossref_reader,
23
- crossref_xml_reader,
24
- datacite_reader,
25
- datacite_xml_reader,
26
- inveniordm_reader,
27
- json_feed_reader,
28
- kbase_reader,
29
- ris_reader,
30
- schema_org_reader,
31
- )
32
- from .writers import (
33
- bibtex_writer,
34
- citation_writer,
35
- commonmeta_writer,
36
- csl_writer,
37
- datacite_writer,
38
- ris_writer,
39
- schema_org_writer,
40
- )
41
- from .utils import (
42
- dict_to_spdx,
43
- from_csl,
44
- from_schema_org,
45
- get_language,
46
- normalize_cc_url,
47
- normalize_id,
48
- normalize_ids,
49
- normalize_orcid,
50
- normalize_url,
51
- normalize_ror,
52
- pages_as_string,
53
- to_csl,
54
- validate_orcid,
55
- validate_url,
56
- get_language,
57
- encode_doi,
58
- name_to_fos,
59
- from_json_feed,
60
- )
61
- from .author_utils import (
62
- authors_as_string,
63
- cleanup_author,
64
- get_affiliations,
65
- get_authors,
66
- get_one_author,
67
- is_personal_name,
68
- )
69
- from .base_utils import (
70
- wrap,
71
- unwrap,
72
- compact,
73
- presence,
74
- parse_attributes,
75
- sanitize,
76
- )
77
- from .date_utils import (
78
- get_date_from_crossref_parts,
79
- get_date_from_date_parts,
80
- get_date_from_unix_timestamp,
81
- get_date_parts,
82
- get_iso8601_date,
83
- strip_milliseconds,
84
- )
85
- from .doi_utils import (
86
- crossref_api_url,
87
- crossref_xml_api_url,
88
- doi_from_url,
89
- doi_as_url,
90
- doi_resolver,
91
- datacite_api_url,
92
- get_doi_ra,
93
- normalize_doi,
94
- validate_doi,
95
- validate_prefix,
96
- )
commonmeta/api_utils.py DELETED
@@ -1,77 +0,0 @@
1
- """API Utils module for commonmeta-py"""
2
-
3
- from typing import Optional
4
- from datetime import datetime as date
5
- import httpx
6
- from furl import furl
7
- import jwt
8
-
9
- from .doi_utils import validate_doi, doi_as_url
10
- from .readers.json_feed_reader import get_json_feed_item_uuid
11
-
12
-
13
- def generate_ghost_token(key: str) -> str:
14
- """Generate a short-lived JWT for the Ghost Admin API.
15
- From https://ghost.org/docs/admin-api/#token-authentication"""
16
-
17
- # Split the key into ID and SECRET
18
- _id, secret = key.split(":")
19
-
20
- # Prepare header and payload
21
- iat = int(date.now().timestamp())
22
-
23
- header = {"alg": "HS256", "typ": "JWT", "kid": _id}
24
- payload = {"iat": iat, "exp": iat + 5 * 60, "aud": "/admin/"}
25
-
26
- # Create and return the token (including decoding secret)
27
- return jwt.encode(payload, bytes.fromhex(secret), algorithm="HS256", headers=header)
28
-
29
-
30
- def update_ghost_post_via_api(
31
- _id: str, api_key: Optional[str] = None, api_url: Optional[str] = None
32
- ) -> dict[str, str]:
33
- """Update Ghost post via API"""
34
- # get post doi and url from Rogue Scholar API
35
- # post url is needed to find post via Ghost API
36
- post = get_json_feed_item_uuid(_id)
37
- if post.get("error", None):
38
- return post
39
- doi = validate_doi(post.get("doi", None))
40
- doi = doi_as_url(doi)
41
- url = post.get("url", None)
42
- if not doi or not url:
43
- return {"error": "DOI or URL not found"}
44
-
45
- # get post_id and updated_at from ghost api
46
- token = generate_ghost_token(api_key)
47
- headers = {
48
- "Authorization": f"Ghost {token}",
49
- "Content-Type": "application/json",
50
- "Accept-Version": "v5",
51
- }
52
- f = furl(url)
53
- slug = f.path.segments[-1]
54
- ghost_url = f"{api_url}/ghost/api/admin/posts/slug/{slug}/"
55
- response = httpx.get(ghost_url, headers=headers, timeout=10)
56
- if response.status_code != 200:
57
- return {"error": "Error fetching post"}
58
- ghost_post = response.json().get("posts")[0]
59
- guid = ghost_post.get("id")
60
- updated_at = ghost_post.get("updated_at")
61
- if not guid or not updated_at:
62
- return {"error": "guid or updated_at not found"}
63
-
64
- # update post canonical_url using doi. This requires sending
65
- # the updated_at timestamp to avoid conflicts, and must use guid
66
- # rather than url for put requests
67
- ghost_url = f"{api_url}/ghost/api/admin/posts/{guid}/"
68
-
69
- json = {"posts": [{"canonical_url": doi, "updated_at": updated_at}]}
70
- response = httpx.put(
71
- ghost_url,
72
- headers=headers,
73
- json=json,
74
- )
75
- if response.status_code != 200:
76
- return {"error": "Error updating post"}
77
- return {"message": f"DOI {doi} added", "guid": guid, "updated_at": updated_at}
@@ -1,261 +0,0 @@
1
- """Author utils module for commonmeta-py"""
2
- import re
3
- from typing import List
4
- from nameparser import HumanName
5
- from pydash import py_
6
- from furl import furl
7
-
8
- from .utils import (
9
- normalize_orcid,
10
- normalize_id,
11
- normalize_ror,
12
- normalize_isni,
13
- format_name_identifier,
14
- validate_ror,
15
- validate_orcid,
16
- )
17
- from .base_utils import parse_attributes, wrap, presence, compact
18
-
19
- from .constants import (
20
- COMMONMETA_CONTRIBUTOR_ROLES,
21
- )
22
-
23
-
24
- def get_one_author(author, **kwargs):
25
- """parse one author string into commonmeta format"""
26
- # if author is a string
27
- if isinstance(author, str):
28
- author = {"creatorName": author}
29
-
30
- # malformed XML
31
- if isinstance(author.get("creatorName", None), list):
32
- return None
33
-
34
- name = (
35
- parse_attributes(author.get("creatorName", None))
36
- or parse_attributes(author.get("contributorName", None))
37
- or parse_attributes(author.get("name", None))
38
- )
39
- given_name = parse_attributes(author.get("givenName", None)) or parse_attributes(
40
- author.get("given", None)
41
- )
42
- family_name = parse_attributes(author.get("familyName", None)) or parse_attributes(
43
- author.get("family", None)
44
- )
45
-
46
- name = cleanup_author(name)
47
-
48
- # make sure we have a name
49
- if not name and not given_name and not family_name:
50
- return None
51
-
52
- # parse contributor roles, checking for roles supported by commonmeta
53
- contributor_roles = wrap(
54
- parse_attributes(author.get("contributorType", None))
55
- ) or wrap(parse_attributes(author.get("contributor_roles", None)))
56
- contributor_roles = [
57
- i for i in contributor_roles if i in COMMONMETA_CONTRIBUTOR_ROLES
58
- ] or ["Author"]
59
-
60
- # parse author type, i.e. "Person", "Organization" or not specified
61
- _type = parse_attributes(
62
- author.get("creatorName", None), content="type", first=True
63
- ) or parse_attributes(
64
- author.get("contributorName", None), content="type", first=True
65
- )
66
-
67
- # also handle Crossref, JSON Feed, or DataCite metadata
68
- _id = (
69
- author.get("id", None)
70
- or author.get("ORCID", None)
71
- or author.get("url", None)
72
- or next(
73
- (
74
- format_name_identifier(i)
75
- for i in wrap(author.get("nameIdentifiers", None))
76
- ),
77
- None,
78
- )
79
- )
80
- _id = normalize_orcid(_id) or normalize_ror(_id) or normalize_isni(_id) or _id
81
-
82
- # DataCite metadata
83
- if isinstance(_type, str) and _type.endswith("al"):
84
- _type = _type[:-3]
85
-
86
- if not _type and isinstance(_id, str) and validate_ror(_id) is not None:
87
- _type = "Organization"
88
- elif not _type and isinstance(_id, str) and validate_orcid(_id) is not None:
89
- _type = "Person"
90
- elif not _type and (given_name or family_name):
91
- _type = "Person"
92
- elif not _type and name and kwargs.get("via", None) == "crossref":
93
- _type = "Organization"
94
- elif not _type and is_personal_name(name):
95
- _type = "Person"
96
- elif not _type and name:
97
- _type = "Organization"
98
-
99
- # split name for type Person into given/family name if not already provided
100
- if _type == "Person" and name and not given_name and not family_name:
101
- names = HumanName(name)
102
-
103
- if names:
104
- given_name = (
105
- " ".join([names.first, names.middle]).strip() if names.first else None
106
- )
107
- family_name = names.last if names.last else None
108
- else:
109
- given_name = None
110
- family_name = None
111
-
112
- # support various keys for affiliations
113
- affiliations = author.get("affiliation", None) or author.get("affiliations", None)
114
-
115
- # return author in commonmeta format, using name vs. given/family name
116
- # depending on type
117
- return compact(
118
- {
119
- "id": _id,
120
- "type": _type,
121
- "contributorRoles": contributor_roles,
122
- "name": name if _type == "Organization" else None,
123
- "givenName": given_name if _type == "Person" else None,
124
- "familyName": family_name if _type == "Person" else None,
125
- "affiliations": presence(get_affiliations(wrap(affiliations))
126
- ),
127
- }
128
- )
129
-
130
-
131
- def is_personal_name(name):
132
- """is_personal_name"""
133
- # personal names are not allowed to contain semicolons
134
- if ";" in name:
135
- return False
136
-
137
- # check if a name has only one word, e.g. "FamousOrganization", not including commas
138
- if len(name.split(" ")) == 1 and "," not in name:
139
- return False
140
-
141
- # check if name contains words known to be used in organization names
142
- if any(
143
- word in name
144
- for word in [
145
- "University",
146
- "College",
147
- "Institute",
148
- "School",
149
- "Center",
150
- "Department",
151
- "Laboratory",
152
- "Library",
153
- "Museum",
154
- "Foundation",
155
- "Society",
156
- "Association",
157
- "Company",
158
- "Corporation",
159
- "Collaboration",
160
- "Consortium",
161
- "Incorporated",
162
- "Inc.",
163
- "Institut",
164
- "Research",
165
- "Science",
166
- "Team",
167
- "Ministry",
168
- "Government",
169
- ]
170
- ):
171
- return False
172
-
173
- # check for suffixes, e.g. "John Smith, MD"
174
- if name.split(", ")[-1] in ["MD", "PhD", "BS"]:
175
- return True
176
-
177
- # check of name can be parsed into given/family name
178
- names = HumanName(name)
179
- if names and (names.first or names.last):
180
- return True
181
-
182
- return False
183
-
184
-
185
- def cleanup_author(author):
186
- """clean up author string"""
187
- if author is None:
188
- return None
189
-
190
- if author.startswith(","):
191
- return None
192
-
193
- # detect pattern "Smith J.", but not "Smith, John K."
194
- if "," not in author:
195
- author = re.sub(r"/([A-Z]\.)?(-?[A-Z]\.)/", ", \1\2", author)
196
-
197
- # remove spaces around hyphens
198
- author = author.replace(" - ", "-")
199
-
200
- # remove non-standard space characters
201
- author = re.sub("/[ \t\r\n\v\f]/", " ", author)
202
- return author
203
-
204
-
205
- def get_authors(authors, **kwargs):
206
- """transform array of author dicts into commonmeta format"""
207
- return py_.uniq(py_.compact([get_one_author(i, **kwargs) for i in authors]))
208
-
209
-
210
- def authors_as_string(authors: List[dict]) -> str:
211
- """convert authors list to string, e.g. for bibtex"""
212
-
213
- def format_author(author):
214
- if author.get("familyName", None) and author.get("givenName", None):
215
- return f"{author['familyName']}, {author['givenName']}"
216
- elif author.get("familyName", None):
217
- return author["familyName"]
218
- return author.get("name", None)
219
-
220
- return " and ".join([format_author(i) for i in wrap(authors) if i is not None])
221
-
222
-
223
- def get_affiliations(affiliations: List[dict]) -> List[dict]:
224
- """parse array of affiliation strings into commonmeta format"""
225
-
226
- def format_element(i):
227
- """format single affiliation element"""
228
- affiliation_identifier = None
229
- if isinstance(i, str):
230
- name = i
231
- scheme_uri = None
232
- else:
233
- if i.get("affiliationIdentifier", None) is not None:
234
- affiliation_identifier = i["affiliationIdentifier"]
235
- if i.get("schemeURI", None) is not None:
236
- scheme_uri = (
237
- i["schemeURI"]
238
- if i["schemeURI"].endswith("/")
239
- else "{affiliation['schemeURI']}/"
240
- )
241
- affiliation_identifier = (
242
- normalize_id(scheme_uri + affiliation_identifier)
243
- if (
244
- not affiliation_identifier.startswith("https://")
245
- and scheme_uri is not None
246
- )
247
- else normalize_id(affiliation_identifier)
248
- )
249
- elif i.get("id", None) is not None:
250
- f = furl(i.get("id"))
251
- if f.scheme in ["http", "https"]:
252
- affiliation_identifier = i.get("id")
253
- name = i.get("name", None) or i.get("#text", None)
254
- return compact(
255
- {
256
- "id": affiliation_identifier,
257
- "name": name,
258
- }
259
- )
260
-
261
- return py_.uniq(py_.compact([format_element(i) for i in affiliations]))