commonmeta-py 0.62__py3-none-any.whl → 0.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +2 -1
- commonmeta/constants.py +21 -0
- commonmeta/readers/schema_org_reader.py +10 -5
- commonmeta/utils.py +32 -6
- commonmeta/writers/inveniordm_writer.py +2 -1
- {commonmeta_py-0.62.dist-info → commonmeta_py-0.65.dist-info}/METADATA +1 -1
- {commonmeta_py-0.62.dist-info → commonmeta_py-0.65.dist-info}/RECORD +10 -10
- {commonmeta_py-0.62.dist-info → commonmeta_py-0.65.dist-info}/LICENSE +0 -0
- {commonmeta_py-0.62.dist-info → commonmeta_py-0.65.dist-info}/WHEEL +0 -0
- {commonmeta_py-0.62.dist-info → commonmeta_py-0.65.dist-info}/entry_points.txt +0 -0
commonmeta/__init__.py
CHANGED
@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
|
|
10
10
|
"""
|
11
11
|
|
12
12
|
__title__ = "commonmeta-py"
|
13
|
-
__version__ = "0.
|
13
|
+
__version__ = "0.65"
|
14
14
|
__author__ = "Martin Fenner"
|
15
15
|
__license__ = "MIT"
|
16
16
|
|
@@ -94,4 +94,5 @@ from .doi_utils import (
|
|
94
94
|
normalize_doi,
|
95
95
|
validate_doi,
|
96
96
|
validate_prefix,
|
97
|
+
is_rogue_scholar_doi,
|
97
98
|
)
|
commonmeta/constants.py
CHANGED
@@ -408,11 +408,32 @@ SO_TO_CM_TRANSLATIONS = {
|
|
408
408
|
"DigitalDocument": "Document",
|
409
409
|
"Dissertation": "Dissertation",
|
410
410
|
"Instrument": "Instrument",
|
411
|
+
"MusicRecording": "Audiovisual",
|
412
|
+
"MusicAlbum": "Audiovisual",
|
411
413
|
"NewsArticle": "Article",
|
412
414
|
"Legislation": "LegalDocument",
|
415
|
+
"ProfilePage": "WebPage",
|
413
416
|
"Report": "Report",
|
414
417
|
"ScholarlyArticle": "JournalArticle",
|
415
418
|
"SoftwareSourceCode": "Software",
|
419
|
+
"Video": "Audiovisual",
|
420
|
+
"WebSite": "WebPage",
|
421
|
+
}
|
422
|
+
|
423
|
+
# OpenGraph to schema.org mapping
|
424
|
+
OG_TO_SO_TRANSLATIONS = {
|
425
|
+
"music.song": "MusicRecording",
|
426
|
+
"music.album": "MusicAlbum",
|
427
|
+
"music.playlist": "MusicPlaylist",
|
428
|
+
"music.radio_station": "RadioStation",
|
429
|
+
"video.movie": "Video",
|
430
|
+
"video.episode": "Video",
|
431
|
+
"video.tv_show": "Video",
|
432
|
+
"video.other": "Video",
|
433
|
+
"article": "Article",
|
434
|
+
"book": "Book",
|
435
|
+
"profile": "ProfilePage",
|
436
|
+
"website": "WebSite",
|
416
437
|
}
|
417
438
|
|
418
439
|
CM_TO_SO_TRANSLATIONS = {
|
@@ -36,6 +36,7 @@ from ..constants import (
|
|
36
36
|
SO_TO_CM_TRANSLATIONS,
|
37
37
|
SO_TO_DC_RELATION_TYPES,
|
38
38
|
SO_TO_DC_REVERSE_RELATION_TYPES,
|
39
|
+
OG_TO_SO_TRANSLATIONS,
|
39
40
|
Commonmeta,
|
40
41
|
)
|
41
42
|
|
@@ -59,7 +60,6 @@ def get_schema_org(pid: str, **kwargs) -> dict:
|
|
59
60
|
"via": "schema_org",
|
60
61
|
"errors": [str(error)],
|
61
62
|
}
|
62
|
-
|
63
63
|
if response.status_code >= 400:
|
64
64
|
if response.status_code in [404, 410]:
|
65
65
|
state = "not_found"
|
@@ -105,7 +105,7 @@ def get_schema_org(pid: str, **kwargs) -> dict:
|
|
105
105
|
|
106
106
|
# load html meta tags
|
107
107
|
data = get_html_meta(soup)
|
108
|
-
|
108
|
+
print(data)
|
109
109
|
# load site-specific metadata
|
110
110
|
data |= web_translator(soup, url)
|
111
111
|
|
@@ -119,6 +119,7 @@ def get_schema_org(pid: str, **kwargs) -> dict:
|
|
119
119
|
None,
|
120
120
|
)
|
121
121
|
if json_ld is not None:
|
122
|
+
print(json_ld)
|
122
123
|
data |= json_ld
|
123
124
|
|
124
125
|
# if @id is a DOI, get metadata from Crossref or DataCite
|
@@ -136,7 +137,7 @@ def get_schema_org(pid: str, **kwargs) -> dict:
|
|
136
137
|
# author and creator are synonyms
|
137
138
|
if data.get("author", None) is None and data.get("creator", None) is not None:
|
138
139
|
data["author"] = data["creator"]
|
139
|
-
|
140
|
+
print(data)
|
140
141
|
return data | {"via": "schema_org", "state": "findable"}
|
141
142
|
|
142
143
|
|
@@ -412,11 +413,13 @@ def get_html_meta(soup):
|
|
412
413
|
data["@id"] = normalize_id(pid)
|
413
414
|
|
414
415
|
_type = (
|
415
|
-
soup.select_one("meta[
|
416
|
-
or soup.select_one("meta[name='dc.type']")
|
416
|
+
soup.select_one("meta[name='dc.type']")
|
417
417
|
or soup.select_one("meta[name='DC.type']")
|
418
418
|
)
|
419
419
|
data["@type"] = _type["content"].capitalize() if _type else None
|
420
|
+
if _type is None:
|
421
|
+
_type = soup.select_one("meta[property='og:type']")
|
422
|
+
data["@type"] = OG_TO_SO_TRANSLATIONS.get(_type["content"]) if _type else None
|
420
423
|
|
421
424
|
url = soup.select_one("meta[property='og:url']") or soup.select_one(
|
422
425
|
"meta[name='twitter:url']"
|
@@ -431,6 +434,7 @@ def get_html_meta(soup):
|
|
431
434
|
or soup.select_one("meta[name='DC.title']")
|
432
435
|
or soup.select_one("meta[property='og:title']")
|
433
436
|
or soup.select_one("meta[name='twitter:title']")
|
437
|
+
or soup.select_one("meta[name='title']")
|
434
438
|
)
|
435
439
|
data["name"] = title["content"] if title else None
|
436
440
|
|
@@ -441,6 +445,7 @@ def get_html_meta(soup):
|
|
441
445
|
"meta[name='dc.description']"
|
442
446
|
or soup.select_one("meta[property='og:description']")
|
443
447
|
or soup.select_one("meta[name='twitter:description']")
|
448
|
+
or soup.select_one("meta[name='description']")
|
444
449
|
)
|
445
450
|
data["description"] = description["content"] if description else None
|
446
451
|
|
commonmeta/utils.py
CHANGED
@@ -1096,22 +1096,48 @@ def from_curie(id: Optional[str]) -> Optional[str]:
|
|
1096
1096
|
if id is None:
|
1097
1097
|
return None
|
1098
1098
|
_type = id.split(":")[0]
|
1099
|
-
if _type == "DOI":
|
1099
|
+
if _type.upper() == "DOI":
|
1100
1100
|
return doi_as_url(id.split(":")[1])
|
1101
|
-
elif _type == "ROR":
|
1101
|
+
elif _type.upper() == "ROR":
|
1102
1102
|
return "https://ror.org/" + id.split(":")[1]
|
1103
|
-
elif _type == "ISNI":
|
1103
|
+
elif _type.upper() == "ISNI":
|
1104
1104
|
return "https://isni.org/isni/" + id.split(":")[1]
|
1105
|
-
elif _type == "ORCID":
|
1105
|
+
elif _type.upper() == "ORCID":
|
1106
1106
|
return normalize_orcid(id.split(":")[1])
|
1107
|
-
elif _type == "URL":
|
1107
|
+
elif _type.upper() == "URL":
|
1108
1108
|
return normalize_url(id.split(":")[1])
|
1109
|
-
elif _type == "JDP":
|
1109
|
+
elif _type.upper() == "JDP":
|
1110
1110
|
return id.split(":")[1]
|
1111
1111
|
# TODO: resolvable url for other identifier types
|
1112
1112
|
return None
|
1113
1113
|
|
1114
1114
|
|
1115
|
+
def extract_curie(string: Optional[str]) -> Optional[str]:
|
1116
|
+
"""Extract CURIE"""
|
1117
|
+
if string is None:
|
1118
|
+
return None
|
1119
|
+
match = re.search(r"((?:doi|DOI):\s?([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))", string)
|
1120
|
+
if match is None:
|
1121
|
+
return None
|
1122
|
+
return doi_as_url(match.group(2))
|
1123
|
+
|
1124
|
+
|
1125
|
+
def extract_url(string: str) -> list:
|
1126
|
+
"""Extract urls from string, including markdown and html."""
|
1127
|
+
|
1128
|
+
match = re.search(r"((?:http|https):\/\/(?:[\w_-]+(?:(?:\.[\w_-]+)+))(?:[\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))", string)
|
1129
|
+
if match is None:
|
1130
|
+
return None
|
1131
|
+
return normalize_url(match.group(1))
|
1132
|
+
|
1133
|
+
|
1134
|
+
def extract_urls(string: str) -> list:
|
1135
|
+
"""Extract urls from string, including markdown and html."""
|
1136
|
+
|
1137
|
+
urls = re.findall(r"((?:http|https):\/\/(?:[\w_-]+(?:(?:\.[\w_-]+)+))(?:[\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))", string)
|
1138
|
+
return py_.uniq(urls)
|
1139
|
+
|
1140
|
+
|
1115
1141
|
def issn_as_url(issn: str) -> Optional[str]:
|
1116
1142
|
"""ISSN as URL"""
|
1117
1143
|
if normalize_issn(issn) is None:
|
@@ -257,7 +257,8 @@ def to_inveniordm_reference(reference: dict) -> dict:
|
|
257
257
|
identifier = reference.get("id", None)
|
258
258
|
scheme = "url"
|
259
259
|
else:
|
260
|
-
|
260
|
+
identifier = None
|
261
|
+
scheme = None
|
261
262
|
|
262
263
|
if reference.get("unstructured", None) is None:
|
263
264
|
# use title as unstructured reference
|
@@ -1,9 +1,9 @@
|
|
1
|
-
commonmeta/__init__.py,sha256=
|
1
|
+
commonmeta/__init__.py,sha256=LqrBGNgQuAGQiRF9HrhX5iHHu14t44M7nTTOYmg4Rmc,1821
|
2
2
|
commonmeta/api_utils.py,sha256=-ZHGVZZhJqnjnsLtp4-PoeHYbDqL0cQme7W70BEjo4U,2677
|
3
3
|
commonmeta/author_utils.py,sha256=zBIPTgP5n7Zx57xomJ2h7x0dvC0AV8gJ2gPoYeDy5Lo,8348
|
4
4
|
commonmeta/base_utils.py,sha256=AsUElA5kT2fw_Osy7Uaj2F6MKeq9yB7d5f2V-h2lh7c,3750
|
5
5
|
commonmeta/cli.py,sha256=sOI9BJTePnljVcXcZ95N7TKXDT283XpjUaak7bMnbr0,6076
|
6
|
-
commonmeta/constants.py,sha256=
|
6
|
+
commonmeta/constants.py,sha256=AFm8gSo4WGnTdJOm1SOGLK602BctcQbaWU_tKCkgn_4,18087
|
7
7
|
commonmeta/crossref_utils.py,sha256=qJlTZtfKR2shAXQDm8VBYUujKFkTtZTUz19GuMUANaI,22198
|
8
8
|
commonmeta/date_utils.py,sha256=rJRV4YmWKQWU__iAV8www3cqwaefC0iRKyHwvxrr_XY,6316
|
9
9
|
commonmeta/doi_utils.py,sha256=xlYQq-qkqhz07CLKpL_WfxZBT8maXgB9-TvQHlL2ZoY,9266
|
@@ -22,7 +22,7 @@ commonmeta/readers/inveniordm_reader.py,sha256=jzv0rXzT8OCdPD_MShBXTnlwD-F9tpTX7
|
|
22
22
|
commonmeta/readers/json_feed_reader.py,sha256=ctlASyxByjXDVgREzdeYOCZezn9aFFv3yKogDFd8WNs,14174
|
23
23
|
commonmeta/readers/kbase_reader.py,sha256=ehKXQsJyPCtaq2FmBxNb2Jb5Nktpx8pNscpmEM6N0A4,6763
|
24
24
|
commonmeta/readers/ris_reader.py,sha256=v6qOd-i2OcMTEFy5RGd3MlYthJcYSU6yzmZ5yHDzmII,3677
|
25
|
-
commonmeta/readers/schema_org_reader.py,sha256=
|
25
|
+
commonmeta/readers/schema_org_reader.py,sha256=udvRBeEnsyRmy5UOIk523f7x08RRLvxqTCMMS736oFs,17132
|
26
26
|
commonmeta/resources/cff_v1.2.0.json,sha256=MpfjDYgX7fN9PLiG54ISZ2uu9WItNqfh-yaRuTf6Ptg,46691
|
27
27
|
commonmeta/resources/commonmeta_v0.12.json,sha256=HUSNReXh2JN3Q6YWSt7CE69js8dh50OlpMYGTyU98oU,16762
|
28
28
|
commonmeta/resources/commonmeta_v0.13.json,sha256=2-WSZGijR13zVu97S_YHXr-cyeLW7hzHXYMlr6nIjdw,15787
|
@@ -58,7 +58,7 @@ commonmeta/resources/styles/modern-language-association.csl,sha256=HI2iU4krze1aH
|
|
58
58
|
commonmeta/resources/styles/vancouver.csl,sha256=lun3_i2oTilgsANk4LjFao2UDPQlGj_hgFgKAWC_DF8,12878
|
59
59
|
commonmeta/schema_utils.py,sha256=gg3l1jd_lFtRkQlO1DYGMVbC10nEmVTN4AWacxC4AAE,915
|
60
60
|
commonmeta/translators.py,sha256=RpGJtKNLjmz41VREZDY7KyyE2eXOi8j7m-da4jHmknI,1362
|
61
|
-
commonmeta/utils.py,sha256=
|
61
|
+
commonmeta/utils.py,sha256=lIH7VejIn_gReLsuXsAZxE-RiMCRGECA_6aPrhGsBFc,44596
|
62
62
|
commonmeta/writers/__init__.py,sha256=47-snms6xBHkoEXKYV1DBtH1npAtlVtvY29Z4Zr45qI,45
|
63
63
|
commonmeta/writers/bibtex_writer.py,sha256=s3hIJIgWvSG7TAriZMRQEAyuitw6ebwWSI1YcYFQ-do,4971
|
64
64
|
commonmeta/writers/citation_writer.py,sha256=RjaNh9EALxq6gfODLRWVJxGxPArGd6ZiHUlkYnCT6MA,2355
|
@@ -66,11 +66,11 @@ commonmeta/writers/commonmeta_writer.py,sha256=2qlttCfYpGhfVjrYkjzbIra7AywssRLT3
|
|
66
66
|
commonmeta/writers/crossref_xml_writer.py,sha256=0Ds494RnXfdfjWw5CLX1kwV2zP7gqffdVqO-X74Uc6c,492
|
67
67
|
commonmeta/writers/csl_writer.py,sha256=6N-93R1emcOsZrUTIhPBVd_Fv1C8Z5EAFYI0mYjoYaY,2797
|
68
68
|
commonmeta/writers/datacite_writer.py,sha256=G7Lr0aZ4sAEdbfXe3dG4Y6AyGUKA9UWr_iiaQRDnV24,6233
|
69
|
-
commonmeta/writers/inveniordm_writer.py,sha256=
|
69
|
+
commonmeta/writers/inveniordm_writer.py,sha256=YXLfiMkWDMMd7ZlOzhp0zNieQFfHKZ4m5FQLIl_XuWI,11427
|
70
70
|
commonmeta/writers/ris_writer.py,sha256=AcnCszS3WY9lF594NbFBtLylsA8ownnYp_XLQJ84Ios,2093
|
71
71
|
commonmeta/writers/schema_org_writer.py,sha256=5j002uCNLdlScZMNQmPjodcVWqaBh2z38zL1H4lo2hY,5741
|
72
|
-
commonmeta_py-0.
|
73
|
-
commonmeta_py-0.
|
74
|
-
commonmeta_py-0.
|
75
|
-
commonmeta_py-0.
|
76
|
-
commonmeta_py-0.
|
72
|
+
commonmeta_py-0.65.dist-info/LICENSE,sha256=746hEF2wZCKkcckk5-_DcBLtHewfaEMS4iXTlA1PVwk,1074
|
73
|
+
commonmeta_py-0.65.dist-info/METADATA,sha256=th0VmBY3Kk5evcQrUOk55lTyqjeZ6CEuXnvumG_Duvc,8279
|
74
|
+
commonmeta_py-0.65.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
75
|
+
commonmeta_py-0.65.dist-info/entry_points.txt,sha256=vbcDw3_2lMTKdcAL2VUF4DRYRpKuzXVYLMCdgKVf88U,49
|
76
|
+
commonmeta_py-0.65.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|