commonmeta-py 0.62__py3-none-any.whl → 0.65__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
commonmeta/__init__.py CHANGED
@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
10
10
  """
11
11
 
12
12
  __title__ = "commonmeta-py"
13
- __version__ = "0.62"
13
+ __version__ = "0.65"
14
14
  __author__ = "Martin Fenner"
15
15
  __license__ = "MIT"
16
16
 
@@ -94,4 +94,5 @@ from .doi_utils import (
94
94
  normalize_doi,
95
95
  validate_doi,
96
96
  validate_prefix,
97
+ is_rogue_scholar_doi,
97
98
  )
commonmeta/constants.py CHANGED
@@ -408,11 +408,32 @@ SO_TO_CM_TRANSLATIONS = {
408
408
  "DigitalDocument": "Document",
409
409
  "Dissertation": "Dissertation",
410
410
  "Instrument": "Instrument",
411
+ "MusicRecording": "Audiovisual",
412
+ "MusicAlbum": "Audiovisual",
411
413
  "NewsArticle": "Article",
412
414
  "Legislation": "LegalDocument",
415
+ "ProfilePage": "WebPage",
413
416
  "Report": "Report",
414
417
  "ScholarlyArticle": "JournalArticle",
415
418
  "SoftwareSourceCode": "Software",
419
+ "Video": "Audiovisual",
420
+ "WebSite": "WebPage",
421
+ }
422
+
423
+ # OpenGraph to schema.org mapping
424
+ OG_TO_SO_TRANSLATIONS = {
425
+ "music.song": "MusicRecording",
426
+ "music.album": "MusicAlbum",
427
+ "music.playlist": "MusicPlaylist",
428
+ "music.radio_station": "RadioStation",
429
+ "video.movie": "Video",
430
+ "video.episode": "Video",
431
+ "video.tv_show": "Video",
432
+ "video.other": "Video",
433
+ "article": "Article",
434
+ "book": "Book",
435
+ "profile": "ProfilePage",
436
+ "website": "WebSite",
416
437
  }
417
438
 
418
439
  CM_TO_SO_TRANSLATIONS = {
@@ -36,6 +36,7 @@ from ..constants import (
36
36
  SO_TO_CM_TRANSLATIONS,
37
37
  SO_TO_DC_RELATION_TYPES,
38
38
  SO_TO_DC_REVERSE_RELATION_TYPES,
39
+ OG_TO_SO_TRANSLATIONS,
39
40
  Commonmeta,
40
41
  )
41
42
 
@@ -59,7 +60,6 @@ def get_schema_org(pid: str, **kwargs) -> dict:
59
60
  "via": "schema_org",
60
61
  "errors": [str(error)],
61
62
  }
62
-
63
63
  if response.status_code >= 400:
64
64
  if response.status_code in [404, 410]:
65
65
  state = "not_found"
@@ -105,7 +105,7 @@ def get_schema_org(pid: str, **kwargs) -> dict:
105
105
 
106
106
  # load html meta tags
107
107
  data = get_html_meta(soup)
108
-
108
+ print(data)
109
109
  # load site-specific metadata
110
110
  data |= web_translator(soup, url)
111
111
 
@@ -119,6 +119,7 @@ def get_schema_org(pid: str, **kwargs) -> dict:
119
119
  None,
120
120
  )
121
121
  if json_ld is not None:
122
+ print(json_ld)
122
123
  data |= json_ld
123
124
 
124
125
  # if @id is a DOI, get metadata from Crossref or DataCite
@@ -136,7 +137,7 @@ def get_schema_org(pid: str, **kwargs) -> dict:
136
137
  # author and creator are synonyms
137
138
  if data.get("author", None) is None and data.get("creator", None) is not None:
138
139
  data["author"] = data["creator"]
139
-
140
+ print(data)
140
141
  return data | {"via": "schema_org", "state": "findable"}
141
142
 
142
143
 
@@ -412,11 +413,13 @@ def get_html_meta(soup):
412
413
  data["@id"] = normalize_id(pid)
413
414
 
414
415
  _type = (
415
- soup.select_one("meta[property='og:type']")
416
- or soup.select_one("meta[name='dc.type']")
416
+ soup.select_one("meta[name='dc.type']")
417
417
  or soup.select_one("meta[name='DC.type']")
418
418
  )
419
419
  data["@type"] = _type["content"].capitalize() if _type else None
420
+ if _type is None:
421
+ _type = soup.select_one("meta[property='og:type']")
422
+ data["@type"] = OG_TO_SO_TRANSLATIONS.get(_type["content"]) if _type else None
420
423
 
421
424
  url = soup.select_one("meta[property='og:url']") or soup.select_one(
422
425
  "meta[name='twitter:url']"
@@ -431,6 +434,7 @@ def get_html_meta(soup):
431
434
  or soup.select_one("meta[name='DC.title']")
432
435
  or soup.select_one("meta[property='og:title']")
433
436
  or soup.select_one("meta[name='twitter:title']")
437
+ or soup.select_one("meta[name='title']")
434
438
  )
435
439
  data["name"] = title["content"] if title else None
436
440
 
@@ -441,6 +445,7 @@ def get_html_meta(soup):
441
445
  "meta[name='dc.description']"
442
446
  or soup.select_one("meta[property='og:description']")
443
447
  or soup.select_one("meta[name='twitter:description']")
448
+ or soup.select_one("meta[name='description']")
444
449
  )
445
450
  data["description"] = description["content"] if description else None
446
451
 
commonmeta/utils.py CHANGED
@@ -1096,22 +1096,48 @@ def from_curie(id: Optional[str]) -> Optional[str]:
1096
1096
  if id is None:
1097
1097
  return None
1098
1098
  _type = id.split(":")[0]
1099
- if _type == "DOI":
1099
+ if _type.upper() == "DOI":
1100
1100
  return doi_as_url(id.split(":")[1])
1101
- elif _type == "ROR":
1101
+ elif _type.upper() == "ROR":
1102
1102
  return "https://ror.org/" + id.split(":")[1]
1103
- elif _type == "ISNI":
1103
+ elif _type.upper() == "ISNI":
1104
1104
  return "https://isni.org/isni/" + id.split(":")[1]
1105
- elif _type == "ORCID":
1105
+ elif _type.upper() == "ORCID":
1106
1106
  return normalize_orcid(id.split(":")[1])
1107
- elif _type == "URL":
1107
+ elif _type.upper() == "URL":
1108
1108
  return normalize_url(id.split(":")[1])
1109
- elif _type == "JDP":
1109
+ elif _type.upper() == "JDP":
1110
1110
  return id.split(":")[1]
1111
1111
  # TODO: resolvable url for other identifier types
1112
1112
  return None
1113
1113
 
1114
1114
 
1115
+ def extract_curie(string: Optional[str]) -> Optional[str]:
1116
+ """Extract CURIE"""
1117
+ if string is None:
1118
+ return None
1119
+ match = re.search(r"((?:doi|DOI):\s?([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))", string)
1120
+ if match is None:
1121
+ return None
1122
+ return doi_as_url(match.group(2))
1123
+
1124
+
1125
+ def extract_url(string: str) -> list:
1126
+ """Extract urls from string, including markdown and html."""
1127
+
1128
+ match = re.search(r"((?:http|https):\/\/(?:[\w_-]+(?:(?:\.[\w_-]+)+))(?:[\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))", string)
1129
+ if match is None:
1130
+ return None
1131
+ return normalize_url(match.group(1))
1132
+
1133
+
1134
+ def extract_urls(string: str) -> list:
1135
+ """Extract urls from string, including markdown and html."""
1136
+
1137
+ urls = re.findall(r"((?:http|https):\/\/(?:[\w_-]+(?:(?:\.[\w_-]+)+))(?:[\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))", string)
1138
+ return py_.uniq(urls)
1139
+
1140
+
1115
1141
  def issn_as_url(issn: str) -> Optional[str]:
1116
1142
  """ISSN as URL"""
1117
1143
  if normalize_issn(issn) is None:
@@ -257,7 +257,8 @@ def to_inveniordm_reference(reference: dict) -> dict:
257
257
  identifier = reference.get("id", None)
258
258
  scheme = "url"
259
259
  else:
260
- return None
260
+ identifier = None
261
+ scheme = None
261
262
 
262
263
  if reference.get("unstructured", None) is None:
263
264
  # use title as unstructured reference
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: commonmeta-py
3
- Version: 0.62
3
+ Version: 0.65
4
4
  Summary: Library for conversions to/from the Commonmeta scholarly metadata format
5
5
  Home-page: https://python.commonmeta.org
6
6
  License: MIT
@@ -1,9 +1,9 @@
1
- commonmeta/__init__.py,sha256=t2QRGjR73eSP55rlkljDk1Rckce3LCX8TSKrmZzNbh4,1795
1
+ commonmeta/__init__.py,sha256=LqrBGNgQuAGQiRF9HrhX5iHHu14t44M7nTTOYmg4Rmc,1821
2
2
  commonmeta/api_utils.py,sha256=-ZHGVZZhJqnjnsLtp4-PoeHYbDqL0cQme7W70BEjo4U,2677
3
3
  commonmeta/author_utils.py,sha256=zBIPTgP5n7Zx57xomJ2h7x0dvC0AV8gJ2gPoYeDy5Lo,8348
4
4
  commonmeta/base_utils.py,sha256=AsUElA5kT2fw_Osy7Uaj2F6MKeq9yB7d5f2V-h2lh7c,3750
5
5
  commonmeta/cli.py,sha256=sOI9BJTePnljVcXcZ95N7TKXDT283XpjUaak7bMnbr0,6076
6
- commonmeta/constants.py,sha256=VfjXLkwoV4A5uztH3vgDJ_qrt7PaWGO6QtHbAt4r03c,17501
6
+ commonmeta/constants.py,sha256=AFm8gSo4WGnTdJOm1SOGLK602BctcQbaWU_tKCkgn_4,18087
7
7
  commonmeta/crossref_utils.py,sha256=qJlTZtfKR2shAXQDm8VBYUujKFkTtZTUz19GuMUANaI,22198
8
8
  commonmeta/date_utils.py,sha256=rJRV4YmWKQWU__iAV8www3cqwaefC0iRKyHwvxrr_XY,6316
9
9
  commonmeta/doi_utils.py,sha256=xlYQq-qkqhz07CLKpL_WfxZBT8maXgB9-TvQHlL2ZoY,9266
@@ -22,7 +22,7 @@ commonmeta/readers/inveniordm_reader.py,sha256=jzv0rXzT8OCdPD_MShBXTnlwD-F9tpTX7
22
22
  commonmeta/readers/json_feed_reader.py,sha256=ctlASyxByjXDVgREzdeYOCZezn9aFFv3yKogDFd8WNs,14174
23
23
  commonmeta/readers/kbase_reader.py,sha256=ehKXQsJyPCtaq2FmBxNb2Jb5Nktpx8pNscpmEM6N0A4,6763
24
24
  commonmeta/readers/ris_reader.py,sha256=v6qOd-i2OcMTEFy5RGd3MlYthJcYSU6yzmZ5yHDzmII,3677
25
- commonmeta/readers/schema_org_reader.py,sha256=xyWzO2XAWlI2pYVl2EbVRsUmfiWXEwP64CHRBQNRN-M,16835
25
+ commonmeta/readers/schema_org_reader.py,sha256=udvRBeEnsyRmy5UOIk523f7x08RRLvxqTCMMS736oFs,17132
26
26
  commonmeta/resources/cff_v1.2.0.json,sha256=MpfjDYgX7fN9PLiG54ISZ2uu9WItNqfh-yaRuTf6Ptg,46691
27
27
  commonmeta/resources/commonmeta_v0.12.json,sha256=HUSNReXh2JN3Q6YWSt7CE69js8dh50OlpMYGTyU98oU,16762
28
28
  commonmeta/resources/commonmeta_v0.13.json,sha256=2-WSZGijR13zVu97S_YHXr-cyeLW7hzHXYMlr6nIjdw,15787
@@ -58,7 +58,7 @@ commonmeta/resources/styles/modern-language-association.csl,sha256=HI2iU4krze1aH
58
58
  commonmeta/resources/styles/vancouver.csl,sha256=lun3_i2oTilgsANk4LjFao2UDPQlGj_hgFgKAWC_DF8,12878
59
59
  commonmeta/schema_utils.py,sha256=gg3l1jd_lFtRkQlO1DYGMVbC10nEmVTN4AWacxC4AAE,915
60
60
  commonmeta/translators.py,sha256=RpGJtKNLjmz41VREZDY7KyyE2eXOi8j7m-da4jHmknI,1362
61
- commonmeta/utils.py,sha256=0ky8xyDQWVND5nJWApPgyVhbjXdPPzfpx4fJpX9ivyw,43674
61
+ commonmeta/utils.py,sha256=lIH7VejIn_gReLsuXsAZxE-RiMCRGECA_6aPrhGsBFc,44596
62
62
  commonmeta/writers/__init__.py,sha256=47-snms6xBHkoEXKYV1DBtH1npAtlVtvY29Z4Zr45qI,45
63
63
  commonmeta/writers/bibtex_writer.py,sha256=s3hIJIgWvSG7TAriZMRQEAyuitw6ebwWSI1YcYFQ-do,4971
64
64
  commonmeta/writers/citation_writer.py,sha256=RjaNh9EALxq6gfODLRWVJxGxPArGd6ZiHUlkYnCT6MA,2355
@@ -66,11 +66,11 @@ commonmeta/writers/commonmeta_writer.py,sha256=2qlttCfYpGhfVjrYkjzbIra7AywssRLT3
66
66
  commonmeta/writers/crossref_xml_writer.py,sha256=0Ds494RnXfdfjWw5CLX1kwV2zP7gqffdVqO-X74Uc6c,492
67
67
  commonmeta/writers/csl_writer.py,sha256=6N-93R1emcOsZrUTIhPBVd_Fv1C8Z5EAFYI0mYjoYaY,2797
68
68
  commonmeta/writers/datacite_writer.py,sha256=G7Lr0aZ4sAEdbfXe3dG4Y6AyGUKA9UWr_iiaQRDnV24,6233
69
- commonmeta/writers/inveniordm_writer.py,sha256=oVcXdHYtuugbfDgKO8JwTRXmP7AK2U96uK4YF0eKXAY,11399
69
+ commonmeta/writers/inveniordm_writer.py,sha256=YXLfiMkWDMMd7ZlOzhp0zNieQFfHKZ4m5FQLIl_XuWI,11427
70
70
  commonmeta/writers/ris_writer.py,sha256=AcnCszS3WY9lF594NbFBtLylsA8ownnYp_XLQJ84Ios,2093
71
71
  commonmeta/writers/schema_org_writer.py,sha256=5j002uCNLdlScZMNQmPjodcVWqaBh2z38zL1H4lo2hY,5741
72
- commonmeta_py-0.62.dist-info/LICENSE,sha256=746hEF2wZCKkcckk5-_DcBLtHewfaEMS4iXTlA1PVwk,1074
73
- commonmeta_py-0.62.dist-info/METADATA,sha256=3amCMEJUFi6F88HR4wPmrBww-8VL49HgBs0LamBOsqo,8279
74
- commonmeta_py-0.62.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
75
- commonmeta_py-0.62.dist-info/entry_points.txt,sha256=vbcDw3_2lMTKdcAL2VUF4DRYRpKuzXVYLMCdgKVf88U,49
76
- commonmeta_py-0.62.dist-info/RECORD,,
72
+ commonmeta_py-0.65.dist-info/LICENSE,sha256=746hEF2wZCKkcckk5-_DcBLtHewfaEMS4iXTlA1PVwk,1074
73
+ commonmeta_py-0.65.dist-info/METADATA,sha256=th0VmBY3Kk5evcQrUOk55lTyqjeZ6CEuXnvumG_Duvc,8279
74
+ commonmeta_py-0.65.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
75
+ commonmeta_py-0.65.dist-info/entry_points.txt,sha256=vbcDw3_2lMTKdcAL2VUF4DRYRpKuzXVYLMCdgKVf88U,49
76
+ commonmeta_py-0.65.dist-info/RECORD,,