commonmeta-py 0.101__py3-none-any.whl → 0.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. commonmeta/__init__.py +51 -50
  2. commonmeta/base_utils.py +1 -0
  3. commonmeta/cli.py +6 -5
  4. commonmeta/constants.py +35 -1
  5. commonmeta/crossref_utils.py +11 -8
  6. commonmeta/date_utils.py +1 -0
  7. commonmeta/doi_utils.py +42 -14
  8. commonmeta/metadata.py +209 -100
  9. commonmeta/readers/cff_reader.py +1 -0
  10. commonmeta/readers/codemeta_reader.py +1 -0
  11. commonmeta/readers/commonmeta_reader.py +1 -0
  12. commonmeta/readers/crossref_reader.py +19 -18
  13. commonmeta/readers/csl_reader.py +4 -1
  14. commonmeta/readers/inveniordm_reader.py +14 -9
  15. commonmeta/readers/json_feed_reader.py +9 -3
  16. commonmeta/readers/kbase_reader.py +1 -0
  17. commonmeta/readers/openalex_reader.py +380 -0
  18. commonmeta/readers/ris_reader.py +1 -0
  19. commonmeta/resources/commonmeta_v0.16.json +21 -5
  20. commonmeta/schema_utils.py +1 -0
  21. commonmeta/utils.py +121 -16
  22. commonmeta/writers/bibtex_writer.py +1 -0
  23. commonmeta/writers/citation_writer.py +1 -0
  24. commonmeta/writers/crossref_xml_writer.py +1 -0
  25. commonmeta/writers/csl_writer.py +1 -0
  26. commonmeta/writers/datacite_writer.py +1 -0
  27. commonmeta/writers/ris_writer.py +1 -0
  28. commonmeta/writers/schema_org_writer.py +1 -0
  29. {commonmeta_py-0.101.dist-info → commonmeta_py-0.104.dist-info}/METADATA +5 -8
  30. {commonmeta_py-0.101.dist-info → commonmeta_py-0.104.dist-info}/RECORD +33 -32
  31. {commonmeta_py-0.101.dist-info → commonmeta_py-0.104.dist-info}/licenses/LICENSE +1 -1
  32. {commonmeta_py-0.101.dist-info → commonmeta_py-0.104.dist-info}/WHEEL +0 -0
  33. {commonmeta_py-0.101.dist-info → commonmeta_py-0.104.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,380 @@
1
+ """OpenAlex reader for commonmeta-py"""
2
+
3
+ from typing import Optional
4
+
5
+ import httpx
6
+ from pydash import py_
7
+
8
+ from ..author_utils import get_authors
9
+ from ..base_utils import compact, presence, sanitize, wrap
10
+ from ..constants import (
11
+ CR_TO_CM_TRANSLATIONS,
12
+ OA_TO_CM_CONTAINER_TRANLATIONS,
13
+ OA_TO_CM_TRANSLATIONS,
14
+ Commonmeta,
15
+ )
16
+ from ..doi_utils import (
17
+ normalize_doi,
18
+ openalex_api_sample_url,
19
+ openalex_api_url,
20
+ )
21
+ from ..utils import (
22
+ dict_to_spdx,
23
+ normalize_url,
24
+ validate_openalex,
25
+ )
26
+
27
+ # Map OpenAlex license strings to SPDX licenceId. May not be the correct license version.
28
+ OA_LICENSES = {"cc-by": "CC-BY-4.0", "cc0": "CC0-1.0"}
29
+ OA_IDENTIFIER_TYPES = {
30
+ "openalex": "OpenAlex",
31
+ "doi": "DOI",
32
+ "mag": "MAG",
33
+ "pmid": "PMID",
34
+ "pmcid": "PMCID",
35
+ }
36
+
37
+
38
+ def get_openalex(pid: str, **kwargs) -> dict:
39
+ """get_openalex"""
40
+ doi = normalize_doi(pid)
41
+ if doi is None:
42
+ return {"state": "not_found"}
43
+ url = openalex_api_url(doi)
44
+ response = httpx.get(url, timeout=10, **kwargs)
45
+ if response.status_code != 200:
46
+ return {"state": "not_found"}
47
+ return response.json() | {"via": "openalex"}
48
+
49
+
50
+ def read_openalex(data: Optional[dict], **kwargs) -> Commonmeta:
51
+ """read_openalex"""
52
+ if data is None:
53
+ return {"state": "not_found"}
54
+ meta = data
55
+ read_options = kwargs or {}
56
+
57
+ doi = meta.get("doi", None)
58
+ _id = normalize_doi(doi)
59
+ _type = CR_TO_CM_TRANSLATIONS.get(meta.get("type_crossref", None)) or "Other"
60
+ additional_type = OA_TO_CM_TRANSLATIONS.get(meta.get("type", None))
61
+ if additional_type == _type:
62
+ additional_type = None
63
+
64
+ archive_locations = []
65
+ contributors = get_contributors(wrap(meta.get("authorships")))
66
+ contributors = get_authors(contributors)
67
+
68
+ url = normalize_url(
69
+ py_.get(meta, "primary_location.landing_page_url") or py_.get(meta, "id")
70
+ )
71
+ title = meta.get("title", None)
72
+ if title is not None:
73
+ titles = [{"title": sanitize(title)}]
74
+ else:
75
+ titles = None
76
+ publisher = compact(
77
+ {"name": py_.get(meta, "primary_location.source.host_organization_name")}
78
+ )
79
+ date = compact(
80
+ {
81
+ "published": py_.get(meta, "publication_date")
82
+ or py_.get(meta, "created_date")
83
+ }
84
+ )
85
+ identifiers = [
86
+ {
87
+ "identifier": uid,
88
+ "identifierType": OA_IDENTIFIER_TYPES[uidType],
89
+ }
90
+ for uidType, uid in (meta.get("ids", {})).items()
91
+ ]
92
+
93
+ license_ = py_.get(meta, "best_oa_location.license")
94
+ if license_ is not None:
95
+ license_ = OA_LICENSES.get(license_, license_)
96
+ license_ = dict_to_spdx({"id": license_})
97
+ container = get_container(meta)
98
+ relations = []
99
+ references = [
100
+ get_related(i) for i in get_references(meta.get("referenced_works", []))
101
+ ]
102
+ funding_references = from_openalex_funding(wrap(meta.get("grants", None)))
103
+
104
+ description = get_abstract(meta)
105
+ if description is not None:
106
+ descriptions = [{"description": sanitize(description), "type": "Abstract"}]
107
+ else:
108
+ descriptions = None
109
+
110
+ subjects = py_.uniq(
111
+ [
112
+ {"subject": py_.get(i, "subfield.display_name")}
113
+ for i in wrap(meta.get("topics", None))
114
+ ]
115
+ )
116
+ files = get_files(meta)
117
+
118
+ return {
119
+ # required properties
120
+ "id": _id,
121
+ "type": _type,
122
+ # recommended and optional properties
123
+ "additionalType": additional_type,
124
+ "archiveLocations": presence(archive_locations),
125
+ "container": presence(container),
126
+ "contributors": presence(contributors),
127
+ "date": presence(date),
128
+ "descriptions": presence(descriptions),
129
+ "files": presence(files),
130
+ "fundingReferences": presence(funding_references),
131
+ "geoLocations": None,
132
+ "identifiers": identifiers,
133
+ "language": meta.get("language", None),
134
+ "license": license_,
135
+ "provider": "OpenAlex",
136
+ "publisher": presence(publisher),
137
+ "references": presence(references),
138
+ "relations": presence(relations),
139
+ "subjects": presence(subjects),
140
+ "titles": presence(titles),
141
+ "url": url,
142
+ "version": meta.get("version", None),
143
+ } | read_options
144
+
145
+
146
+ def get_abstract(meta):
147
+ """Parse abstract from OpenAlex abstract_inverted_index"""
148
+ abstract_inverted_index = py_.get(meta, "abstract_inverted_index")
149
+
150
+ if abstract_inverted_index:
151
+ # Determine the length of the abstract
152
+ max_pos = max(
153
+ p for positions in abstract_inverted_index.values() for p in positions
154
+ )
155
+ abstract_words = [""] * (max_pos + 1)
156
+
157
+ for word, positions in abstract_inverted_index.items():
158
+ for p in positions:
159
+ abstract_words[p] = word
160
+
161
+ abstract = " ".join(abstract_words)
162
+ else:
163
+ abstract = None
164
+ return abstract
165
+
166
+
167
+ def get_contributors(contributors: list) -> list:
168
+ """Parse contributor"""
169
+
170
+ def parse_contributor(c):
171
+ affiliations = []
172
+ for affiliation in c.get("institutions", []):
173
+ affiliations.append(
174
+ compact(
175
+ {
176
+ "id": affiliation.get("ror", None),
177
+ "name": affiliation.get("display_name", None),
178
+ }
179
+ )
180
+ )
181
+
182
+ return compact(
183
+ {
184
+ "id": py_.get(c, "author.orcid"),
185
+ "name": py_.get(c, "author.display_name"),
186
+ "affiliations": affiliations,
187
+ }
188
+ )
189
+
190
+ return [parse_contributor(i) for i in contributors]
191
+
192
+
193
+ def get_references(pids: list, **kwargs) -> list:
194
+ """Get related articles from OpenAlex using their pid
195
+ Used for retrieving metadata for citations and references which are not included in the OpenAlex record
196
+ """
197
+ references = get_openalex_works(pids)
198
+ return references
199
+
200
+
201
+ def get_citations(citation_url: str, **kwargs) -> list:
202
+ response = httpx.get(citation_url, timeout=10, **kwargs)
203
+ if response.status_code != 200:
204
+ return {"state": "not_found"}
205
+ response = response.json()
206
+ return response.json().get("results", [])
207
+
208
+
209
+ def get_related(related: Optional[dict]) -> Optional[dict]:
210
+ """Get reference from OpenAlex reference"""
211
+ if related is None or not isinstance(related, dict):
212
+ return None
213
+ doi = related.get("doi", None)
214
+ metadata = {
215
+ "id": normalize_doi(doi) if doi else None,
216
+ "contributor": related.get("author", None),
217
+ "title": related.get("display_name", None),
218
+ "publisher": related.get(
219
+ "primary_location.source.host_organization_name", None
220
+ ),
221
+ "publicationYear": related.get("publication_year", None),
222
+ "volume": py_.get(related, "biblio.volume"),
223
+ "issue": py_.get(related, "biblio.issue"),
224
+ "firstPage": py_.get(related, "biblio.first_page"),
225
+ "lastPage": py_.get(related, "biblio.last_page"),
226
+ "containerTitle": related.get("primary_location.source.display_name", None),
227
+ }
228
+ return compact(metadata)
229
+
230
+
231
+ def get_openalex_works(pids: list, **kwargs) -> list:
232
+ """Get OpenAlex works, use batches of 49 to honor API limit."""
233
+ pid_batches = [pids[i : i + 49] for i in range(0, len(pids), 49)]
234
+ works = []
235
+ for pid_batch in pid_batches:
236
+ ids = "|".join(pid_batch)
237
+ url = f"https://api.openalex.org/works?filter=ids.openalex:{ids}"
238
+ response = httpx.get(url, timeout=10, **kwargs)
239
+ if response.status_code != 200:
240
+ return {"state": "not_found"}
241
+ response = response.json()
242
+ if py_.get(response, "count") == 0:
243
+ return {"state": "not_found"}
244
+
245
+ works.extend(response.get("results"))
246
+
247
+ return works
248
+
249
+
250
+ def get_openalex_funders(pids: list, **kwargs) -> list:
251
+ """Get ROR id and name from OpenAlex funders.
252
+ use batches of 49 to honor API limit."""
253
+ pid_batches = [pids[i : i + 49] for i in range(0, len(pids), 49)]
254
+ funders = []
255
+ for pid_batch in pid_batches:
256
+ ids = "|".join(pid_batch)
257
+ url = f"https://api.openalex.org/funders?filter=ids.openalex:{ids}"
258
+ response = httpx.get(url, timeout=10, **kwargs)
259
+ if response.status_code != 200:
260
+ return {"state": "not_found"}
261
+ response = response.json()
262
+ if py_.get(response, "count") == 0:
263
+ return {"state": "not_found"}
264
+
265
+ def format_funder(funder):
266
+ return compact(
267
+ {
268
+ "id": py_.get(funder, "id"),
269
+ "ror": py_.get(funder, "ids.ror"),
270
+ "name": py_.get(funder, "display_name"),
271
+ }
272
+ )
273
+
274
+ f = [format_funder(i) for i in response.get("results")]
275
+ funders.extend(f)
276
+
277
+ return funders
278
+
279
+
280
+ def get_openalex_source(str: Optional[str], **kwargs) -> Optional[dict]:
281
+ """Get issn, name, homepage_url and type from OpenAlex source."""
282
+ id = validate_openalex(str)
283
+ if not id:
284
+ return None
285
+
286
+ url = f"https://api.openalex.org/sources/{id}"
287
+ response = httpx.get(url, timeout=10, **kwargs)
288
+ if response.status_code != 200:
289
+ return {"state": "not_found"}
290
+ response = response.json()
291
+ if py_.get(response, "count") == 0:
292
+ return {"state": "not_found"}
293
+
294
+ return compact(
295
+ {
296
+ "id": py_.get(response, "id"),
297
+ "url": py_.get(response, "homepage_url"),
298
+ "issn": py_.get(response, "issn_l"),
299
+ "title": py_.get(response, "display_name"),
300
+ "type": py_.get(response, "type"),
301
+ }
302
+ )
303
+
304
+
305
+ def get_files(meta) -> Optional[list]:
306
+ """get file links"""
307
+ pdf_url = py_.get(meta, "best_oa_location.pdf_url")
308
+ if pdf_url is None:
309
+ return None
310
+ return [
311
+ {"mimeType": "application/pdf", "url": pdf_url},
312
+ ]
313
+
314
+
315
+ def get_container(meta: dict) -> dict:
316
+ """Get container from OpenAlex"""
317
+ source = get_openalex_source(py_.get(meta, "primary_location.source.id"))
318
+ print(source)
319
+ container_type = py_.get(source, "type")
320
+ if container_type:
321
+ container_type = OA_TO_CM_CONTAINER_TRANLATIONS.get(
322
+ container_type, container_type
323
+ )
324
+ issn = py_.get(source, "issn")
325
+ container_title = py_.get(source, "title")
326
+ url_ = py_.get(source, "url")
327
+
328
+ return compact(
329
+ {
330
+ "type": container_type,
331
+ "identifier": issn or url_,
332
+ "identifierType": "ISSN" if issn else "URL" if url_ else None,
333
+ "title": container_title,
334
+ "volume": py_.get(meta, "biblio.volume"),
335
+ "issue": py_.get(meta, "biblio.issue"),
336
+ "firstPage": py_.get(meta, "biblio.first_page"),
337
+ "lastPage": py_.get(meta, "biblio.last_page"),
338
+ }
339
+ )
340
+
341
+
342
+ def from_openalex_funding(funding_references: list) -> list:
343
+ """Get funding references from OpenAlex"""
344
+ funder_ids = [
345
+ validate_openalex(funding.get("funder"))
346
+ for funding in funding_references
347
+ if "funder" in funding
348
+ ]
349
+ funders = get_openalex_funders(funder_ids)
350
+ formatted_funding_references = []
351
+ for funding in funding_references:
352
+ funder = next(
353
+ item for item in funders if item["id"] == funding.get("funder", None)
354
+ )
355
+ f = compact(
356
+ {
357
+ "funderName": funder.get("name", None),
358
+ "funderIdentifier": funder.get("ror", None),
359
+ "funderIdentifierType": "ROR" if funder.get("ror", None) else None,
360
+ "awardNumber": funding.get("award_id", None),
361
+ }
362
+ )
363
+ formatted_funding_references.append(f)
364
+ return py_.uniq(formatted_funding_references)
365
+
366
+
367
+ def get_random_id_from_openalex(number: int = 1, **kwargs) -> list:
368
+ """Get random ID from OpenAlex"""
369
+ number = min(number, 20)
370
+ url = openalex_api_sample_url(number, **kwargs)
371
+ try:
372
+ response = httpx.get(url, timeout=10)
373
+ if response.status_code != 200:
374
+ return []
375
+
376
+ items = py_.get(response.json(), "results")
377
+ print(items)
378
+ return [i.get("id") for i in items]
379
+ except (httpx.ReadTimeout, httpx.ConnectError):
380
+ return []
@@ -1,4 +1,5 @@
1
1
  """RIS reader for commonmeta-py"""
2
+
2
3
  from typing import Optional
3
4
 
4
5
  from ..utils import compact, normalize_url, wrap
@@ -5,8 +5,7 @@
5
5
  "description": "JSON representation of the Commonmeta schema.",
6
6
  "commonmeta": {
7
7
  "anyOf": [
8
- { "$ref": "#/definitions/commonmeta"
9
- },
8
+ { "$ref": "#/definitions/commonmeta" },
10
9
  {
11
10
  "type": "array",
12
11
  "description": "An array of commonmeta objects.",
@@ -196,7 +195,13 @@
196
195
  "type": {
197
196
  "description": "The type of the description.",
198
197
  "type": "string",
199
- "enum": ["Abstract", "Summary", "Methods", "TechnicalInfo", "Other"]
198
+ "enum": [
199
+ "Abstract",
200
+ "Summary",
201
+ "Methods",
202
+ "TechnicalInfo",
203
+ "Other"
204
+ ]
200
205
  },
201
206
  "language": {
202
207
  "description": "The language of the title. Use one of the language codes from the IETF BCP 47 standard.",
@@ -267,7 +272,9 @@
267
272
  "items": { "$ref": "#/definitions/geoLocationPoint" },
268
273
  "minItems": 4
269
274
  },
270
- "inPolygonPoint": { "$ref": "#/definitions/geoLocationPoint" }
275
+ "inPolygonPoint": {
276
+ "$ref": "#/definitions/geoLocationPoint"
277
+ }
271
278
  },
272
279
  "required": ["polygonPoints"]
273
280
  },
@@ -294,6 +301,7 @@
294
301
  "Handle",
295
302
  "ISBN",
296
303
  "ISSN",
304
+ "OpenAlex",
297
305
  "PMID",
298
306
  "PMCID",
299
307
  "PURL",
@@ -323,7 +331,15 @@
323
331
  "provider": {
324
332
  "description": "The provider of the resource. This can be a DOI registration agency or a repository.",
325
333
  "type": "string",
326
- "enum": ["Crossref", "DataCite", "GitHub", "JaLC", "KISTI", "mEDRA", "OP"]
334
+ "enum": [
335
+ "Crossref",
336
+ "DataCite",
337
+ "GitHub",
338
+ "JaLC",
339
+ "KISTI",
340
+ "mEDRA",
341
+ "OP"
342
+ ]
327
343
  },
328
344
  "publisher": {
329
345
  "description": "The publisher of the resource.",
@@ -1,4 +1,5 @@
1
1
  """Schema utils for commonmeta-py"""
2
+
2
3
  from os import path
3
4
  import orjson as json
4
5
  from jsonschema import Draft202012Validator, ValidationError
commonmeta/utils.py CHANGED
@@ -1,22 +1,22 @@
1
1
  """Utils module for commonmeta-py"""
2
2
 
3
3
  import os
4
- import orjson as json
5
4
  import re
6
5
  import time
7
6
  from typing import Optional
8
7
  from urllib.parse import urlparse
9
- import yaml
10
- from furl import furl
8
+
11
9
  import bibtexparser
10
+ import orjson as json
11
+ import pycountry
12
+ import yaml
12
13
  from bs4 import BeautifulSoup
14
+ from furl import furl
13
15
  from pydash import py_
14
- import pycountry
15
16
 
16
- from .base_utils import wrap, compact, parse_attributes
17
- from .doi_utils import normalize_doi, doi_from_url, get_doi_ra, validate_doi, doi_as_url
17
+ from .base_utils import compact, parse_attributes, wrap
18
18
  from .constants import DATACITE_CONTRIBUTOR_TYPES
19
-
19
+ from .doi_utils import doi_as_url, doi_from_url, get_doi_ra, normalize_doi, validate_doi
20
20
 
21
21
  NORMALIZED_LICENSES = {
22
22
  "https://creativecommons.org/licenses/by/1.0": "https://creativecommons.org/licenses/by/1.0/legalcode",
@@ -144,17 +144,13 @@ def normalize_id(pid: Optional[str], **kwargs) -> Optional[str]:
144
144
  return doi
145
145
 
146
146
  # check for valid HTTP uri and ensure https
147
- uri = urlparse(pid)
148
- if not uri.netloc or uri.scheme not in ["http", "https"]:
147
+ f = furl(pid)
148
+ if not f.host or f.scheme not in ["http", "https"]:
149
149
  return None
150
- if uri.scheme == "http":
151
- pid = pid.replace(HTTP_SCHEME, HTTPS_SCHEME)
150
+ if f.scheme == "http":
151
+ f.scheme = "https"
152
152
 
153
- # remove trailing slash
154
- if pid.endswith("/"):
155
- pid = pid.strip("/")
156
-
157
- return pid
153
+ return f.url
158
154
 
159
155
 
160
156
  def normalize_ids(ids: list, relation_type=None) -> list:
@@ -289,6 +285,115 @@ def validate_isni(isni: Optional[str]) -> Optional[str]:
289
285
  return isni
290
286
 
291
287
 
288
+ def validate_mag(mag: Optional[str]) -> Optional[str]:
289
+ """Validate Microsoft Academic Graph ID (mag)"""
290
+ if mag is None or not isinstance(mag, str):
291
+ return None
292
+ match = re.search(
293
+ r"\A(\d{4,10})\Z",
294
+ mag,
295
+ )
296
+ if match is None:
297
+ return None
298
+ return match.group(1)
299
+
300
+
301
+ def validate_openalex(openalex: Optional[str]) -> Optional[str]:
302
+ """Validate OpenAlex ID"""
303
+ if openalex is None or not isinstance(openalex, str):
304
+ return None
305
+ match = re.search(
306
+ r"\A(?:(?:http|https)://openalex\.org/)?([AFIPSW]\d{8,10})\Z",
307
+ openalex,
308
+ )
309
+ if match is None:
310
+ return None
311
+ return match.group(1)
312
+
313
+
314
+ def validate_pmid(pmid: Optional[str]) -> Optional[str]:
315
+ """Validate PubMed ID (pmid)"""
316
+ if pmid is None or not isinstance(pmid, str):
317
+ return None
318
+ match = re.search(
319
+ r"\A(?:(?:http|https)://pubmed\.ncbi\.nlm\.nih\.gov/)?(\d{4,8})\Z",
320
+ pmid,
321
+ )
322
+ if match is None:
323
+ return None
324
+ return match.group(1)
325
+
326
+
327
+ def validate_pmcid(pmcid: Optional[str]) -> Optional[str]:
328
+ """Validate PubMed Central ID (pmcid)"""
329
+ if pmcid is None or not isinstance(pmcid, str):
330
+ return None
331
+ match = re.search(
332
+ r"\A(?:(?:http|https)://www\.ncbi\.nlm\.nih\.gov/pmc/articles/)?(\d{4,8})\Z",
333
+ pmcid,
334
+ )
335
+ if match is None:
336
+ return None
337
+ return match.group(1)
338
+
339
+
340
+ def validate_id(id: Optional[str]) -> tuple[Optional[str], Optional[str]]:
341
+ """
342
+ Validate an identifier and return the validated identifier and its type.
343
+
344
+ Args:
345
+ id: The identifier string to validate
346
+
347
+ Returns:
348
+ A tuple containing (validated_id, id_type) or (None, None) if invalid
349
+ """
350
+ if id is None:
351
+ return None, None
352
+
353
+ # Check if it's a DOI
354
+ doi = validate_doi(id)
355
+ if doi:
356
+ return normalize_doi(id), "DOI"
357
+
358
+ # Check if it's an ORCID
359
+ orcid = validate_orcid(id)
360
+ if orcid:
361
+ return normalize_orcid(id), "ORCID"
362
+
363
+ # Check if it's a ROR
364
+ ror = validate_ror(id)
365
+ if ror:
366
+ return normalize_ror(id), "ROR"
367
+
368
+ # Check if it's an ISNI
369
+ isni = validate_isni(id)
370
+ if isni:
371
+ return normalize_isni(id), "ISNI"
372
+
373
+ # Check if it's an OpenAlex ID
374
+ openalex = validate_openalex(id)
375
+ if openalex:
376
+ return f"https://openalex.org/{openalex}", "OpenAlex"
377
+
378
+ # Check if it's a PubMed ID
379
+ pmid = validate_pmid(id)
380
+ if pmid:
381
+ return f"https://pubmed.ncbi.nlm.nih.gov/{pmid}", "PMID"
382
+
383
+ # Check if it's a PubMed Central ID
384
+ pmcid = validate_pmcid(id)
385
+ if pmcid:
386
+ return f"https://www.ncbi.nlm.nih.gov/pmc/articles/{pmcid}", "PMCID"
387
+
388
+ # Check if it's a URL
389
+ url_type = validate_url(id)
390
+ if url_type:
391
+ return normalize_url(id), url_type
392
+
393
+ # No known valid identifier type was found
394
+ return None, None
395
+
396
+
292
397
  def normalize_isni(isni: Optional[str]) -> Optional[str]:
293
398
  """Normalize ISNI"""
294
399
  if isni is None or not isinstance(isni, str):
@@ -1,4 +1,5 @@
1
1
  """Bibtex writer for commonmeta-py"""
2
+
2
3
  from bibtexparser.bwriter import BibTexWriter
3
4
  from bibtexparser.bibdatabase import BibDatabase
4
5
  from bibtexparser.customization import page_double_hyphen
@@ -1,4 +1,5 @@
1
1
  """Citation writer for commonmeta-py"""
2
+
2
3
  import orjson as json
3
4
  import re
4
5
  from pydash import py_
@@ -1,4 +1,5 @@
1
1
  """Crossref XML writer for commonmeta-py"""
2
+
2
3
  from typing import Optional
3
4
  from ..constants import Commonmeta
4
5
  from ..crossref_utils import generate_crossref_xml, generate_crossref_xml_list
@@ -1,4 +1,5 @@
1
1
  """CSL-JSON writer for commonmeta-py"""
2
+
2
3
  import orjson as json
3
4
  from typing import Optional
4
5
 
@@ -1,4 +1,5 @@
1
1
  """DataCite writer for commonmeta-py"""
2
+
2
3
  import orjson as json
3
4
  from typing import Optional, Union
4
5
 
@@ -1,4 +1,5 @@
1
1
  """RIS writer for commonmeta-py"""
2
+
2
3
  from ..utils import to_ris
3
4
  from ..base_utils import compact, wrap, presence, parse_attributes
4
5
  from ..doi_utils import doi_from_url
@@ -1,4 +1,5 @@
1
1
  """Schema.org writer for commonmeta-py"""
2
+
2
3
  import orjson as json
3
4
  from ..utils import to_schema_org_creators, github_as_repo_url, get_language
4
5
  from ..base_utils import compact, wrap, presence, parse_attributes