commonmeta-py 0.112__py3-none-any.whl → 0.114__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
commonmeta/__init__.py CHANGED
@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
10
10
  """
11
11
 
12
12
  __title__ = "commonmeta-py"
13
- __version__ = "0.112"
13
+ __version__ = "0.114"
14
14
  __author__ = "Martin Fenner"
15
15
  __license__ = "MIT"
16
16
 
commonmeta/doi_utils.py CHANGED
@@ -302,19 +302,101 @@ def datacite_api_sample_url(number: int = 1, **kwargs) -> str:
302
302
  return f"https://api.datacite.org/dois?random=true&page[size]={number}"
303
303
 
304
304
 
305
- def is_rogue_scholar_doi(doi: str) -> bool:
306
- """Return True if DOI is from Rogue Scholar"""
307
- prefix = validate_prefix(doi)
308
- return prefix in [
309
- "10.34732", # not managed by Front Matter
305
+ def is_rogue_scholar_doi(doi: str, ra: str = "crossref") -> bool:
306
+ """Check if a DOI is from Rogue Scholar with specific registration agency"""
307
+ rogue_scholar_crossref_prefixes = [
308
+ "10.13003",
310
309
  "10.53731",
311
310
  "10.54900",
312
- "10.57689", # not managed by Front Matter
313
- "10.58079", # not managed by Front Matter
311
+ "10.57689",
312
+ "10.59347",
314
313
  "10.59348",
315
314
  "10.59349",
316
315
  "10.59350",
317
316
  "10.63485",
318
317
  "10.64000",
318
+ ]
319
+ rogue_scholar_datacite_prefixes = [
320
+ "10.5438",
321
+ "10.34732", # not managed by Front Matter
322
+ "10.57689", # not managed by Front Matter
323
+ "10.58079", # not managed by Front Matter
324
+ "10.60804",
319
325
  "10.71938", # not managed by Front Matter
326
+ # "10.83132",
320
327
  ]
328
+
329
+ prefix = validate_prefix(doi)
330
+ if not prefix:
331
+ return False
332
+
333
+ is_crossref = prefix in rogue_scholar_crossref_prefixes
334
+ is_datacite = prefix in rogue_scholar_datacite_prefixes
335
+
336
+ if ra == "crossref":
337
+ return is_crossref
338
+ elif ra == "datacite":
339
+ return is_datacite
340
+ return is_crossref or is_datacite
341
+
342
+
343
+ def generate_wordpress_doi(prefix: str, slug: str, guid: str) -> str:
344
+ """Generate a DOI from a WordPress GUID and slug"""
345
+ import re
346
+
347
+ if not prefix or not guid:
348
+ return ""
349
+
350
+ pattern = re.compile(r"p=(\d+)$")
351
+ matched = pattern.search(guid)
352
+
353
+ if not matched:
354
+ return ""
355
+
356
+ doi = f"https://doi.org/{prefix}/{slug}.{matched.group(1)}"
357
+ return doi
358
+
359
+
360
+ def generate_doi_from_guid(prefix: str, guid: str) -> str:
361
+ """Validates a GUID that is a DOI"""
362
+ import base32_lib as base32
363
+
364
+ if not prefix:
365
+ return ""
366
+
367
+ doi = normalize_doi(guid)
368
+ if not doi:
369
+ return ""
370
+
371
+ p = validate_prefix(doi)
372
+ if not p or p != prefix:
373
+ return ""
374
+
375
+ suffix = doi.split("/")[-1]
376
+
377
+ try:
378
+ number = base32.decode(suffix, checksum=True)
379
+ if number != 0:
380
+ return doi
381
+ except (ValueError, IndexError):
382
+ pass
383
+
384
+ return ""
385
+
386
+
387
+ def generate_substack_doi(prefix: str, guid: str) -> str:
388
+ """Generate a DOI from a Substack GUID"""
389
+ import base32_lib as base32
390
+
391
+ if not prefix or not guid:
392
+ return ""
393
+
394
+ try:
395
+ i = int(guid)
396
+ except ValueError:
397
+ return ""
398
+
399
+ # encode the number using base32 with length=4, split_every=8, and checksum=True
400
+ suffix = base32.encode(i, length=4, split_every=8, checksum=True)
401
+ doi = f"https://doi.org/{prefix}/{suffix}"
402
+ return doi
@@ -13,6 +13,9 @@ from ..date_utils import get_date_from_unix_timestamp
13
13
  from ..doi_utils import (
14
14
  doi_from_url,
15
15
  encode_doi,
16
+ generate_doi_from_guid,
17
+ generate_substack_doi,
18
+ generate_wordpress_doi,
16
19
  is_rogue_scholar_doi,
17
20
  normalize_doi,
18
21
  validate_doi,
@@ -55,13 +58,39 @@ def read_jsonfeed(data: Optional[dict], **kwargs) -> Commonmeta:
55
58
  "archive_url", None
56
59
  ):
57
60
  url = normalize_url(meta.get("archive_url", None))
58
- _id = normalize_doi(read_options.get("doi", None) or meta.get("doi", None)) or url
59
- _type = "BlogPost"
60
61
 
61
- # optionally generate a DOI if missing but a DOI prefix is provided
62
- prefix = read_options.get("prefix", None) or py_.get(meta, "blog.prefix", None)
63
- if doi_from_url(_id) is None and prefix is not None:
64
- _id = encode_doi(prefix)
62
+ # generate DOI string for registration if not provided
63
+ _id = normalize_doi(read_options.get("doi", None) or meta.get("doi", None))
64
+ if _id is None:
65
+ if meta.get("guid") and py_.get(meta, "blog.doi_reg", False):
66
+ # Generate DOI based on blogging platform
67
+ generator = py_.get(meta, "blog.generator")
68
+ prefix = py_.get(meta, "blog.prefix")
69
+ slug = py_.get(meta, "blog.slug")
70
+ guid = meta.get("guid")
71
+
72
+ # Import these functions only when needed to avoid circular imports
73
+ if generator in ["WordPress", "WordPress.com"] and prefix and slug and guid:
74
+ _id = generate_wordpress_doi(prefix, slug, guid)
75
+ elif generator == "Substack" and prefix and guid:
76
+ _id = generate_substack_doi(prefix, guid)
77
+ elif prefix and guid:
78
+ _id = generate_doi_from_guid(prefix, guid)
79
+
80
+ # If still no DOI but prefix provided and not registered for DOI generation
81
+ elif py_.get(meta, "blog.prefix") and not py_.get(meta, "blog.doi_reg", False):
82
+ prefix = py_.get(meta, "blog.prefix")
83
+ _id = encode_doi(prefix)
84
+
85
+ # If override prefix is provided in read_options, use that
86
+ elif read_options.get("prefix"):
87
+ _id = encode_doi(read_options.get("prefix"))
88
+
89
+ # fall back to url if no DOI can be generated
90
+ if _id is None:
91
+ _id = url
92
+
93
+ _type = "BlogPost"
65
94
 
66
95
  if meta.get("authors", None):
67
96
  contributors = get_authors(from_jsonfeed(wrap(meta.get("authors"))))
@@ -23,6 +23,7 @@ from ..utils import (
23
23
  get_language,
24
24
  id_from_url,
25
25
  normalize_url,
26
+ pages_as_string,
26
27
  validate_orcid,
27
28
  validate_ror,
28
29
  )
@@ -68,7 +69,7 @@ def write_inveniordm(metadata):
68
69
  ]
69
70
  )
70
71
  container = metadata.container if metadata.container else {}
71
- journal = (
72
+ journal_title = (
72
73
  container.get("title", None)
73
74
  if _type not in ["inbook", "inproceedings"]
74
75
  and container.get("type") in ["Journal", "Periodical", "Blog"]
@@ -79,6 +80,10 @@ def write_inveniordm(metadata):
79
80
  if container.get("identifierType", None) == "ISSN"
80
81
  else None
81
82
  )
83
+ volume = container.get("volume", None)
84
+ issue = container.get("issue", None)
85
+ pages = pages_as_string(container)
86
+
82
87
  dates = []
83
88
  for date in metadata.date.keys():
84
89
  if metadata.date.get(date, None) is None:
@@ -141,7 +146,15 @@ def write_inveniordm(metadata):
141
146
  ),
142
147
  "custom_fields": compact(
143
148
  {
144
- "journal:journal": compact({"title": journal, "issn": issn}),
149
+ "journal:journal": compact(
150
+ {
151
+ "title": journal_title,
152
+ "issn": issn,
153
+ "volume": volume,
154
+ "issue": issue,
155
+ "pages": pages,
156
+ }
157
+ ),
145
158
  "rs:content_html": presence(metadata.content),
146
159
  "rs:image": presence(metadata.image),
147
160
  "rs:generator": container.get("platform", None),
@@ -463,7 +476,6 @@ def push_inveniordm(metadata, host: str, token: str, legacy_key: str):
463
476
  # optionally update rogue-scholar legacy record
464
477
  if host == "rogue-scholar.org" and legacy_key is not None:
465
478
  record = update_legacy_record(record, legacy_key)
466
- print("g", record)
467
479
  except Exception as e:
468
480
  raise InvenioRDMError(f"Unexpected error: {str(e)}")
469
481
 
@@ -511,12 +523,11 @@ def create_draft_record(record, host, token, input):
511
523
  )
512
524
  response.raise_for_status()
513
525
  data = response.json()
514
- return {
515
- "id": data.get("id", None),
516
- "created": data.get("created", None),
517
- "updated": data.get("updated", None),
518
- "status": "updated",
519
- }
526
+ record["id"]: data.get("id", None)
527
+ record["created"] = data.get("created", None)
528
+ record["updated"] = data.get("updated", None)
529
+ record["status"] = "draft"
530
+ return record
520
531
  except requests.exceptions.RequestException as e:
521
532
  raise InvenioRDMError(f"Error creating draft record: {str(e)}")
522
533
 
@@ -610,20 +621,31 @@ def update_legacy_record(record, legacy_key: str):
610
621
  legacy_host = "bosczcmeodcrajtcaddf.supabase.co"
611
622
 
612
623
  if not legacy_key:
613
- return record, ValueError("no legacy key provided")
614
-
624
+ raise ValueError("no legacy key provided")
615
625
  if not record.get("uuid", None):
616
- return record, ValueError("no UUID provided")
617
-
618
- now = f"{int(time())}"
619
-
626
+ raise ValueError("no UUID provided")
620
627
  if not record.get("doi", None):
621
- return ValueError("no valid doi to update")
628
+ raise ValueError("no valid doi to update")
622
629
 
623
- output = {"indexed_at": now, "indexed": "true", "archived": "true"}
630
+ now = f"{int(time())}"
631
+ if record.get("id", None) is not None:
632
+ output = {
633
+ "rid": record.get("id"),
634
+ "indexed_at": now,
635
+ "indexed": "true",
636
+ "archived": "true",
637
+ }
638
+ elif record.get("doi", None) is not None:
639
+ output = {
640
+ "doi": record.get("doi"),
641
+ "indexed_at": now,
642
+ "indexed": "true",
643
+ "archived": "true",
644
+ }
645
+ else:
646
+ return record # nothing to update
624
647
 
625
648
  request_url = f"https://{legacy_host}/rest/v1/posts?id=eq.{record['uuid']}"
626
-
627
649
  headers = {
628
650
  "Content-Type": "application/json",
629
651
  "apikey": legacy_key,
@@ -635,13 +657,13 @@ def update_legacy_record(record, legacy_key: str):
635
657
  response = requests.patch(request_url, json=output, headers=headers, timeout=30)
636
658
  response.raise_for_status()
637
659
  if response.status_code != 204:
638
- return record, Exception(f"Unexpected status code: {response.status_code}")
660
+ return Exception(f"Unexpected status code: {response.status_code}")
639
661
 
640
662
  record["status"] = "updated_legacy"
641
663
  return record
642
664
 
643
665
  except requests.exceptions.RequestException as e:
644
- return record, e
666
+ raise InvenioRDMError(f"Error updating legacy record: {str(e)}")
645
667
 
646
668
 
647
669
  def search_by_slug(slug, type_value, host, token) -> Optional[str]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: commonmeta-py
3
- Version: 0.112
3
+ Version: 0.114
4
4
  Summary: Library for conversions to/from the Commonmeta scholarly metadata format
5
5
  Project-URL: Homepage, https://python.commonmeta.org
6
6
  Project-URL: Repository, https://github.com/front-matter/commonmeta-py
@@ -1,11 +1,11 @@
1
- commonmeta/__init__.py,sha256=eKvnKvDHY9vRV-P7jBGueCHolec-BCQP9YOJWd_L0iM,2098
1
+ commonmeta/__init__.py,sha256=G4y5yCVJHisJQAgUH6EKoM6g79z1I5zu8mfvG9tuYpE,2098
2
2
  commonmeta/api_utils.py,sha256=y5KLfIOWOjde7LXZ36u-eneQJ-Q53yXUZg3hWpCBS2E,2685
3
3
  commonmeta/author_utils.py,sha256=3lYW5s1rOUWNTKs1FP6XLfEUY3yCLOe_3L_VdJTDMp0,8585
4
4
  commonmeta/base_utils.py,sha256=-MGy9q2uTiJEkPWQUYOJMdq-3tRpNnvBwlLjvllQ5g8,11164
5
5
  commonmeta/cli.py,sha256=pdBpBosLNq3RS9buO-Voqawc9Ay1eSt-xP5O97iOft4,8480
6
6
  commonmeta/constants.py,sha256=wSTEUiHeRdXLwjXEQD9AU2hxFyEKi5OTX2iHOKO6nF0,19844
7
7
  commonmeta/date_utils.py,sha256=H2cCobX0JREIUOT_cCigGd3MG7prGiQpXk1m4ZNrFwU,6318
8
- commonmeta/doi_utils.py,sha256=ZztajfOLtnASk1BbQ1Y2Q4B_xxlnbujn7Opx5a1U5vY,9582
8
+ commonmeta/doi_utils.py,sha256=kS9wBoZQHvV-fqFoW9j-_aN_7Kj1I6sQdqnqK3Nno0M,11512
9
9
  commonmeta/file_utils.py,sha256=tGvXxScjh-PPo5YvLDyk4sqwY5Q50N0zAmBHVaUOLeU,3268
10
10
  commonmeta/metadata.py,sha256=U9uYOkKAdgqLiQwQKXMXjrAFvO6avMWp9heJG4cNAAY,18893
11
11
  commonmeta/schema_utils.py,sha256=WGpmMj9cfNMg_55hhgwY9qpO0A1HSvTLQC2equjBftI,1770
@@ -22,7 +22,7 @@ commonmeta/readers/csl_reader.py,sha256=OxzC2AZKfv43BCah4XGYvlK_LUK-5mxXFcjdzB5v
22
22
  commonmeta/readers/datacite_reader.py,sha256=M6gznf1kisR1WzDZaoR0pLJC6Q4Rtnerodfs8lU2khI,12094
23
23
  commonmeta/readers/datacite_xml_reader.py,sha256=zJSuN9pnWplYFH7V1eneh0OjKTFCNkOLmEMf6fU6_xg,13048
24
24
  commonmeta/readers/inveniordm_reader.py,sha256=6LkT6R20jSFqDdZqAzcREHbdAcIPHiYJvxKsK_mpDdw,8374
25
- commonmeta/readers/jsonfeed_reader.py,sha256=jn7ux79jGD_ChsBH7zU8_CeCEObIbDh3NDjMhHIzE64,14470
25
+ commonmeta/readers/jsonfeed_reader.py,sha256=zcPxxuyAGW8W7w0-VwP9AhpX97qVWHQJUIJ5p4bBbfE,15655
26
26
  commonmeta/readers/kbase_reader.py,sha256=KH3loJvuq2bm8zAYIUG7hTsr5-2Anj3NQvoJUDiqmss,6764
27
27
  commonmeta/readers/openalex_reader.py,sha256=4HUkBsut_iUjhUcC5c1GHgxnKsYQc-fgY43QILgVZEg,12826
28
28
  commonmeta/readers/ris_reader.py,sha256=oQ3G7qQmNwhr4cNp-Gv5UW28J2K1oKpBlPh-tjRtnpQ,3678
@@ -80,11 +80,11 @@ commonmeta/writers/commonmeta_writer.py,sha256=QpfyhG__7o_XpsOTCPWxGymO7YKwZi2LQ
80
80
  commonmeta/writers/crossref_xml_writer.py,sha256=d-Rb2Vd_g3UW8GM4APIT7fivSQ5GMssZ6Ubi3OykHaw,33479
81
81
  commonmeta/writers/csl_writer.py,sha256=4gDYs1EzK4_L2UIRTfs25wgHmYRwdRP2zmfxF9387oU,2779
82
82
  commonmeta/writers/datacite_writer.py,sha256=bcinpwhq7XnVthKHH8-sdXA34dSlvFH4ImYH768iaQU,6428
83
- commonmeta/writers/inveniordm_writer.py,sha256=EDIw56kIr-58oB1oWnGOCfUBRwIsivjnR8WT5rrcji0,23334
83
+ commonmeta/writers/inveniordm_writer.py,sha256=1WgFPT6no2v-rurNo-02RcgN3Ic88MdbnKBnVQlv9zo,24142
84
84
  commonmeta/writers/ris_writer.py,sha256=3SdyEvMRaPRP1SV1MB-MXBlunE7x6og7RF1zuWtetPc,2094
85
85
  commonmeta/writers/schema_org_writer.py,sha256=s18_x0ReXwAGBoEAwp2q-HCgFQ-h5qRg6JyAlqCoSFE,5871
86
- commonmeta_py-0.112.dist-info/METADATA,sha256=8zV_v5vYgwtipHroPPPc1pms1YB4QNKPpWBCH4uRGrk,7652
87
- commonmeta_py-0.112.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
88
- commonmeta_py-0.112.dist-info/entry_points.txt,sha256=U4w4BoRuS3rN5t5Y-uYSyOeU5Lh_VRVMS9OIDzIgw4w,50
89
- commonmeta_py-0.112.dist-info/licenses/LICENSE,sha256=wsIvxF9Q9GC9vA_s79zTWP3BkXJdfUNRmALlU8GbW1s,1074
90
- commonmeta_py-0.112.dist-info/RECORD,,
86
+ commonmeta_py-0.114.dist-info/METADATA,sha256=a0JivRZx0DkvMk3O1na9HIMHGgUZuvBBDk0Smc8H-Qc,7652
87
+ commonmeta_py-0.114.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
88
+ commonmeta_py-0.114.dist-info/entry_points.txt,sha256=U4w4BoRuS3rN5t5Y-uYSyOeU5Lh_VRVMS9OIDzIgw4w,50
89
+ commonmeta_py-0.114.dist-info/licenses/LICENSE,sha256=wsIvxF9Q9GC9vA_s79zTWP3BkXJdfUNRmALlU8GbW1s,1074
90
+ commonmeta_py-0.114.dist-info/RECORD,,