commonmeta-py 0.111__py3-none-any.whl → 0.113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
commonmeta/__init__.py CHANGED
@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
10
10
  """
11
11
 
12
12
  __title__ = "commonmeta-py"
13
- __version__ = "0.111"
13
+ __version__ = "0.113"
14
14
  __author__ = "Martin Fenner"
15
15
  __license__ = "MIT"
16
16
 
commonmeta/doi_utils.py CHANGED
@@ -302,19 +302,101 @@ def datacite_api_sample_url(number: int = 1, **kwargs) -> str:
302
302
  return f"https://api.datacite.org/dois?random=true&page[size]={number}"
303
303
 
304
304
 
305
- def is_rogue_scholar_doi(doi: str) -> bool:
306
- """Return True if DOI is from Rogue Scholar"""
307
- prefix = validate_prefix(doi)
308
- return prefix in [
309
- "10.34732", # not managed by Front Matter
305
+ def is_rogue_scholar_doi(doi: str, ra: str = "crossref") -> bool:
306
+ """Check if a DOI is from Rogue Scholar with specific registration agency"""
307
+ rogue_scholar_crossref_prefixes = [
308
+ "10.13003",
310
309
  "10.53731",
311
310
  "10.54900",
312
- "10.57689", # not managed by Front Matter
313
- "10.58079", # not managed by Front Matter
311
+ "10.57689",
312
+ "10.59347",
314
313
  "10.59348",
315
314
  "10.59349",
316
315
  "10.59350",
317
316
  "10.63485",
318
317
  "10.64000",
318
+ ]
319
+ rogue_scholar_datacite_prefixes = [
320
+ "10.5438",
321
+ "10.34732", # not managed by Front Matter
322
+ "10.57689", # not managed by Front Matter
323
+ "10.58079", # not managed by Front Matter
324
+ "10.60804",
319
325
  "10.71938", # not managed by Front Matter
326
+ # "10.83132",
320
327
  ]
328
+
329
+ prefix = validate_prefix(doi)
330
+ if not prefix:
331
+ return False
332
+
333
+ is_crossref = prefix in rogue_scholar_crossref_prefixes
334
+ is_datacite = prefix in rogue_scholar_datacite_prefixes
335
+
336
+ if ra == "crossref":
337
+ return is_crossref
338
+ elif ra == "datacite":
339
+ return is_datacite
340
+ return is_crossref or is_datacite
341
+
342
+
343
+ def generate_wordpress_doi(prefix: str, slug: str, guid: str) -> str:
344
+ """Generate a DOI from a WordPress GUID and slug"""
345
+ import re
346
+
347
+ if not prefix or not guid:
348
+ return ""
349
+
350
+ pattern = re.compile(r"p=(\d+)$")
351
+ matched = pattern.search(guid)
352
+
353
+ if not matched:
354
+ return ""
355
+
356
+ doi = f"https://doi.org/{prefix}/{slug}.{matched.group(1)}"
357
+ return doi
358
+
359
+
360
+ def generate_doi_from_guid(prefix: str, guid: str) -> str:
361
+ """Validates a GUID that is a DOI"""
362
+ import base32_lib as base32
363
+
364
+ if not prefix:
365
+ return ""
366
+
367
+ doi = normalize_doi(guid)
368
+ if not doi:
369
+ return ""
370
+
371
+ p = validate_prefix(doi)
372
+ if not p or p != prefix:
373
+ return ""
374
+
375
+ suffix = doi.split("/")[-1]
376
+
377
+ try:
378
+ number = base32.decode(suffix, checksum=True)
379
+ if number != 0:
380
+ return doi
381
+ except (ValueError, IndexError):
382
+ pass
383
+
384
+ return ""
385
+
386
+
387
+ def generate_substack_doi(prefix: str, guid: str) -> str:
388
+ """Generate a DOI from a Substack GUID"""
389
+ import base32_lib as base32
390
+
391
+ if not prefix or not guid:
392
+ return ""
393
+
394
+ try:
395
+ i = int(guid)
396
+ except ValueError:
397
+ return ""
398
+
399
+ # encode the number using base32 with length=4, split_every=8, and checksum=True
400
+ suffix = base32.encode(i, length=4, split_every=8, checksum=True)
401
+ doi = f"https://doi.org/{prefix}/{suffix}"
402
+ return doi
commonmeta/metadata.py CHANGED
@@ -490,7 +490,9 @@ class MetadataList:
490
490
  elif to == "datacite":
491
491
  raise ValueError("Datacite not yet supported for metadata lists")
492
492
  elif to == "inveniordm":
493
- response = push_inveniordm_list(self, host=self.host, token=self.token, legacy_key=self.legacy_key)
493
+ response = push_inveniordm_list(
494
+ self, host=self.host, token=self.token, legacy_key=self.legacy_key
495
+ )
494
496
  return response
495
497
  else:
496
498
  raise ValueError("No valid output format found")
@@ -13,6 +13,9 @@ from ..date_utils import get_date_from_unix_timestamp
13
13
  from ..doi_utils import (
14
14
  doi_from_url,
15
15
  encode_doi,
16
+ generate_doi_from_guid,
17
+ generate_substack_doi,
18
+ generate_wordpress_doi,
16
19
  is_rogue_scholar_doi,
17
20
  normalize_doi,
18
21
  validate_doi,
@@ -55,13 +58,39 @@ def read_jsonfeed(data: Optional[dict], **kwargs) -> Commonmeta:
55
58
  "archive_url", None
56
59
  ):
57
60
  url = normalize_url(meta.get("archive_url", None))
58
- _id = normalize_doi(read_options.get("doi", None) or meta.get("doi", None)) or url
59
- _type = "BlogPost"
60
61
 
61
- # optionally generate a DOI if missing but a DOI prefix is provided
62
- prefix = read_options.get("prefix", None) or py_.get(meta, "blog.prefix", None)
63
- if doi_from_url(_id) is None and prefix is not None:
64
- _id = encode_doi(prefix)
62
+ # generate DOI string for registration if not provided
63
+ _id = normalize_doi(read_options.get("doi", None) or meta.get("doi", None))
64
+ if _id is None:
65
+ if meta.get("guid") and py_.get(meta, "blog.doi_reg", False):
66
+ # Generate DOI based on blogging platform
67
+ generator = py_.get(meta, "blog.generator")
68
+ prefix = py_.get(meta, "blog.prefix")
69
+ slug = py_.get(meta, "blog.slug")
70
+ guid = meta.get("guid")
71
+
72
+ # Import these functions only when needed to avoid circular imports
73
+ if generator in ["WordPress", "WordPress.com"] and prefix and slug and guid:
74
+ _id = generate_wordpress_doi(prefix, slug, guid)
75
+ elif generator == "Substack" and prefix and guid:
76
+ _id = generate_substack_doi(prefix, guid)
77
+ elif prefix and guid:
78
+ _id = generate_doi_from_guid(prefix, guid)
79
+
80
+ # If still no DOI but prefix provided and not registered for DOI generation
81
+ elif py_.get(meta, "blog.prefix") and not py_.get(meta, "blog.doi_reg", False):
82
+ prefix = py_.get(meta, "blog.prefix")
83
+ _id = encode_doi(prefix)
84
+
85
+ # If override prefix is provided in read_options, use that
86
+ elif read_options.get("prefix"):
87
+ _id = encode_doi(read_options.get("prefix"))
88
+
89
+ # fall back to url if no DOI can be generated
90
+ if _id is None:
91
+ _id = url
92
+
93
+ _type = "BlogPost"
65
94
 
66
95
  if meta.get("authors", None):
67
96
  contributors = get_authors(from_jsonfeed(wrap(meta.get("authors"))))
@@ -23,6 +23,7 @@ from ..utils import (
23
23
  get_language,
24
24
  id_from_url,
25
25
  normalize_url,
26
+ pages_as_string,
26
27
  validate_orcid,
27
28
  validate_ror,
28
29
  )
@@ -68,7 +69,7 @@ def write_inveniordm(metadata):
68
69
  ]
69
70
  )
70
71
  container = metadata.container if metadata.container else {}
71
- journal = (
72
+ journal_title = (
72
73
  container.get("title", None)
73
74
  if _type not in ["inbook", "inproceedings"]
74
75
  and container.get("type") in ["Journal", "Periodical", "Blog"]
@@ -79,6 +80,10 @@ def write_inveniordm(metadata):
79
80
  if container.get("identifierType", None) == "ISSN"
80
81
  else None
81
82
  )
83
+ volume = container.get("volume", None)
84
+ issue = container.get("issue", None)
85
+ pages = pages_as_string(container)
86
+
82
87
  dates = []
83
88
  for date in metadata.date.keys():
84
89
  if metadata.date.get(date, None) is None:
@@ -141,7 +146,15 @@ def write_inveniordm(metadata):
141
146
  ),
142
147
  "custom_fields": compact(
143
148
  {
144
- "journal:journal": compact({"title": journal, "issn": issn}),
149
+ "journal:journal": compact(
150
+ {
151
+ "title": journal_title,
152
+ "issn": issn,
153
+ "volume": volume,
154
+ "issue": issue,
155
+ "pages": pages,
156
+ }
157
+ ),
145
158
  "rs:content_html": presence(metadata.content),
146
159
  "rs:image": presence(metadata.image),
147
160
  "rs:generator": container.get("platform", None),
@@ -371,7 +384,7 @@ def write_inveniordm_list(metalist):
371
384
  return [write_inveniordm(item) for item in metalist.items]
372
385
 
373
386
 
374
- def push_inveniordm(metadata, host: str, token: str, legacy_key:str):
387
+ def push_inveniordm(metadata, host: str, token: str, legacy_key: str):
375
388
  """Push record to InvenioRDM"""
376
389
 
377
390
  record = {}
@@ -382,8 +395,9 @@ def push_inveniordm(metadata, host: str, token: str, legacy_key:str):
382
395
  community_index = None
383
396
  if hasattr(metadata, "relations") and metadata.relations:
384
397
  for i, relation in enumerate(metadata.relations):
385
- if (relation.get("type") == "IsPartOf" and
386
- relation.get("id", "").startswith("https://rogue-scholar.org/api/communities/")):
398
+ if relation.get("type") == "IsPartOf" and relation.get(
399
+ "id", ""
400
+ ).startswith("https://rogue-scholar.org/api/communities/"):
387
401
  slug = relation.get("id").split("/")[5]
388
402
  community_id, _ = search_by_slug(slug, "blog", host, token)
389
403
  if community_id:
@@ -413,7 +427,6 @@ def push_inveniordm(metadata, host: str, token: str, legacy_key:str):
413
427
  record["id"] = search_by_doi(doi_from_url(metadata.id), host, token)
414
428
 
415
429
  if record["id"] is not None:
416
-
417
430
  # Create draft record from published record
418
431
  record = edit_published_record(record, host, token)
419
432
 
@@ -443,9 +456,7 @@ def push_inveniordm(metadata, host: str, token: str, legacy_key:str):
443
456
 
444
457
  community_id = search_by_slug(slug, "topic", host, token)
445
458
  if community_id:
446
- record = add_record_to_community(
447
- record, host, token, community_id
448
- )
459
+ record = add_record_to_community(record, host, token, community_id)
449
460
 
450
461
  # Add record to communities defined as IsPartOf relation in inveniordm metadata's RelatedIdentifiers
451
462
  related_identifiers = py_.get(input, "metadata.related_identifiers")
@@ -460,21 +471,19 @@ def push_inveniordm(metadata, host: str, token: str, legacy_key:str):
460
471
  and len(path_parts) == 3
461
472
  and path_parts[1] == "communities"
462
473
  ):
463
- record = add_record_to_community(
464
- record, host, token, path_parts[2]
465
- )
474
+ record = add_record_to_community(record, host, token, path_parts[2])
466
475
 
467
476
  # optionally update rogue-scholar legacy record
468
477
  if host == "rogue-scholar.org" and legacy_key is not None:
469
478
  record = update_legacy_record(record, legacy_key)
470
-
479
+ print("g", record)
471
480
  except Exception as e:
472
481
  raise InvenioRDMError(f"Unexpected error: {str(e)}")
473
482
 
474
483
  return record
475
484
 
476
485
 
477
- def push_inveniordm_list(metalist, host: str, token: str, legacy_key:str) -> list:
486
+ def push_inveniordm_list(metalist, host: str, token: str, legacy_key: str) -> list:
478
487
  """Push inveniordm list to InvenioRDM, returns list of push results."""
479
488
 
480
489
  if metalist is None:
@@ -491,7 +500,9 @@ def search_by_doi(doi, host, token) -> Optional[str]:
491
500
  }
492
501
  params = {"q": f"doi:{doi}", "size": 1}
493
502
  try:
494
- response = requests.get(f"https://{host}/api/records", headers=headers, params=params)
503
+ response = requests.get(
504
+ f"https://{host}/api/records", headers=headers, params=params
505
+ )
495
506
  response.raise_for_status()
496
507
  data = response.json()
497
508
  if py_.get(data, "hits.total") or 0 > 0:
@@ -508,14 +519,16 @@ def create_draft_record(record, host, token, input):
508
519
  "Content-Type": "application/json",
509
520
  }
510
521
  try:
511
- response = requests.post(f"https://{host}/api/records", headers=headers, json=input)
522
+ response = requests.post(
523
+ f"https://{host}/api/records", headers=headers, json=input
524
+ )
512
525
  response.raise_for_status()
513
526
  data = response.json()
514
527
  return {
515
528
  "id": data.get("id", None),
516
529
  "created": data.get("created", None),
517
530
  "updated": data.get("updated", None),
518
- "status": "updated"
531
+ "status": "updated",
519
532
  }
520
533
  except requests.exceptions.RequestException as e:
521
534
  raise InvenioRDMError(f"Error creating draft record: {str(e)}")
@@ -611,38 +624,37 @@ def update_legacy_record(record, legacy_key: str):
611
624
 
612
625
  if not legacy_key:
613
626
  return record, ValueError("no legacy key provided")
614
-
615
627
  if not record.get("uuid", None):
616
628
  return record, ValueError("no UUID provided")
617
-
618
- now = f"{int(time())}"
619
-
620
629
  if not record.get("doi", None):
621
630
  return ValueError("no valid doi to update")
622
631
 
623
- output = {
624
- "doi": record["doi"],
632
+ now = f"{int(time())}"
633
+ if record.get("id", None) is not None:
634
+ output = {
635
+ "rid": record.get("id"),
636
+ "indexed_at": now,
637
+ "indexed": "true",
638
+ "archived": "true",
639
+ }
640
+ else:
641
+ output = {
642
+ "doi": record.get("doi"),
625
643
  "indexed_at": now,
626
644
  "indexed": "true",
627
- "archived": "true"
645
+ "archived": "true",
628
646
  }
629
647
 
630
648
  request_url = f"https://{legacy_host}/rest/v1/posts?id=eq.{record['uuid']}"
631
-
632
649
  headers = {
633
650
  "Content-Type": "application/json",
634
651
  "apikey": legacy_key,
635
652
  "Authorization": f"Bearer {legacy_key}",
636
- "Prefer": "return=minimal"
653
+ "Prefer": "return=minimal",
637
654
  }
638
655
 
639
656
  try:
640
- response = requests.patch(
641
- request_url,
642
- json=output,
643
- headers=headers,
644
- timeout=30
645
- )
657
+ response = requests.patch(request_url, json=output, headers=headers, timeout=30)
646
658
  response.raise_for_status()
647
659
  if response.status_code != 204:
648
660
  return record, Exception(f"Unexpected status code: {response.status_code}")
@@ -651,7 +663,7 @@ def update_legacy_record(record, legacy_key: str):
651
663
  return record
652
664
 
653
665
  except requests.exceptions.RequestException as e:
654
- return record, e
666
+ raise InvenioRDMError(f"Error updating legacy record: {str(e)}")
655
667
 
656
668
 
657
669
  def search_by_slug(slug, type_value, host, token) -> Optional[str]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: commonmeta-py
3
- Version: 0.111
3
+ Version: 0.113
4
4
  Summary: Library for conversions to/from the Commonmeta scholarly metadata format
5
5
  Project-URL: Homepage, https://python.commonmeta.org
6
6
  Project-URL: Repository, https://github.com/front-matter/commonmeta-py
@@ -1,13 +1,13 @@
1
- commonmeta/__init__.py,sha256=7VbnWhd_GpuWNLnOiFnEkdZ7RLRMCJviElrUM9gDfuA,2098
1
+ commonmeta/__init__.py,sha256=nq7nj3A5l3OtAM12vAsKOAYFN7fJmVFSq7GRuupnm40,2098
2
2
  commonmeta/api_utils.py,sha256=y5KLfIOWOjde7LXZ36u-eneQJ-Q53yXUZg3hWpCBS2E,2685
3
3
  commonmeta/author_utils.py,sha256=3lYW5s1rOUWNTKs1FP6XLfEUY3yCLOe_3L_VdJTDMp0,8585
4
4
  commonmeta/base_utils.py,sha256=-MGy9q2uTiJEkPWQUYOJMdq-3tRpNnvBwlLjvllQ5g8,11164
5
5
  commonmeta/cli.py,sha256=pdBpBosLNq3RS9buO-Voqawc9Ay1eSt-xP5O97iOft4,8480
6
6
  commonmeta/constants.py,sha256=wSTEUiHeRdXLwjXEQD9AU2hxFyEKi5OTX2iHOKO6nF0,19844
7
7
  commonmeta/date_utils.py,sha256=H2cCobX0JREIUOT_cCigGd3MG7prGiQpXk1m4ZNrFwU,6318
8
- commonmeta/doi_utils.py,sha256=ZztajfOLtnASk1BbQ1Y2Q4B_xxlnbujn7Opx5a1U5vY,9582
8
+ commonmeta/doi_utils.py,sha256=kS9wBoZQHvV-fqFoW9j-_aN_7Kj1I6sQdqnqK3Nno0M,11512
9
9
  commonmeta/file_utils.py,sha256=tGvXxScjh-PPo5YvLDyk4sqwY5Q50N0zAmBHVaUOLeU,3268
10
- commonmeta/metadata.py,sha256=k_u2ZE2_GbCQrzAPms5ywa2ylQ-GrEYJIRTQpPAMHGw,18863
10
+ commonmeta/metadata.py,sha256=U9uYOkKAdgqLiQwQKXMXjrAFvO6avMWp9heJG4cNAAY,18893
11
11
  commonmeta/schema_utils.py,sha256=WGpmMj9cfNMg_55hhgwY9qpO0A1HSvTLQC2equjBftI,1770
12
12
  commonmeta/translators.py,sha256=CBMK4jrXRmGZiAhCh6wsJjhbDJWbcsda8UvXFXxccAw,1363
13
13
  commonmeta/utils.py,sha256=pJnh3EzOU1E2nutnAZsopY_NsUX6zYmxoj5bIYqqWvE,50574
@@ -22,7 +22,7 @@ commonmeta/readers/csl_reader.py,sha256=OxzC2AZKfv43BCah4XGYvlK_LUK-5mxXFcjdzB5v
22
22
  commonmeta/readers/datacite_reader.py,sha256=M6gznf1kisR1WzDZaoR0pLJC6Q4Rtnerodfs8lU2khI,12094
23
23
  commonmeta/readers/datacite_xml_reader.py,sha256=zJSuN9pnWplYFH7V1eneh0OjKTFCNkOLmEMf6fU6_xg,13048
24
24
  commonmeta/readers/inveniordm_reader.py,sha256=6LkT6R20jSFqDdZqAzcREHbdAcIPHiYJvxKsK_mpDdw,8374
25
- commonmeta/readers/jsonfeed_reader.py,sha256=jn7ux79jGD_ChsBH7zU8_CeCEObIbDh3NDjMhHIzE64,14470
25
+ commonmeta/readers/jsonfeed_reader.py,sha256=zcPxxuyAGW8W7w0-VwP9AhpX97qVWHQJUIJ5p4bBbfE,15655
26
26
  commonmeta/readers/kbase_reader.py,sha256=KH3loJvuq2bm8zAYIUG7hTsr5-2Anj3NQvoJUDiqmss,6764
27
27
  commonmeta/readers/openalex_reader.py,sha256=4HUkBsut_iUjhUcC5c1GHgxnKsYQc-fgY43QILgVZEg,12826
28
28
  commonmeta/readers/ris_reader.py,sha256=oQ3G7qQmNwhr4cNp-Gv5UW28J2K1oKpBlPh-tjRtnpQ,3678
@@ -80,11 +80,11 @@ commonmeta/writers/commonmeta_writer.py,sha256=QpfyhG__7o_XpsOTCPWxGymO7YKwZi2LQ
80
80
  commonmeta/writers/crossref_xml_writer.py,sha256=d-Rb2Vd_g3UW8GM4APIT7fivSQ5GMssZ6Ubi3OykHaw,33479
81
81
  commonmeta/writers/csl_writer.py,sha256=4gDYs1EzK4_L2UIRTfs25wgHmYRwdRP2zmfxF9387oU,2779
82
82
  commonmeta/writers/datacite_writer.py,sha256=bcinpwhq7XnVthKHH8-sdXA34dSlvFH4ImYH768iaQU,6428
83
- commonmeta/writers/inveniordm_writer.py,sha256=LbBM0gLGsvoujp1dV_9JYBda2Ehl6naRww99e4mxF24,23475
83
+ commonmeta/writers/inveniordm_writer.py,sha256=MlfmBnKX5R-dmcgKeVQjOjdJSXXWbwevHFMMM2nH7DM,24100
84
84
  commonmeta/writers/ris_writer.py,sha256=3SdyEvMRaPRP1SV1MB-MXBlunE7x6og7RF1zuWtetPc,2094
85
85
  commonmeta/writers/schema_org_writer.py,sha256=s18_x0ReXwAGBoEAwp2q-HCgFQ-h5qRg6JyAlqCoSFE,5871
86
- commonmeta_py-0.111.dist-info/METADATA,sha256=jhSXxau6pUwREEMi9NvmCk7GgkYly-MdRWhQ2Zh-5wc,7652
87
- commonmeta_py-0.111.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
88
- commonmeta_py-0.111.dist-info/entry_points.txt,sha256=U4w4BoRuS3rN5t5Y-uYSyOeU5Lh_VRVMS9OIDzIgw4w,50
89
- commonmeta_py-0.111.dist-info/licenses/LICENSE,sha256=wsIvxF9Q9GC9vA_s79zTWP3BkXJdfUNRmALlU8GbW1s,1074
90
- commonmeta_py-0.111.dist-info/RECORD,,
86
+ commonmeta_py-0.113.dist-info/METADATA,sha256=UG1-6Kdao3WU9BW3PyrLBdIqD_JCCUDAt_O-2qPc4qI,7652
87
+ commonmeta_py-0.113.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
88
+ commonmeta_py-0.113.dist-info/entry_points.txt,sha256=U4w4BoRuS3rN5t5Y-uYSyOeU5Lh_VRVMS9OIDzIgw4w,50
89
+ commonmeta_py-0.113.dist-info/licenses/LICENSE,sha256=wsIvxF9Q9GC9vA_s79zTWP3BkXJdfUNRmALlU8GbW1s,1074
90
+ commonmeta_py-0.113.dist-info/RECORD,,