commonmeta-py 0.112__py3-none-any.whl → 0.114__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +1 -1
- commonmeta/doi_utils.py +89 -7
- commonmeta/readers/jsonfeed_reader.py +35 -6
- commonmeta/writers/inveniordm_writer.py +42 -20
- {commonmeta_py-0.112.dist-info → commonmeta_py-0.114.dist-info}/METADATA +1 -1
- {commonmeta_py-0.112.dist-info → commonmeta_py-0.114.dist-info}/RECORD +9 -9
- {commonmeta_py-0.112.dist-info → commonmeta_py-0.114.dist-info}/WHEEL +0 -0
- {commonmeta_py-0.112.dist-info → commonmeta_py-0.114.dist-info}/entry_points.txt +0 -0
- {commonmeta_py-0.112.dist-info → commonmeta_py-0.114.dist-info}/licenses/LICENSE +0 -0
commonmeta/__init__.py
CHANGED
commonmeta/doi_utils.py
CHANGED
@@ -302,19 +302,101 @@ def datacite_api_sample_url(number: int = 1, **kwargs) -> str:
|
|
302
302
|
return f"https://api.datacite.org/dois?random=true&page[size]={number}"
|
303
303
|
|
304
304
|
|
305
|
-
def is_rogue_scholar_doi(doi: str) -> bool:
|
306
|
-
"""
|
307
|
-
|
308
|
-
|
309
|
-
"10.34732", # not managed by Front Matter
|
305
|
+
def is_rogue_scholar_doi(doi: str, ra: str = "crossref") -> bool:
|
306
|
+
"""Check if a DOI is from Rogue Scholar with specific registration agency"""
|
307
|
+
rogue_scholar_crossref_prefixes = [
|
308
|
+
"10.13003",
|
310
309
|
"10.53731",
|
311
310
|
"10.54900",
|
312
|
-
"10.57689",
|
313
|
-
"10.
|
311
|
+
"10.57689",
|
312
|
+
"10.59347",
|
314
313
|
"10.59348",
|
315
314
|
"10.59349",
|
316
315
|
"10.59350",
|
317
316
|
"10.63485",
|
318
317
|
"10.64000",
|
318
|
+
]
|
319
|
+
rogue_scholar_datacite_prefixes = [
|
320
|
+
"10.5438",
|
321
|
+
"10.34732", # not managed by Front Matter
|
322
|
+
"10.57689", # not managed by Front Matter
|
323
|
+
"10.58079", # not managed by Front Matter
|
324
|
+
"10.60804",
|
319
325
|
"10.71938", # not managed by Front Matter
|
326
|
+
# "10.83132",
|
320
327
|
]
|
328
|
+
|
329
|
+
prefix = validate_prefix(doi)
|
330
|
+
if not prefix:
|
331
|
+
return False
|
332
|
+
|
333
|
+
is_crossref = prefix in rogue_scholar_crossref_prefixes
|
334
|
+
is_datacite = prefix in rogue_scholar_datacite_prefixes
|
335
|
+
|
336
|
+
if ra == "crossref":
|
337
|
+
return is_crossref
|
338
|
+
elif ra == "datacite":
|
339
|
+
return is_datacite
|
340
|
+
return is_crossref or is_datacite
|
341
|
+
|
342
|
+
|
343
|
+
def generate_wordpress_doi(prefix: str, slug: str, guid: str) -> str:
|
344
|
+
"""Generate a DOI from a WordPress GUID and slug"""
|
345
|
+
import re
|
346
|
+
|
347
|
+
if not prefix or not guid:
|
348
|
+
return ""
|
349
|
+
|
350
|
+
pattern = re.compile(r"p=(\d+)$")
|
351
|
+
matched = pattern.search(guid)
|
352
|
+
|
353
|
+
if not matched:
|
354
|
+
return ""
|
355
|
+
|
356
|
+
doi = f"https://doi.org/{prefix}/{slug}.{matched.group(1)}"
|
357
|
+
return doi
|
358
|
+
|
359
|
+
|
360
|
+
def generate_doi_from_guid(prefix: str, guid: str) -> str:
|
361
|
+
"""Validates a GUID that is a DOI"""
|
362
|
+
import base32_lib as base32
|
363
|
+
|
364
|
+
if not prefix:
|
365
|
+
return ""
|
366
|
+
|
367
|
+
doi = normalize_doi(guid)
|
368
|
+
if not doi:
|
369
|
+
return ""
|
370
|
+
|
371
|
+
p = validate_prefix(doi)
|
372
|
+
if not p or p != prefix:
|
373
|
+
return ""
|
374
|
+
|
375
|
+
suffix = doi.split("/")[-1]
|
376
|
+
|
377
|
+
try:
|
378
|
+
number = base32.decode(suffix, checksum=True)
|
379
|
+
if number != 0:
|
380
|
+
return doi
|
381
|
+
except (ValueError, IndexError):
|
382
|
+
pass
|
383
|
+
|
384
|
+
return ""
|
385
|
+
|
386
|
+
|
387
|
+
def generate_substack_doi(prefix: str, guid: str) -> str:
|
388
|
+
"""Generate a DOI from a Substack GUID"""
|
389
|
+
import base32_lib as base32
|
390
|
+
|
391
|
+
if not prefix or not guid:
|
392
|
+
return ""
|
393
|
+
|
394
|
+
try:
|
395
|
+
i = int(guid)
|
396
|
+
except ValueError:
|
397
|
+
return ""
|
398
|
+
|
399
|
+
# encode the number using base32 with length=4, split_every=8, and checksum=True
|
400
|
+
suffix = base32.encode(i, length=4, split_every=8, checksum=True)
|
401
|
+
doi = f"https://doi.org/{prefix}/{suffix}"
|
402
|
+
return doi
|
@@ -13,6 +13,9 @@ from ..date_utils import get_date_from_unix_timestamp
|
|
13
13
|
from ..doi_utils import (
|
14
14
|
doi_from_url,
|
15
15
|
encode_doi,
|
16
|
+
generate_doi_from_guid,
|
17
|
+
generate_substack_doi,
|
18
|
+
generate_wordpress_doi,
|
16
19
|
is_rogue_scholar_doi,
|
17
20
|
normalize_doi,
|
18
21
|
validate_doi,
|
@@ -55,13 +58,39 @@ def read_jsonfeed(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
55
58
|
"archive_url", None
|
56
59
|
):
|
57
60
|
url = normalize_url(meta.get("archive_url", None))
|
58
|
-
_id = normalize_doi(read_options.get("doi", None) or meta.get("doi", None)) or url
|
59
|
-
_type = "BlogPost"
|
60
61
|
|
61
|
-
#
|
62
|
-
|
63
|
-
if
|
64
|
-
|
62
|
+
# generate DOI string for registration if not provided
|
63
|
+
_id = normalize_doi(read_options.get("doi", None) or meta.get("doi", None))
|
64
|
+
if _id is None:
|
65
|
+
if meta.get("guid") and py_.get(meta, "blog.doi_reg", False):
|
66
|
+
# Generate DOI based on blogging platform
|
67
|
+
generator = py_.get(meta, "blog.generator")
|
68
|
+
prefix = py_.get(meta, "blog.prefix")
|
69
|
+
slug = py_.get(meta, "blog.slug")
|
70
|
+
guid = meta.get("guid")
|
71
|
+
|
72
|
+
# Import these functions only when needed to avoid circular imports
|
73
|
+
if generator in ["WordPress", "WordPress.com"] and prefix and slug and guid:
|
74
|
+
_id = generate_wordpress_doi(prefix, slug, guid)
|
75
|
+
elif generator == "Substack" and prefix and guid:
|
76
|
+
_id = generate_substack_doi(prefix, guid)
|
77
|
+
elif prefix and guid:
|
78
|
+
_id = generate_doi_from_guid(prefix, guid)
|
79
|
+
|
80
|
+
# If still no DOI but prefix provided and not registered for DOI generation
|
81
|
+
elif py_.get(meta, "blog.prefix") and not py_.get(meta, "blog.doi_reg", False):
|
82
|
+
prefix = py_.get(meta, "blog.prefix")
|
83
|
+
_id = encode_doi(prefix)
|
84
|
+
|
85
|
+
# If override prefix is provided in read_options, use that
|
86
|
+
elif read_options.get("prefix"):
|
87
|
+
_id = encode_doi(read_options.get("prefix"))
|
88
|
+
|
89
|
+
# fall back to url if no DOI can be generated
|
90
|
+
if _id is None:
|
91
|
+
_id = url
|
92
|
+
|
93
|
+
_type = "BlogPost"
|
65
94
|
|
66
95
|
if meta.get("authors", None):
|
67
96
|
contributors = get_authors(from_jsonfeed(wrap(meta.get("authors"))))
|
@@ -23,6 +23,7 @@ from ..utils import (
|
|
23
23
|
get_language,
|
24
24
|
id_from_url,
|
25
25
|
normalize_url,
|
26
|
+
pages_as_string,
|
26
27
|
validate_orcid,
|
27
28
|
validate_ror,
|
28
29
|
)
|
@@ -68,7 +69,7 @@ def write_inveniordm(metadata):
|
|
68
69
|
]
|
69
70
|
)
|
70
71
|
container = metadata.container if metadata.container else {}
|
71
|
-
|
72
|
+
journal_title = (
|
72
73
|
container.get("title", None)
|
73
74
|
if _type not in ["inbook", "inproceedings"]
|
74
75
|
and container.get("type") in ["Journal", "Periodical", "Blog"]
|
@@ -79,6 +80,10 @@ def write_inveniordm(metadata):
|
|
79
80
|
if container.get("identifierType", None) == "ISSN"
|
80
81
|
else None
|
81
82
|
)
|
83
|
+
volume = container.get("volume", None)
|
84
|
+
issue = container.get("issue", None)
|
85
|
+
pages = pages_as_string(container)
|
86
|
+
|
82
87
|
dates = []
|
83
88
|
for date in metadata.date.keys():
|
84
89
|
if metadata.date.get(date, None) is None:
|
@@ -141,7 +146,15 @@ def write_inveniordm(metadata):
|
|
141
146
|
),
|
142
147
|
"custom_fields": compact(
|
143
148
|
{
|
144
|
-
"journal:journal": compact(
|
149
|
+
"journal:journal": compact(
|
150
|
+
{
|
151
|
+
"title": journal_title,
|
152
|
+
"issn": issn,
|
153
|
+
"volume": volume,
|
154
|
+
"issue": issue,
|
155
|
+
"pages": pages,
|
156
|
+
}
|
157
|
+
),
|
145
158
|
"rs:content_html": presence(metadata.content),
|
146
159
|
"rs:image": presence(metadata.image),
|
147
160
|
"rs:generator": container.get("platform", None),
|
@@ -463,7 +476,6 @@ def push_inveniordm(metadata, host: str, token: str, legacy_key: str):
|
|
463
476
|
# optionally update rogue-scholar legacy record
|
464
477
|
if host == "rogue-scholar.org" and legacy_key is not None:
|
465
478
|
record = update_legacy_record(record, legacy_key)
|
466
|
-
print("g", record)
|
467
479
|
except Exception as e:
|
468
480
|
raise InvenioRDMError(f"Unexpected error: {str(e)}")
|
469
481
|
|
@@ -511,12 +523,11 @@ def create_draft_record(record, host, token, input):
|
|
511
523
|
)
|
512
524
|
response.raise_for_status()
|
513
525
|
data = response.json()
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
}
|
526
|
+
record["id"]: data.get("id", None)
|
527
|
+
record["created"] = data.get("created", None)
|
528
|
+
record["updated"] = data.get("updated", None)
|
529
|
+
record["status"] = "draft"
|
530
|
+
return record
|
520
531
|
except requests.exceptions.RequestException as e:
|
521
532
|
raise InvenioRDMError(f"Error creating draft record: {str(e)}")
|
522
533
|
|
@@ -610,20 +621,31 @@ def update_legacy_record(record, legacy_key: str):
|
|
610
621
|
legacy_host = "bosczcmeodcrajtcaddf.supabase.co"
|
611
622
|
|
612
623
|
if not legacy_key:
|
613
|
-
|
614
|
-
|
624
|
+
raise ValueError("no legacy key provided")
|
615
625
|
if not record.get("uuid", None):
|
616
|
-
|
617
|
-
|
618
|
-
now = f"{int(time())}"
|
619
|
-
|
626
|
+
raise ValueError("no UUID provided")
|
620
627
|
if not record.get("doi", None):
|
621
|
-
|
628
|
+
raise ValueError("no valid doi to update")
|
622
629
|
|
623
|
-
|
630
|
+
now = f"{int(time())}"
|
631
|
+
if record.get("id", None) is not None:
|
632
|
+
output = {
|
633
|
+
"rid": record.get("id"),
|
634
|
+
"indexed_at": now,
|
635
|
+
"indexed": "true",
|
636
|
+
"archived": "true",
|
637
|
+
}
|
638
|
+
elif record.get("doi", None) is not None:
|
639
|
+
output = {
|
640
|
+
"doi": record.get("doi"),
|
641
|
+
"indexed_at": now,
|
642
|
+
"indexed": "true",
|
643
|
+
"archived": "true",
|
644
|
+
}
|
645
|
+
else:
|
646
|
+
return record # nothing to update
|
624
647
|
|
625
648
|
request_url = f"https://{legacy_host}/rest/v1/posts?id=eq.{record['uuid']}"
|
626
|
-
|
627
649
|
headers = {
|
628
650
|
"Content-Type": "application/json",
|
629
651
|
"apikey": legacy_key,
|
@@ -635,13 +657,13 @@ def update_legacy_record(record, legacy_key: str):
|
|
635
657
|
response = requests.patch(request_url, json=output, headers=headers, timeout=30)
|
636
658
|
response.raise_for_status()
|
637
659
|
if response.status_code != 204:
|
638
|
-
return
|
660
|
+
return Exception(f"Unexpected status code: {response.status_code}")
|
639
661
|
|
640
662
|
record["status"] = "updated_legacy"
|
641
663
|
return record
|
642
664
|
|
643
665
|
except requests.exceptions.RequestException as e:
|
644
|
-
|
666
|
+
raise InvenioRDMError(f"Error updating legacy record: {str(e)}")
|
645
667
|
|
646
668
|
|
647
669
|
def search_by_slug(slug, type_value, host, token) -> Optional[str]:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: commonmeta-py
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.114
|
4
4
|
Summary: Library for conversions to/from the Commonmeta scholarly metadata format
|
5
5
|
Project-URL: Homepage, https://python.commonmeta.org
|
6
6
|
Project-URL: Repository, https://github.com/front-matter/commonmeta-py
|
@@ -1,11 +1,11 @@
|
|
1
|
-
commonmeta/__init__.py,sha256=
|
1
|
+
commonmeta/__init__.py,sha256=G4y5yCVJHisJQAgUH6EKoM6g79z1I5zu8mfvG9tuYpE,2098
|
2
2
|
commonmeta/api_utils.py,sha256=y5KLfIOWOjde7LXZ36u-eneQJ-Q53yXUZg3hWpCBS2E,2685
|
3
3
|
commonmeta/author_utils.py,sha256=3lYW5s1rOUWNTKs1FP6XLfEUY3yCLOe_3L_VdJTDMp0,8585
|
4
4
|
commonmeta/base_utils.py,sha256=-MGy9q2uTiJEkPWQUYOJMdq-3tRpNnvBwlLjvllQ5g8,11164
|
5
5
|
commonmeta/cli.py,sha256=pdBpBosLNq3RS9buO-Voqawc9Ay1eSt-xP5O97iOft4,8480
|
6
6
|
commonmeta/constants.py,sha256=wSTEUiHeRdXLwjXEQD9AU2hxFyEKi5OTX2iHOKO6nF0,19844
|
7
7
|
commonmeta/date_utils.py,sha256=H2cCobX0JREIUOT_cCigGd3MG7prGiQpXk1m4ZNrFwU,6318
|
8
|
-
commonmeta/doi_utils.py,sha256=
|
8
|
+
commonmeta/doi_utils.py,sha256=kS9wBoZQHvV-fqFoW9j-_aN_7Kj1I6sQdqnqK3Nno0M,11512
|
9
9
|
commonmeta/file_utils.py,sha256=tGvXxScjh-PPo5YvLDyk4sqwY5Q50N0zAmBHVaUOLeU,3268
|
10
10
|
commonmeta/metadata.py,sha256=U9uYOkKAdgqLiQwQKXMXjrAFvO6avMWp9heJG4cNAAY,18893
|
11
11
|
commonmeta/schema_utils.py,sha256=WGpmMj9cfNMg_55hhgwY9qpO0A1HSvTLQC2equjBftI,1770
|
@@ -22,7 +22,7 @@ commonmeta/readers/csl_reader.py,sha256=OxzC2AZKfv43BCah4XGYvlK_LUK-5mxXFcjdzB5v
|
|
22
22
|
commonmeta/readers/datacite_reader.py,sha256=M6gznf1kisR1WzDZaoR0pLJC6Q4Rtnerodfs8lU2khI,12094
|
23
23
|
commonmeta/readers/datacite_xml_reader.py,sha256=zJSuN9pnWplYFH7V1eneh0OjKTFCNkOLmEMf6fU6_xg,13048
|
24
24
|
commonmeta/readers/inveniordm_reader.py,sha256=6LkT6R20jSFqDdZqAzcREHbdAcIPHiYJvxKsK_mpDdw,8374
|
25
|
-
commonmeta/readers/jsonfeed_reader.py,sha256=
|
25
|
+
commonmeta/readers/jsonfeed_reader.py,sha256=zcPxxuyAGW8W7w0-VwP9AhpX97qVWHQJUIJ5p4bBbfE,15655
|
26
26
|
commonmeta/readers/kbase_reader.py,sha256=KH3loJvuq2bm8zAYIUG7hTsr5-2Anj3NQvoJUDiqmss,6764
|
27
27
|
commonmeta/readers/openalex_reader.py,sha256=4HUkBsut_iUjhUcC5c1GHgxnKsYQc-fgY43QILgVZEg,12826
|
28
28
|
commonmeta/readers/ris_reader.py,sha256=oQ3G7qQmNwhr4cNp-Gv5UW28J2K1oKpBlPh-tjRtnpQ,3678
|
@@ -80,11 +80,11 @@ commonmeta/writers/commonmeta_writer.py,sha256=QpfyhG__7o_XpsOTCPWxGymO7YKwZi2LQ
|
|
80
80
|
commonmeta/writers/crossref_xml_writer.py,sha256=d-Rb2Vd_g3UW8GM4APIT7fivSQ5GMssZ6Ubi3OykHaw,33479
|
81
81
|
commonmeta/writers/csl_writer.py,sha256=4gDYs1EzK4_L2UIRTfs25wgHmYRwdRP2zmfxF9387oU,2779
|
82
82
|
commonmeta/writers/datacite_writer.py,sha256=bcinpwhq7XnVthKHH8-sdXA34dSlvFH4ImYH768iaQU,6428
|
83
|
-
commonmeta/writers/inveniordm_writer.py,sha256=
|
83
|
+
commonmeta/writers/inveniordm_writer.py,sha256=1WgFPT6no2v-rurNo-02RcgN3Ic88MdbnKBnVQlv9zo,24142
|
84
84
|
commonmeta/writers/ris_writer.py,sha256=3SdyEvMRaPRP1SV1MB-MXBlunE7x6og7RF1zuWtetPc,2094
|
85
85
|
commonmeta/writers/schema_org_writer.py,sha256=s18_x0ReXwAGBoEAwp2q-HCgFQ-h5qRg6JyAlqCoSFE,5871
|
86
|
-
commonmeta_py-0.
|
87
|
-
commonmeta_py-0.
|
88
|
-
commonmeta_py-0.
|
89
|
-
commonmeta_py-0.
|
90
|
-
commonmeta_py-0.
|
86
|
+
commonmeta_py-0.114.dist-info/METADATA,sha256=a0JivRZx0DkvMk3O1na9HIMHGgUZuvBBDk0Smc8H-Qc,7652
|
87
|
+
commonmeta_py-0.114.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
88
|
+
commonmeta_py-0.114.dist-info/entry_points.txt,sha256=U4w4BoRuS3rN5t5Y-uYSyOeU5Lh_VRVMS9OIDzIgw4w,50
|
89
|
+
commonmeta_py-0.114.dist-info/licenses/LICENSE,sha256=wsIvxF9Q9GC9vA_s79zTWP3BkXJdfUNRmALlU8GbW1s,1074
|
90
|
+
commonmeta_py-0.114.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|