commonmeta-py 0.126__py3-none-any.whl → 0.127__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +2 -1
- commonmeta/utils.py +71 -0
- commonmeta/writers/inveniordm_writer.py +48 -41
- {commonmeta_py-0.126.dist-info → commonmeta_py-0.127.dist-info}/METADATA +1 -1
- {commonmeta_py-0.126.dist-info → commonmeta_py-0.127.dist-info}/RECORD +8 -8
- {commonmeta_py-0.126.dist-info → commonmeta_py-0.127.dist-info}/WHEEL +0 -0
- {commonmeta_py-0.126.dist-info → commonmeta_py-0.127.dist-info}/entry_points.txt +0 -0
- {commonmeta_py-0.126.dist-info → commonmeta_py-0.127.dist-info}/licenses/LICENSE +0 -0
commonmeta/__init__.py
CHANGED
@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
|
|
10
10
|
"""
|
11
11
|
|
12
12
|
__title__ = "commonmeta-py"
|
13
|
-
__version__ = "0.
|
13
|
+
__version__ = "0.127"
|
14
14
|
__author__ = "Martin Fenner"
|
15
15
|
__license__ = "MIT"
|
16
16
|
|
@@ -96,6 +96,7 @@ from .utils import (
|
|
96
96
|
normalize_url,
|
97
97
|
pages_as_string,
|
98
98
|
replace_curie,
|
99
|
+
string_to_slug,
|
99
100
|
to_csl,
|
100
101
|
validate_orcid,
|
101
102
|
validate_ror,
|
commonmeta/utils.py
CHANGED
@@ -128,6 +128,57 @@ FOS_MAPPINGS = {
|
|
128
128
|
"Other humanities": "http://www.oecd.org/science/inno/38235147.pdf?6.5",
|
129
129
|
}
|
130
130
|
|
131
|
+
FOS_TO_STRING_MAPPINGS = {
|
132
|
+
"Natural sciences": "naturalSciences",
|
133
|
+
"Mathematics": "mathematics",
|
134
|
+
"Computer and information sciences": "computerAndInformationSciences",
|
135
|
+
"Physical sciences": "physicalSciences",
|
136
|
+
"Chemical sciences": "chemicalSciences",
|
137
|
+
"Earth and related environmental sciences": "earthAndRelatedEnvironmentalSciences",
|
138
|
+
"Biological sciences": "biologicalSciences",
|
139
|
+
"Other natural sciences": "otherNaturalSciences",
|
140
|
+
"Engineering and technology": "engineeringAndTechnology",
|
141
|
+
"Civil engineering": "civilEngineering",
|
142
|
+
"Electrical engineering, electronic engineering, information engineering": "electricalEngineering",
|
143
|
+
"Mechanical engineering": "mechanicalEngineering",
|
144
|
+
"Chemical engineering": "chemicalEngineering",
|
145
|
+
"Materials engineering": "materialsEngineering",
|
146
|
+
"Medical engineering": "medicalEngineering",
|
147
|
+
"Environmental engineering": "environmentalEngineering",
|
148
|
+
"Environmental biotechnology": "environmentalBiotechnology",
|
149
|
+
"Industrial biotechnology": "industrialBiotechnology",
|
150
|
+
"Nano technology": "nanoTechnology",
|
151
|
+
"Other engineering and technologies": "otherEngineeringAndTechnologies",
|
152
|
+
"Medical and health sciences": "medicalAndHealthSciences",
|
153
|
+
"Basic medicine": "basicMedicine",
|
154
|
+
"Clinical medicine": "clinicalMedicine",
|
155
|
+
"Health sciences": "healthSciences",
|
156
|
+
"Health biotechnology": "healthBiotechnology",
|
157
|
+
"Other medical sciences": "otherMedicalSciences",
|
158
|
+
"Agricultural sciences": "agriculturalSciences",
|
159
|
+
"Agriculture, forestry, and fisheries": "agricultureForestryAndFisheries",
|
160
|
+
"Animal and dairy science": "animalAndDairyScience",
|
161
|
+
"Veterinary science": "veterinaryScience",
|
162
|
+
"Agricultural biotechnology": "agriculturalBiotechnology",
|
163
|
+
"Other agricultural sciences": "otherAgriculturalSciences",
|
164
|
+
"Social science": "socialScience",
|
165
|
+
"Psychology": "psychology",
|
166
|
+
"Economics and business": "economicsAndBusiness",
|
167
|
+
"Educational sciences": "educationalSciences",
|
168
|
+
"Sociology": "sociology",
|
169
|
+
"Law": "law",
|
170
|
+
"Political science": "politicalScience",
|
171
|
+
"Social and economic geography": "socialAndEconomicGeography",
|
172
|
+
"Media and communications": "mediaAndCommunications",
|
173
|
+
"Other social sciences": "otherSocialSciences",
|
174
|
+
"Humanities": "humanities",
|
175
|
+
"History and archaeology": "historyAndArchaeology",
|
176
|
+
"Languages and literature": "languagesAndLiterature",
|
177
|
+
"Philosophy, ethics and religion": "philosophyEthicsAndReligion",
|
178
|
+
"Arts (arts, history of arts, performing arts, music)": "artsArtsHistoryOfArtsPerformingArtsMusic",
|
179
|
+
"Other humanities": "otherHumanities",
|
180
|
+
}
|
181
|
+
|
131
182
|
|
132
183
|
def normalize_id(pid: Optional[str], **kwargs) -> Optional[str]:
|
133
184
|
"""Check for valid DOI or HTTP(S) URL"""
|
@@ -1274,6 +1325,26 @@ def subjects_as_string(subjects):
|
|
1274
1325
|
return ", ".join(keywords)
|
1275
1326
|
|
1276
1327
|
|
1328
|
+
def string_to_slug(text):
|
1329
|
+
"""makes a string lowercase and removes non-alphanumeric characters"""
|
1330
|
+
# Remove FOS (Fields of Science) prefix
|
1331
|
+
text = text.removeprefix("FOS: ")
|
1332
|
+
# Lookup FOS name
|
1333
|
+
slug = FOS_TO_STRING_MAPPINGS.get(text, None)
|
1334
|
+
if slug is not None:
|
1335
|
+
return slug.lower()
|
1336
|
+
|
1337
|
+
# Replace spaces with hyphens
|
1338
|
+
slug = re.sub(r"\s+", "-", text.lower())
|
1339
|
+
# Remove special characters
|
1340
|
+
slug = re.sub(r"[^a-z0-9-]", "", slug)
|
1341
|
+
# Remove multiple consecutive hyphens
|
1342
|
+
slug = re.sub(r"-+", "-", slug)
|
1343
|
+
# Remove leading and trailing hyphens
|
1344
|
+
slug = slug.strip("-")
|
1345
|
+
return slug
|
1346
|
+
|
1347
|
+
|
1277
1348
|
# def reverse():
|
1278
1349
|
# return { 'citation': wrap(related_identifiers).select do |ri|
|
1279
1350
|
# ri['relationType'] == 'IsReferencedBy'
|
@@ -1,13 +1,13 @@
|
|
1
1
|
"""InvenioRDM writer for commonmeta-py"""
|
2
2
|
|
3
3
|
import logging
|
4
|
-
import re
|
5
4
|
from time import time
|
6
5
|
from typing import Dict, Optional
|
7
6
|
|
8
7
|
import orjson as json
|
9
8
|
import pydash as py_
|
10
9
|
import requests
|
10
|
+
from urllib3._collections import HTTPHeaderDict
|
11
11
|
|
12
12
|
from ..base_utils import compact, parse_attributes, presence, wrap
|
13
13
|
from ..constants import (
|
@@ -18,13 +18,14 @@ from ..constants import (
|
|
18
18
|
Commonmeta,
|
19
19
|
)
|
20
20
|
from ..date_utils import get_iso8601_date
|
21
|
-
from ..doi_utils import
|
21
|
+
from ..doi_utils import doi_from_url, normalize_doi
|
22
22
|
from ..utils import (
|
23
23
|
FOS_MAPPINGS,
|
24
24
|
get_language,
|
25
25
|
id_from_url,
|
26
26
|
normalize_url,
|
27
27
|
pages_as_string,
|
28
|
+
string_to_slug,
|
28
29
|
validate_orcid,
|
29
30
|
validate_ror,
|
30
31
|
)
|
@@ -399,28 +400,17 @@ def push_inveniordm(metadata: Commonmeta, host: str, token: str, **kwargs) -> Di
|
|
399
400
|
"doi": doi,
|
400
401
|
}
|
401
402
|
|
402
|
-
# extract optional information needed
|
403
|
-
#
|
404
|
-
#
|
405
|
-
#
|
403
|
+
# extract optional information needed
|
404
|
+
# uuid is the Rogue Scholar uuid
|
405
|
+
# community_id is the id of the primary community of the record,
|
406
|
+
# in the case of Rogue Scholar the blog community
|
407
|
+
|
406
408
|
if hasattr(metadata, "identifiers") and metadata.identifiers:
|
407
|
-
rid_index = None
|
408
|
-
uuid_index = None
|
409
409
|
for i, identifier in enumerate(metadata.identifiers):
|
410
|
-
if identifier.get("identifierType") == "
|
411
|
-
"identifier"
|
412
|
-
):
|
413
|
-
record["id"] = identifier.get("identifier")
|
414
|
-
rid_index = i
|
415
|
-
elif identifier.get("identifierType") == "UUID" and identifier.get(
|
410
|
+
if identifier.get("identifierType") == "UUID" and identifier.get(
|
416
411
|
"identifier"
|
417
412
|
):
|
418
413
|
record["uuid"] = identifier.get("identifier")
|
419
|
-
uuid_index = i
|
420
|
-
if rid_index is not None:
|
421
|
-
metadata.identifiers.pop(rid_index)
|
422
|
-
if uuid_index is not None:
|
423
|
-
metadata.identifiers.pop(uuid_index)
|
424
414
|
|
425
415
|
if hasattr(metadata, "relations") and metadata.relations:
|
426
416
|
community_index = None
|
@@ -434,6 +424,7 @@ def push_inveniordm(metadata: Commonmeta, host: str, token: str, **kwargs) -> Di
|
|
434
424
|
record["community"] = slug
|
435
425
|
record["community_id"] = community_id
|
436
426
|
community_index = i
|
427
|
+
break
|
437
428
|
|
438
429
|
if community_index is not None:
|
439
430
|
metadata.relations.pop(community_index)
|
@@ -496,8 +487,14 @@ def add_record_to_communities(
|
|
496
487
|
) -> dict:
|
497
488
|
"""Add record to one or more InvenioRDM communities"""
|
498
489
|
|
490
|
+
communities = get_record_communities(record, host, token)
|
491
|
+
community_ids = [c.get("id") for c in communities] if communities else []
|
492
|
+
|
499
493
|
# Add record to primary community if primary community is specified
|
500
|
-
if
|
494
|
+
if (
|
495
|
+
record.get("community_id", None) is not None
|
496
|
+
and record.get("community_id") not in community_ids
|
497
|
+
):
|
501
498
|
record = add_record_to_community(record, host, token, record["community_id"])
|
502
499
|
|
503
500
|
# Add record to subject area community if subject area community is specified
|
@@ -509,9 +506,8 @@ def add_record_to_communities(
|
|
509
506
|
slug = string_to_slug(subject_name)
|
510
507
|
if slug in COMMUNITY_TRANSLATIONS:
|
511
508
|
slug = COMMUNITY_TRANSLATIONS[slug]
|
512
|
-
|
513
509
|
community_id = search_by_slug(slug, "topic", host, token)
|
514
|
-
if community_id:
|
510
|
+
if community_id and community_id not in community_ids:
|
515
511
|
record = add_record_to_community(record, host, token, community_id)
|
516
512
|
|
517
513
|
# Add record to communities defined as IsPartOf relation in InvenioRDM RelatedIdentifiers
|
@@ -522,7 +518,7 @@ def add_record_to_communities(
|
|
522
518
|
).startswith(f"https://{host}/api/communities/"):
|
523
519
|
slug = identifier.get("identifier").split("/")[5]
|
524
520
|
community_id = search_by_slug(slug, "topic", host, token)
|
525
|
-
if community_id:
|
521
|
+
if community_id and community_id not in community_ids:
|
526
522
|
record = add_record_to_community(record, host, token, community_id)
|
527
523
|
|
528
524
|
return record
|
@@ -664,8 +660,6 @@ def publish_draft_record(record, host, token):
|
|
664
660
|
record["status"] = "error_publish_draft_record"
|
665
661
|
return record
|
666
662
|
data = response.json()
|
667
|
-
record["uuid"] = py_.get(data, "metadata.identifiers.0.identifier")
|
668
|
-
record["doi"] = (doi_as_url(py_.get(data, "pids.doi.identifier")),)
|
669
663
|
record["created"] = data.get("created", None)
|
670
664
|
record["updated"] = data.get("updated", None)
|
671
665
|
record["status"] = "published"
|
@@ -676,17 +670,39 @@ def publish_draft_record(record, host, token):
|
|
676
670
|
return record
|
677
671
|
|
678
672
|
|
673
|
+
def get_record_communities(record, host, token):
|
674
|
+
"""Get record communities by id"""
|
675
|
+
headers = {
|
676
|
+
"Authorization": f"Bearer {token}",
|
677
|
+
"Content-Type": "application/json",
|
678
|
+
}
|
679
|
+
try:
|
680
|
+
response = requests.get(
|
681
|
+
f"https://{host}/api/records/{record['id']}/communities",
|
682
|
+
headers=headers,
|
683
|
+
)
|
684
|
+
response.raise_for_status()
|
685
|
+
data = response.json()
|
686
|
+
if py_.get(data, "hits.total", 0) > 0:
|
687
|
+
return py_.get(data, "hits.hits")
|
688
|
+
return None
|
689
|
+
except requests.exceptions.RequestException as e:
|
690
|
+
logger.error(f"Error getting communities: {str(e)}", exc_info=True)
|
691
|
+
return None
|
692
|
+
|
693
|
+
|
679
694
|
def add_record_to_community(record, host, token, community_id):
|
680
|
-
"""Add a record to a community
|
695
|
+
"""Add a record to a community"""
|
681
696
|
headers = {
|
682
697
|
"Authorization": f"Bearer {token}",
|
683
698
|
"Content-Type": "application/json",
|
684
699
|
}
|
700
|
+
json = {"communities": [{"id": community_id}]}
|
685
701
|
try:
|
686
702
|
response = requests.post(
|
687
703
|
f"https://{host}/api/records/{record['id']}/communities",
|
688
704
|
headers=headers,
|
689
|
-
json=
|
705
|
+
json=json,
|
690
706
|
)
|
691
707
|
response.raise_for_status()
|
692
708
|
return record
|
@@ -750,7 +766,11 @@ def search_by_slug(slug: str, type_value: str, host: str, token: str) -> Optiona
|
|
750
766
|
"Authorization": f"Bearer {token}",
|
751
767
|
"Content-Type": "application/json",
|
752
768
|
}
|
753
|
-
params =
|
769
|
+
params = HTTPHeaderDict()
|
770
|
+
params.add("q", f"slug:{slug}")
|
771
|
+
params.add("type", type_value)
|
772
|
+
params.add("type", "subject")
|
773
|
+
params.add("size", 1)
|
754
774
|
try:
|
755
775
|
response = requests.get(
|
756
776
|
f"https://{host}/api/communities", headers=headers, params=params
|
@@ -765,18 +785,5 @@ def search_by_slug(slug: str, type_value: str, host: str, token: str) -> Optiona
|
|
765
785
|
return None
|
766
786
|
|
767
787
|
|
768
|
-
def string_to_slug(text):
|
769
|
-
"""makes a string lowercase and removes non-alphanumeric characters"""
|
770
|
-
# Replace spaces with hyphens
|
771
|
-
slug = re.sub(r"\s+", "-", text.lower())
|
772
|
-
# Remove special characters
|
773
|
-
slug = re.sub(r"[^a-z0-9-]", "", slug)
|
774
|
-
# Remove multiple consecutive hyphens
|
775
|
-
slug = re.sub(r"-+", "-", slug)
|
776
|
-
# Remove leading and trailing hyphens
|
777
|
-
slug = slug.strip("-")
|
778
|
-
return slug
|
779
|
-
|
780
|
-
|
781
788
|
class InvenioRDMError(Exception):
|
782
789
|
"""Custom exception for InvenioRDM API errors"""
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: commonmeta-py
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.127
|
4
4
|
Summary: Library for conversions to/from the Commonmeta scholarly metadata format
|
5
5
|
Project-URL: Homepage, https://python.commonmeta.org
|
6
6
|
Project-URL: Repository, https://github.com/front-matter/commonmeta-py
|
@@ -1,4 +1,4 @@
|
|
1
|
-
commonmeta/__init__.py,sha256=
|
1
|
+
commonmeta/__init__.py,sha256=42EVUELvqJzvTf3X-JSjCvuqNyS1chKRYTzVrwp8ZY0,2118
|
2
2
|
commonmeta/api_utils.py,sha256=P8LMHHYiF4OTi97_5k4KstcBreooMkOAKZ4ebxsAv4o,2691
|
3
3
|
commonmeta/author_utils.py,sha256=3lYW5s1rOUWNTKs1FP6XLfEUY3yCLOe_3L_VdJTDMp0,8585
|
4
4
|
commonmeta/base_utils.py,sha256=-MGy9q2uTiJEkPWQUYOJMdq-3tRpNnvBwlLjvllQ5g8,11164
|
@@ -10,7 +10,7 @@ commonmeta/file_utils.py,sha256=eFYDWyR8Gr722nvFmp542hCm-TGmO_q4ciZ85IPHpjA,2893
|
|
10
10
|
commonmeta/metadata.py,sha256=90aTe47d071wHxwcNsOqU5lSVPKP8wAPnPHhddj3Fuo,18443
|
11
11
|
commonmeta/schema_utils.py,sha256=zn3gqAHciUOQmrw9okR68weFs-yqPPyORFt-Zl1D3Lw,1924
|
12
12
|
commonmeta/translators.py,sha256=CBMK4jrXRmGZiAhCh6wsJjhbDJWbcsda8UvXFXxccAw,1363
|
13
|
-
commonmeta/utils.py,sha256=
|
13
|
+
commonmeta/utils.py,sha256=HzgVlcRqtDXMwlbTXL0AKL3jHB6E9kEAljS5nXKdOm0,53852
|
14
14
|
commonmeta/readers/__init__.py,sha256=vOf7UsOKNoh_ZCuyexxhAmPMt8wjB-pF_CfpWRaN8pk,45
|
15
15
|
commonmeta/readers/bibtex_reader.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
16
|
commonmeta/readers/cff_reader.py,sha256=HMFK6QIg_XIlhmYIWL4EfIyuidYl5L_0TAwyG78XPlU,6244
|
@@ -80,11 +80,11 @@ commonmeta/writers/commonmeta_writer.py,sha256=QpfyhG__7o_XpsOTCPWxGymO7YKwZi2LQ
|
|
80
80
|
commonmeta/writers/crossref_xml_writer.py,sha256=rcPOfrGxU4mX7_fFywYWDW2FFUoKW9wD-JzW8nX1ipI,33915
|
81
81
|
commonmeta/writers/csl_writer.py,sha256=4gDYs1EzK4_L2UIRTfs25wgHmYRwdRP2zmfxF9387oU,2779
|
82
82
|
commonmeta/writers/datacite_writer.py,sha256=bcinpwhq7XnVthKHH8-sdXA34dSlvFH4ImYH768iaQU,6428
|
83
|
-
commonmeta/writers/inveniordm_writer.py,sha256=
|
83
|
+
commonmeta/writers/inveniordm_writer.py,sha256=w4DTPjWk1YBtva9RWAq9DnDzmhpa0ejws2h2h-Pfz7A,26854
|
84
84
|
commonmeta/writers/ris_writer.py,sha256=3SdyEvMRaPRP1SV1MB-MXBlunE7x6og7RF1zuWtetPc,2094
|
85
85
|
commonmeta/writers/schema_org_writer.py,sha256=s18_x0ReXwAGBoEAwp2q-HCgFQ-h5qRg6JyAlqCoSFE,5871
|
86
|
-
commonmeta_py-0.
|
87
|
-
commonmeta_py-0.
|
88
|
-
commonmeta_py-0.
|
89
|
-
commonmeta_py-0.
|
90
|
-
commonmeta_py-0.
|
86
|
+
commonmeta_py-0.127.dist-info/METADATA,sha256=JHsi5YfGnb7zlq9vkkG_rh0PqIRbfKnLC7bM5QF8v1I,7656
|
87
|
+
commonmeta_py-0.127.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
88
|
+
commonmeta_py-0.127.dist-info/entry_points.txt,sha256=U4w4BoRuS3rN5t5Y-uYSyOeU5Lh_VRVMS9OIDzIgw4w,50
|
89
|
+
commonmeta_py-0.127.dist-info/licenses/LICENSE,sha256=wsIvxF9Q9GC9vA_s79zTWP3BkXJdfUNRmALlU8GbW1s,1074
|
90
|
+
commonmeta_py-0.127.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|