commonmeta-py 0.126__py3-none-any.whl → 0.127__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
commonmeta/__init__.py CHANGED
@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
10
10
  """
11
11
 
12
12
  __title__ = "commonmeta-py"
13
- __version__ = "0.126"
13
+ __version__ = "0.127"
14
14
  __author__ = "Martin Fenner"
15
15
  __license__ = "MIT"
16
16
 
@@ -96,6 +96,7 @@ from .utils import (
96
96
  normalize_url,
97
97
  pages_as_string,
98
98
  replace_curie,
99
+ string_to_slug,
99
100
  to_csl,
100
101
  validate_orcid,
101
102
  validate_ror,
commonmeta/utils.py CHANGED
@@ -128,6 +128,57 @@ FOS_MAPPINGS = {
128
128
  "Other humanities": "http://www.oecd.org/science/inno/38235147.pdf?6.5",
129
129
  }
130
130
 
131
+ FOS_TO_STRING_MAPPINGS = {
132
+ "Natural sciences": "naturalSciences",
133
+ "Mathematics": "mathematics",
134
+ "Computer and information sciences": "computerAndInformationSciences",
135
+ "Physical sciences": "physicalSciences",
136
+ "Chemical sciences": "chemicalSciences",
137
+ "Earth and related environmental sciences": "earthAndRelatedEnvironmentalSciences",
138
+ "Biological sciences": "biologicalSciences",
139
+ "Other natural sciences": "otherNaturalSciences",
140
+ "Engineering and technology": "engineeringAndTechnology",
141
+ "Civil engineering": "civilEngineering",
142
+ "Electrical engineering, electronic engineering, information engineering": "electricalEngineering",
143
+ "Mechanical engineering": "mechanicalEngineering",
144
+ "Chemical engineering": "chemicalEngineering",
145
+ "Materials engineering": "materialsEngineering",
146
+ "Medical engineering": "medicalEngineering",
147
+ "Environmental engineering": "environmentalEngineering",
148
+ "Environmental biotechnology": "environmentalBiotechnology",
149
+ "Industrial biotechnology": "industrialBiotechnology",
150
+ "Nano technology": "nanoTechnology",
151
+ "Other engineering and technologies": "otherEngineeringAndTechnologies",
152
+ "Medical and health sciences": "medicalAndHealthSciences",
153
+ "Basic medicine": "basicMedicine",
154
+ "Clinical medicine": "clinicalMedicine",
155
+ "Health sciences": "healthSciences",
156
+ "Health biotechnology": "healthBiotechnology",
157
+ "Other medical sciences": "otherMedicalSciences",
158
+ "Agricultural sciences": "agriculturalSciences",
159
+ "Agriculture, forestry, and fisheries": "agricultureForestryAndFisheries",
160
+ "Animal and dairy science": "animalAndDairyScience",
161
+ "Veterinary science": "veterinaryScience",
162
+ "Agricultural biotechnology": "agriculturalBiotechnology",
163
+ "Other agricultural sciences": "otherAgriculturalSciences",
164
+ "Social science": "socialScience",
165
+ "Psychology": "psychology",
166
+ "Economics and business": "economicsAndBusiness",
167
+ "Educational sciences": "educationalSciences",
168
+ "Sociology": "sociology",
169
+ "Law": "law",
170
+ "Political science": "politicalScience",
171
+ "Social and economic geography": "socialAndEconomicGeography",
172
+ "Media and communications": "mediaAndCommunications",
173
+ "Other social sciences": "otherSocialSciences",
174
+ "Humanities": "humanities",
175
+ "History and archaeology": "historyAndArchaeology",
176
+ "Languages and literature": "languagesAndLiterature",
177
+ "Philosophy, ethics and religion": "philosophyEthicsAndReligion",
178
+ "Arts (arts, history of arts, performing arts, music)": "artsArtsHistoryOfArtsPerformingArtsMusic",
179
+ "Other humanities": "otherHumanities",
180
+ }
181
+
131
182
 
132
183
  def normalize_id(pid: Optional[str], **kwargs) -> Optional[str]:
133
184
  """Check for valid DOI or HTTP(S) URL"""
@@ -1274,6 +1325,26 @@ def subjects_as_string(subjects):
1274
1325
  return ", ".join(keywords)
1275
1326
 
1276
1327
 
1328
+ def string_to_slug(text):
1329
+ """makes a string lowercase and removes non-alphanumeric characters"""
1330
+ # Remove FOS (Fields of Science) prefix
1331
+ text = text.removeprefix("FOS: ")
1332
+ # Lookup FOS name
1333
+ slug = FOS_TO_STRING_MAPPINGS.get(text, None)
1334
+ if slug is not None:
1335
+ return slug.lower()
1336
+
1337
+ # Replace spaces with hyphens
1338
+ slug = re.sub(r"\s+", "-", text.lower())
1339
+ # Remove special characters
1340
+ slug = re.sub(r"[^a-z0-9-]", "", slug)
1341
+ # Remove multiple consecutive hyphens
1342
+ slug = re.sub(r"-+", "-", slug)
1343
+ # Remove leading and trailing hyphens
1344
+ slug = slug.strip("-")
1345
+ return slug
1346
+
1347
+
1277
1348
  # def reverse():
1278
1349
  # return { 'citation': wrap(related_identifiers).select do |ri|
1279
1350
  # ri['relationType'] == 'IsReferencedBy'
@@ -1,13 +1,13 @@
1
1
  """InvenioRDM writer for commonmeta-py"""
2
2
 
3
3
  import logging
4
- import re
5
4
  from time import time
6
5
  from typing import Dict, Optional
7
6
 
8
7
  import orjson as json
9
8
  import pydash as py_
10
9
  import requests
10
+ from urllib3._collections import HTTPHeaderDict
11
11
 
12
12
  from ..base_utils import compact, parse_attributes, presence, wrap
13
13
  from ..constants import (
@@ -18,13 +18,14 @@ from ..constants import (
18
18
  Commonmeta,
19
19
  )
20
20
  from ..date_utils import get_iso8601_date
21
- from ..doi_utils import doi_as_url, doi_from_url, normalize_doi
21
+ from ..doi_utils import doi_from_url, normalize_doi
22
22
  from ..utils import (
23
23
  FOS_MAPPINGS,
24
24
  get_language,
25
25
  id_from_url,
26
26
  normalize_url,
27
27
  pages_as_string,
28
+ string_to_slug,
28
29
  validate_orcid,
29
30
  validate_ror,
30
31
  )
@@ -399,28 +400,17 @@ def push_inveniordm(metadata: Commonmeta, host: str, token: str, **kwargs) -> Di
399
400
  "doi": doi,
400
401
  }
401
402
 
402
- # extract optional information needed but not upserted to the InvenioRDM API:
403
- # rid is the InvenioRDM record id,
404
- # uuid is the Rogue Scholar uuid,
405
- # community_id is the id of the primary community of the record
403
+ # extract optional information needed
404
+ # uuid is the Rogue Scholar uuid
405
+ # community_id is the id of the primary community of the record,
406
+ # in the case of Rogue Scholar the blog community
407
+
406
408
  if hasattr(metadata, "identifiers") and metadata.identifiers:
407
- rid_index = None
408
- uuid_index = None
409
409
  for i, identifier in enumerate(metadata.identifiers):
410
- if identifier.get("identifierType") == "RID" and identifier.get(
411
- "identifier"
412
- ):
413
- record["id"] = identifier.get("identifier")
414
- rid_index = i
415
- elif identifier.get("identifierType") == "UUID" and identifier.get(
410
+ if identifier.get("identifierType") == "UUID" and identifier.get(
416
411
  "identifier"
417
412
  ):
418
413
  record["uuid"] = identifier.get("identifier")
419
- uuid_index = i
420
- if rid_index is not None:
421
- metadata.identifiers.pop(rid_index)
422
- if uuid_index is not None:
423
- metadata.identifiers.pop(uuid_index)
424
414
 
425
415
  if hasattr(metadata, "relations") and metadata.relations:
426
416
  community_index = None
@@ -434,6 +424,7 @@ def push_inveniordm(metadata: Commonmeta, host: str, token: str, **kwargs) -> Di
434
424
  record["community"] = slug
435
425
  record["community_id"] = community_id
436
426
  community_index = i
427
+ break
437
428
 
438
429
  if community_index is not None:
439
430
  metadata.relations.pop(community_index)
@@ -496,8 +487,14 @@ def add_record_to_communities(
496
487
  ) -> dict:
497
488
  """Add record to one or more InvenioRDM communities"""
498
489
 
490
+ communities = get_record_communities(record, host, token)
491
+ community_ids = [c.get("id") for c in communities] if communities else []
492
+
499
493
  # Add record to primary community if primary community is specified
500
- if record.get("community_id", None) is not None:
494
+ if (
495
+ record.get("community_id", None) is not None
496
+ and record.get("community_id") not in community_ids
497
+ ):
501
498
  record = add_record_to_community(record, host, token, record["community_id"])
502
499
 
503
500
  # Add record to subject area community if subject area community is specified
@@ -509,9 +506,8 @@ def add_record_to_communities(
509
506
  slug = string_to_slug(subject_name)
510
507
  if slug in COMMUNITY_TRANSLATIONS:
511
508
  slug = COMMUNITY_TRANSLATIONS[slug]
512
-
513
509
  community_id = search_by_slug(slug, "topic", host, token)
514
- if community_id:
510
+ if community_id and community_id not in community_ids:
515
511
  record = add_record_to_community(record, host, token, community_id)
516
512
 
517
513
  # Add record to communities defined as IsPartOf relation in InvenioRDM RelatedIdentifiers
@@ -522,7 +518,7 @@ def add_record_to_communities(
522
518
  ).startswith(f"https://{host}/api/communities/"):
523
519
  slug = identifier.get("identifier").split("/")[5]
524
520
  community_id = search_by_slug(slug, "topic", host, token)
525
- if community_id:
521
+ if community_id and community_id not in community_ids:
526
522
  record = add_record_to_community(record, host, token, community_id)
527
523
 
528
524
  return record
@@ -664,8 +660,6 @@ def publish_draft_record(record, host, token):
664
660
  record["status"] = "error_publish_draft_record"
665
661
  return record
666
662
  data = response.json()
667
- record["uuid"] = py_.get(data, "metadata.identifiers.0.identifier")
668
- record["doi"] = (doi_as_url(py_.get(data, "pids.doi.identifier")),)
669
663
  record["created"] = data.get("created", None)
670
664
  record["updated"] = data.get("updated", None)
671
665
  record["status"] = "published"
@@ -676,17 +670,39 @@ def publish_draft_record(record, host, token):
676
670
  return record
677
671
 
678
672
 
673
+ def get_record_communities(record, host, token):
674
+ """Get record communities by id"""
675
+ headers = {
676
+ "Authorization": f"Bearer {token}",
677
+ "Content-Type": "application/json",
678
+ }
679
+ try:
680
+ response = requests.get(
681
+ f"https://{host}/api/records/{record['id']}/communities",
682
+ headers=headers,
683
+ )
684
+ response.raise_for_status()
685
+ data = response.json()
686
+ if py_.get(data, "hits.total", 0) > 0:
687
+ return py_.get(data, "hits.hits")
688
+ return None
689
+ except requests.exceptions.RequestException as e:
690
+ logger.error(f"Error getting communities: {str(e)}", exc_info=True)
691
+ return None
692
+
693
+
679
694
  def add_record_to_community(record, host, token, community_id):
680
- """Add a record to a community in InvenioRDM"""
695
+ """Add a record to a community"""
681
696
  headers = {
682
697
  "Authorization": f"Bearer {token}",
683
698
  "Content-Type": "application/json",
684
699
  }
700
+ json = {"communities": [{"id": community_id}]}
685
701
  try:
686
702
  response = requests.post(
687
703
  f"https://{host}/api/records/{record['id']}/communities",
688
704
  headers=headers,
689
- json={"id": community_id},
705
+ json=json,
690
706
  )
691
707
  response.raise_for_status()
692
708
  return record
@@ -750,7 +766,11 @@ def search_by_slug(slug: str, type_value: str, host: str, token: str) -> Optiona
750
766
  "Authorization": f"Bearer {token}",
751
767
  "Content-Type": "application/json",
752
768
  }
753
- params = {"q": f"slug:{slug} AND type:{type_value}", "size": 1}
769
+ params = HTTPHeaderDict()
770
+ params.add("q", f"slug:{slug}")
771
+ params.add("type", type_value)
772
+ params.add("type", "subject")
773
+ params.add("size", 1)
754
774
  try:
755
775
  response = requests.get(
756
776
  f"https://{host}/api/communities", headers=headers, params=params
@@ -765,18 +785,5 @@ def search_by_slug(slug: str, type_value: str, host: str, token: str) -> Optiona
765
785
  return None
766
786
 
767
787
 
768
- def string_to_slug(text):
769
- """makes a string lowercase and removes non-alphanumeric characters"""
770
- # Replace spaces with hyphens
771
- slug = re.sub(r"\s+", "-", text.lower())
772
- # Remove special characters
773
- slug = re.sub(r"[^a-z0-9-]", "", slug)
774
- # Remove multiple consecutive hyphens
775
- slug = re.sub(r"-+", "-", slug)
776
- # Remove leading and trailing hyphens
777
- slug = slug.strip("-")
778
- return slug
779
-
780
-
781
788
  class InvenioRDMError(Exception):
782
789
  """Custom exception for InvenioRDM API errors"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: commonmeta-py
3
- Version: 0.126
3
+ Version: 0.127
4
4
  Summary: Library for conversions to/from the Commonmeta scholarly metadata format
5
5
  Project-URL: Homepage, https://python.commonmeta.org
6
6
  Project-URL: Repository, https://github.com/front-matter/commonmeta-py
@@ -1,4 +1,4 @@
1
- commonmeta/__init__.py,sha256=imnf6kwadjjF_Pq3XiozlWDZ5DKHoIOuIcrwypD4zxs,2098
1
+ commonmeta/__init__.py,sha256=42EVUELvqJzvTf3X-JSjCvuqNyS1chKRYTzVrwp8ZY0,2118
2
2
  commonmeta/api_utils.py,sha256=P8LMHHYiF4OTi97_5k4KstcBreooMkOAKZ4ebxsAv4o,2691
3
3
  commonmeta/author_utils.py,sha256=3lYW5s1rOUWNTKs1FP6XLfEUY3yCLOe_3L_VdJTDMp0,8585
4
4
  commonmeta/base_utils.py,sha256=-MGy9q2uTiJEkPWQUYOJMdq-3tRpNnvBwlLjvllQ5g8,11164
@@ -10,7 +10,7 @@ commonmeta/file_utils.py,sha256=eFYDWyR8Gr722nvFmp542hCm-TGmO_q4ciZ85IPHpjA,2893
10
10
  commonmeta/metadata.py,sha256=90aTe47d071wHxwcNsOqU5lSVPKP8wAPnPHhddj3Fuo,18443
11
11
  commonmeta/schema_utils.py,sha256=zn3gqAHciUOQmrw9okR68weFs-yqPPyORFt-Zl1D3Lw,1924
12
12
  commonmeta/translators.py,sha256=CBMK4jrXRmGZiAhCh6wsJjhbDJWbcsda8UvXFXxccAw,1363
13
- commonmeta/utils.py,sha256=pJnh3EzOU1E2nutnAZsopY_NsUX6zYmxoj5bIYqqWvE,50574
13
+ commonmeta/utils.py,sha256=HzgVlcRqtDXMwlbTXL0AKL3jHB6E9kEAljS5nXKdOm0,53852
14
14
  commonmeta/readers/__init__.py,sha256=vOf7UsOKNoh_ZCuyexxhAmPMt8wjB-pF_CfpWRaN8pk,45
15
15
  commonmeta/readers/bibtex_reader.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  commonmeta/readers/cff_reader.py,sha256=HMFK6QIg_XIlhmYIWL4EfIyuidYl5L_0TAwyG78XPlU,6244
@@ -80,11 +80,11 @@ commonmeta/writers/commonmeta_writer.py,sha256=QpfyhG__7o_XpsOTCPWxGymO7YKwZi2LQ
80
80
  commonmeta/writers/crossref_xml_writer.py,sha256=rcPOfrGxU4mX7_fFywYWDW2FFUoKW9wD-JzW8nX1ipI,33915
81
81
  commonmeta/writers/csl_writer.py,sha256=4gDYs1EzK4_L2UIRTfs25wgHmYRwdRP2zmfxF9387oU,2779
82
82
  commonmeta/writers/datacite_writer.py,sha256=bcinpwhq7XnVthKHH8-sdXA34dSlvFH4ImYH768iaQU,6428
83
- commonmeta/writers/inveniordm_writer.py,sha256=MeMFImmjhmtCyNPq3Txd6COIvayE3PImmxm0_nDfDmQ,26796
83
+ commonmeta/writers/inveniordm_writer.py,sha256=w4DTPjWk1YBtva9RWAq9DnDzmhpa0ejws2h2h-Pfz7A,26854
84
84
  commonmeta/writers/ris_writer.py,sha256=3SdyEvMRaPRP1SV1MB-MXBlunE7x6og7RF1zuWtetPc,2094
85
85
  commonmeta/writers/schema_org_writer.py,sha256=s18_x0ReXwAGBoEAwp2q-HCgFQ-h5qRg6JyAlqCoSFE,5871
86
- commonmeta_py-0.126.dist-info/METADATA,sha256=w6m0e9-YGX-8r8u0AHoI5h71SaLIdwGmtrJTY4df4r0,7656
87
- commonmeta_py-0.126.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
88
- commonmeta_py-0.126.dist-info/entry_points.txt,sha256=U4w4BoRuS3rN5t5Y-uYSyOeU5Lh_VRVMS9OIDzIgw4w,50
89
- commonmeta_py-0.126.dist-info/licenses/LICENSE,sha256=wsIvxF9Q9GC9vA_s79zTWP3BkXJdfUNRmALlU8GbW1s,1074
90
- commonmeta_py-0.126.dist-info/RECORD,,
86
+ commonmeta_py-0.127.dist-info/METADATA,sha256=JHsi5YfGnb7zlq9vkkG_rh0PqIRbfKnLC7bM5QF8v1I,7656
87
+ commonmeta_py-0.127.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
88
+ commonmeta_py-0.127.dist-info/entry_points.txt,sha256=U4w4BoRuS3rN5t5Y-uYSyOeU5Lh_VRVMS9OIDzIgw4w,50
89
+ commonmeta_py-0.127.dist-info/licenses/LICENSE,sha256=wsIvxF9Q9GC9vA_s79zTWP3BkXJdfUNRmALlU8GbW1s,1074
90
+ commonmeta_py-0.127.dist-info/RECORD,,