commonmeta-py 0.106__py3-none-any.whl → 0.108__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. commonmeta/__init__.py +12 -3
  2. commonmeta/api_utils.py +3 -2
  3. commonmeta/base_utils.py +186 -3
  4. commonmeta/cli.py +114 -34
  5. commonmeta/constants.py +20 -0
  6. commonmeta/file_utils.py +112 -0
  7. commonmeta/metadata.py +102 -42
  8. commonmeta/readers/codemeta_reader.py +1 -1
  9. commonmeta/readers/crossref_reader.py +23 -10
  10. commonmeta/readers/crossref_xml_reader.py +1 -1
  11. commonmeta/readers/datacite_reader.py +6 -4
  12. commonmeta/readers/{json_feed_reader.py → jsonfeed_reader.py} +12 -12
  13. commonmeta/resources/crossref/common5.4.0.xsd +1264 -0
  14. commonmeta/resources/crossref/{crossref5.3.1.xsd → crossref5.4.0.xsd} +286 -88
  15. commonmeta/resources/crossref/doi_resources5.4.0.xsd +117 -0
  16. commonmeta/resources/crossref/fundingdata5.4.0.xsd +59 -0
  17. commonmeta/resources/crossref/fundref.xsd +29 -19
  18. commonmeta/resources/crossref/languages5.4.0.xsd +8119 -0
  19. commonmeta/resources/crossref/mediatypes5.4.0.xsd +2207 -0
  20. commonmeta/resources/crossref/module-ali.xsd +14 -6
  21. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-common.xsd +101 -0
  22. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-content.xsd +683 -0
  23. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-presentation.xsd +2092 -0
  24. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-strict-content.xsd +186 -0
  25. commonmeta/resources/crossref/standard-modules/mathml3/mathml3.xsd +9 -0
  26. commonmeta/resources/crossref/standard-modules/mathml3/module-ali.xsd +47 -0
  27. commonmeta/resources/crossref/standard-modules/module-ali.xsd +47 -0
  28. commonmeta/resources/crossref/standard-modules/xlink.xsd +100 -0
  29. commonmeta/resources/crossref/standard-modules/xml.xsd +287 -0
  30. commonmeta/resources/crossref/xml.xsd +287 -0
  31. commonmeta/schema_utils.py +25 -0
  32. commonmeta/utils.py +90 -15
  33. commonmeta/writers/bibtex_writer.py +5 -5
  34. commonmeta/writers/citation_writer.py +10 -5
  35. commonmeta/writers/commonmeta_writer.py +5 -17
  36. commonmeta/writers/crossref_xml_writer.py +1032 -4
  37. commonmeta/writers/csl_writer.py +6 -6
  38. commonmeta/writers/datacite_writer.py +11 -6
  39. commonmeta/writers/inveniordm_writer.py +286 -10
  40. commonmeta/writers/ris_writer.py +3 -3
  41. commonmeta/writers/schema_org_writer.py +10 -5
  42. {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/METADATA +5 -2
  43. {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/RECORD +46 -32
  44. commonmeta/crossref_utils.py +0 -583
  45. commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
  46. {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/WHEEL +0 -0
  47. {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/entry_points.txt +0 -0
  48. {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/licenses/LICENSE +0 -0
commonmeta/utils.py CHANGED
@@ -408,14 +408,52 @@ def openalex_api_url(id: str, identifier_type: str, **kwargs) -> str:
408
408
 
409
409
  def openalex_api_query_url(query: dict) -> str:
410
410
  """Return the OpenAlex API query URL"""
411
+ # Define allowed types
412
+ types = [
413
+ "article",
414
+ "book-chapter",
415
+ "dataset",
416
+ "preprint",
417
+ "dissertation",
418
+ "book",
419
+ "review",
420
+ "paratext",
421
+ "libguides",
422
+ "letter",
423
+ "other",
424
+ "reference-entry",
425
+ "report",
426
+ "editorial",
427
+ "peer-review",
428
+ "erratum",
429
+ "standard",
430
+ "grant",
431
+ "supplementary-materials",
432
+ "retraction",
433
+ ]
434
+
411
435
  url = "https://api.openalex.org/works"
412
436
  f = furl(url)
413
- rows = min(int(query.get("rows", 20)), 1000)
414
- queries = []
437
+
438
+ # Handle pagination and sample parameters
439
+ number = max(1, min(1000, int(query.get("number", query.get("rows", 10)))))
440
+ page = max(1, int(query.get("page", 1)))
441
+
442
+ sample = query.get("sample", False)
443
+ if sample:
444
+ f.args["sample"] = str(number)
445
+ else:
446
+ f.args["per-page"] = str(number)
447
+ f.args["page"] = str(page)
448
+ # Sort results by published date in descending order
449
+ f.args["sort"] = "publication_date:desc"
450
+
451
+ # Build filters
415
452
  filters = []
453
+ queries = []
416
454
  _query = None
417
- _filter = None
418
455
 
456
+ # Handle query parameters
419
457
  if query.get("query", None) is not None:
420
458
  queries += [query.get("query")]
421
459
  for key, value in query.items():
@@ -428,23 +466,60 @@ def openalex_api_query_url(query: dict) -> str:
428
466
  queries += [f"{key}:{value}"]
429
467
  if queries:
430
468
  _query = ",".join(queries)
469
+ f.args["query"] = _query
470
+
471
+ # Member/IDs filter
472
+ ids = query.get("ids", query.get("member", ""))
473
+ if ids:
474
+ filters.append(f"member:{ids}")
475
+
476
+ # Type filter
477
+ type_ = query.get("type_", query.get("type", ""))
478
+ if type_ and type_ in types:
479
+ filters.append(f"type:{type_}")
431
480
 
481
+ # ROR filter
482
+ ror = query.get("ror", "")
483
+ if ror:
484
+ r = validate_ror(ror)
485
+ if r:
486
+ filters.append(f"authorships.institutions.ror:{r}")
487
+
488
+ # ORCID filter
489
+ orcid = query.get("orcid", "")
490
+ if orcid:
491
+ o = validate_orcid(orcid)
492
+ if o:
493
+ filters.append(f"authorships.author.orcid:{o}")
494
+
495
+ # Year filter
496
+ year = query.get("year", query.get("publication_year", ""))
497
+ if year:
498
+ filters.append(f"publication_year:{year}")
499
+
500
+ # Other filters from the original function
432
501
  for key, value in query.items():
433
502
  if key in [
434
503
  "prefix",
435
- "member",
436
- "type",
437
504
  "has-full-text",
438
- "has-references",
439
- "has-orcid",
440
505
  "has-funder",
441
506
  "has-license",
442
507
  ]:
443
- filters += [f"{key}:{value}"]
444
- if filters:
445
- _filter = ",".join(filters)
508
+ filters.append(f"{key}:{value}")
509
+
510
+ # Boolean filters
511
+ # if query.get("hasORCID", query.get("has-orcid", False)):
512
+ # filters.append("has-orcid:true")
513
+
514
+ # if query.get("hasReferences", query.get("has-references", False)):
515
+ # filters.append("has-references:true")
446
516
 
447
- f.args.update(compact({"rows": rows, "query": _query, "filter": _filter}))
517
+ # if query.get("hasAbstract", query.get("has-abstract", False)):
518
+ # filters.append("has-abstract:true")
519
+
520
+ # Add filters to params if any exist
521
+ if filters:
522
+ f.args["filter"] = ",".join(filters)
448
523
 
449
524
  return f.url
450
525
 
@@ -579,7 +654,7 @@ def dict_to_spdx(dct: dict) -> dict:
579
654
  # end
580
655
 
581
656
 
582
- def from_json_feed(elements: list) -> list:
657
+ def from_jsonfeed(elements: list) -> list:
583
658
  """Convert from JSON Feed elements"""
584
659
 
585
660
  def format_element(element):
@@ -878,7 +953,7 @@ def find_from_format_by_id(pid: str) -> Optional[str]:
878
953
  if re.match(r"\A(http|https):/(/)?github\.com/(.+)\Z", pid) is not None:
879
954
  return "cff"
880
955
  if re.match(r"\Ahttps:/(/)?api\.rogue-scholar\.org/posts/(.+)\Z", pid) is not None:
881
- return "json_feed_item"
956
+ return "jsonfeed"
882
957
  if re.match(r"\Ahttps:/(/)(.+)/api/records/(.+)\Z", pid) is not None:
883
958
  return "inveniordm"
884
959
  return "schema_org"
@@ -906,7 +981,7 @@ def find_from_format_by_dict(dct: dict) -> Optional[str]:
906
981
  ]:
907
982
  return "codemeta"
908
983
  if dct.get("guid", None) is not None:
909
- return "json_feed_item"
984
+ return "jsonfeed"
910
985
  if dct.get("schemaVersion", "").startswith("http://datacite.org/schema/kernel"):
911
986
  return "datacite"
912
987
  if dct.get("source", None) == "Crossref":
@@ -939,7 +1014,7 @@ def find_from_format_by_string(string: str) -> Optional[str]:
939
1014
  ]:
940
1015
  return "codemeta"
941
1016
  if data.get("guid", None) is not None:
942
- return "json_feed_item"
1017
+ return "jsonfeed"
943
1018
  if data.get("schemaVersion", "").startswith(
944
1019
  "http://datacite.org/schema/kernel"
945
1020
  ):
@@ -1,15 +1,15 @@
1
1
  """Bibtex writer for commonmeta-py"""
2
2
 
3
- from bibtexparser.bwriter import BibTexWriter
4
3
  from bibtexparser.bibdatabase import BibDatabase
4
+ from bibtexparser.bwriter import BibTexWriter
5
5
  from bibtexparser.customization import page_double_hyphen
6
6
 
7
- from ..utils import pages_as_string, get_language
8
- from ..base_utils import compact
9
7
  from ..author_utils import authors_as_string
10
- from ..date_utils import get_month_from_date, get_iso8601_date, MONTH_SHORT_NAMES
11
- from ..doi_utils import doi_from_url
8
+ from ..base_utils import compact
12
9
  from ..constants import CM_TO_BIB_TRANSLATIONS, Commonmeta
10
+ from ..date_utils import MONTH_SHORT_NAMES, get_iso8601_date, get_month_from_date
11
+ from ..doi_utils import doi_from_url
12
+ from ..utils import get_language, pages_as_string
13
13
 
14
14
 
15
15
  def write_bibtex(metadata: Commonmeta) -> str:
@@ -1,13 +1,18 @@
1
1
  """Citation writer for commonmeta-py"""
2
2
 
3
- import orjson as json
4
3
  import re
5
- from pydash import py_
6
- from citeproc import CitationStylesStyle, CitationStylesBibliography
7
- from citeproc import Citation, CitationItem
8
- from citeproc import formatter
4
+
5
+ import orjson as json
6
+ from citeproc import (
7
+ Citation,
8
+ CitationItem,
9
+ CitationStylesBibliography,
10
+ CitationStylesStyle,
11
+ formatter,
12
+ )
9
13
  from citeproc.source.json import CiteProcJSON
10
14
  from citeproc_styles import get_style_filepath
15
+ from pydash import py_
11
16
 
12
17
 
13
18
  def write_citation(metadata):
@@ -1,8 +1,7 @@
1
1
  """Commonmeta writer for commonmeta-py"""
2
2
 
3
- import orjson as json
4
- import orjsonl
5
3
  import pydash as py_
4
+
6
5
  from ..base_utils import compact
7
6
 
8
7
 
@@ -32,12 +31,12 @@ def write_commonmeta(metadata):
32
31
  "funding_references": "fundingReferences",
33
32
  },
34
33
  )
35
- return json.dumps(compact(data), option=json.OPT_INDENT_2)
34
+ return compact(data)
36
35
 
37
36
 
38
37
  def write_commonmeta_list(metalist):
39
- """Write commonmeta list. If filename is provided,
40
- write to file. Optionally, use JSON Lines format."""
38
+ """Write commonmeta list. If file is provided,
39
+ write to file. Supports JSON, JSON Lines and YAML format."""
41
40
  if metalist is None:
42
41
  return None
43
42
 
@@ -47,7 +46,7 @@ def write_commonmeta_list(metalist):
47
46
  return compact(item)
48
47
 
49
48
  items = [format_item(item) for item in metalist.items]
50
- output = compact(
49
+ return compact(
51
50
  {
52
51
  "id": metalist.id,
53
52
  "title": metalist.title,
@@ -55,14 +54,3 @@ def write_commonmeta_list(metalist):
55
54
  "items": items,
56
55
  }
57
56
  )
58
-
59
- if metalist.filename and metalist.filename.rsplit(".", 1)[1] in ["jsonl", "json"]:
60
- if metalist.jsonlines:
61
- orjsonl.save(metalist.filename, items)
62
- else:
63
- json_output = json.dumps(output).decode("utf-8")
64
- with open(metalist.filename, "w") as file:
65
- file.write(json_output)
66
- return metalist.filename
67
- else:
68
- return json.dumps(output).decode("utf-8")