commonmeta-py 0.106__py3-none-any.whl → 0.108__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +12 -3
- commonmeta/api_utils.py +3 -2
- commonmeta/base_utils.py +186 -3
- commonmeta/cli.py +114 -34
- commonmeta/constants.py +20 -0
- commonmeta/file_utils.py +112 -0
- commonmeta/metadata.py +102 -42
- commonmeta/readers/codemeta_reader.py +1 -1
- commonmeta/readers/crossref_reader.py +23 -10
- commonmeta/readers/crossref_xml_reader.py +1 -1
- commonmeta/readers/datacite_reader.py +6 -4
- commonmeta/readers/{json_feed_reader.py → jsonfeed_reader.py} +12 -12
- commonmeta/resources/crossref/common5.4.0.xsd +1264 -0
- commonmeta/resources/crossref/{crossref5.3.1.xsd → crossref5.4.0.xsd} +286 -88
- commonmeta/resources/crossref/doi_resources5.4.0.xsd +117 -0
- commonmeta/resources/crossref/fundingdata5.4.0.xsd +59 -0
- commonmeta/resources/crossref/fundref.xsd +29 -19
- commonmeta/resources/crossref/languages5.4.0.xsd +8119 -0
- commonmeta/resources/crossref/mediatypes5.4.0.xsd +2207 -0
- commonmeta/resources/crossref/module-ali.xsd +14 -6
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-common.xsd +101 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-content.xsd +683 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-presentation.xsd +2092 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-strict-content.xsd +186 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3.xsd +9 -0
- commonmeta/resources/crossref/standard-modules/mathml3/module-ali.xsd +47 -0
- commonmeta/resources/crossref/standard-modules/module-ali.xsd +47 -0
- commonmeta/resources/crossref/standard-modules/xlink.xsd +100 -0
- commonmeta/resources/crossref/standard-modules/xml.xsd +287 -0
- commonmeta/resources/crossref/xml.xsd +287 -0
- commonmeta/schema_utils.py +25 -0
- commonmeta/utils.py +90 -15
- commonmeta/writers/bibtex_writer.py +5 -5
- commonmeta/writers/citation_writer.py +10 -5
- commonmeta/writers/commonmeta_writer.py +5 -17
- commonmeta/writers/crossref_xml_writer.py +1032 -4
- commonmeta/writers/csl_writer.py +6 -6
- commonmeta/writers/datacite_writer.py +11 -6
- commonmeta/writers/inveniordm_writer.py +286 -10
- commonmeta/writers/ris_writer.py +3 -3
- commonmeta/writers/schema_org_writer.py +10 -5
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/METADATA +5 -2
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/RECORD +46 -32
- commonmeta/crossref_utils.py +0 -583
- commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/WHEEL +0 -0
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/entry_points.txt +0 -0
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/licenses/LICENSE +0 -0
commonmeta/utils.py
CHANGED
@@ -408,14 +408,52 @@ def openalex_api_url(id: str, identifier_type: str, **kwargs) -> str:
|
|
408
408
|
|
409
409
|
def openalex_api_query_url(query: dict) -> str:
|
410
410
|
"""Return the OpenAlex API query URL"""
|
411
|
+
# Define allowed types
|
412
|
+
types = [
|
413
|
+
"article",
|
414
|
+
"book-chapter",
|
415
|
+
"dataset",
|
416
|
+
"preprint",
|
417
|
+
"dissertation",
|
418
|
+
"book",
|
419
|
+
"review",
|
420
|
+
"paratext",
|
421
|
+
"libguides",
|
422
|
+
"letter",
|
423
|
+
"other",
|
424
|
+
"reference-entry",
|
425
|
+
"report",
|
426
|
+
"editorial",
|
427
|
+
"peer-review",
|
428
|
+
"erratum",
|
429
|
+
"standard",
|
430
|
+
"grant",
|
431
|
+
"supplementary-materials",
|
432
|
+
"retraction",
|
433
|
+
]
|
434
|
+
|
411
435
|
url = "https://api.openalex.org/works"
|
412
436
|
f = furl(url)
|
413
|
-
|
414
|
-
|
437
|
+
|
438
|
+
# Handle pagination and sample parameters
|
439
|
+
number = max(1, min(1000, int(query.get("number", query.get("rows", 10)))))
|
440
|
+
page = max(1, int(query.get("page", 1)))
|
441
|
+
|
442
|
+
sample = query.get("sample", False)
|
443
|
+
if sample:
|
444
|
+
f.args["sample"] = str(number)
|
445
|
+
else:
|
446
|
+
f.args["per-page"] = str(number)
|
447
|
+
f.args["page"] = str(page)
|
448
|
+
# Sort results by published date in descending order
|
449
|
+
f.args["sort"] = "publication_date:desc"
|
450
|
+
|
451
|
+
# Build filters
|
415
452
|
filters = []
|
453
|
+
queries = []
|
416
454
|
_query = None
|
417
|
-
_filter = None
|
418
455
|
|
456
|
+
# Handle query parameters
|
419
457
|
if query.get("query", None) is not None:
|
420
458
|
queries += [query.get("query")]
|
421
459
|
for key, value in query.items():
|
@@ -428,23 +466,60 @@ def openalex_api_query_url(query: dict) -> str:
|
|
428
466
|
queries += [f"{key}:{value}"]
|
429
467
|
if queries:
|
430
468
|
_query = ",".join(queries)
|
469
|
+
f.args["query"] = _query
|
470
|
+
|
471
|
+
# Member/IDs filter
|
472
|
+
ids = query.get("ids", query.get("member", ""))
|
473
|
+
if ids:
|
474
|
+
filters.append(f"member:{ids}")
|
475
|
+
|
476
|
+
# Type filter
|
477
|
+
type_ = query.get("type_", query.get("type", ""))
|
478
|
+
if type_ and type_ in types:
|
479
|
+
filters.append(f"type:{type_}")
|
431
480
|
|
481
|
+
# ROR filter
|
482
|
+
ror = query.get("ror", "")
|
483
|
+
if ror:
|
484
|
+
r = validate_ror(ror)
|
485
|
+
if r:
|
486
|
+
filters.append(f"authorships.institutions.ror:{r}")
|
487
|
+
|
488
|
+
# ORCID filter
|
489
|
+
orcid = query.get("orcid", "")
|
490
|
+
if orcid:
|
491
|
+
o = validate_orcid(orcid)
|
492
|
+
if o:
|
493
|
+
filters.append(f"authorships.author.orcid:{o}")
|
494
|
+
|
495
|
+
# Year filter
|
496
|
+
year = query.get("year", query.get("publication_year", ""))
|
497
|
+
if year:
|
498
|
+
filters.append(f"publication_year:{year}")
|
499
|
+
|
500
|
+
# Other filters from the original function
|
432
501
|
for key, value in query.items():
|
433
502
|
if key in [
|
434
503
|
"prefix",
|
435
|
-
"member",
|
436
|
-
"type",
|
437
504
|
"has-full-text",
|
438
|
-
"has-references",
|
439
|
-
"has-orcid",
|
440
505
|
"has-funder",
|
441
506
|
"has-license",
|
442
507
|
]:
|
443
|
-
filters
|
444
|
-
|
445
|
-
|
508
|
+
filters.append(f"{key}:{value}")
|
509
|
+
|
510
|
+
# Boolean filters
|
511
|
+
# if query.get("hasORCID", query.get("has-orcid", False)):
|
512
|
+
# filters.append("has-orcid:true")
|
513
|
+
|
514
|
+
# if query.get("hasReferences", query.get("has-references", False)):
|
515
|
+
# filters.append("has-references:true")
|
446
516
|
|
447
|
-
|
517
|
+
# if query.get("hasAbstract", query.get("has-abstract", False)):
|
518
|
+
# filters.append("has-abstract:true")
|
519
|
+
|
520
|
+
# Add filters to params if any exist
|
521
|
+
if filters:
|
522
|
+
f.args["filter"] = ",".join(filters)
|
448
523
|
|
449
524
|
return f.url
|
450
525
|
|
@@ -579,7 +654,7 @@ def dict_to_spdx(dct: dict) -> dict:
|
|
579
654
|
# end
|
580
655
|
|
581
656
|
|
582
|
-
def
|
657
|
+
def from_jsonfeed(elements: list) -> list:
|
583
658
|
"""Convert from JSON Feed elements"""
|
584
659
|
|
585
660
|
def format_element(element):
|
@@ -878,7 +953,7 @@ def find_from_format_by_id(pid: str) -> Optional[str]:
|
|
878
953
|
if re.match(r"\A(http|https):/(/)?github\.com/(.+)\Z", pid) is not None:
|
879
954
|
return "cff"
|
880
955
|
if re.match(r"\Ahttps:/(/)?api\.rogue-scholar\.org/posts/(.+)\Z", pid) is not None:
|
881
|
-
return "
|
956
|
+
return "jsonfeed"
|
882
957
|
if re.match(r"\Ahttps:/(/)(.+)/api/records/(.+)\Z", pid) is not None:
|
883
958
|
return "inveniordm"
|
884
959
|
return "schema_org"
|
@@ -906,7 +981,7 @@ def find_from_format_by_dict(dct: dict) -> Optional[str]:
|
|
906
981
|
]:
|
907
982
|
return "codemeta"
|
908
983
|
if dct.get("guid", None) is not None:
|
909
|
-
return "
|
984
|
+
return "jsonfeed"
|
910
985
|
if dct.get("schemaVersion", "").startswith("http://datacite.org/schema/kernel"):
|
911
986
|
return "datacite"
|
912
987
|
if dct.get("source", None) == "Crossref":
|
@@ -939,7 +1014,7 @@ def find_from_format_by_string(string: str) -> Optional[str]:
|
|
939
1014
|
]:
|
940
1015
|
return "codemeta"
|
941
1016
|
if data.get("guid", None) is not None:
|
942
|
-
return "
|
1017
|
+
return "jsonfeed"
|
943
1018
|
if data.get("schemaVersion", "").startswith(
|
944
1019
|
"http://datacite.org/schema/kernel"
|
945
1020
|
):
|
@@ -1,15 +1,15 @@
|
|
1
1
|
"""Bibtex writer for commonmeta-py"""
|
2
2
|
|
3
|
-
from bibtexparser.bwriter import BibTexWriter
|
4
3
|
from bibtexparser.bibdatabase import BibDatabase
|
4
|
+
from bibtexparser.bwriter import BibTexWriter
|
5
5
|
from bibtexparser.customization import page_double_hyphen
|
6
6
|
|
7
|
-
from ..utils import pages_as_string, get_language
|
8
|
-
from ..base_utils import compact
|
9
7
|
from ..author_utils import authors_as_string
|
10
|
-
from ..
|
11
|
-
from ..doi_utils import doi_from_url
|
8
|
+
from ..base_utils import compact
|
12
9
|
from ..constants import CM_TO_BIB_TRANSLATIONS, Commonmeta
|
10
|
+
from ..date_utils import MONTH_SHORT_NAMES, get_iso8601_date, get_month_from_date
|
11
|
+
from ..doi_utils import doi_from_url
|
12
|
+
from ..utils import get_language, pages_as_string
|
13
13
|
|
14
14
|
|
15
15
|
def write_bibtex(metadata: Commonmeta) -> str:
|
@@ -1,13 +1,18 @@
|
|
1
1
|
"""Citation writer for commonmeta-py"""
|
2
2
|
|
3
|
-
import orjson as json
|
4
3
|
import re
|
5
|
-
|
6
|
-
|
7
|
-
from citeproc import
|
8
|
-
|
4
|
+
|
5
|
+
import orjson as json
|
6
|
+
from citeproc import (
|
7
|
+
Citation,
|
8
|
+
CitationItem,
|
9
|
+
CitationStylesBibliography,
|
10
|
+
CitationStylesStyle,
|
11
|
+
formatter,
|
12
|
+
)
|
9
13
|
from citeproc.source.json import CiteProcJSON
|
10
14
|
from citeproc_styles import get_style_filepath
|
15
|
+
from pydash import py_
|
11
16
|
|
12
17
|
|
13
18
|
def write_citation(metadata):
|
@@ -1,8 +1,7 @@
|
|
1
1
|
"""Commonmeta writer for commonmeta-py"""
|
2
2
|
|
3
|
-
import orjson as json
|
4
|
-
import orjsonl
|
5
3
|
import pydash as py_
|
4
|
+
|
6
5
|
from ..base_utils import compact
|
7
6
|
|
8
7
|
|
@@ -32,12 +31,12 @@ def write_commonmeta(metadata):
|
|
32
31
|
"funding_references": "fundingReferences",
|
33
32
|
},
|
34
33
|
)
|
35
|
-
return
|
34
|
+
return compact(data)
|
36
35
|
|
37
36
|
|
38
37
|
def write_commonmeta_list(metalist):
|
39
|
-
"""Write commonmeta list. If
|
40
|
-
write to file.
|
38
|
+
"""Write commonmeta list. If file is provided,
|
39
|
+
write to file. Supports JSON, JSON Lines and YAML format."""
|
41
40
|
if metalist is None:
|
42
41
|
return None
|
43
42
|
|
@@ -47,7 +46,7 @@ def write_commonmeta_list(metalist):
|
|
47
46
|
return compact(item)
|
48
47
|
|
49
48
|
items = [format_item(item) for item in metalist.items]
|
50
|
-
|
49
|
+
return compact(
|
51
50
|
{
|
52
51
|
"id": metalist.id,
|
53
52
|
"title": metalist.title,
|
@@ -55,14 +54,3 @@ def write_commonmeta_list(metalist):
|
|
55
54
|
"items": items,
|
56
55
|
}
|
57
56
|
)
|
58
|
-
|
59
|
-
if metalist.filename and metalist.filename.rsplit(".", 1)[1] in ["jsonl", "json"]:
|
60
|
-
if metalist.jsonlines:
|
61
|
-
orjsonl.save(metalist.filename, items)
|
62
|
-
else:
|
63
|
-
json_output = json.dumps(output).decode("utf-8")
|
64
|
-
with open(metalist.filename, "w") as file:
|
65
|
-
file.write(json_output)
|
66
|
-
return metalist.filename
|
67
|
-
else:
|
68
|
-
return json.dumps(output).decode("utf-8")
|