commonmeta-py 0.23__py3-none-any.whl → 0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. commonmeta/__init__.py +96 -0
  2. commonmeta/api_utils.py +77 -0
  3. commonmeta/author_utils.py +260 -0
  4. commonmeta/base_utils.py +121 -0
  5. commonmeta/cli.py +200 -0
  6. commonmeta/constants.py +587 -0
  7. commonmeta/crossref_utils.py +575 -0
  8. commonmeta/date_utils.py +193 -0
  9. commonmeta/doi_utils.py +273 -0
  10. commonmeta/metadata.py +320 -0
  11. commonmeta/readers/__init__.py +1 -0
  12. commonmeta/readers/cff_reader.py +199 -0
  13. commonmeta/readers/codemeta_reader.py +112 -0
  14. commonmeta/readers/commonmeta_reader.py +13 -0
  15. commonmeta/readers/crossref_reader.py +409 -0
  16. commonmeta/readers/crossref_xml_reader.py +505 -0
  17. commonmeta/readers/csl_reader.py +98 -0
  18. commonmeta/readers/datacite_reader.py +390 -0
  19. commonmeta/readers/datacite_xml_reader.py +359 -0
  20. commonmeta/readers/inveniordm_reader.py +218 -0
  21. commonmeta/readers/json_feed_reader.py +420 -0
  22. commonmeta/readers/kbase_reader.py +205 -0
  23. commonmeta/readers/ris_reader.py +103 -0
  24. commonmeta/readers/schema_org_reader.py +506 -0
  25. commonmeta/resources/cff_v1.2.0.json +1827 -0
  26. commonmeta/resources/commonmeta_v0.12.json +601 -0
  27. commonmeta/resources/commonmeta_v0.13.json +559 -0
  28. commonmeta/resources/commonmeta_v0.14.json +573 -0
  29. commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
  30. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
  31. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
  32. commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
  33. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
  34. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
  35. commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
  36. commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
  37. commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
  38. commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
  39. commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
  40. commonmeta/resources/crossref/fundref.xsd +49 -0
  41. commonmeta/resources/crossref/module-ali.xsd +39 -0
  42. commonmeta/resources/crossref/relations.xsd +444 -0
  43. commonmeta/resources/crossref-v0.2.json +60 -0
  44. commonmeta/resources/csl-data.json +538 -0
  45. commonmeta/resources/datacite-v4.5.json +829 -0
  46. commonmeta/resources/datacite-v4.5pr.json +608 -0
  47. commonmeta/resources/ietf-bcp-47.json +3025 -0
  48. commonmeta/resources/iso-8601.json +3182 -0
  49. commonmeta/resources/spdx/licenses.json +4851 -0
  50. commonmeta/resources/spdx-schema..json +903 -0
  51. commonmeta/resources/styles/apa.csl +1697 -0
  52. commonmeta/resources/styles/chicago-author-date.csl +684 -0
  53. commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
  54. commonmeta/resources/styles/ieee.csl +468 -0
  55. commonmeta/resources/styles/modern-language-association.csl +341 -0
  56. commonmeta/resources/styles/vancouver.csl +376 -0
  57. commonmeta/schema_utils.py +27 -0
  58. commonmeta/translators.py +47 -0
  59. commonmeta/utils.py +1108 -0
  60. commonmeta/writers/__init__.py +1 -0
  61. commonmeta/writers/bibtex_writer.py +149 -0
  62. commonmeta/writers/citation_writer.py +70 -0
  63. commonmeta/writers/commonmeta_writer.py +68 -0
  64. commonmeta/writers/crossref_xml_writer.py +17 -0
  65. commonmeta/writers/csl_writer.py +79 -0
  66. commonmeta/writers/datacite_writer.py +193 -0
  67. commonmeta/writers/inveniordm_writer.py +94 -0
  68. commonmeta/writers/ris_writer.py +58 -0
  69. commonmeta/writers/schema_org_writer.py +146 -0
  70. {commonmeta_py-0.23.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
  71. commonmeta_py-0.24.dist-info/RECORD +75 -0
  72. {commonmeta_py-0.23.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
  73. commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
  74. commonmeta_py-0.23.dist-info/RECORD +0 -5
  75. /commonmeta_py/__init__.py → /commonmeta/readers/bibtex_reader.py +0 -0
  76. {commonmeta_py-0.23.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0
@@ -0,0 +1,376 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="sort-only" initialize-with-hyphen="false" page-range-format="minimal">
3
+ <info>
4
+ <title>Vancouver</title>
5
+ <id>http://www.zotero.org/styles/vancouver</id>
6
+ <link href="http://www.zotero.org/styles/vancouver" rel="self"/>
7
+ <link href="http://www.nlm.nih.gov/bsd/uniform_requirements.html" rel="documentation"/>
8
+ <author>
9
+ <name>Michael Berkowitz</name>
10
+ <email>mberkowi@gmu.edu</email>
11
+ </author>
12
+ <contributor>
13
+ <name>Sean Takats</name>
14
+ <email>stakats@gmu.edu</email>
15
+ </contributor>
16
+ <contributor>
17
+ <name>Sebastian Karcher</name>
18
+ </contributor>
19
+ <category citation-format="numeric"/>
20
+ <category field="medicine"/>
21
+ <summary>Vancouver style as outlined by International Committee of Medical Journal Editors Uniform Requirements for Manuscripts Submitted to Biomedical Journals: Sample References</summary>
22
+ <updated>2022-09-28T11:33:04+00:00</updated>
23
+ <rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
24
+ </info>
25
+ <locale xml:lang="en">
26
+ <date form="text" delimiter=" ">
27
+ <date-part name="year"/>
28
+ <date-part name="month" form="short" strip-periods="true"/>
29
+ <date-part name="day"/>
30
+ </date>
31
+ <terms>
32
+ <term name="collection-editor" form="long">
33
+ <single>editor</single>
34
+ <multiple>editors</multiple>
35
+ </term>
36
+ <term name="presented at">presented at</term>
37
+ <term name="available at">available from</term>
38
+ <term name="section" form="short">sect.</term>
39
+ </terms>
40
+ </locale>
41
+ <locale xml:lang="fr">
42
+ <date form="text" delimiter=" ">
43
+ <date-part name="day"/>
44
+ <date-part name="month" form="short" strip-periods="true"/>
45
+ <date-part name="year"/>
46
+ </date>
47
+ </locale>
48
+ <macro name="author">
49
+ <names variable="author">
50
+ <name sort-separator=" " initialize-with="" name-as-sort-order="all" delimiter=", " delimiter-precedes-last="always"/>
51
+ <label form="long" prefix=", "/>
52
+ <substitute>
53
+ <text macro="webpage-title"/>
54
+ <names variable="editor"/>
55
+ </substitute>
56
+ </names>
57
+ </macro>
58
+ <macro name="editor">
59
+ <names variable="editor" suffix=".">
60
+ <name sort-separator=" " initialize-with="" name-as-sort-order="all" delimiter=", " delimiter-precedes-last="always"/>
61
+ <label form="long" prefix=", "/>
62
+ </names>
63
+ </macro>
64
+ <macro name="chapter-marker">
65
+ <choose>
66
+ <if type="chapter paper-conference entry-dictionary entry-encyclopedia" match="any">
67
+ <text term="in" text-case="capitalize-first"/>
68
+ </if>
69
+ </choose>
70
+ </macro>
71
+ <macro name="webpage-title">
72
+ <!--If a webpage has a container, we're assuming the citation is "part of a website" as per ch. 25 Citing Medicine https://www.ncbi.nlm.nih.gov/books/NBK7274/?report=reader -->
73
+ <choose>
74
+ <if type="webpage" variable="container-title" match="all">
75
+ <group delimiter=" ">
76
+ <text variable="container-title"/>
77
+ <text term="internet" prefix="[" suffix="]" text-case="capitalize-first"/>
78
+ </group>
79
+ </if>
80
+ </choose>
81
+ </macro>
82
+ <macro name="publisher">
83
+ <choose>
84
+ <!--discard publisher info for articles-->
85
+ <if type="article-journal article-magazine article-newspaper" match="none">
86
+ <group delimiter=": " suffix=";">
87
+ <choose>
88
+ <if type="thesis">
89
+ <text variable="publisher-place" prefix="[" suffix="]"/>
90
+ </if>
91
+ <else-if type="speech"/>
92
+ <else>
93
+ <text variable="publisher-place"/>
94
+ </else>
95
+ </choose>
96
+ <text variable="publisher"/>
97
+ </group>
98
+ </if>
99
+ </choose>
100
+ </macro>
101
+ <macro name="access">
102
+ <choose>
103
+ <if variable="URL">
104
+ <group delimiter=": ">
105
+ <text term="available at" text-case="capitalize-first"/>
106
+ <text variable="URL"/>
107
+ </group>
108
+ </if>
109
+ </choose>
110
+ </macro>
111
+ <macro name="accessed-date">
112
+ <choose>
113
+ <if variable="URL">
114
+ <group prefix="[" suffix="]" delimiter=" ">
115
+ <text term="cited" text-case="lowercase"/>
116
+ <date variable="accessed" form="text"/>
117
+ </group>
118
+ </if>
119
+ </choose>
120
+ </macro>
121
+ <macro name="container-title">
122
+ <choose>
123
+ <if type="article-journal article-magazine chapter paper-conference article-newspaper review review-book entry-dictionary entry-encyclopedia" match="any">
124
+ <group suffix="." delimiter=" ">
125
+ <choose>
126
+ <if type="article-journal review review-book" match="any">
127
+ <text variable="container-title" form="short" strip-periods="true"/>
128
+ </if>
129
+ <else>
130
+ <text variable="container-title" strip-periods="true"/>
131
+ </else>
132
+ </choose>
133
+ <choose>
134
+ <if variable="URL">
135
+ <text term="internet" prefix="[" suffix="]" text-case="capitalize-first"/>
136
+ </if>
137
+ </choose>
138
+ </group>
139
+ <text macro="edition" prefix=" "/>
140
+ </if>
141
+ <!--add event-name and event-place once they become available-->
142
+ <else-if type="bill legislation" match="any">
143
+ <group delimiter=", ">
144
+ <group delimiter=". ">
145
+ <text variable="container-title"/>
146
+ <group delimiter=" ">
147
+ <text term="section" form="short" text-case="capitalize-first"/>
148
+ <text variable="section"/>
149
+ </group>
150
+ </group>
151
+ <text variable="number"/>
152
+ </group>
153
+ </else-if>
154
+ <else-if type="speech">
155
+ <group delimiter=": " suffix=";">
156
+ <group delimiter=" ">
157
+ <text variable="genre" text-case="capitalize-first"/>
158
+ <text term="presented at"/>
159
+ </group>
160
+ <text variable="event"/>
161
+ </group>
162
+ </else-if>
163
+ <else>
164
+ <group delimiter=", " suffix=".">
165
+ <choose>
166
+ <if variable="collection-title" match="none">
167
+ <group delimiter=" ">
168
+ <label variable="volume" form="short" text-case="capitalize-first"/>
169
+ <text variable="volume"/>
170
+ </group>
171
+ </if>
172
+ </choose>
173
+ <text variable="container-title"/>
174
+ </group>
175
+ </else>
176
+ </choose>
177
+ </macro>
178
+ <macro name="title">
179
+ <choose>
180
+ <if type="webpage" variable="container-title" match="all"/>
181
+ <else>
182
+ <text variable="title"/>
183
+ <choose>
184
+ <if type="article-journal article-magazine chapter paper-conference article-newspaper review review-book entry-dictionary entry-encyclopedia" match="none">
185
+ <choose>
186
+ <if variable="URL">
187
+ <text term="internet" prefix=" [" suffix="]" text-case="capitalize-first"/>
188
+ </if>
189
+ </choose>
190
+ <text macro="edition" prefix=". "/>
191
+ </if>
192
+ </choose>
193
+ </else>
194
+ </choose>
195
+ <choose>
196
+ <if type="thesis">
197
+ <text variable="genre" prefix=" [" suffix="]"/>
198
+ </if>
199
+ </choose>
200
+ </macro>
201
+ <macro name="edition">
202
+ <choose>
203
+ <if is-numeric="edition">
204
+ <group delimiter=" ">
205
+ <number variable="edition" form="ordinal"/>
206
+ <text term="edition" form="short"/>
207
+ </group>
208
+ </if>
209
+ <else>
210
+ <text variable="edition" suffix="."/>
211
+ </else>
212
+ </choose>
213
+ </macro>
214
+ <macro name="date">
215
+ <choose>
216
+ <if type="article-journal article-magazine article-newspaper review review-book" match="any">
217
+ <group suffix=";" delimiter=" ">
218
+ <date variable="issued" form="text"/>
219
+ <text macro="accessed-date"/>
220
+ </group>
221
+ </if>
222
+ <else-if type="bill legislation" match="any">
223
+ <group delimiter=", ">
224
+ <date variable="issued" delimiter=" ">
225
+ <date-part name="month" form="short" strip-periods="true"/>
226
+ <date-part name="day"/>
227
+ </date>
228
+ <date variable="issued">
229
+ <date-part name="year"/>
230
+ </date>
231
+ </group>
232
+ </else-if>
233
+ <else-if type="report">
234
+ <date variable="issued" delimiter=" ">
235
+ <date-part name="year"/>
236
+ <date-part name="month" form="short" strip-periods="true"/>
237
+ </date>
238
+ <text macro="accessed-date" prefix=" "/>
239
+ </else-if>
240
+ <else-if type="patent">
241
+ <group suffix=".">
242
+ <group delimiter=", ">
243
+ <text variable="number"/>
244
+ <date variable="issued">
245
+ <date-part name="year"/>
246
+ </date>
247
+ </group>
248
+ <text macro="accessed-date" prefix=" "/>
249
+ </group>
250
+ </else-if>
251
+ <else-if type="speech">
252
+ <group delimiter="; ">
253
+ <group delimiter=" ">
254
+ <date variable="issued" delimiter=" ">
255
+ <date-part name="year"/>
256
+ <date-part name="month" form="short" strip-periods="true"/>
257
+ <date-part name="day"/>
258
+ </date>
259
+ <text macro="accessed-date"/>
260
+ </group>
261
+ <text variable="event-place"/>
262
+ </group>
263
+ </else-if>
264
+ <else>
265
+ <group suffix=".">
266
+ <date variable="issued">
267
+ <date-part name="year"/>
268
+ </date>
269
+ <text macro="accessed-date" prefix=" "/>
270
+ </group>
271
+ </else>
272
+ </choose>
273
+ </macro>
274
+ <macro name="pages">
275
+ <choose>
276
+ <if type="article-journal article-magazine article-newspaper review review-book" match="any">
277
+ <text variable="page" prefix=":"/>
278
+ </if>
279
+ <else-if type="book" match="any">
280
+ <text variable="number-of-pages" prefix=" "/>
281
+ <choose>
282
+ <if is-numeric="number-of-pages">
283
+ <label variable="number-of-pages" form="short" prefix=" " plural="never"/>
284
+ </if>
285
+ </choose>
286
+ </else-if>
287
+ <else>
288
+ <group prefix=" " delimiter=" ">
289
+ <label variable="page" form="short" plural="never"/>
290
+ <text variable="page"/>
291
+ </group>
292
+ </else>
293
+ </choose>
294
+ </macro>
295
+ <macro name="journal-location">
296
+ <choose>
297
+ <if type="article-journal article-magazine review review-book" match="any">
298
+ <text variable="volume"/>
299
+ <text variable="issue" prefix="(" suffix=")"/>
300
+ </if>
301
+ </choose>
302
+ </macro>
303
+ <macro name="webpage-part">
304
+ <choose>
305
+ <if type="webpage" variable="container-title" match="all">
306
+ <text variable="title"/>
307
+ </if>
308
+ </choose>
309
+ </macro>
310
+ <macro name="collection-details">
311
+ <choose>
312
+ <if type="article-journal article-magazine article-newspaper review review-book" match="none">
313
+ <choose>
314
+ <if variable="collection-title">
315
+ <group delimiter=" " prefix="(" suffix=")">
316
+ <names variable="collection-editor" suffix=".">
317
+ <name sort-separator=" " initialize-with="" name-as-sort-order="all" delimiter=", " delimiter-precedes-last="always"/>
318
+ <label form="long" prefix=", "/>
319
+ </names>
320
+ <group delimiter="; ">
321
+ <text variable="collection-title"/>
322
+ <group delimiter=" ">
323
+ <label variable="volume" form="short"/>
324
+ <text variable="volume"/>
325
+ </group>
326
+ </group>
327
+ </group>
328
+ </if>
329
+ </choose>
330
+ </if>
331
+ </choose>
332
+ </macro>
333
+ <macro name="report-details">
334
+ <choose>
335
+ <if type="report">
336
+ <text variable="number" prefix="Report No.: "/>
337
+ </if>
338
+ </choose>
339
+ </macro>
340
+ <citation collapse="citation-number">
341
+ <sort>
342
+ <key variable="citation-number"/>
343
+ </sort>
344
+ <layout prefix="(" suffix=")" delimiter=",">
345
+ <text variable="citation-number"/>
346
+ </layout>
347
+ </citation>
348
+ <bibliography et-al-min="7" et-al-use-first="6" second-field-align="flush">
349
+ <layout>
350
+ <text variable="citation-number" suffix="."/>
351
+ <group delimiter=". " suffix=". ">
352
+ <text macro="author"/>
353
+ <text macro="title"/>
354
+ </group>
355
+ <group delimiter=" " suffix=". ">
356
+ <group delimiter=": ">
357
+ <text macro="chapter-marker"/>
358
+ <group delimiter=" ">
359
+ <text macro="editor"/>
360
+ <text macro="container-title"/>
361
+ </group>
362
+ </group>
363
+ <text macro="publisher"/>
364
+ <group>
365
+ <text macro="date"/>
366
+ <text macro="journal-location"/>
367
+ <text macro="pages"/>
368
+ </group>
369
+ <text macro="webpage-part"/>
370
+ </group>
371
+ <text macro="collection-details" suffix=". "/>
372
+ <text macro="report-details" suffix=". "/>
373
+ <text macro="access"/>
374
+ </layout>
375
+ </bibliography>
376
+ </style>
@@ -0,0 +1,27 @@
1
+ """Schema utils for commonmeta-py"""
2
+ from os import path
3
+ import orjson as json
4
+ from jsonschema import Draft202012Validator, ValidationError
5
+
6
+
7
+ def json_schema_errors(instance, schema: str = "commonmeta"):
8
+ """validate against JSON schema"""
9
+ schema_map = {
10
+ "commonmeta": "commonmeta_v0.14",
11
+ "datacite": "datacite-v4.5",
12
+ "crossref": "crossref-v0.2",
13
+ "csl": "csl-data",
14
+ "cff": "cff_v1.2.0",
15
+ }
16
+ try:
17
+ if schema not in schema_map.keys():
18
+ raise ValueError("No schema found")
19
+ file_path = path.join(
20
+ path.dirname(__file__), f"resources/{schema_map[schema]}.json"
21
+ )
22
+ with open(file_path, encoding="utf-8") as file:
23
+ string = file.read()
24
+ schema = json.loads(string)
25
+ return Draft202012Validator(schema).validate(instance)
26
+ except ValidationError as error:
27
+ return error.message
@@ -0,0 +1,47 @@
1
+ """Web translators for commonmeta. Using BeautifulSoup to extract metadata from web pages."""
2
+
3
+ from furl import furl
4
+ import re
5
+
6
+ from .doi_utils import doi_as_url
7
+
8
+
9
+ def web_translator(soup, url: str):
10
+ """Extract metadata from web pages"""
11
+ f = furl(url)
12
+ if f.host == "arxiv.org":
13
+ return arxiv_translator(soup)
14
+ elif f.host == "datacite.org":
15
+ return datacite_translator(soup)
16
+ elif f.host == "app.pan.pl":
17
+ return pan_translator(soup)
18
+ return {}
19
+
20
+
21
+ def arxiv_translator(soup):
22
+ """Extract metadata from arXiv. Find the DOI and return it."""
23
+ arxiv_id = soup.select_one("meta[name='citation_arxiv_id']")
24
+ if arxiv_id is None:
25
+ return {}
26
+ return {"@id": f"https://doi.org/10.48550/arXiv.{arxiv_id['content']}"}
27
+
28
+
29
+ def datacite_translator(soup):
30
+ """Extract metadata from DataCite blog posts. Find the DOI and return it."""
31
+ doi = soup.select_one("div#citation")
32
+ if doi is None:
33
+ return {}
34
+ return {"@id": doi.get("data-doi", None)}
35
+
36
+
37
+ def pan_translator(soup):
38
+ """Extract metadata from Acta Palaeontologica Polonica. Find the DOI and return it."""
39
+ caption = soup.select_one("p.caption div.vol").text
40
+ match = re.search(
41
+ r"doi:(10\.4202/.+)\Z",
42
+ caption,
43
+ )
44
+ if match is None:
45
+ return {}
46
+ doi = doi_as_url(match.group(1))
47
+ return {"@id": doi}