commonmeta-ruby 3.0.6 → 3.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +14 -12
- data/commonmeta.gemspec +1 -0
- data/lib/commonmeta/cli.rb +12 -0
- data/lib/commonmeta/utils.rb +684 -673
- data/lib/commonmeta/version.rb +1 -1
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/random_doi/decode_another_doi.yml +221 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/random_id/decode_another_id.yml +221 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/random_id/decode_id.yml +221 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/random_id/encode_id.yml +221 -0
- data/spec/utils_spec.rb +21 -1
- metadata +27 -3
data/lib/commonmeta/utils.rb
CHANGED
@@ -3,439 +3,439 @@
|
|
3
3
|
module Commonmeta
|
4
4
|
module Utils
|
5
5
|
NORMALIZED_LICENSES = {
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
6
|
+
"https://creativecommons.org/licenses/by/1.0" => "https://creativecommons.org/licenses/by/1.0/legalcode",
|
7
|
+
"https://creativecommons.org/licenses/by/2.0" => "https://creativecommons.org/licenses/by/2.0/legalcode",
|
8
|
+
"https://creativecommons.org/licenses/by/2.5" => "https://creativecommons.org/licenses/by/2.5/legalcode",
|
9
|
+
"https://creativecommons.org/licenses/by/3.0" => "https://creativecommons.org/licenses/by/3.0/legalcode",
|
10
|
+
"https://creativecommons.org/licenses/by/3.0/us" => "https://creativecommons.org/licenses/by/3.0/legalcode",
|
11
|
+
"https://creativecommons.org/licenses/by/4.0" => "https://creativecommons.org/licenses/by/4.0/legalcode",
|
12
|
+
"https://creativecommons.org/licenses/by-nc/1.0" => "https://creativecommons.org/licenses/by-nc/1.0/legalcode",
|
13
|
+
"https://creativecommons.org/licenses/by-nc/2.0" => "https://creativecommons.org/licenses/by-nc/2.0/legalcode",
|
14
|
+
"https://creativecommons.org/licenses/by-nc/2.5" => "https://creativecommons.org/licenses/by-nc/2.5/legalcode",
|
15
|
+
"https://creativecommons.org/licenses/by-nc/3.0" => "https://creativecommons.org/licenses/by-nc/3.0/legalcode",
|
16
|
+
"https://creativecommons.org/licenses/by-nc/4.0" => "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
|
17
|
+
"https://creativecommons.org/licenses/by-nd-nc/1.0" => "https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode",
|
18
|
+
"https://creativecommons.org/licenses/by-nd-nc/2.0" => "https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode",
|
19
|
+
"https://creativecommons.org/licenses/by-nd-nc/2.5" => "https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode",
|
20
|
+
"https://creativecommons.org/licenses/by-nd-nc/3.0" => "https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode",
|
21
|
+
"https://creativecommons.org/licenses/by-nd-nc/4.0" => "https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode",
|
22
|
+
"https://creativecommons.org/licenses/by-nc-sa/1.0" => "https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode",
|
23
|
+
"https://creativecommons.org/licenses/by-nc-sa/2.0" => "https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode",
|
24
|
+
"https://creativecommons.org/licenses/by-nc-sa/2.5" => "https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode",
|
25
|
+
"https://creativecommons.org/licenses/by-nc-sa/3.0" => "https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode",
|
26
|
+
"https://creativecommons.org/licenses/by-nc-sa/4.0" => "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode",
|
27
|
+
"https://creativecommons.org/licenses/by-nd/1.0" => "https://creativecommons.org/licenses/by-nd/1.0/legalcode",
|
28
|
+
"https://creativecommons.org/licenses/by-nd/2.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
|
29
|
+
"https://creativecommons.org/licenses/by-nd/2.5" => "https://creativecommons.org/licenses/by-nd/2.5/legalcode",
|
30
|
+
"https://creativecommons.org/licenses/by-nd/3.0" => "https://creativecommons.org/licenses/by-nd/3.0/legalcode",
|
31
|
+
"https://creativecommons.org/licenses/by-nd/4.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
|
32
|
+
"https://creativecommons.org/licenses/by-sa/1.0" => "https://creativecommons.org/licenses/by-sa/1.0/legalcode",
|
33
|
+
"https://creativecommons.org/licenses/by-sa/2.0" => "https://creativecommons.org/licenses/by-sa/2.0/legalcode",
|
34
|
+
"https://creativecommons.org/licenses/by-sa/2.5" => "https://creativecommons.org/licenses/by-sa/2.5/legalcode",
|
35
|
+
"https://creativecommons.org/licenses/by-sa/3.0" => "https://creativecommons.org/licenses/by-sa/3.0/legalcode",
|
36
|
+
"https://creativecommons.org/licenses/by-sa/4.0" => "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
|
37
|
+
"https://creativecommons.org/licenses/by-nc-nd/1.0" => "https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode",
|
38
|
+
"https://creativecommons.org/licenses/by-nc-nd/2.0" => "https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode",
|
39
|
+
"https://creativecommons.org/licenses/by-nc-nd/2.5" => "https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode",
|
40
|
+
"https://creativecommons.org/licenses/by-nc-nd/3.0" => "https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode",
|
41
|
+
"https://creativecommons.org/licenses/by-nc-nd/4.0" => "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode",
|
42
|
+
"https://creativecommons.org/licenses/publicdomain" => "https://creativecommons.org/licenses/publicdomain/",
|
43
|
+
"https://creativecommons.org/publicdomain/zero/1.0" => "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
|
44
44
|
}
|
45
45
|
|
46
46
|
# source: https://www.bibtex.com/e/entry-types/
|
47
47
|
BIB_TO_CM_TRANSLATIONS = {
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
48
|
+
"article" => "JournalArticle",
|
49
|
+
"book" => "Book",
|
50
|
+
"booklet" => "Book",
|
51
|
+
"inbook" => "BookChapter",
|
52
|
+
"inproceedings" => "ProceedingsArticle",
|
53
|
+
"manual" => "Report",
|
54
|
+
"mastersthesis" => "Dissertation",
|
55
|
+
"misc" => "Other",
|
56
|
+
"phdthesis" => "Dissertation",
|
57
|
+
"proceedings" => "Proceedings",
|
58
|
+
"techreport" => "Report",
|
59
|
+
"unpublished" => "Manuscript",
|
60
60
|
}
|
61
61
|
|
62
62
|
CM_TO_BIB_TRANSLATIONS = {
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
63
|
+
"Article" => "article",
|
64
|
+
"Book" => "book",
|
65
|
+
"BookChapter" => "inbook",
|
66
|
+
"Dissertation" => "phdthesis",
|
67
|
+
"JournalArticle" => "article",
|
68
|
+
"Manuscript" => "unpublished",
|
69
|
+
"Other" => "misc",
|
70
|
+
"Proceedings" => "proceedings",
|
71
|
+
"ProceedingsArticle" => "inproceedings",
|
72
|
+
"Report" => "techreport",
|
73
73
|
}
|
74
74
|
|
75
75
|
# source: https://docs.citationstyles.org/en/stable/specification.html?highlight=book#appendix-iii-types
|
76
76
|
CSL_TO_CM_TRANSLATIONS = {
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
77
|
+
"article" => "Article",
|
78
|
+
"article-journal" => "JournalArticle",
|
79
|
+
"article-magazine" => "Article",
|
80
|
+
"article-newspaper" => "Article",
|
81
|
+
"bill" => "LegalDocument",
|
82
|
+
"book" => "Book",
|
83
|
+
"broadcast" => "Audiovisual",
|
84
|
+
"chapter" => "BookChapter",
|
85
|
+
"classic" => "Book",
|
86
|
+
"collection" => "Collection",
|
87
|
+
"dataset" => "Dataset",
|
88
|
+
"document" => "Document",
|
89
|
+
"entry" => "Entry",
|
90
|
+
"entry-dictionary" => "Entry",
|
91
|
+
"entry-encyclopedia" => "Entry",
|
92
|
+
"event" => "Event",
|
93
|
+
"figure" => "Figure",
|
94
|
+
"graphic" => "Image",
|
95
|
+
"hearing" => "LegalDocument",
|
96
|
+
"interview" => "Document",
|
97
|
+
"legal_case" => "LegalDocument",
|
98
|
+
"legislation" => "LegalDocument",
|
99
|
+
"manuscript" => "Manuscript",
|
100
|
+
"map" => "Map",
|
101
|
+
"motion_picture" => "Audiovisual",
|
102
|
+
"musical_score" => "Document",
|
103
|
+
"pamphlet" => "Document",
|
104
|
+
"paper-conference" => "ProceedingsArticle",
|
105
|
+
"patent" => "Patent",
|
106
|
+
"performance" => "Performance",
|
107
|
+
"periodical" => "Journal",
|
108
|
+
"personal_communication" => "PersonalCommunication",
|
109
|
+
"post" => "Post",
|
110
|
+
"post-weblog" => "Article",
|
111
|
+
"regulation" => "LegalDocument",
|
112
|
+
"report" => "Report",
|
113
|
+
"review" => "Review",
|
114
|
+
"review-book" => "Review",
|
115
|
+
"software" => "Software",
|
116
|
+
"song" => "Audiovisual",
|
117
|
+
"speech" => "Speech",
|
118
|
+
"standard" => "Standard",
|
119
|
+
"thesis" => "Dissertation",
|
120
|
+
"treaty" => "LegalDocument",
|
121
|
+
"webpage" => "WebPage",
|
122
122
|
}
|
123
123
|
|
124
124
|
CM_TO_CSL_TRANSLATIONS = {
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
125
|
+
"Article" => "article",
|
126
|
+
"JournalArticle" => "article-journal",
|
127
|
+
"Book" => "book",
|
128
|
+
"BookChapter" => "chapter",
|
129
|
+
"Collection" => "collection",
|
130
|
+
"Dataset" => "dataset",
|
131
|
+
"Document" => "document",
|
132
|
+
"Entry" => "entry",
|
133
|
+
"Event" => "event",
|
134
|
+
"Figure" => "figure",
|
135
|
+
"Image" => "graphic",
|
136
|
+
"LegalDocument" => "legal_case",
|
137
|
+
"Manuscript" => "manuscript",
|
138
|
+
"Map" => "map",
|
139
|
+
"Audiovisual" => "motion_picture",
|
140
|
+
"Patent" => "patent",
|
141
|
+
"Performance" => "performance",
|
142
|
+
"Journal" => "periodical",
|
143
|
+
"PersonalCommunication" => "personal_communication",
|
144
|
+
"Post" => "post",
|
145
|
+
"Report" => "report",
|
146
|
+
"Review" => "review",
|
147
|
+
"Software" => "software",
|
148
|
+
"Speech" => "speech",
|
149
|
+
"Standard" => "standard",
|
150
|
+
"Dissertation" => "thesis",
|
151
|
+
"WebPage" => "webpage",
|
152
152
|
}
|
153
153
|
|
154
154
|
# source: http://api.crossref.org/types
|
155
155
|
CR_TO_CM_TRANSLATIONS = {
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
156
|
+
"BookChapter" => "BookChapter",
|
157
|
+
"BookPart" => "BookPart",
|
158
|
+
"BookSection" => "BookSection",
|
159
|
+
"BookSeries" => "BookSeries",
|
160
|
+
"BookSet" => "BookSet",
|
161
|
+
"BookTrack" => "BookTrack",
|
162
|
+
"Book" => "Book",
|
163
|
+
"Component" => "Component",
|
164
|
+
"Database" => "Database",
|
165
|
+
"Dataset" => "Dataset",
|
166
|
+
"Dissertation" => "Dissertation",
|
167
|
+
"EditedBook" => "EditedBook",
|
168
|
+
"Grant" => "Grant",
|
169
|
+
"JournalArticle" => "JournalArticle",
|
170
|
+
"JournalIssue" => "JournalIssue",
|
171
|
+
"JournalVolume" => "JournalVolume",
|
172
|
+
"Journal" => "Journal",
|
173
|
+
"Monograph" => "Book",
|
174
|
+
"Other" => "Other",
|
175
|
+
"PeerReview" => "PeerReview",
|
176
|
+
"PostedContent" => "Article",
|
177
|
+
"ProceedingsArticle" => "ProceedingsArticle",
|
178
|
+
"ProceedingsSeries" => "ProceedingsSeries",
|
179
|
+
"Proceedings" => "Proceedings",
|
180
|
+
"ReferenceBook" => "ReferenceBook",
|
181
|
+
"ReferenceEntry" => "Entry",
|
182
|
+
"ReportComponent" => "ReportComponent",
|
183
|
+
"ReportSeries" => "ReportSeries",
|
184
|
+
"Report" => "Report",
|
185
|
+
"Standard" => "Standard",
|
186
186
|
}
|
187
187
|
|
188
188
|
CM_TO_CR_TRANSLATIONS = {
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
189
|
+
"Article" => "PostedContent",
|
190
|
+
"BookChapter" => "BookChapter",
|
191
|
+
"BookSeries" => "BookSeries",
|
192
|
+
"Book" => "Book",
|
193
|
+
"Component" => "Component",
|
194
|
+
"Dataset" => "Dataset",
|
195
|
+
"Dissertation" => "Dissertation",
|
196
|
+
"Grant" => "Grant",
|
197
|
+
"JournalArticle" => "JournalArticle",
|
198
|
+
"JournalIssue" => "JournalIssue",
|
199
|
+
"JournalVolume" => "JournalVolume",
|
200
|
+
"Journal" => "Journal",
|
201
|
+
"ProceedingsArticle" => "ProceedingsArticle",
|
202
|
+
"ProceedingsSeries" => "ProceedingsSeries",
|
203
|
+
"Proceedings" => "Proceedings",
|
204
|
+
"ReportComponent" => "ReportComponent",
|
205
|
+
"ReportSeries" => "ReportSeries",
|
206
|
+
"Report" => "Report",
|
207
|
+
"PeerReview" => "PeerReview",
|
208
|
+
"Other" => "Other",
|
209
209
|
}
|
210
210
|
|
211
211
|
# source: https://github.com/datacite/schema/blob/master/source/meta/kernel-4/include/datacite-resourceType-v4.xsd
|
212
212
|
DC_TO_CM_TRANSLATIONS = {
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
213
|
+
"Audiovisual" => "Audiovisual",
|
214
|
+
"BlogPosting" => "Article",
|
215
|
+
"Book" => "Book",
|
216
|
+
"BookChapter" => "BookChapter",
|
217
|
+
"Collection" => "Collection",
|
218
|
+
"ComputationalNotebook" => "ComputationalNotebook",
|
219
|
+
"ConferencePaper" => "ProceedingsArticle",
|
220
|
+
"ConferenceProceeding" => "Proceedings",
|
221
|
+
"DataPaper" => "JournalArticle",
|
222
|
+
"Dataset" => "Dataset",
|
223
|
+
"Dissertation" => "Dissertation",
|
224
|
+
"Event" => "Event",
|
225
|
+
"Image" => "Image",
|
226
|
+
"InteractiveResource" => "InteractiveResource",
|
227
|
+
"Journal" => "Journal",
|
228
|
+
"JournalArticle" => "JournalArticle",
|
229
|
+
"Model" => "Model",
|
230
|
+
"OutputManagementPlan" => "OutputManagementPlan",
|
231
|
+
"PeerReview" => "PeerReview",
|
232
|
+
"PhysicalObject" => "PhysicalObject",
|
233
|
+
"Poster" => "Speech",
|
234
|
+
"Preprint" => "Article",
|
235
|
+
"Report" => "Report",
|
236
|
+
"Service" => "Service",
|
237
|
+
"Software" => "Software",
|
238
|
+
"Sound" => "Sound",
|
239
|
+
"Standard" => "Standard",
|
240
|
+
"Text" => "Document",
|
241
|
+
"Thesis" => "Dissertation",
|
242
|
+
"Workflow" => "Workflow",
|
243
|
+
"Other" => "Other",
|
244
244
|
}
|
245
245
|
|
246
246
|
CM_TO_DC_TRANSLATIONS = {
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
247
|
+
"Article" => "Preprint",
|
248
|
+
"Audiovisual" => "Audiovisual",
|
249
|
+
"Book" => "Book",
|
250
|
+
"BookChapter" => "BookChapter",
|
251
|
+
"Collection" => "Collection",
|
252
|
+
"Dataset" => "Dataset",
|
253
|
+
"Dissertation" => "Dissertation",
|
254
|
+
"Document" => "Text",
|
255
|
+
"Entry" => "Text",
|
256
|
+
"Event" => "Event",
|
257
|
+
"Figure" => "Image",
|
258
|
+
"Image" => "Image",
|
259
|
+
"JournalArticle" => "JournalArticle",
|
260
|
+
"LegalDocument" => "Text",
|
261
|
+
"Manuscript" => "Text",
|
262
|
+
"Map" => "Image",
|
263
|
+
"Patent" => "Text",
|
264
|
+
"Performance" => "Audiovisual",
|
265
|
+
"PersonalCommunication" => "Text",
|
266
|
+
"Post" => "Text",
|
267
|
+
"ProceedingsArticle" => "ConferencePaper",
|
268
|
+
"Proceedings" => "ConferenceProceeding",
|
269
|
+
"Report" => "Report",
|
270
|
+
"PeerReview" => "PeerReview",
|
271
|
+
"Software" => "Software",
|
272
|
+
"Sound" => "Sound",
|
273
|
+
"Standard" => "Standard",
|
274
|
+
"WebPage" => "Text",
|
275
275
|
}
|
276
276
|
|
277
277
|
RIS_TO_CM_TRANSLATIONS = {
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
278
|
+
"ABST" => "Text",
|
279
|
+
"ADVS" => "Text",
|
280
|
+
"AGGR" => "Text",
|
281
|
+
"ANCIENT" => "Text",
|
282
|
+
"ART" => "Text",
|
283
|
+
"BILL" => "Text",
|
284
|
+
"BLOG" => "Text",
|
285
|
+
"BOOK" => "Book",
|
286
|
+
"CASE" => "Text",
|
287
|
+
"CHAP" => "BookChapter",
|
288
|
+
"CHART" => "Text",
|
289
|
+
"CLSWK" => "Text",
|
290
|
+
"CTLG" => "Collection",
|
291
|
+
"COMP" => "Software",
|
292
|
+
"DATA" => "Dataset",
|
293
|
+
"DBASE" => "Database",
|
294
|
+
"DICT" => "Dictionary",
|
295
|
+
"EBOOK" => "Book",
|
296
|
+
"ECHAP" => "BookChapter",
|
297
|
+
"EDBOOK" => "Book",
|
298
|
+
"EJOUR" => "JournalArticle",
|
299
|
+
"ELEC" => "Text",
|
300
|
+
"ENCYC" => "Encyclopedia",
|
301
|
+
"EQUA" => "Equation",
|
302
|
+
"FIGURE" => "Image",
|
303
|
+
"GEN" => "CreativeWork",
|
304
|
+
"GOVDOC" => "GovernmentDocument",
|
305
|
+
"GRANT" => "Grant",
|
306
|
+
"HEAR" => "Hearing",
|
307
|
+
"ICOMM" => "Text",
|
308
|
+
"INPR" => "Text",
|
309
|
+
"JFULL" => "JournalArticle",
|
310
|
+
"JOUR" => "JournalArticle",
|
311
|
+
"LEGAL" => "LegalRuleOrRegulation",
|
312
|
+
"MANSCPT" => "Text",
|
313
|
+
"MAP" => "Map",
|
314
|
+
"MGZN" => "MagazineArticle",
|
315
|
+
"MPCT" => "Audiovisual",
|
316
|
+
"MULTI" => "Audiovisual",
|
317
|
+
"MUSIC" => "MusicScore",
|
318
|
+
"NEWS" => "NewspaperArticle",
|
319
|
+
"PAMP" => "Pamphlet",
|
320
|
+
"PAT" => "Patent",
|
321
|
+
"PCOMM" => "PersonalCommunication",
|
322
|
+
"RPRT" => "Report",
|
323
|
+
"SER" => "SerialPublication",
|
324
|
+
"SLIDE" => "Slide",
|
325
|
+
"SOUND" => "SoundRecording",
|
326
|
+
"STAND" => "Standard",
|
327
|
+
"THES" => "Dissertation",
|
328
|
+
"UNBILL" => "UnenactedBill",
|
329
|
+
"UNPB" => "UnpublishedWork",
|
330
|
+
"VIDEO" => "Audiovisual",
|
331
|
+
"WEB" => "WebPage",
|
332
332
|
}
|
333
333
|
|
334
334
|
CM_TO_RIS_TRANSLATIONS = {
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
335
|
+
"Article" => "JOUR",
|
336
|
+
"Audiovisual" => "VIDEO",
|
337
|
+
"Book" => "BOOK",
|
338
|
+
"BookChapter" => "CHAP",
|
339
|
+
"Collection" => "CTLG",
|
340
|
+
"Dataset" => "DATA",
|
341
|
+
"Dissertation" => "THES",
|
342
|
+
"Document" => "GEN",
|
343
|
+
"Entry" => "DICT",
|
344
|
+
"Event" => "GEN",
|
345
|
+
"Figure" => "FIGURE",
|
346
|
+
"Image" => "FIGURE",
|
347
|
+
"JournalArticle" => "JOUR",
|
348
|
+
"LegalDocument" => "GEN",
|
349
|
+
"Manuscript" => "GEN",
|
350
|
+
"Map" => "MAP",
|
351
|
+
"Patent" => "PAT",
|
352
|
+
"Performance" => "GEN",
|
353
|
+
"PersonalCommunication" => "PCOMM",
|
354
|
+
"Post" => "GEN",
|
355
|
+
"ProceedingsArticle" => "CPAPER",
|
356
|
+
"Proceedings" => "CONF",
|
357
|
+
"Report" => "RPRT",
|
358
|
+
"Review" => "GEN",
|
359
|
+
"Software" => "COMP",
|
360
|
+
"Sound" => "SOUND",
|
361
|
+
"Standard" => "STAND",
|
362
|
+
"WebPage" => "WEB",
|
363
363
|
}
|
364
364
|
|
365
365
|
SO_TO_CM_TRANSLATIONS = {
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
366
|
+
"Article" => "Article",
|
367
|
+
"BlogPosting" => "Article",
|
368
|
+
"Book" => "Book",
|
369
|
+
"BookChapter" => "BookChapter",
|
370
|
+
"CreativeWork" => "Other",
|
371
|
+
"Dataset" => "Dataset",
|
372
|
+
"Dissertation" => "Dissertation",
|
373
|
+
"NewsArticle" => "Article",
|
374
|
+
"Legislation" => "LegalDocument",
|
375
|
+
"ScholarlyArticle" => "JournalArticle",
|
376
|
+
"SoftwareSourceCode" => "Software",
|
377
377
|
}
|
378
378
|
|
379
379
|
CM_TO_SO_TRANSLATIONS = {
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
380
|
+
"Article" => "Article",
|
381
|
+
"Audiovisual" => "CreativeWork",
|
382
|
+
"Book" => "Book",
|
383
|
+
"BookChapter" => "BookChapter",
|
384
|
+
"Collection" => "CreativeWork",
|
385
|
+
"Dataset" => "Dataset",
|
386
|
+
"Dissertation" => "Dissertation",
|
387
|
+
"Document" => "CreativeWork",
|
388
|
+
"Entry" => "CreativeWork",
|
389
|
+
"Event" => "CreativeWork",
|
390
|
+
"Figure" => "CreativeWork",
|
391
|
+
"Image" => "CreativeWork",
|
392
|
+
"JournalArticle" => "ScholarlyArticle",
|
393
|
+
"LegalDocument" => "Legislation",
|
394
|
+
"Software" => "SoftwareSourceCode",
|
395
395
|
}
|
396
396
|
|
397
397
|
CM_TO_JATS_TRANSLATIONS = {
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
398
|
+
"Proceedings" => "working-paper",
|
399
|
+
"ReferenceBook" => "book",
|
400
|
+
"JournalIssue" => "journal",
|
401
|
+
"ProceedingsArticle" => "working-paper",
|
402
|
+
"Other" => nil,
|
403
|
+
"Dissertation" => nil,
|
404
|
+
"Dataset" => "data",
|
405
|
+
"Document" => "journal",
|
406
|
+
"EditedBook" => "book",
|
407
|
+
"JournalArticle" => "journal",
|
408
|
+
"Journal" => "journal",
|
409
|
+
"Report" => "report",
|
410
|
+
"BookSeries" => "book",
|
411
|
+
"ReportSeries" => "report",
|
412
|
+
"BookTrack" => "book",
|
413
|
+
"Standard" => "standard",
|
414
|
+
"BookSection" => "chapter",
|
415
|
+
"BookPart" => "chapter",
|
416
|
+
"Book" => "book",
|
417
|
+
"BookChapter" => "chapter",
|
418
|
+
"StandardSeries" => "standard",
|
419
|
+
"Monograph" => "book",
|
420
|
+
"Component" => nil,
|
421
|
+
"ReferenceEntry" => nil,
|
422
|
+
"JournalVolume" => "journal",
|
423
|
+
"BookSet" => "book",
|
424
|
+
"Article" => "journal",
|
425
|
+
"Software" => "software",
|
426
426
|
}
|
427
427
|
|
428
428
|
UNKNOWN_INFORMATION = {
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
429
|
+
":unac" => "temporarily inaccessible",
|
430
|
+
":unal" => "unallowed, suppressed intentionally",
|
431
|
+
":unap" => "not applicable, makes no sense",
|
432
|
+
":unas" => "value unassigned (e.g., Untitled)",
|
433
|
+
":unav" => "value unavailable, possibly unknown",
|
434
|
+
":unkn" => "known to be unknown (e.g., Anonymous, Inconnue)",
|
435
|
+
":none" => "never had a value, never will",
|
436
|
+
":null" => "explicitly and meaningfully empty",
|
437
|
+
":tba" => "to be assigned or announced later",
|
438
|
+
":etal" => "too numerous to list (et alia)",
|
439
439
|
}
|
440
440
|
|
441
441
|
def find_from_format(id: nil, string: nil, ext: nil, filename: nil)
|
@@ -448,7 +448,7 @@ module Commonmeta
|
|
448
448
|
elsif filename.present?
|
449
449
|
find_from_format_by_filename(filename)
|
450
450
|
else
|
451
|
-
|
451
|
+
"datacite"
|
452
452
|
end
|
453
453
|
end
|
454
454
|
|
@@ -459,35 +459,35 @@ module Commonmeta
|
|
459
459
|
ra = get_doi_ra(id)
|
460
460
|
%w[DataCite Crossref mEDRA KISTI JaLC OP].include?(ra) ? ra.downcase : nil
|
461
461
|
elsif %r{\A(?:(http|https):/(/)?orcid\.org/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z}.match?(id)
|
462
|
-
|
462
|
+
"orcid"
|
463
463
|
elsif %r{\A(http|https):/(/)?github\.com/(.+)/package.json\z}.match?(id)
|
464
|
-
|
464
|
+
"npm"
|
465
465
|
elsif %r{\A(http|https):/(/)?github\.com/(.+)/codemeta.json\z}.match?(id)
|
466
|
-
|
466
|
+
"codemeta"
|
467
467
|
elsif %r{\A(http|https):/(/)?github\.com/(.+)/CITATION.cff\z}.match?(id)
|
468
|
-
|
468
|
+
"cff"
|
469
469
|
elsif %r{\A(http|https):/(/)?github\.com/(.+)\z}.match?(id)
|
470
|
-
|
470
|
+
"cff"
|
471
471
|
else
|
472
|
-
|
472
|
+
"schema_org"
|
473
473
|
end
|
474
474
|
end
|
475
475
|
|
476
476
|
def find_from_format_by_filename(filename)
|
477
|
-
if filename ==
|
478
|
-
|
479
|
-
elsif filename ==
|
480
|
-
|
477
|
+
if filename == "package.json"
|
478
|
+
"npm"
|
479
|
+
elsif filename == "CITATION.cff"
|
480
|
+
"cff"
|
481
481
|
end
|
482
482
|
end
|
483
483
|
|
484
484
|
def find_from_format_by_ext(string, options = {})
|
485
485
|
case options[:ext]
|
486
|
-
when
|
487
|
-
|
488
|
-
when
|
489
|
-
|
490
|
-
when
|
486
|
+
when ".bib"
|
487
|
+
"bibtex"
|
488
|
+
when ".ris"
|
489
|
+
"ris"
|
490
|
+
when ".xml", ".json"
|
491
491
|
find_from_format_by_string(string)
|
492
492
|
end
|
493
493
|
end
|
@@ -495,36 +495,36 @@ module Commonmeta
|
|
495
495
|
def find_from_format_by_string(string)
|
496
496
|
begin # try to parse as JSON
|
497
497
|
hsh = MultiJson.load(string).to_h
|
498
|
-
if hsh.dig(
|
499
|
-
return
|
500
|
-
elsif hsh.dig(
|
501
|
-
return
|
502
|
-
elsif hsh.dig(
|
503
|
-
return
|
504
|
-
elsif hsh.dig(
|
505
|
-
return
|
506
|
-
elsif URI.parse(hsh.dig(
|
507
|
-
return
|
498
|
+
if hsh.dig("@context") && URI.parse(hsh.dig("@context")).host == "schema.org"
|
499
|
+
return "schema_org"
|
500
|
+
elsif hsh.dig("schemaVersion").to_s.start_with?("http://datacite.org/schema/kernel")
|
501
|
+
return "datacite"
|
502
|
+
elsif hsh.dig("source") == "Crossref"
|
503
|
+
return "crossref"
|
504
|
+
elsif hsh.dig("issued", "date-parts").present?
|
505
|
+
return "csl"
|
506
|
+
elsif URI.parse(hsh.dig("@context")).to_s == "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld"
|
507
|
+
return "codemeta"
|
508
508
|
end
|
509
509
|
rescue MultiJson::ParseError
|
510
510
|
end
|
511
511
|
|
512
512
|
begin # try to parse as XML
|
513
513
|
hsh = Hash.from_xml(string)
|
514
|
-
return
|
514
|
+
return "crossref_xml" if hsh.to_h.dig("crossref_result").present?
|
515
515
|
rescue Nokogiri::XML::SyntaxError
|
516
516
|
end
|
517
517
|
|
518
518
|
begin # try to parse as YAML
|
519
519
|
hsh = YAML.load(string, permitted_classes: [Date])
|
520
|
-
return
|
520
|
+
return "cff" if hsh.is_a?(Hash) && hsh.fetch("cff-version", nil).present?
|
521
521
|
rescue Psych::SyntaxError
|
522
522
|
end
|
523
523
|
|
524
|
-
if string.start_with?(
|
525
|
-
|
524
|
+
if string.start_with?("TY - ")
|
525
|
+
"ris"
|
526
526
|
elsif BibTeX.parse(string).first
|
527
|
-
|
527
|
+
"bibtex"
|
528
528
|
end
|
529
529
|
end
|
530
530
|
|
@@ -538,7 +538,7 @@ module Commonmeta
|
|
538
538
|
|
539
539
|
def validate_orcid(orcid)
|
540
540
|
orcid = Array(%r{\A(?:(?:http|https)://(?:(?:www|sandbox)?\.)?orcid\.org/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z}.match(orcid)).last
|
541
|
-
orcid.gsub(/[[:space:]]/,
|
541
|
+
orcid.gsub(/[[:space:]]/, "-") if orcid.present?
|
542
542
|
end
|
543
543
|
|
544
544
|
def validate_orcid_scheme(orcid_scheme)
|
@@ -547,16 +547,16 @@ module Commonmeta
|
|
547
547
|
|
548
548
|
def validate_url(str)
|
549
549
|
if %r{\A(?:(http|https)://(dx\.)?doi.org/)?(doi:)?(10\.\d{4,5}/.+)\z}.match?(str)
|
550
|
-
|
550
|
+
"DOI"
|
551
551
|
elsif %r{\A(http|https)://}.match?(str)
|
552
|
-
|
552
|
+
"URL"
|
553
553
|
elsif /\A(ISSN|eISSN) (\d{4}-\d{3}[0-9X]+)\z/.match?(str)
|
554
|
-
|
554
|
+
"ISSN"
|
555
555
|
end
|
556
556
|
end
|
557
557
|
|
558
558
|
def parse_attributes(element, options = {})
|
559
|
-
content = options[:content] ||
|
559
|
+
content = options[:content] || "__content__"
|
560
560
|
|
561
561
|
if element.is_a?(String) && options[:content].nil?
|
562
562
|
CGI.unescapeHTML(element)
|
@@ -589,7 +589,7 @@ module Commonmeta
|
|
589
589
|
return nil unless id.present?
|
590
590
|
|
591
591
|
# handle info URIs
|
592
|
-
return id if id.to_s.start_with?(
|
592
|
+
return id if id.to_s.start_with?("info")
|
593
593
|
|
594
594
|
# check for valid HTTP uri
|
595
595
|
uri = Addressable::URI.parse(id)
|
@@ -597,7 +597,7 @@ module Commonmeta
|
|
597
597
|
return nil unless uri && uri.host && %w[http https ftp].include?(uri.scheme)
|
598
598
|
|
599
599
|
# optionally turn into https URL
|
600
|
-
uri.scheme =
|
600
|
+
uri.scheme = "https" if options[:https]
|
601
601
|
|
602
602
|
# clean up URL
|
603
603
|
uri.path = PostRank::URI.clean(uri.path)
|
@@ -617,52 +617,52 @@ module Commonmeta
|
|
617
617
|
return nil unless orcid.present?
|
618
618
|
|
619
619
|
# turn ORCID ID into URL
|
620
|
-
|
620
|
+
"https://orcid.org/" + Addressable::URI.encode(orcid)
|
621
621
|
end
|
622
622
|
|
623
623
|
# pick electronic issn if there are multiple
|
624
624
|
# format issn as xxxx-xxxx
|
625
625
|
def normalize_issn(input, options = {})
|
626
|
-
content = options[:content] ||
|
626
|
+
content = options[:content] || "__content__"
|
627
627
|
|
628
628
|
issn = if input.blank?
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
629
|
+
nil
|
630
|
+
elsif input.is_a?(String) && options[:content].nil?
|
631
|
+
input
|
632
|
+
elsif input.is_a?(Hash)
|
633
|
+
input.fetch(content, nil)
|
634
|
+
elsif input.is_a?(Array)
|
635
|
+
a = input.find { |a| a["media_type"] == "electronic" } || input.first
|
636
|
+
a.fetch(content, nil)
|
637
|
+
end
|
638
638
|
|
639
639
|
case issn.to_s.length
|
640
640
|
when 9
|
641
641
|
issn
|
642
642
|
when 8
|
643
|
-
issn[0..3] +
|
643
|
+
issn[0..3] + "-" + issn[4..7]
|
644
644
|
end
|
645
645
|
end
|
646
646
|
|
647
647
|
# find Creative Commons or OSI license in licenses array, normalize url and name
|
648
648
|
def normalize_licenses(licenses)
|
649
649
|
standard_licenses = Array.wrap(licenses).map do |l|
|
650
|
-
URI.parse(l[
|
650
|
+
URI.parse(l["url"])
|
651
651
|
end.select { |li| li.host && li.host[/(creativecommons.org|opensource.org)$/] }
|
652
652
|
return licenses unless standard_licenses.present?
|
653
653
|
|
654
654
|
# use HTTPS
|
655
|
-
uri.scheme =
|
655
|
+
uri.scheme = "https"
|
656
656
|
|
657
657
|
# use host name without subdomain
|
658
658
|
uri.host = Array(/(creativecommons.org|opensource.org)/.match uri.host).last
|
659
659
|
|
660
660
|
# normalize URLs
|
661
|
-
if uri.host ==
|
662
|
-
uri.path = uri.path.split(
|
663
|
-
uri.path <<
|
661
|
+
if uri.host == "creativecommons.org"
|
662
|
+
uri.path = uri.path.split("/")[0..-2].join("/") if uri.path.split("/").last == "legalcode"
|
663
|
+
uri.path << "/" unless uri.path.end_with?("/")
|
664
664
|
else
|
665
|
-
uri.path = uri.path.gsub(/(-license|\.php|\.html)/,
|
665
|
+
uri.path = uri.path.gsub(/(-license|\.php|\.html)/, "")
|
666
666
|
uri.path = uri.path.sub(/(mit|afl|apl|osl|gpl|ecl)/) { |match| match.upcase }
|
667
667
|
uri.path = uri.path.sub(/(artistic|apache)/) { |match| match.titleize }
|
668
668
|
uri.path = uri.path.sub(/([^0-9-]+)(-)?([1-9])?(\.)?([0-9])?$/) do
|
@@ -670,8 +670,8 @@ module Commonmeta
|
|
670
670
|
text = m[1]
|
671
671
|
|
672
672
|
if m[3].present?
|
673
|
-
version = [m[3], m[5].presence ||
|
674
|
-
[text, version].join(
|
673
|
+
version = [m[3], m[5].presence || "0"].join(".")
|
674
|
+
[text, version].join("-")
|
675
675
|
else
|
676
676
|
text
|
677
677
|
end
|
@@ -693,13 +693,13 @@ module Commonmeta
|
|
693
693
|
end
|
694
694
|
|
695
695
|
def from_datacite(element)
|
696
|
-
mapping = {
|
696
|
+
mapping = { "nameType" => "type", "creatorName" => "name" }
|
697
697
|
|
698
698
|
map_hash_keys(element: element, mapping: mapping)
|
699
699
|
end
|
700
700
|
|
701
701
|
def to_schema_org(element)
|
702
|
-
mapping = {
|
702
|
+
mapping = { "type" => "@type", "id" => "@id", "title" => "name" }
|
703
703
|
|
704
704
|
map_hash_keys(element: element, mapping: mapping)
|
705
705
|
end
|
@@ -707,20 +707,20 @@ module Commonmeta
|
|
707
707
|
def to_schema_org_container(element, options = {})
|
708
708
|
return nil unless element.is_a?(Hash) || (element.nil? && options[:container_title].present?)
|
709
709
|
|
710
|
-
issn = element[
|
711
|
-
id = issn.blank? ? element[
|
712
|
-
name = options[:container_title] || element[
|
713
|
-
type = id || name ? options[:type] || element[
|
710
|
+
issn = element["identifier"] if element["identifierType"] == "ISSN"
|
711
|
+
id = issn.blank? ? element["identifier"] : nil
|
712
|
+
name = options[:container_title] || element["title"]
|
713
|
+
type = id || name ? options[:type] || element["type"] : nil
|
714
714
|
|
715
|
-
{
|
715
|
+
{ "@id" => id, "@type" => type, "name" => name, "issn" => issn }.compact
|
716
716
|
end
|
717
717
|
|
718
718
|
def to_schema_org_identifiers(element, _options = {})
|
719
719
|
Array.wrap(element).map do |ai|
|
720
720
|
{
|
721
|
-
|
722
|
-
|
723
|
-
|
721
|
+
"@type" => "PropertyValue",
|
722
|
+
"propertyID" => ai["alternateIdentifierType"],
|
723
|
+
"value" => ai["alternateIdentifier"],
|
724
724
|
}
|
725
725
|
end.unwrap
|
726
726
|
end
|
@@ -728,22 +728,22 @@ module Commonmeta
|
|
728
728
|
def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
|
729
729
|
return nil unless related_identifiers.present? && relation_type.present?
|
730
730
|
|
731
|
-
relation_type = if relation_type ==
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
731
|
+
relation_type = if relation_type == "References"
|
732
|
+
%w[References Cites
|
733
|
+
Documents]
|
734
|
+
else
|
735
|
+
[relation_type]
|
736
|
+
end
|
737
737
|
|
738
738
|
Array.wrap(related_identifiers).select do |ri|
|
739
|
-
relation_type.include?(ri[
|
739
|
+
relation_type.include?(ri["relationType"])
|
740
740
|
end.map do |r|
|
741
|
-
if r[
|
742
|
-
{
|
741
|
+
if r["relatedIdentifierType"] == "ISSN" && r["relationType"] == "IsPartOf"
|
742
|
+
{ "@type" => "Periodical", "issn" => r["relatedIdentifier"] }.compact
|
743
743
|
else
|
744
744
|
{
|
745
|
-
|
746
|
-
|
745
|
+
"@id" => normalize_id(r["relatedIdentifier"]),
|
746
|
+
"@type" => DC_TO_SO_TRANSLATIONS[r["resourceTypeGeneral"]] || "CreativeWork",
|
747
747
|
}.compact
|
748
748
|
end
|
749
749
|
end.unwrap
|
@@ -754,9 +754,9 @@ module Commonmeta
|
|
754
754
|
|
755
755
|
Array.wrap(funding_references).map do |fr|
|
756
756
|
{
|
757
|
-
|
758
|
-
|
759
|
-
|
757
|
+
"@id" => fr["funderIdentifier"],
|
758
|
+
"@type" => "Organization",
|
759
|
+
"name" => fr["funderName"],
|
760
760
|
}.compact
|
761
761
|
end.unwrap
|
762
762
|
end
|
@@ -765,10 +765,10 @@ module Commonmeta
|
|
765
765
|
return nil unless reference.present?
|
766
766
|
|
767
767
|
{
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
768
|
+
"@type" => "CreativeWork",
|
769
|
+
"@id" => reference["doi"] ? normalize_id(reference["doi"]) : nil,
|
770
|
+
"name" => reference["title"],
|
771
|
+
"datePublished" => reference["publicationYear"],
|
772
772
|
}.compact
|
773
773
|
end
|
774
774
|
|
@@ -776,67 +776,67 @@ module Commonmeta
|
|
776
776
|
return nil unless geo_location.present?
|
777
777
|
|
778
778
|
Array.wrap(geo_location).each_with_object([]) do |gl, sum|
|
779
|
-
if gl.fetch(
|
779
|
+
if gl.fetch("geoLocationPoint", nil)
|
780
780
|
sum << {
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
}
|
781
|
+
"@type" => "Place",
|
782
|
+
"geo" => {
|
783
|
+
"@type" => "GeoCoordinates",
|
784
|
+
"address" => gl["geoLocationPlace"],
|
785
|
+
"latitude" => gl.dig("geoLocationPoint", "pointLatitude"),
|
786
|
+
"longitude" => gl.dig("geoLocationPoint", "pointLongitude"),
|
787
|
+
},
|
788
788
|
}.compact
|
789
789
|
end
|
790
790
|
|
791
|
-
if gl.fetch(
|
791
|
+
if gl.fetch("geoLocationBox", nil)
|
792
792
|
sum << {
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
gl.dig(
|
799
|
-
gl.dig(
|
800
|
-
gl.dig(
|
801
|
-
}.compact
|
793
|
+
"@type" => "Place",
|
794
|
+
"geo" => {
|
795
|
+
"@type" => "GeoShape",
|
796
|
+
"address" => gl["geoLocationPlace"],
|
797
|
+
"box" => [gl.dig("geoLocationBox", "southBoundLatitude"),
|
798
|
+
gl.dig("geoLocationBox", "westBoundLongitude"),
|
799
|
+
gl.dig("geoLocationBox", "northBoundLatitude"),
|
800
|
+
gl.dig("geoLocationBox", "eastBoundLongitude")].compact.join(" ").presence,
|
801
|
+
}.compact,
|
802
802
|
}.compact
|
803
803
|
end
|
804
804
|
|
805
|
-
if gl.fetch(
|
805
|
+
if gl.fetch("geoLocationPolygon", nil)
|
806
806
|
sum << {
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
807
|
+
"@type" => "Place",
|
808
|
+
"geo" => {
|
809
|
+
"@type" => "GeoShape",
|
810
|
+
"address" => gl["geoLocationPlace"],
|
811
|
+
"polygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp|
|
812
812
|
Array.wrap(glp).map do |glpp|
|
813
|
-
[glpp.dig(
|
814
|
-
glpp.dig(
|
813
|
+
[glpp.dig("polygonPoint", "pointLongitude"),
|
814
|
+
glpp.dig("polygonPoint", "pointLatitude")].compact
|
815
815
|
end.compact
|
816
|
-
end.compact.presence
|
817
|
-
}
|
816
|
+
end.compact.presence,
|
817
|
+
},
|
818
818
|
}
|
819
819
|
end
|
820
820
|
|
821
|
-
next unless gl.fetch(
|
822
|
-
nil) && !gl.fetch(
|
823
|
-
nil) && !gl.fetch(
|
821
|
+
next unless gl.fetch("geoLocationPlace",
|
822
|
+
nil) && !gl.fetch("geoLocationPoint",
|
823
|
+
nil) && !gl.fetch("geoLocationBox",
|
824
824
|
nil) && !gl.fetch(
|
825
|
-
|
826
|
-
|
825
|
+
"geoLocationPolygon", nil
|
826
|
+
)
|
827
827
|
|
828
828
|
sum << {
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
}
|
829
|
+
"@type" => "Place",
|
830
|
+
"geo" => {
|
831
|
+
"@type" => "GeoCoordinates",
|
832
|
+
"address" => gl["geoLocationPlace"],
|
833
|
+
},
|
834
834
|
}.compact
|
835
835
|
end.unwrap
|
836
836
|
end
|
837
837
|
|
838
838
|
def from_schema_org(element)
|
839
|
-
mapping = {
|
839
|
+
mapping = { "@type" => "type", "@id" => "id" }
|
840
840
|
|
841
841
|
map_hash_keys(element: element, mapping: mapping)
|
842
842
|
end
|
@@ -844,16 +844,16 @@ module Commonmeta
|
|
844
844
|
def map_hash_keys(element: nil, mapping: nil)
|
845
845
|
Array.wrap(element).map do |a|
|
846
846
|
a.map { |k, v| [mapping.fetch(k, k), v] }.reduce({}) do |hsh, (k, v)|
|
847
|
-
if k ==
|
847
|
+
if k == "affiliation" && v.is_a?(Array)
|
848
848
|
hsh[k] = v.map do |affiliation|
|
849
849
|
if affiliation.is_a?(Hash)
|
850
|
-
affiliation.merge(
|
850
|
+
affiliation.merge("@type" => "Organization")
|
851
851
|
else
|
852
852
|
affiliation
|
853
853
|
end
|
854
854
|
end
|
855
855
|
hsh
|
856
|
-
elsif k ==
|
856
|
+
elsif k == "type" && v.is_a?(String)
|
857
857
|
hsh[k] = v.capitalize
|
858
858
|
hsh
|
859
859
|
elsif v.is_a?(Hash)
|
@@ -869,51 +869,51 @@ module Commonmeta
|
|
869
869
|
|
870
870
|
def to_identifier(identifier)
|
871
871
|
{
|
872
|
-
|
873
|
-
|
874
|
-
|
872
|
+
"@type" => "PropertyValue",
|
873
|
+
"propertyID" => identifier["relatedIdentifierType"],
|
874
|
+
"value" => identifier["relatedIdentifier"],
|
875
875
|
}
|
876
876
|
end
|
877
877
|
|
878
878
|
def from_csl(element)
|
879
879
|
Array.wrap(element).map do |a|
|
880
|
-
if a[
|
881
|
-
a[
|
882
|
-
a[
|
883
|
-
elsif a[
|
884
|
-
a[
|
885
|
-
elsif a[
|
886
|
-
a[
|
880
|
+
if a["literal"].present?
|
881
|
+
a["type"] = "Organization"
|
882
|
+
a["name"] = a["literal"]
|
883
|
+
elsif a["name"].present?
|
884
|
+
a["type"] = "Organization"
|
885
|
+
elsif a["given"].present? || a["family"].present?
|
886
|
+
a["type"] = "Person"
|
887
887
|
end
|
888
|
-
a[
|
889
|
-
a[
|
890
|
-
a.except(
|
888
|
+
a["givenName"] = a["given"]
|
889
|
+
a["familyName"] = a["family"]
|
890
|
+
a.except("given", "family", "literal").compact
|
891
891
|
end.unwrap
|
892
892
|
end
|
893
893
|
|
894
894
|
def to_csl(element)
|
895
895
|
Array.wrap(element).map do |a|
|
896
|
-
a[
|
897
|
-
a[
|
898
|
-
a[
|
899
|
-
a.except(
|
900
|
-
|
896
|
+
a["family"] = a["familyName"]
|
897
|
+
a["given"] = a["givenName"]
|
898
|
+
a["literal"] = a["name"] unless a["familyName"].present?
|
899
|
+
a.except("nameType", "type", "@type", "id", "@id", "name", "familyName", "givenName",
|
900
|
+
"affiliation", "contributorType").compact
|
901
901
|
end.presence
|
902
902
|
end
|
903
903
|
|
904
904
|
def to_ris(element)
|
905
905
|
Array.wrap(element).map do |a|
|
906
|
-
if a[
|
907
|
-
[a[
|
906
|
+
if a["familyName"].present?
|
907
|
+
[a["familyName"], a["givenName"]].join(", ")
|
908
908
|
else
|
909
|
-
a[
|
909
|
+
a["name"]
|
910
910
|
end
|
911
911
|
end.unwrap
|
912
912
|
end
|
913
913
|
|
914
914
|
def sanitize(text, options = {})
|
915
915
|
options[:tags] ||= Set.new(%w[strong em b i code pre sub sup br])
|
916
|
-
content = options[:content] ||
|
916
|
+
content = options[:content] || "__content__"
|
917
917
|
custom_scrubber = Commonmeta::WhitelistScrubber.new(options)
|
918
918
|
|
919
919
|
if text.is_a?(String)
|
@@ -930,8 +930,8 @@ module Commonmeta
|
|
930
930
|
def github_from_url(url)
|
931
931
|
return {} unless %r{\Ahttps://github\.com/(.+)(?:/)?(.+)?(?:/tree/)?(.*)\z}.match?(url)
|
932
932
|
|
933
|
-
words = URI.parse(url).path[1..-1].split(
|
934
|
-
path = words.length > 3 ? words[4...words.length].join(
|
933
|
+
words = URI.parse(url).path[1..-1].split("/")
|
934
|
+
path = words.length > 3 ? words[4...words.length].join("/") : nil
|
935
935
|
|
936
936
|
{ owner: words[0], repo: words[1], release: words[3], path: path }.compact
|
937
937
|
end
|
@@ -970,7 +970,7 @@ module Commonmeta
|
|
970
970
|
def github_as_codemeta_url(url)
|
971
971
|
github_hash = github_from_url(url)
|
972
972
|
|
973
|
-
if github_hash[:path].to_s.end_with?(
|
973
|
+
if github_hash[:path].to_s.end_with?("codemeta.json")
|
974
974
|
"https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
|
975
975
|
elsif github_hash[:owner].present?
|
976
976
|
"https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/master/codemeta.json"
|
@@ -980,7 +980,7 @@ module Commonmeta
|
|
980
980
|
def github_as_cff_url(url)
|
981
981
|
github_hash = github_from_url(url)
|
982
982
|
|
983
|
-
if github_hash[:path].to_s.end_with?(
|
983
|
+
if github_hash[:path].to_s.end_with?("CITATION.cff")
|
984
984
|
"https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
|
985
985
|
elsif github_hash[:owner].present?
|
986
986
|
"https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/main/CITATION.cff"
|
@@ -988,18 +988,18 @@ module Commonmeta
|
|
988
988
|
end
|
989
989
|
|
990
990
|
def get_date_parts(iso8601_time)
|
991
|
-
return {
|
991
|
+
return { "date-parts" => [[]] } if iso8601_time.nil?
|
992
992
|
|
993
993
|
year = iso8601_time[0..3].to_i
|
994
994
|
month = iso8601_time[5..6].to_i
|
995
995
|
day = iso8601_time[8..9].to_i
|
996
|
-
{
|
996
|
+
{ "date-parts" => [[year, month, day].reject { |part| part == 0 }] }
|
997
997
|
rescue TypeError
|
998
998
|
nil
|
999
999
|
end
|
1000
1000
|
|
1001
1001
|
def get_date_from_date_parts(date_as_parts)
|
1002
|
-
date_parts = date_as_parts.fetch(
|
1002
|
+
date_parts = date_as_parts.fetch("date-parts", []).first
|
1003
1003
|
return nil if date_parts == [nil]
|
1004
1004
|
|
1005
1005
|
year = date_parts[0]
|
@@ -1011,13 +1011,13 @@ module Commonmeta
|
|
1011
1011
|
end
|
1012
1012
|
|
1013
1013
|
def get_date_from_parts(year, month = nil, day = nil)
|
1014
|
-
[year.to_s.rjust(4,
|
1015
|
-
part ==
|
1016
|
-
end.join(
|
1014
|
+
[year.to_s.rjust(4, "0"), month.to_s.rjust(2, "0"), day.to_s.rjust(2, "0")].reject do |part|
|
1015
|
+
part == "00"
|
1016
|
+
end.join("-")
|
1017
1017
|
end
|
1018
1018
|
|
1019
1019
|
def get_date_parts_from_parts(year, month = nil, day = nil)
|
1020
|
-
{
|
1020
|
+
{ "date-parts" => [[year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }] }
|
1021
1021
|
end
|
1022
1022
|
|
1023
1023
|
def get_iso8601_date(iso8601_time)
|
@@ -1064,9 +1064,9 @@ module Commonmeta
|
|
1064
1064
|
# strip milliseconds if there is a time, as it interferes with edtc parsing
|
1065
1065
|
# keep dates unchanged
|
1066
1066
|
def strip_milliseconds(iso8601_time)
|
1067
|
-
return iso8601_time.split(
|
1067
|
+
return iso8601_time.split(" ").first if iso8601_time.to_s.include? " "
|
1068
1068
|
|
1069
|
-
return iso8601_time.split(
|
1069
|
+
return iso8601_time.split(".").first + "Z" if iso8601_time.to_s.include? "."
|
1070
1070
|
|
1071
1071
|
iso8601_time
|
1072
1072
|
end
|
@@ -1074,64 +1074,64 @@ module Commonmeta
|
|
1074
1074
|
# iso8601 datetime without hyphens and colons, used by Crossref
|
1075
1075
|
# return nil if invalid
|
1076
1076
|
def get_datetime_from_time(time)
|
1077
|
-
DateTime.strptime(time.to_s,
|
1077
|
+
DateTime.strptime(time.to_s, "%Y%m%d%H%M%S").strftime("%Y-%m-%dT%H:%M:%SZ")
|
1078
1078
|
rescue ArgumentError
|
1079
1079
|
nil
|
1080
1080
|
end
|
1081
1081
|
|
1082
1082
|
def get_date(dates, date_type)
|
1083
|
-
dd = Array.wrap(dates).find { |d| d[
|
1084
|
-
dd.fetch(
|
1083
|
+
dd = Array.wrap(dates).find { |d| d["dateType"] == date_type } || {}
|
1084
|
+
dd.fetch("date", nil)
|
1085
1085
|
end
|
1086
1086
|
|
1087
1087
|
# convert commonmeta dates to DataCite format
|
1088
1088
|
def get_dates_from_date(date)
|
1089
1089
|
return nil if date.nil?
|
1090
1090
|
|
1091
|
-
mapping = {
|
1091
|
+
mapping = { "published" => "issued" }
|
1092
1092
|
|
1093
1093
|
date = map_hash_keys(element: date, mapping: mapping)
|
1094
1094
|
|
1095
1095
|
date.map do |k, v|
|
1096
|
-
{
|
1097
|
-
|
1096
|
+
{ "date" => v,
|
1097
|
+
"dateType" => k.capitalize }
|
1098
1098
|
end
|
1099
1099
|
end
|
1100
1100
|
|
1101
1101
|
def get_contributor(contributor, contributor_type)
|
1102
|
-
contributor.select { |c| c[
|
1102
|
+
contributor.select { |c| c["contributorType"] == contributor_type }
|
1103
1103
|
end
|
1104
1104
|
|
1105
1105
|
def get_identifier(identifiers, identifier_type)
|
1106
|
-
id = Array.wrap(identifiers).find { |i| i[
|
1107
|
-
id.fetch(
|
1106
|
+
id = Array.wrap(identifiers).find { |i| i["identifierType"] == identifier_type } || {}
|
1107
|
+
id.fetch("identifier", nil)
|
1108
1108
|
end
|
1109
1109
|
|
1110
1110
|
def get_identifier_type(identifier_type)
|
1111
1111
|
return nil unless identifier_type.present?
|
1112
1112
|
|
1113
1113
|
identifierTypes = {
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1114
|
+
"ark" => "ARK",
|
1115
|
+
"arxiv" => "arXiv",
|
1116
|
+
"bibcode" => "bibcode",
|
1117
|
+
"doi" => "DOI",
|
1118
|
+
"ean13" => "EAN13",
|
1119
|
+
"eissn" => "EISSN",
|
1120
|
+
"handle" => "Handle",
|
1121
|
+
"igsn" => "IGSN",
|
1122
|
+
"isbn" => "ISBN",
|
1123
|
+
"issn" => "ISSN",
|
1124
|
+
"istc" => "ISTC",
|
1125
|
+
"lissn" => "LISSN",
|
1126
|
+
"lsid" => "LSID",
|
1127
|
+
"pmid" => "PMID",
|
1128
|
+
"purl" => "PURL",
|
1129
|
+
"upc" => "UPC",
|
1130
|
+
"url" => "URL",
|
1131
|
+
"urn" => "URN",
|
1132
|
+
"md5" => "md5",
|
1133
|
+
"minid" => "minid",
|
1134
|
+
"dataguid" => "dataguid",
|
1135
1135
|
}
|
1136
1136
|
|
1137
1137
|
identifierTypes[identifier_type.downcase] || identifier_type
|
@@ -1140,27 +1140,27 @@ module Commonmeta
|
|
1140
1140
|
def get_series_information(str)
|
1141
1141
|
return {} unless str.present?
|
1142
1142
|
|
1143
|
-
str = str.split(
|
1143
|
+
str = str.split(",").map(&:strip)
|
1144
1144
|
|
1145
1145
|
title = str.first
|
1146
1146
|
volume_issue = str.length > 2 ? str[1].rpartition(/\(([^)]+)\)/) : nil
|
1147
1147
|
volume = volume_issue.present? ? volume_issue[0].presence || volume_issue[2].presence : nil
|
1148
1148
|
issue = volume_issue.present? ? volume_issue[1][1...-1].presence : nil
|
1149
1149
|
pages = str.length > 1 ? str.last : nil
|
1150
|
-
first_page = pages.present? ? pages.split(
|
1151
|
-
last_page = pages.present? ? pages.split(
|
1150
|
+
first_page = pages.present? ? pages.split("-").map(&:strip)[0] : nil
|
1151
|
+
last_page = pages.present? ? pages.split("-").map(&:strip)[1] : nil
|
1152
1152
|
|
1153
1153
|
{
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1154
|
+
"title" => title,
|
1155
|
+
"volume" => volume,
|
1156
|
+
"issue" => issue,
|
1157
|
+
"firstPage" => first_page,
|
1158
|
+
"lastPage" => last_page,
|
1159
1159
|
}.compact
|
1160
1160
|
end
|
1161
1161
|
|
1162
1162
|
def jsonlint(json)
|
1163
|
-
return [
|
1163
|
+
return ["No JSON provided"] unless json.present?
|
1164
1164
|
|
1165
1165
|
error_array = []
|
1166
1166
|
linter = JsonLint::Linter.new
|
@@ -1169,33 +1169,33 @@ module Commonmeta
|
|
1169
1169
|
end
|
1170
1170
|
|
1171
1171
|
def name_to_spdx(name)
|
1172
|
-
spdx = JSON.load(File.read(File.expand_path(
|
1173
|
-
__dir__))).fetch(
|
1172
|
+
spdx = JSON.load(File.read(File.expand_path("../../resources/spdx/licenses.json",
|
1173
|
+
__dir__))).fetch("licenses")
|
1174
1174
|
license = spdx.find do |l|
|
1175
|
-
l[
|
1175
|
+
l["name"] == name || l["licenseId"] == name || l["seeAlso"].first == normalize_cc_url(name)
|
1176
1176
|
end
|
1177
1177
|
|
1178
1178
|
if license
|
1179
|
-
{
|
1179
|
+
{ "id" => license["licenseId"], "url" => license["seeAlso"].first }.compact
|
1180
1180
|
else
|
1181
|
-
{
|
1181
|
+
{ "rights" => name }
|
1182
1182
|
end
|
1183
1183
|
end
|
1184
1184
|
|
1185
1185
|
def hsh_to_spdx(hsh)
|
1186
|
-
spdx = JSON.load(File.read(File.expand_path(
|
1187
|
-
__dir__))).fetch(
|
1188
|
-
hsh[
|
1186
|
+
spdx = JSON.load(File.read(File.expand_path("../../resources/spdx/licenses.json",
|
1187
|
+
__dir__))).fetch("licenses")
|
1188
|
+
hsh["rightsUri"] = hsh.delete("rightsURI") if hsh["rightsUri"].blank?
|
1189
1189
|
license = spdx.find do |l|
|
1190
|
-
l[
|
1190
|
+
l["licenseId"].casecmp?(hsh["rightsIdentifier"]) || l["seeAlso"].first == normalize_cc_url(hsh["rightsUri"]) || l["name"] == hsh["rights"] || l["seeAlso"].first == normalize_cc_url(hsh["rights"])
|
1191
1191
|
end
|
1192
1192
|
|
1193
1193
|
if license
|
1194
|
-
{
|
1194
|
+
{ "id" => license["licenseId"], "url" => license["seeAlso"].first }.compact
|
1195
1195
|
else
|
1196
1196
|
{
|
1197
|
-
|
1198
|
-
|
1197
|
+
"id" => hsh["rightsIdentifier"].present? ? hsh["rightsIdentifier"].downcase : nil,
|
1198
|
+
"url" => hsh["rightsURI"] || hsh["rightsUri"],
|
1199
1199
|
}.compact
|
1200
1200
|
end
|
1201
1201
|
end
|
@@ -1203,152 +1203,163 @@ module Commonmeta
|
|
1203
1203
|
def spdx_to_hsh(hsh)
|
1204
1204
|
return nil unless hsh.present? && hsh.is_a?(Hash)
|
1205
1205
|
|
1206
|
-
spdx = JSON.load(File.read(File.expand_path(
|
1207
|
-
__dir__))).fetch(
|
1206
|
+
spdx = JSON.load(File.read(File.expand_path("../../resources/spdx/licenses.json",
|
1207
|
+
__dir__))).fetch("licenses")
|
1208
1208
|
|
1209
|
-
license = spdx.find { |l| l[
|
1209
|
+
license = spdx.find { |l| l["licenseId"].casecmp?(hsh["id"]) }
|
1210
1210
|
|
1211
1211
|
if license
|
1212
1212
|
[{
|
1213
|
-
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1213
|
+
"rightsIdentifier" => license["licenseId"].downcase,
|
1214
|
+
"rightsUri" => license["seeAlso"].first,
|
1215
|
+
"rights" => license["name"],
|
1216
|
+
"rightsIdentifierScheme" => "SPDX",
|
1217
|
+
"schemeUri" => "https://spdx.org/licenses/",
|
1218
1218
|
}.compact]
|
1219
1219
|
else
|
1220
|
-
[{
|
1220
|
+
[{ "rightsIdentifier" => hsh["id"], "rightsURI" => hsh["url"] }.compact]
|
1221
1221
|
end
|
1222
1222
|
end
|
1223
1223
|
|
1224
1224
|
def name_to_fos(name)
|
1225
1225
|
# first find subject in Fields of Science (OECD)
|
1226
|
-
fos = JSON.load(File.read(File.expand_path(
|
1227
|
-
__dir__))).fetch(
|
1226
|
+
fos = JSON.load(File.read(File.expand_path("../../resources/oecd/fos-mappings.json",
|
1227
|
+
__dir__))).fetch("fosFields")
|
1228
1228
|
|
1229
|
-
subject = fos.find { |l| l[
|
1229
|
+
subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
|
1230
1230
|
|
1231
1231
|
if subject
|
1232
1232
|
return [{
|
1233
|
-
|
1234
|
-
|
1233
|
+
"subject" => sanitize(name).downcase,
|
1234
|
+
},
|
1235
1235
|
{
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1236
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1237
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1238
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
1239
|
+
}]
|
1240
1240
|
end
|
1241
1241
|
|
1242
1242
|
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
1243
1243
|
# and map to Fields of Science. Add an extra entry for the latter
|
1244
|
-
fores = JSON.load(File.read(File.expand_path(
|
1244
|
+
fores = JSON.load(File.read(File.expand_path("../../resources/oecd/for-mappings.json",
|
1245
1245
|
__dir__)))
|
1246
|
-
for_fields = fores.fetch(
|
1247
|
-
for_disciplines = fores.fetch(
|
1246
|
+
for_fields = fores.fetch("forFields")
|
1247
|
+
for_disciplines = fores.fetch("forDisciplines")
|
1248
1248
|
|
1249
|
-
subject = for_fields.find { |l| l[
|
1250
|
-
for_disciplines.find { |l| l[
|
1249
|
+
subject = for_fields.find { |l| l["forLabel"] == name } ||
|
1250
|
+
for_disciplines.find { |l| l["forLabel"] == name }
|
1251
1251
|
|
1252
1252
|
if subject
|
1253
1253
|
[{
|
1254
|
-
|
1254
|
+
"subject" => sanitize(name).downcase,
|
1255
1255
|
},
|
1256
1256
|
{
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1257
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1258
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1259
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
1260
|
+
}]
|
1261
1261
|
else
|
1262
|
-
[{
|
1262
|
+
[{ "subject" => sanitize(name).downcase }]
|
1263
1263
|
end
|
1264
1264
|
end
|
1265
1265
|
|
1266
1266
|
def hsh_to_fos(hsh)
|
1267
1267
|
# first find subject in Fields of Science (OECD)
|
1268
|
-
fos = JSON.load(File.read(File.expand_path(
|
1269
|
-
__dir__))).fetch(
|
1268
|
+
fos = JSON.load(File.read(File.expand_path("../../resources/oecd/fos-mappings.json",
|
1269
|
+
__dir__))).fetch("fosFields")
|
1270
1270
|
subject = fos.find do |l|
|
1271
|
-
l[
|
1271
|
+
l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] || l["fosLabel"] == hsh["subject"]
|
1272
1272
|
end
|
1273
1273
|
|
1274
1274
|
if subject
|
1275
1275
|
return [{
|
1276
|
-
|
1277
|
-
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1276
|
+
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1277
|
+
"subjectScheme" => hsh["subjectScheme"],
|
1278
|
+
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
1279
|
+
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
1280
|
+
"classificationCode" => hsh["classificationCode"],
|
1281
|
+
"lang" => hsh["lang"],
|
1282
|
+
}.compact,
|
1283
1283
|
{
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1287
|
-
|
1284
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1285
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1286
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
1287
|
+
}.compact]
|
1288
1288
|
end
|
1289
1289
|
|
1290
1290
|
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
1291
1291
|
# and map to Fields of Science. Add an extra entry for the latter
|
1292
|
-
fores = JSON.load(File.read(File.expand_path(
|
1292
|
+
fores = JSON.load(File.read(File.expand_path("../../resources/oecd/for-mappings.json",
|
1293
1293
|
__dir__)))
|
1294
|
-
for_fields = fores.fetch(
|
1295
|
-
for_disciplines = fores.fetch(
|
1294
|
+
for_fields = fores.fetch("forFields")
|
1295
|
+
for_disciplines = fores.fetch("forDisciplines")
|
1296
1296
|
|
1297
1297
|
# try to extract forId
|
1298
|
-
if hsh[
|
1299
|
-
for_id = hsh[
|
1300
|
-
for_id = for_id.rjust(6,
|
1298
|
+
if hsh["subjectScheme"] == "FOR"
|
1299
|
+
for_id = hsh["__content__"].to_s.split(" ").first || hsh["subject"].to_s.split(" ").first
|
1300
|
+
for_id = for_id.rjust(6, "0")
|
1301
1301
|
|
1302
|
-
subject = for_fields.find { |l| l[
|
1303
|
-
for_disciplines.find { |l| l[
|
1302
|
+
subject = for_fields.find { |l| l["forId"] == for_id } ||
|
1303
|
+
for_disciplines.find { |l| l["forId"] == for_id[0..3] }
|
1304
1304
|
else
|
1305
1305
|
subject = for_fields.find do |l|
|
1306
|
-
l[
|
1306
|
+
l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"]
|
1307
1307
|
end ||
|
1308
1308
|
for_disciplines.find do |l|
|
1309
|
-
l[
|
1309
|
+
l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"]
|
1310
1310
|
end
|
1311
1311
|
end
|
1312
1312
|
|
1313
1313
|
if subject
|
1314
1314
|
[{
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1315
|
+
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1316
|
+
"subjectScheme" => hsh["subjectScheme"],
|
1317
|
+
"classificationCode" => hsh["classificationCode"],
|
1318
|
+
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
1319
|
+
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
1320
|
+
"lang" => hsh["lang"],
|
1321
1321
|
}.compact,
|
1322
1322
|
{
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1323
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1324
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1325
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
1326
|
+
}]
|
1327
1327
|
else
|
1328
1328
|
[{
|
1329
|
-
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1329
|
+
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1330
|
+
"subjectScheme" => hsh["subjectScheme"],
|
1331
|
+
"classificationCode" => hsh["classificationCode"],
|
1332
|
+
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
1333
|
+
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
1334
|
+
"lang" => hsh["lang"],
|
1335
1335
|
}.compact]
|
1336
1336
|
end
|
1337
1337
|
end
|
1338
1338
|
|
1339
1339
|
def encode_doi(prefix)
|
1340
1340
|
# DOI suffix is a generated from a random number, encoded in base32
|
1341
|
-
# suffix has 8 digits plus two checksum digits. With base32 there are
|
1341
|
+
# suffix has 8 digits plus two checksum digits. With base32 there are
|
1342
1342
|
# 32 possible digits, so 8 digits gives 32^8 possible combinations
|
1343
|
-
random_int = SecureRandom.random_number(32**7..(32**8) - 1)
|
1343
|
+
random_int = SecureRandom.random_number(32 ** 7..(32 ** 8) - 1)
|
1344
1344
|
suffix = Base32::URL.encode(random_int, checksum: true)
|
1345
1345
|
str = "#{suffix[0, 5]}-#{suffix[5, 10]}"
|
1346
1346
|
"https://doi.org/#{prefix}/#{str}"
|
1347
1347
|
end
|
1348
1348
|
|
1349
1349
|
def decode_doi(doi)
|
1350
|
-
suffix = doi.split(
|
1350
|
+
suffix = doi.split("/", 5).last
|
1351
1351
|
Base32::URL.decode(suffix)
|
1352
1352
|
end
|
1353
|
+
|
1354
|
+
def encode_id
|
1355
|
+
# suffix has 5 digits plus two checksum digits. With base32 there are
|
1356
|
+
# 32 possible digits, so 5 digits gives 32^5 possible combinations
|
1357
|
+
random_int = SecureRandom.random_number(32 ** 4..(32 ** 5) - 1)
|
1358
|
+
Base32::URL.encode(random_int, checksum: true)
|
1359
|
+
end
|
1360
|
+
|
1361
|
+
def decode_id(id)
|
1362
|
+
Base32::URL.decode(id)
|
1363
|
+
end
|
1353
1364
|
end
|
1354
1365
|
end
|