commonmeta-ruby 3.0.6 → 3.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +14 -12
- data/commonmeta.gemspec +1 -0
- data/lib/commonmeta/cli.rb +12 -0
- data/lib/commonmeta/utils.rb +684 -673
- data/lib/commonmeta/version.rb +1 -1
- data/spec/cli_spec.rb +14 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/random_doi/decode_another_doi.yml +221 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/random_id/decode_another_id.yml +221 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/random_id/decode_id.yml +221 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/random_id/encode_id.yml +221 -0
- data/spec/utils_spec.rb +21 -1
- metadata +27 -3
data/lib/commonmeta/utils.rb
CHANGED
@@ -3,439 +3,439 @@
|
|
3
3
|
module Commonmeta
|
4
4
|
module Utils
|
5
5
|
NORMALIZED_LICENSES = {
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
6
|
+
"https://creativecommons.org/licenses/by/1.0" => "https://creativecommons.org/licenses/by/1.0/legalcode",
|
7
|
+
"https://creativecommons.org/licenses/by/2.0" => "https://creativecommons.org/licenses/by/2.0/legalcode",
|
8
|
+
"https://creativecommons.org/licenses/by/2.5" => "https://creativecommons.org/licenses/by/2.5/legalcode",
|
9
|
+
"https://creativecommons.org/licenses/by/3.0" => "https://creativecommons.org/licenses/by/3.0/legalcode",
|
10
|
+
"https://creativecommons.org/licenses/by/3.0/us" => "https://creativecommons.org/licenses/by/3.0/legalcode",
|
11
|
+
"https://creativecommons.org/licenses/by/4.0" => "https://creativecommons.org/licenses/by/4.0/legalcode",
|
12
|
+
"https://creativecommons.org/licenses/by-nc/1.0" => "https://creativecommons.org/licenses/by-nc/1.0/legalcode",
|
13
|
+
"https://creativecommons.org/licenses/by-nc/2.0" => "https://creativecommons.org/licenses/by-nc/2.0/legalcode",
|
14
|
+
"https://creativecommons.org/licenses/by-nc/2.5" => "https://creativecommons.org/licenses/by-nc/2.5/legalcode",
|
15
|
+
"https://creativecommons.org/licenses/by-nc/3.0" => "https://creativecommons.org/licenses/by-nc/3.0/legalcode",
|
16
|
+
"https://creativecommons.org/licenses/by-nc/4.0" => "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
|
17
|
+
"https://creativecommons.org/licenses/by-nd-nc/1.0" => "https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode",
|
18
|
+
"https://creativecommons.org/licenses/by-nd-nc/2.0" => "https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode",
|
19
|
+
"https://creativecommons.org/licenses/by-nd-nc/2.5" => "https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode",
|
20
|
+
"https://creativecommons.org/licenses/by-nd-nc/3.0" => "https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode",
|
21
|
+
"https://creativecommons.org/licenses/by-nd-nc/4.0" => "https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode",
|
22
|
+
"https://creativecommons.org/licenses/by-nc-sa/1.0" => "https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode",
|
23
|
+
"https://creativecommons.org/licenses/by-nc-sa/2.0" => "https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode",
|
24
|
+
"https://creativecommons.org/licenses/by-nc-sa/2.5" => "https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode",
|
25
|
+
"https://creativecommons.org/licenses/by-nc-sa/3.0" => "https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode",
|
26
|
+
"https://creativecommons.org/licenses/by-nc-sa/4.0" => "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode",
|
27
|
+
"https://creativecommons.org/licenses/by-nd/1.0" => "https://creativecommons.org/licenses/by-nd/1.0/legalcode",
|
28
|
+
"https://creativecommons.org/licenses/by-nd/2.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
|
29
|
+
"https://creativecommons.org/licenses/by-nd/2.5" => "https://creativecommons.org/licenses/by-nd/2.5/legalcode",
|
30
|
+
"https://creativecommons.org/licenses/by-nd/3.0" => "https://creativecommons.org/licenses/by-nd/3.0/legalcode",
|
31
|
+
"https://creativecommons.org/licenses/by-nd/4.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
|
32
|
+
"https://creativecommons.org/licenses/by-sa/1.0" => "https://creativecommons.org/licenses/by-sa/1.0/legalcode",
|
33
|
+
"https://creativecommons.org/licenses/by-sa/2.0" => "https://creativecommons.org/licenses/by-sa/2.0/legalcode",
|
34
|
+
"https://creativecommons.org/licenses/by-sa/2.5" => "https://creativecommons.org/licenses/by-sa/2.5/legalcode",
|
35
|
+
"https://creativecommons.org/licenses/by-sa/3.0" => "https://creativecommons.org/licenses/by-sa/3.0/legalcode",
|
36
|
+
"https://creativecommons.org/licenses/by-sa/4.0" => "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
|
37
|
+
"https://creativecommons.org/licenses/by-nc-nd/1.0" => "https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode",
|
38
|
+
"https://creativecommons.org/licenses/by-nc-nd/2.0" => "https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode",
|
39
|
+
"https://creativecommons.org/licenses/by-nc-nd/2.5" => "https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode",
|
40
|
+
"https://creativecommons.org/licenses/by-nc-nd/3.0" => "https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode",
|
41
|
+
"https://creativecommons.org/licenses/by-nc-nd/4.0" => "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode",
|
42
|
+
"https://creativecommons.org/licenses/publicdomain" => "https://creativecommons.org/licenses/publicdomain/",
|
43
|
+
"https://creativecommons.org/publicdomain/zero/1.0" => "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
|
44
44
|
}
|
45
45
|
|
46
46
|
# source: https://www.bibtex.com/e/entry-types/
|
47
47
|
BIB_TO_CM_TRANSLATIONS = {
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
48
|
+
"article" => "JournalArticle",
|
49
|
+
"book" => "Book",
|
50
|
+
"booklet" => "Book",
|
51
|
+
"inbook" => "BookChapter",
|
52
|
+
"inproceedings" => "ProceedingsArticle",
|
53
|
+
"manual" => "Report",
|
54
|
+
"mastersthesis" => "Dissertation",
|
55
|
+
"misc" => "Other",
|
56
|
+
"phdthesis" => "Dissertation",
|
57
|
+
"proceedings" => "Proceedings",
|
58
|
+
"techreport" => "Report",
|
59
|
+
"unpublished" => "Manuscript",
|
60
60
|
}
|
61
61
|
|
62
62
|
CM_TO_BIB_TRANSLATIONS = {
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
63
|
+
"Article" => "article",
|
64
|
+
"Book" => "book",
|
65
|
+
"BookChapter" => "inbook",
|
66
|
+
"Dissertation" => "phdthesis",
|
67
|
+
"JournalArticle" => "article",
|
68
|
+
"Manuscript" => "unpublished",
|
69
|
+
"Other" => "misc",
|
70
|
+
"Proceedings" => "proceedings",
|
71
|
+
"ProceedingsArticle" => "inproceedings",
|
72
|
+
"Report" => "techreport",
|
73
73
|
}
|
74
74
|
|
75
75
|
# source: https://docs.citationstyles.org/en/stable/specification.html?highlight=book#appendix-iii-types
|
76
76
|
CSL_TO_CM_TRANSLATIONS = {
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
77
|
+
"article" => "Article",
|
78
|
+
"article-journal" => "JournalArticle",
|
79
|
+
"article-magazine" => "Article",
|
80
|
+
"article-newspaper" => "Article",
|
81
|
+
"bill" => "LegalDocument",
|
82
|
+
"book" => "Book",
|
83
|
+
"broadcast" => "Audiovisual",
|
84
|
+
"chapter" => "BookChapter",
|
85
|
+
"classic" => "Book",
|
86
|
+
"collection" => "Collection",
|
87
|
+
"dataset" => "Dataset",
|
88
|
+
"document" => "Document",
|
89
|
+
"entry" => "Entry",
|
90
|
+
"entry-dictionary" => "Entry",
|
91
|
+
"entry-encyclopedia" => "Entry",
|
92
|
+
"event" => "Event",
|
93
|
+
"figure" => "Figure",
|
94
|
+
"graphic" => "Image",
|
95
|
+
"hearing" => "LegalDocument",
|
96
|
+
"interview" => "Document",
|
97
|
+
"legal_case" => "LegalDocument",
|
98
|
+
"legislation" => "LegalDocument",
|
99
|
+
"manuscript" => "Manuscript",
|
100
|
+
"map" => "Map",
|
101
|
+
"motion_picture" => "Audiovisual",
|
102
|
+
"musical_score" => "Document",
|
103
|
+
"pamphlet" => "Document",
|
104
|
+
"paper-conference" => "ProceedingsArticle",
|
105
|
+
"patent" => "Patent",
|
106
|
+
"performance" => "Performance",
|
107
|
+
"periodical" => "Journal",
|
108
|
+
"personal_communication" => "PersonalCommunication",
|
109
|
+
"post" => "Post",
|
110
|
+
"post-weblog" => "Article",
|
111
|
+
"regulation" => "LegalDocument",
|
112
|
+
"report" => "Report",
|
113
|
+
"review" => "Review",
|
114
|
+
"review-book" => "Review",
|
115
|
+
"software" => "Software",
|
116
|
+
"song" => "Audiovisual",
|
117
|
+
"speech" => "Speech",
|
118
|
+
"standard" => "Standard",
|
119
|
+
"thesis" => "Dissertation",
|
120
|
+
"treaty" => "LegalDocument",
|
121
|
+
"webpage" => "WebPage",
|
122
122
|
}
|
123
123
|
|
124
124
|
CM_TO_CSL_TRANSLATIONS = {
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
125
|
+
"Article" => "article",
|
126
|
+
"JournalArticle" => "article-journal",
|
127
|
+
"Book" => "book",
|
128
|
+
"BookChapter" => "chapter",
|
129
|
+
"Collection" => "collection",
|
130
|
+
"Dataset" => "dataset",
|
131
|
+
"Document" => "document",
|
132
|
+
"Entry" => "entry",
|
133
|
+
"Event" => "event",
|
134
|
+
"Figure" => "figure",
|
135
|
+
"Image" => "graphic",
|
136
|
+
"LegalDocument" => "legal_case",
|
137
|
+
"Manuscript" => "manuscript",
|
138
|
+
"Map" => "map",
|
139
|
+
"Audiovisual" => "motion_picture",
|
140
|
+
"Patent" => "patent",
|
141
|
+
"Performance" => "performance",
|
142
|
+
"Journal" => "periodical",
|
143
|
+
"PersonalCommunication" => "personal_communication",
|
144
|
+
"Post" => "post",
|
145
|
+
"Report" => "report",
|
146
|
+
"Review" => "review",
|
147
|
+
"Software" => "software",
|
148
|
+
"Speech" => "speech",
|
149
|
+
"Standard" => "standard",
|
150
|
+
"Dissertation" => "thesis",
|
151
|
+
"WebPage" => "webpage",
|
152
152
|
}
|
153
153
|
|
154
154
|
# source: http://api.crossref.org/types
|
155
155
|
CR_TO_CM_TRANSLATIONS = {
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
156
|
+
"BookChapter" => "BookChapter",
|
157
|
+
"BookPart" => "BookPart",
|
158
|
+
"BookSection" => "BookSection",
|
159
|
+
"BookSeries" => "BookSeries",
|
160
|
+
"BookSet" => "BookSet",
|
161
|
+
"BookTrack" => "BookTrack",
|
162
|
+
"Book" => "Book",
|
163
|
+
"Component" => "Component",
|
164
|
+
"Database" => "Database",
|
165
|
+
"Dataset" => "Dataset",
|
166
|
+
"Dissertation" => "Dissertation",
|
167
|
+
"EditedBook" => "EditedBook",
|
168
|
+
"Grant" => "Grant",
|
169
|
+
"JournalArticle" => "JournalArticle",
|
170
|
+
"JournalIssue" => "JournalIssue",
|
171
|
+
"JournalVolume" => "JournalVolume",
|
172
|
+
"Journal" => "Journal",
|
173
|
+
"Monograph" => "Book",
|
174
|
+
"Other" => "Other",
|
175
|
+
"PeerReview" => "PeerReview",
|
176
|
+
"PostedContent" => "Article",
|
177
|
+
"ProceedingsArticle" => "ProceedingsArticle",
|
178
|
+
"ProceedingsSeries" => "ProceedingsSeries",
|
179
|
+
"Proceedings" => "Proceedings",
|
180
|
+
"ReferenceBook" => "ReferenceBook",
|
181
|
+
"ReferenceEntry" => "Entry",
|
182
|
+
"ReportComponent" => "ReportComponent",
|
183
|
+
"ReportSeries" => "ReportSeries",
|
184
|
+
"Report" => "Report",
|
185
|
+
"Standard" => "Standard",
|
186
186
|
}
|
187
187
|
|
188
188
|
CM_TO_CR_TRANSLATIONS = {
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
189
|
+
"Article" => "PostedContent",
|
190
|
+
"BookChapter" => "BookChapter",
|
191
|
+
"BookSeries" => "BookSeries",
|
192
|
+
"Book" => "Book",
|
193
|
+
"Component" => "Component",
|
194
|
+
"Dataset" => "Dataset",
|
195
|
+
"Dissertation" => "Dissertation",
|
196
|
+
"Grant" => "Grant",
|
197
|
+
"JournalArticle" => "JournalArticle",
|
198
|
+
"JournalIssue" => "JournalIssue",
|
199
|
+
"JournalVolume" => "JournalVolume",
|
200
|
+
"Journal" => "Journal",
|
201
|
+
"ProceedingsArticle" => "ProceedingsArticle",
|
202
|
+
"ProceedingsSeries" => "ProceedingsSeries",
|
203
|
+
"Proceedings" => "Proceedings",
|
204
|
+
"ReportComponent" => "ReportComponent",
|
205
|
+
"ReportSeries" => "ReportSeries",
|
206
|
+
"Report" => "Report",
|
207
|
+
"PeerReview" => "PeerReview",
|
208
|
+
"Other" => "Other",
|
209
209
|
}
|
210
210
|
|
211
211
|
# source: https://github.com/datacite/schema/blob/master/source/meta/kernel-4/include/datacite-resourceType-v4.xsd
|
212
212
|
DC_TO_CM_TRANSLATIONS = {
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
213
|
+
"Audiovisual" => "Audiovisual",
|
214
|
+
"BlogPosting" => "Article",
|
215
|
+
"Book" => "Book",
|
216
|
+
"BookChapter" => "BookChapter",
|
217
|
+
"Collection" => "Collection",
|
218
|
+
"ComputationalNotebook" => "ComputationalNotebook",
|
219
|
+
"ConferencePaper" => "ProceedingsArticle",
|
220
|
+
"ConferenceProceeding" => "Proceedings",
|
221
|
+
"DataPaper" => "JournalArticle",
|
222
|
+
"Dataset" => "Dataset",
|
223
|
+
"Dissertation" => "Dissertation",
|
224
|
+
"Event" => "Event",
|
225
|
+
"Image" => "Image",
|
226
|
+
"InteractiveResource" => "InteractiveResource",
|
227
|
+
"Journal" => "Journal",
|
228
|
+
"JournalArticle" => "JournalArticle",
|
229
|
+
"Model" => "Model",
|
230
|
+
"OutputManagementPlan" => "OutputManagementPlan",
|
231
|
+
"PeerReview" => "PeerReview",
|
232
|
+
"PhysicalObject" => "PhysicalObject",
|
233
|
+
"Poster" => "Speech",
|
234
|
+
"Preprint" => "Article",
|
235
|
+
"Report" => "Report",
|
236
|
+
"Service" => "Service",
|
237
|
+
"Software" => "Software",
|
238
|
+
"Sound" => "Sound",
|
239
|
+
"Standard" => "Standard",
|
240
|
+
"Text" => "Document",
|
241
|
+
"Thesis" => "Dissertation",
|
242
|
+
"Workflow" => "Workflow",
|
243
|
+
"Other" => "Other",
|
244
244
|
}
|
245
245
|
|
246
246
|
CM_TO_DC_TRANSLATIONS = {
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
247
|
+
"Article" => "Preprint",
|
248
|
+
"Audiovisual" => "Audiovisual",
|
249
|
+
"Book" => "Book",
|
250
|
+
"BookChapter" => "BookChapter",
|
251
|
+
"Collection" => "Collection",
|
252
|
+
"Dataset" => "Dataset",
|
253
|
+
"Dissertation" => "Dissertation",
|
254
|
+
"Document" => "Text",
|
255
|
+
"Entry" => "Text",
|
256
|
+
"Event" => "Event",
|
257
|
+
"Figure" => "Image",
|
258
|
+
"Image" => "Image",
|
259
|
+
"JournalArticle" => "JournalArticle",
|
260
|
+
"LegalDocument" => "Text",
|
261
|
+
"Manuscript" => "Text",
|
262
|
+
"Map" => "Image",
|
263
|
+
"Patent" => "Text",
|
264
|
+
"Performance" => "Audiovisual",
|
265
|
+
"PersonalCommunication" => "Text",
|
266
|
+
"Post" => "Text",
|
267
|
+
"ProceedingsArticle" => "ConferencePaper",
|
268
|
+
"Proceedings" => "ConferenceProceeding",
|
269
|
+
"Report" => "Report",
|
270
|
+
"PeerReview" => "PeerReview",
|
271
|
+
"Software" => "Software",
|
272
|
+
"Sound" => "Sound",
|
273
|
+
"Standard" => "Standard",
|
274
|
+
"WebPage" => "Text",
|
275
275
|
}
|
276
276
|
|
277
277
|
RIS_TO_CM_TRANSLATIONS = {
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
278
|
+
"ABST" => "Text",
|
279
|
+
"ADVS" => "Text",
|
280
|
+
"AGGR" => "Text",
|
281
|
+
"ANCIENT" => "Text",
|
282
|
+
"ART" => "Text",
|
283
|
+
"BILL" => "Text",
|
284
|
+
"BLOG" => "Text",
|
285
|
+
"BOOK" => "Book",
|
286
|
+
"CASE" => "Text",
|
287
|
+
"CHAP" => "BookChapter",
|
288
|
+
"CHART" => "Text",
|
289
|
+
"CLSWK" => "Text",
|
290
|
+
"CTLG" => "Collection",
|
291
|
+
"COMP" => "Software",
|
292
|
+
"DATA" => "Dataset",
|
293
|
+
"DBASE" => "Database",
|
294
|
+
"DICT" => "Dictionary",
|
295
|
+
"EBOOK" => "Book",
|
296
|
+
"ECHAP" => "BookChapter",
|
297
|
+
"EDBOOK" => "Book",
|
298
|
+
"EJOUR" => "JournalArticle",
|
299
|
+
"ELEC" => "Text",
|
300
|
+
"ENCYC" => "Encyclopedia",
|
301
|
+
"EQUA" => "Equation",
|
302
|
+
"FIGURE" => "Image",
|
303
|
+
"GEN" => "CreativeWork",
|
304
|
+
"GOVDOC" => "GovernmentDocument",
|
305
|
+
"GRANT" => "Grant",
|
306
|
+
"HEAR" => "Hearing",
|
307
|
+
"ICOMM" => "Text",
|
308
|
+
"INPR" => "Text",
|
309
|
+
"JFULL" => "JournalArticle",
|
310
|
+
"JOUR" => "JournalArticle",
|
311
|
+
"LEGAL" => "LegalRuleOrRegulation",
|
312
|
+
"MANSCPT" => "Text",
|
313
|
+
"MAP" => "Map",
|
314
|
+
"MGZN" => "MagazineArticle",
|
315
|
+
"MPCT" => "Audiovisual",
|
316
|
+
"MULTI" => "Audiovisual",
|
317
|
+
"MUSIC" => "MusicScore",
|
318
|
+
"NEWS" => "NewspaperArticle",
|
319
|
+
"PAMP" => "Pamphlet",
|
320
|
+
"PAT" => "Patent",
|
321
|
+
"PCOMM" => "PersonalCommunication",
|
322
|
+
"RPRT" => "Report",
|
323
|
+
"SER" => "SerialPublication",
|
324
|
+
"SLIDE" => "Slide",
|
325
|
+
"SOUND" => "SoundRecording",
|
326
|
+
"STAND" => "Standard",
|
327
|
+
"THES" => "Dissertation",
|
328
|
+
"UNBILL" => "UnenactedBill",
|
329
|
+
"UNPB" => "UnpublishedWork",
|
330
|
+
"VIDEO" => "Audiovisual",
|
331
|
+
"WEB" => "WebPage",
|
332
332
|
}
|
333
333
|
|
334
334
|
CM_TO_RIS_TRANSLATIONS = {
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
335
|
+
"Article" => "JOUR",
|
336
|
+
"Audiovisual" => "VIDEO",
|
337
|
+
"Book" => "BOOK",
|
338
|
+
"BookChapter" => "CHAP",
|
339
|
+
"Collection" => "CTLG",
|
340
|
+
"Dataset" => "DATA",
|
341
|
+
"Dissertation" => "THES",
|
342
|
+
"Document" => "GEN",
|
343
|
+
"Entry" => "DICT",
|
344
|
+
"Event" => "GEN",
|
345
|
+
"Figure" => "FIGURE",
|
346
|
+
"Image" => "FIGURE",
|
347
|
+
"JournalArticle" => "JOUR",
|
348
|
+
"LegalDocument" => "GEN",
|
349
|
+
"Manuscript" => "GEN",
|
350
|
+
"Map" => "MAP",
|
351
|
+
"Patent" => "PAT",
|
352
|
+
"Performance" => "GEN",
|
353
|
+
"PersonalCommunication" => "PCOMM",
|
354
|
+
"Post" => "GEN",
|
355
|
+
"ProceedingsArticle" => "CPAPER",
|
356
|
+
"Proceedings" => "CONF",
|
357
|
+
"Report" => "RPRT",
|
358
|
+
"Review" => "GEN",
|
359
|
+
"Software" => "COMP",
|
360
|
+
"Sound" => "SOUND",
|
361
|
+
"Standard" => "STAND",
|
362
|
+
"WebPage" => "WEB",
|
363
363
|
}
|
364
364
|
|
365
365
|
SO_TO_CM_TRANSLATIONS = {
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
366
|
+
"Article" => "Article",
|
367
|
+
"BlogPosting" => "Article",
|
368
|
+
"Book" => "Book",
|
369
|
+
"BookChapter" => "BookChapter",
|
370
|
+
"CreativeWork" => "Other",
|
371
|
+
"Dataset" => "Dataset",
|
372
|
+
"Dissertation" => "Dissertation",
|
373
|
+
"NewsArticle" => "Article",
|
374
|
+
"Legislation" => "LegalDocument",
|
375
|
+
"ScholarlyArticle" => "JournalArticle",
|
376
|
+
"SoftwareSourceCode" => "Software",
|
377
377
|
}
|
378
378
|
|
379
379
|
CM_TO_SO_TRANSLATIONS = {
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
380
|
+
"Article" => "Article",
|
381
|
+
"Audiovisual" => "CreativeWork",
|
382
|
+
"Book" => "Book",
|
383
|
+
"BookChapter" => "BookChapter",
|
384
|
+
"Collection" => "CreativeWork",
|
385
|
+
"Dataset" => "Dataset",
|
386
|
+
"Dissertation" => "Dissertation",
|
387
|
+
"Document" => "CreativeWork",
|
388
|
+
"Entry" => "CreativeWork",
|
389
|
+
"Event" => "CreativeWork",
|
390
|
+
"Figure" => "CreativeWork",
|
391
|
+
"Image" => "CreativeWork",
|
392
|
+
"JournalArticle" => "ScholarlyArticle",
|
393
|
+
"LegalDocument" => "Legislation",
|
394
|
+
"Software" => "SoftwareSourceCode",
|
395
395
|
}
|
396
396
|
|
397
397
|
CM_TO_JATS_TRANSLATIONS = {
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
398
|
+
"Proceedings" => "working-paper",
|
399
|
+
"ReferenceBook" => "book",
|
400
|
+
"JournalIssue" => "journal",
|
401
|
+
"ProceedingsArticle" => "working-paper",
|
402
|
+
"Other" => nil,
|
403
|
+
"Dissertation" => nil,
|
404
|
+
"Dataset" => "data",
|
405
|
+
"Document" => "journal",
|
406
|
+
"EditedBook" => "book",
|
407
|
+
"JournalArticle" => "journal",
|
408
|
+
"Journal" => "journal",
|
409
|
+
"Report" => "report",
|
410
|
+
"BookSeries" => "book",
|
411
|
+
"ReportSeries" => "report",
|
412
|
+
"BookTrack" => "book",
|
413
|
+
"Standard" => "standard",
|
414
|
+
"BookSection" => "chapter",
|
415
|
+
"BookPart" => "chapter",
|
416
|
+
"Book" => "book",
|
417
|
+
"BookChapter" => "chapter",
|
418
|
+
"StandardSeries" => "standard",
|
419
|
+
"Monograph" => "book",
|
420
|
+
"Component" => nil,
|
421
|
+
"ReferenceEntry" => nil,
|
422
|
+
"JournalVolume" => "journal",
|
423
|
+
"BookSet" => "book",
|
424
|
+
"Article" => "journal",
|
425
|
+
"Software" => "software",
|
426
426
|
}
|
427
427
|
|
428
428
|
UNKNOWN_INFORMATION = {
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
429
|
+
":unac" => "temporarily inaccessible",
|
430
|
+
":unal" => "unallowed, suppressed intentionally",
|
431
|
+
":unap" => "not applicable, makes no sense",
|
432
|
+
":unas" => "value unassigned (e.g., Untitled)",
|
433
|
+
":unav" => "value unavailable, possibly unknown",
|
434
|
+
":unkn" => "known to be unknown (e.g., Anonymous, Inconnue)",
|
435
|
+
":none" => "never had a value, never will",
|
436
|
+
":null" => "explicitly and meaningfully empty",
|
437
|
+
":tba" => "to be assigned or announced later",
|
438
|
+
":etal" => "too numerous to list (et alia)",
|
439
439
|
}
|
440
440
|
|
441
441
|
def find_from_format(id: nil, string: nil, ext: nil, filename: nil)
|
@@ -448,7 +448,7 @@ module Commonmeta
|
|
448
448
|
elsif filename.present?
|
449
449
|
find_from_format_by_filename(filename)
|
450
450
|
else
|
451
|
-
|
451
|
+
"datacite"
|
452
452
|
end
|
453
453
|
end
|
454
454
|
|
@@ -459,35 +459,35 @@ module Commonmeta
|
|
459
459
|
ra = get_doi_ra(id)
|
460
460
|
%w[DataCite Crossref mEDRA KISTI JaLC OP].include?(ra) ? ra.downcase : nil
|
461
461
|
elsif %r{\A(?:(http|https):/(/)?orcid\.org/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z}.match?(id)
|
462
|
-
|
462
|
+
"orcid"
|
463
463
|
elsif %r{\A(http|https):/(/)?github\.com/(.+)/package.json\z}.match?(id)
|
464
|
-
|
464
|
+
"npm"
|
465
465
|
elsif %r{\A(http|https):/(/)?github\.com/(.+)/codemeta.json\z}.match?(id)
|
466
|
-
|
466
|
+
"codemeta"
|
467
467
|
elsif %r{\A(http|https):/(/)?github\.com/(.+)/CITATION.cff\z}.match?(id)
|
468
|
-
|
468
|
+
"cff"
|
469
469
|
elsif %r{\A(http|https):/(/)?github\.com/(.+)\z}.match?(id)
|
470
|
-
|
470
|
+
"cff"
|
471
471
|
else
|
472
|
-
|
472
|
+
"schema_org"
|
473
473
|
end
|
474
474
|
end
|
475
475
|
|
476
476
|
def find_from_format_by_filename(filename)
|
477
|
-
if filename ==
|
478
|
-
|
479
|
-
elsif filename ==
|
480
|
-
|
477
|
+
if filename == "package.json"
|
478
|
+
"npm"
|
479
|
+
elsif filename == "CITATION.cff"
|
480
|
+
"cff"
|
481
481
|
end
|
482
482
|
end
|
483
483
|
|
484
484
|
def find_from_format_by_ext(string, options = {})
|
485
485
|
case options[:ext]
|
486
|
-
when
|
487
|
-
|
488
|
-
when
|
489
|
-
|
490
|
-
when
|
486
|
+
when ".bib"
|
487
|
+
"bibtex"
|
488
|
+
when ".ris"
|
489
|
+
"ris"
|
490
|
+
when ".xml", ".json"
|
491
491
|
find_from_format_by_string(string)
|
492
492
|
end
|
493
493
|
end
|
@@ -495,36 +495,36 @@ module Commonmeta
|
|
495
495
|
def find_from_format_by_string(string)
|
496
496
|
begin # try to parse as JSON
|
497
497
|
hsh = MultiJson.load(string).to_h
|
498
|
-
if hsh.dig(
|
499
|
-
return
|
500
|
-
elsif hsh.dig(
|
501
|
-
return
|
502
|
-
elsif hsh.dig(
|
503
|
-
return
|
504
|
-
elsif hsh.dig(
|
505
|
-
return
|
506
|
-
elsif URI.parse(hsh.dig(
|
507
|
-
return
|
498
|
+
if hsh.dig("@context") && URI.parse(hsh.dig("@context")).host == "schema.org"
|
499
|
+
return "schema_org"
|
500
|
+
elsif hsh.dig("schemaVersion").to_s.start_with?("http://datacite.org/schema/kernel")
|
501
|
+
return "datacite"
|
502
|
+
elsif hsh.dig("source") == "Crossref"
|
503
|
+
return "crossref"
|
504
|
+
elsif hsh.dig("issued", "date-parts").present?
|
505
|
+
return "csl"
|
506
|
+
elsif URI.parse(hsh.dig("@context")).to_s == "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld"
|
507
|
+
return "codemeta"
|
508
508
|
end
|
509
509
|
rescue MultiJson::ParseError
|
510
510
|
end
|
511
511
|
|
512
512
|
begin # try to parse as XML
|
513
513
|
hsh = Hash.from_xml(string)
|
514
|
-
return
|
514
|
+
return "crossref_xml" if hsh.to_h.dig("crossref_result").present?
|
515
515
|
rescue Nokogiri::XML::SyntaxError
|
516
516
|
end
|
517
517
|
|
518
518
|
begin # try to parse as YAML
|
519
519
|
hsh = YAML.load(string, permitted_classes: [Date])
|
520
|
-
return
|
520
|
+
return "cff" if hsh.is_a?(Hash) && hsh.fetch("cff-version", nil).present?
|
521
521
|
rescue Psych::SyntaxError
|
522
522
|
end
|
523
523
|
|
524
|
-
if string.start_with?(
|
525
|
-
|
524
|
+
if string.start_with?("TY - ")
|
525
|
+
"ris"
|
526
526
|
elsif BibTeX.parse(string).first
|
527
|
-
|
527
|
+
"bibtex"
|
528
528
|
end
|
529
529
|
end
|
530
530
|
|
@@ -538,7 +538,7 @@ module Commonmeta
|
|
538
538
|
|
539
539
|
def validate_orcid(orcid)
|
540
540
|
orcid = Array(%r{\A(?:(?:http|https)://(?:(?:www|sandbox)?\.)?orcid\.org/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z}.match(orcid)).last
|
541
|
-
orcid.gsub(/[[:space:]]/,
|
541
|
+
orcid.gsub(/[[:space:]]/, "-") if orcid.present?
|
542
542
|
end
|
543
543
|
|
544
544
|
def validate_orcid_scheme(orcid_scheme)
|
@@ -547,16 +547,16 @@ module Commonmeta
|
|
547
547
|
|
548
548
|
def validate_url(str)
|
549
549
|
if %r{\A(?:(http|https)://(dx\.)?doi.org/)?(doi:)?(10\.\d{4,5}/.+)\z}.match?(str)
|
550
|
-
|
550
|
+
"DOI"
|
551
551
|
elsif %r{\A(http|https)://}.match?(str)
|
552
|
-
|
552
|
+
"URL"
|
553
553
|
elsif /\A(ISSN|eISSN) (\d{4}-\d{3}[0-9X]+)\z/.match?(str)
|
554
|
-
|
554
|
+
"ISSN"
|
555
555
|
end
|
556
556
|
end
|
557
557
|
|
558
558
|
def parse_attributes(element, options = {})
|
559
|
-
content = options[:content] ||
|
559
|
+
content = options[:content] || "__content__"
|
560
560
|
|
561
561
|
if element.is_a?(String) && options[:content].nil?
|
562
562
|
CGI.unescapeHTML(element)
|
@@ -589,7 +589,7 @@ module Commonmeta
|
|
589
589
|
return nil unless id.present?
|
590
590
|
|
591
591
|
# handle info URIs
|
592
|
-
return id if id.to_s.start_with?(
|
592
|
+
return id if id.to_s.start_with?("info")
|
593
593
|
|
594
594
|
# check for valid HTTP uri
|
595
595
|
uri = Addressable::URI.parse(id)
|
@@ -597,7 +597,7 @@ module Commonmeta
|
|
597
597
|
return nil unless uri && uri.host && %w[http https ftp].include?(uri.scheme)
|
598
598
|
|
599
599
|
# optionally turn into https URL
|
600
|
-
uri.scheme =
|
600
|
+
uri.scheme = "https" if options[:https]
|
601
601
|
|
602
602
|
# clean up URL
|
603
603
|
uri.path = PostRank::URI.clean(uri.path)
|
@@ -617,52 +617,52 @@ module Commonmeta
|
|
617
617
|
return nil unless orcid.present?
|
618
618
|
|
619
619
|
# turn ORCID ID into URL
|
620
|
-
|
620
|
+
"https://orcid.org/" + Addressable::URI.encode(orcid)
|
621
621
|
end
|
622
622
|
|
623
623
|
# pick electronic issn if there are multiple
|
624
624
|
# format issn as xxxx-xxxx
|
625
625
|
def normalize_issn(input, options = {})
|
626
|
-
content = options[:content] ||
|
626
|
+
content = options[:content] || "__content__"
|
627
627
|
|
628
628
|
issn = if input.blank?
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
629
|
+
nil
|
630
|
+
elsif input.is_a?(String) && options[:content].nil?
|
631
|
+
input
|
632
|
+
elsif input.is_a?(Hash)
|
633
|
+
input.fetch(content, nil)
|
634
|
+
elsif input.is_a?(Array)
|
635
|
+
a = input.find { |a| a["media_type"] == "electronic" } || input.first
|
636
|
+
a.fetch(content, nil)
|
637
|
+
end
|
638
638
|
|
639
639
|
case issn.to_s.length
|
640
640
|
when 9
|
641
641
|
issn
|
642
642
|
when 8
|
643
|
-
issn[0..3] +
|
643
|
+
issn[0..3] + "-" + issn[4..7]
|
644
644
|
end
|
645
645
|
end
|
646
646
|
|
647
647
|
# find Creative Commons or OSI license in licenses array, normalize url and name
|
648
648
|
def normalize_licenses(licenses)
|
649
649
|
standard_licenses = Array.wrap(licenses).map do |l|
|
650
|
-
URI.parse(l[
|
650
|
+
URI.parse(l["url"])
|
651
651
|
end.select { |li| li.host && li.host[/(creativecommons.org|opensource.org)$/] }
|
652
652
|
return licenses unless standard_licenses.present?
|
653
653
|
|
654
654
|
# use HTTPS
|
655
|
-
uri.scheme =
|
655
|
+
uri.scheme = "https"
|
656
656
|
|
657
657
|
# use host name without subdomain
|
658
658
|
uri.host = Array(/(creativecommons.org|opensource.org)/.match uri.host).last
|
659
659
|
|
660
660
|
# normalize URLs
|
661
|
-
if uri.host ==
|
662
|
-
uri.path = uri.path.split(
|
663
|
-
uri.path <<
|
661
|
+
if uri.host == "creativecommons.org"
|
662
|
+
uri.path = uri.path.split("/")[0..-2].join("/") if uri.path.split("/").last == "legalcode"
|
663
|
+
uri.path << "/" unless uri.path.end_with?("/")
|
664
664
|
else
|
665
|
-
uri.path = uri.path.gsub(/(-license|\.php|\.html)/,
|
665
|
+
uri.path = uri.path.gsub(/(-license|\.php|\.html)/, "")
|
666
666
|
uri.path = uri.path.sub(/(mit|afl|apl|osl|gpl|ecl)/) { |match| match.upcase }
|
667
667
|
uri.path = uri.path.sub(/(artistic|apache)/) { |match| match.titleize }
|
668
668
|
uri.path = uri.path.sub(/([^0-9-]+)(-)?([1-9])?(\.)?([0-9])?$/) do
|
@@ -670,8 +670,8 @@ module Commonmeta
|
|
670
670
|
text = m[1]
|
671
671
|
|
672
672
|
if m[3].present?
|
673
|
-
version = [m[3], m[5].presence ||
|
674
|
-
[text, version].join(
|
673
|
+
version = [m[3], m[5].presence || "0"].join(".")
|
674
|
+
[text, version].join("-")
|
675
675
|
else
|
676
676
|
text
|
677
677
|
end
|
@@ -693,13 +693,13 @@ module Commonmeta
|
|
693
693
|
end
|
694
694
|
|
695
695
|
def from_datacite(element)
|
696
|
-
mapping = {
|
696
|
+
mapping = { "nameType" => "type", "creatorName" => "name" }
|
697
697
|
|
698
698
|
map_hash_keys(element: element, mapping: mapping)
|
699
699
|
end
|
700
700
|
|
701
701
|
def to_schema_org(element)
|
702
|
-
mapping = {
|
702
|
+
mapping = { "type" => "@type", "id" => "@id", "title" => "name" }
|
703
703
|
|
704
704
|
map_hash_keys(element: element, mapping: mapping)
|
705
705
|
end
|
@@ -707,20 +707,20 @@ module Commonmeta
|
|
707
707
|
def to_schema_org_container(element, options = {})
|
708
708
|
return nil unless element.is_a?(Hash) || (element.nil? && options[:container_title].present?)
|
709
709
|
|
710
|
-
issn = element[
|
711
|
-
id = issn.blank? ? element[
|
712
|
-
name = options[:container_title] || element[
|
713
|
-
type = id || name ? options[:type] || element[
|
710
|
+
issn = element["identifier"] if element["identifierType"] == "ISSN"
|
711
|
+
id = issn.blank? ? element["identifier"] : nil
|
712
|
+
name = options[:container_title] || element["title"]
|
713
|
+
type = id || name ? options[:type] || element["type"] : nil
|
714
714
|
|
715
|
-
{
|
715
|
+
{ "@id" => id, "@type" => type, "name" => name, "issn" => issn }.compact
|
716
716
|
end
|
717
717
|
|
718
718
|
def to_schema_org_identifiers(element, _options = {})
|
719
719
|
Array.wrap(element).map do |ai|
|
720
720
|
{
|
721
|
-
|
722
|
-
|
723
|
-
|
721
|
+
"@type" => "PropertyValue",
|
722
|
+
"propertyID" => ai["alternateIdentifierType"],
|
723
|
+
"value" => ai["alternateIdentifier"],
|
724
724
|
}
|
725
725
|
end.unwrap
|
726
726
|
end
|
@@ -728,22 +728,22 @@ module Commonmeta
|
|
728
728
|
def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
|
729
729
|
return nil unless related_identifiers.present? && relation_type.present?
|
730
730
|
|
731
|
-
relation_type = if relation_type ==
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
731
|
+
relation_type = if relation_type == "References"
|
732
|
+
%w[References Cites
|
733
|
+
Documents]
|
734
|
+
else
|
735
|
+
[relation_type]
|
736
|
+
end
|
737
737
|
|
738
738
|
Array.wrap(related_identifiers).select do |ri|
|
739
|
-
relation_type.include?(ri[
|
739
|
+
relation_type.include?(ri["relationType"])
|
740
740
|
end.map do |r|
|
741
|
-
if r[
|
742
|
-
{
|
741
|
+
if r["relatedIdentifierType"] == "ISSN" && r["relationType"] == "IsPartOf"
|
742
|
+
{ "@type" => "Periodical", "issn" => r["relatedIdentifier"] }.compact
|
743
743
|
else
|
744
744
|
{
|
745
|
-
|
746
|
-
|
745
|
+
"@id" => normalize_id(r["relatedIdentifier"]),
|
746
|
+
"@type" => DC_TO_SO_TRANSLATIONS[r["resourceTypeGeneral"]] || "CreativeWork",
|
747
747
|
}.compact
|
748
748
|
end
|
749
749
|
end.unwrap
|
@@ -754,9 +754,9 @@ module Commonmeta
|
|
754
754
|
|
755
755
|
Array.wrap(funding_references).map do |fr|
|
756
756
|
{
|
757
|
-
|
758
|
-
|
759
|
-
|
757
|
+
"@id" => fr["funderIdentifier"],
|
758
|
+
"@type" => "Organization",
|
759
|
+
"name" => fr["funderName"],
|
760
760
|
}.compact
|
761
761
|
end.unwrap
|
762
762
|
end
|
@@ -765,10 +765,10 @@ module Commonmeta
|
|
765
765
|
return nil unless reference.present?
|
766
766
|
|
767
767
|
{
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
768
|
+
"@type" => "CreativeWork",
|
769
|
+
"@id" => reference["doi"] ? normalize_id(reference["doi"]) : nil,
|
770
|
+
"name" => reference["title"],
|
771
|
+
"datePublished" => reference["publicationYear"],
|
772
772
|
}.compact
|
773
773
|
end
|
774
774
|
|
@@ -776,67 +776,67 @@ module Commonmeta
|
|
776
776
|
return nil unless geo_location.present?
|
777
777
|
|
778
778
|
Array.wrap(geo_location).each_with_object([]) do |gl, sum|
|
779
|
-
if gl.fetch(
|
779
|
+
if gl.fetch("geoLocationPoint", nil)
|
780
780
|
sum << {
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
}
|
781
|
+
"@type" => "Place",
|
782
|
+
"geo" => {
|
783
|
+
"@type" => "GeoCoordinates",
|
784
|
+
"address" => gl["geoLocationPlace"],
|
785
|
+
"latitude" => gl.dig("geoLocationPoint", "pointLatitude"),
|
786
|
+
"longitude" => gl.dig("geoLocationPoint", "pointLongitude"),
|
787
|
+
},
|
788
788
|
}.compact
|
789
789
|
end
|
790
790
|
|
791
|
-
if gl.fetch(
|
791
|
+
if gl.fetch("geoLocationBox", nil)
|
792
792
|
sum << {
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
gl.dig(
|
799
|
-
gl.dig(
|
800
|
-
gl.dig(
|
801
|
-
}.compact
|
793
|
+
"@type" => "Place",
|
794
|
+
"geo" => {
|
795
|
+
"@type" => "GeoShape",
|
796
|
+
"address" => gl["geoLocationPlace"],
|
797
|
+
"box" => [gl.dig("geoLocationBox", "southBoundLatitude"),
|
798
|
+
gl.dig("geoLocationBox", "westBoundLongitude"),
|
799
|
+
gl.dig("geoLocationBox", "northBoundLatitude"),
|
800
|
+
gl.dig("geoLocationBox", "eastBoundLongitude")].compact.join(" ").presence,
|
801
|
+
}.compact,
|
802
802
|
}.compact
|
803
803
|
end
|
804
804
|
|
805
|
-
if gl.fetch(
|
805
|
+
if gl.fetch("geoLocationPolygon", nil)
|
806
806
|
sum << {
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
807
|
+
"@type" => "Place",
|
808
|
+
"geo" => {
|
809
|
+
"@type" => "GeoShape",
|
810
|
+
"address" => gl["geoLocationPlace"],
|
811
|
+
"polygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp|
|
812
812
|
Array.wrap(glp).map do |glpp|
|
813
|
-
[glpp.dig(
|
814
|
-
glpp.dig(
|
813
|
+
[glpp.dig("polygonPoint", "pointLongitude"),
|
814
|
+
glpp.dig("polygonPoint", "pointLatitude")].compact
|
815
815
|
end.compact
|
816
|
-
end.compact.presence
|
817
|
-
}
|
816
|
+
end.compact.presence,
|
817
|
+
},
|
818
818
|
}
|
819
819
|
end
|
820
820
|
|
821
|
-
next unless gl.fetch(
|
822
|
-
nil) && !gl.fetch(
|
823
|
-
nil) && !gl.fetch(
|
821
|
+
next unless gl.fetch("geoLocationPlace",
|
822
|
+
nil) && !gl.fetch("geoLocationPoint",
|
823
|
+
nil) && !gl.fetch("geoLocationBox",
|
824
824
|
nil) && !gl.fetch(
|
825
|
-
|
826
|
-
|
825
|
+
"geoLocationPolygon", nil
|
826
|
+
)
|
827
827
|
|
828
828
|
sum << {
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
}
|
829
|
+
"@type" => "Place",
|
830
|
+
"geo" => {
|
831
|
+
"@type" => "GeoCoordinates",
|
832
|
+
"address" => gl["geoLocationPlace"],
|
833
|
+
},
|
834
834
|
}.compact
|
835
835
|
end.unwrap
|
836
836
|
end
|
837
837
|
|
838
838
|
def from_schema_org(element)
|
839
|
-
mapping = {
|
839
|
+
mapping = { "@type" => "type", "@id" => "id" }
|
840
840
|
|
841
841
|
map_hash_keys(element: element, mapping: mapping)
|
842
842
|
end
|
@@ -844,16 +844,16 @@ module Commonmeta
|
|
844
844
|
def map_hash_keys(element: nil, mapping: nil)
|
845
845
|
Array.wrap(element).map do |a|
|
846
846
|
a.map { |k, v| [mapping.fetch(k, k), v] }.reduce({}) do |hsh, (k, v)|
|
847
|
-
if k ==
|
847
|
+
if k == "affiliation" && v.is_a?(Array)
|
848
848
|
hsh[k] = v.map do |affiliation|
|
849
849
|
if affiliation.is_a?(Hash)
|
850
|
-
affiliation.merge(
|
850
|
+
affiliation.merge("@type" => "Organization")
|
851
851
|
else
|
852
852
|
affiliation
|
853
853
|
end
|
854
854
|
end
|
855
855
|
hsh
|
856
|
-
elsif k ==
|
856
|
+
elsif k == "type" && v.is_a?(String)
|
857
857
|
hsh[k] = v.capitalize
|
858
858
|
hsh
|
859
859
|
elsif v.is_a?(Hash)
|
@@ -869,51 +869,51 @@ module Commonmeta
|
|
869
869
|
|
870
870
|
def to_identifier(identifier)
|
871
871
|
{
|
872
|
-
|
873
|
-
|
874
|
-
|
872
|
+
"@type" => "PropertyValue",
|
873
|
+
"propertyID" => identifier["relatedIdentifierType"],
|
874
|
+
"value" => identifier["relatedIdentifier"],
|
875
875
|
}
|
876
876
|
end
|
877
877
|
|
878
878
|
def from_csl(element)
|
879
879
|
Array.wrap(element).map do |a|
|
880
|
-
if a[
|
881
|
-
a[
|
882
|
-
a[
|
883
|
-
elsif a[
|
884
|
-
a[
|
885
|
-
elsif a[
|
886
|
-
a[
|
880
|
+
if a["literal"].present?
|
881
|
+
a["type"] = "Organization"
|
882
|
+
a["name"] = a["literal"]
|
883
|
+
elsif a["name"].present?
|
884
|
+
a["type"] = "Organization"
|
885
|
+
elsif a["given"].present? || a["family"].present?
|
886
|
+
a["type"] = "Person"
|
887
887
|
end
|
888
|
-
a[
|
889
|
-
a[
|
890
|
-
a.except(
|
888
|
+
a["givenName"] = a["given"]
|
889
|
+
a["familyName"] = a["family"]
|
890
|
+
a.except("given", "family", "literal").compact
|
891
891
|
end.unwrap
|
892
892
|
end
|
893
893
|
|
894
894
|
def to_csl(element)
|
895
895
|
Array.wrap(element).map do |a|
|
896
|
-
a[
|
897
|
-
a[
|
898
|
-
a[
|
899
|
-
a.except(
|
900
|
-
|
896
|
+
a["family"] = a["familyName"]
|
897
|
+
a["given"] = a["givenName"]
|
898
|
+
a["literal"] = a["name"] unless a["familyName"].present?
|
899
|
+
a.except("nameType", "type", "@type", "id", "@id", "name", "familyName", "givenName",
|
900
|
+
"affiliation", "contributorType").compact
|
901
901
|
end.presence
|
902
902
|
end
|
903
903
|
|
904
904
|
def to_ris(element)
|
905
905
|
Array.wrap(element).map do |a|
|
906
|
-
if a[
|
907
|
-
[a[
|
906
|
+
if a["familyName"].present?
|
907
|
+
[a["familyName"], a["givenName"]].join(", ")
|
908
908
|
else
|
909
|
-
a[
|
909
|
+
a["name"]
|
910
910
|
end
|
911
911
|
end.unwrap
|
912
912
|
end
|
913
913
|
|
914
914
|
def sanitize(text, options = {})
|
915
915
|
options[:tags] ||= Set.new(%w[strong em b i code pre sub sup br])
|
916
|
-
content = options[:content] ||
|
916
|
+
content = options[:content] || "__content__"
|
917
917
|
custom_scrubber = Commonmeta::WhitelistScrubber.new(options)
|
918
918
|
|
919
919
|
if text.is_a?(String)
|
@@ -930,8 +930,8 @@ module Commonmeta
|
|
930
930
|
def github_from_url(url)
|
931
931
|
return {} unless %r{\Ahttps://github\.com/(.+)(?:/)?(.+)?(?:/tree/)?(.*)\z}.match?(url)
|
932
932
|
|
933
|
-
words = URI.parse(url).path[1..-1].split(
|
934
|
-
path = words.length > 3 ? words[4...words.length].join(
|
933
|
+
words = URI.parse(url).path[1..-1].split("/")
|
934
|
+
path = words.length > 3 ? words[4...words.length].join("/") : nil
|
935
935
|
|
936
936
|
{ owner: words[0], repo: words[1], release: words[3], path: path }.compact
|
937
937
|
end
|
@@ -970,7 +970,7 @@ module Commonmeta
|
|
970
970
|
def github_as_codemeta_url(url)
|
971
971
|
github_hash = github_from_url(url)
|
972
972
|
|
973
|
-
if github_hash[:path].to_s.end_with?(
|
973
|
+
if github_hash[:path].to_s.end_with?("codemeta.json")
|
974
974
|
"https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
|
975
975
|
elsif github_hash[:owner].present?
|
976
976
|
"https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/master/codemeta.json"
|
@@ -980,7 +980,7 @@ module Commonmeta
|
|
980
980
|
def github_as_cff_url(url)
|
981
981
|
github_hash = github_from_url(url)
|
982
982
|
|
983
|
-
if github_hash[:path].to_s.end_with?(
|
983
|
+
if github_hash[:path].to_s.end_with?("CITATION.cff")
|
984
984
|
"https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
|
985
985
|
elsif github_hash[:owner].present?
|
986
986
|
"https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/main/CITATION.cff"
|
@@ -988,18 +988,18 @@ module Commonmeta
|
|
988
988
|
end
|
989
989
|
|
990
990
|
def get_date_parts(iso8601_time)
|
991
|
-
return {
|
991
|
+
return { "date-parts" => [[]] } if iso8601_time.nil?
|
992
992
|
|
993
993
|
year = iso8601_time[0..3].to_i
|
994
994
|
month = iso8601_time[5..6].to_i
|
995
995
|
day = iso8601_time[8..9].to_i
|
996
|
-
{
|
996
|
+
{ "date-parts" => [[year, month, day].reject { |part| part == 0 }] }
|
997
997
|
rescue TypeError
|
998
998
|
nil
|
999
999
|
end
|
1000
1000
|
|
1001
1001
|
def get_date_from_date_parts(date_as_parts)
|
1002
|
-
date_parts = date_as_parts.fetch(
|
1002
|
+
date_parts = date_as_parts.fetch("date-parts", []).first
|
1003
1003
|
return nil if date_parts == [nil]
|
1004
1004
|
|
1005
1005
|
year = date_parts[0]
|
@@ -1011,13 +1011,13 @@ module Commonmeta
|
|
1011
1011
|
end
|
1012
1012
|
|
1013
1013
|
def get_date_from_parts(year, month = nil, day = nil)
|
1014
|
-
[year.to_s.rjust(4,
|
1015
|
-
part ==
|
1016
|
-
end.join(
|
1014
|
+
[year.to_s.rjust(4, "0"), month.to_s.rjust(2, "0"), day.to_s.rjust(2, "0")].reject do |part|
|
1015
|
+
part == "00"
|
1016
|
+
end.join("-")
|
1017
1017
|
end
|
1018
1018
|
|
1019
1019
|
def get_date_parts_from_parts(year, month = nil, day = nil)
|
1020
|
-
{
|
1020
|
+
{ "date-parts" => [[year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }] }
|
1021
1021
|
end
|
1022
1022
|
|
1023
1023
|
def get_iso8601_date(iso8601_time)
|
@@ -1064,9 +1064,9 @@ module Commonmeta
|
|
1064
1064
|
# strip milliseconds if there is a time, as it interferes with edtc parsing
|
1065
1065
|
# keep dates unchanged
|
1066
1066
|
def strip_milliseconds(iso8601_time)
|
1067
|
-
return iso8601_time.split(
|
1067
|
+
return iso8601_time.split(" ").first if iso8601_time.to_s.include? " "
|
1068
1068
|
|
1069
|
-
return iso8601_time.split(
|
1069
|
+
return iso8601_time.split(".").first + "Z" if iso8601_time.to_s.include? "."
|
1070
1070
|
|
1071
1071
|
iso8601_time
|
1072
1072
|
end
|
@@ -1074,64 +1074,64 @@ module Commonmeta
|
|
1074
1074
|
# iso8601 datetime without hyphens and colons, used by Crossref
|
1075
1075
|
# return nil if invalid
|
1076
1076
|
def get_datetime_from_time(time)
|
1077
|
-
DateTime.strptime(time.to_s,
|
1077
|
+
DateTime.strptime(time.to_s, "%Y%m%d%H%M%S").strftime("%Y-%m-%dT%H:%M:%SZ")
|
1078
1078
|
rescue ArgumentError
|
1079
1079
|
nil
|
1080
1080
|
end
|
1081
1081
|
|
1082
1082
|
def get_date(dates, date_type)
|
1083
|
-
dd = Array.wrap(dates).find { |d| d[
|
1084
|
-
dd.fetch(
|
1083
|
+
dd = Array.wrap(dates).find { |d| d["dateType"] == date_type } || {}
|
1084
|
+
dd.fetch("date", nil)
|
1085
1085
|
end
|
1086
1086
|
|
1087
1087
|
# convert commonmeta dates to DataCite format
|
1088
1088
|
def get_dates_from_date(date)
|
1089
1089
|
return nil if date.nil?
|
1090
1090
|
|
1091
|
-
mapping = {
|
1091
|
+
mapping = { "published" => "issued" }
|
1092
1092
|
|
1093
1093
|
date = map_hash_keys(element: date, mapping: mapping)
|
1094
1094
|
|
1095
1095
|
date.map do |k, v|
|
1096
|
-
{
|
1097
|
-
|
1096
|
+
{ "date" => v,
|
1097
|
+
"dateType" => k.capitalize }
|
1098
1098
|
end
|
1099
1099
|
end
|
1100
1100
|
|
1101
1101
|
def get_contributor(contributor, contributor_type)
|
1102
|
-
contributor.select { |c| c[
|
1102
|
+
contributor.select { |c| c["contributorType"] == contributor_type }
|
1103
1103
|
end
|
1104
1104
|
|
1105
1105
|
def get_identifier(identifiers, identifier_type)
|
1106
|
-
id = Array.wrap(identifiers).find { |i| i[
|
1107
|
-
id.fetch(
|
1106
|
+
id = Array.wrap(identifiers).find { |i| i["identifierType"] == identifier_type } || {}
|
1107
|
+
id.fetch("identifier", nil)
|
1108
1108
|
end
|
1109
1109
|
|
1110
1110
|
def get_identifier_type(identifier_type)
|
1111
1111
|
return nil unless identifier_type.present?
|
1112
1112
|
|
1113
1113
|
identifierTypes = {
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1114
|
+
"ark" => "ARK",
|
1115
|
+
"arxiv" => "arXiv",
|
1116
|
+
"bibcode" => "bibcode",
|
1117
|
+
"doi" => "DOI",
|
1118
|
+
"ean13" => "EAN13",
|
1119
|
+
"eissn" => "EISSN",
|
1120
|
+
"handle" => "Handle",
|
1121
|
+
"igsn" => "IGSN",
|
1122
|
+
"isbn" => "ISBN",
|
1123
|
+
"issn" => "ISSN",
|
1124
|
+
"istc" => "ISTC",
|
1125
|
+
"lissn" => "LISSN",
|
1126
|
+
"lsid" => "LSID",
|
1127
|
+
"pmid" => "PMID",
|
1128
|
+
"purl" => "PURL",
|
1129
|
+
"upc" => "UPC",
|
1130
|
+
"url" => "URL",
|
1131
|
+
"urn" => "URN",
|
1132
|
+
"md5" => "md5",
|
1133
|
+
"minid" => "minid",
|
1134
|
+
"dataguid" => "dataguid",
|
1135
1135
|
}
|
1136
1136
|
|
1137
1137
|
identifierTypes[identifier_type.downcase] || identifier_type
|
@@ -1140,27 +1140,27 @@ module Commonmeta
|
|
1140
1140
|
def get_series_information(str)
|
1141
1141
|
return {} unless str.present?
|
1142
1142
|
|
1143
|
-
str = str.split(
|
1143
|
+
str = str.split(",").map(&:strip)
|
1144
1144
|
|
1145
1145
|
title = str.first
|
1146
1146
|
volume_issue = str.length > 2 ? str[1].rpartition(/\(([^)]+)\)/) : nil
|
1147
1147
|
volume = volume_issue.present? ? volume_issue[0].presence || volume_issue[2].presence : nil
|
1148
1148
|
issue = volume_issue.present? ? volume_issue[1][1...-1].presence : nil
|
1149
1149
|
pages = str.length > 1 ? str.last : nil
|
1150
|
-
first_page = pages.present? ? pages.split(
|
1151
|
-
last_page = pages.present? ? pages.split(
|
1150
|
+
first_page = pages.present? ? pages.split("-").map(&:strip)[0] : nil
|
1151
|
+
last_page = pages.present? ? pages.split("-").map(&:strip)[1] : nil
|
1152
1152
|
|
1153
1153
|
{
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1154
|
+
"title" => title,
|
1155
|
+
"volume" => volume,
|
1156
|
+
"issue" => issue,
|
1157
|
+
"firstPage" => first_page,
|
1158
|
+
"lastPage" => last_page,
|
1159
1159
|
}.compact
|
1160
1160
|
end
|
1161
1161
|
|
1162
1162
|
def jsonlint(json)
|
1163
|
-
return [
|
1163
|
+
return ["No JSON provided"] unless json.present?
|
1164
1164
|
|
1165
1165
|
error_array = []
|
1166
1166
|
linter = JsonLint::Linter.new
|
@@ -1169,33 +1169,33 @@ module Commonmeta
|
|
1169
1169
|
end
|
1170
1170
|
|
1171
1171
|
def name_to_spdx(name)
|
1172
|
-
spdx = JSON.load(File.read(File.expand_path(
|
1173
|
-
__dir__))).fetch(
|
1172
|
+
spdx = JSON.load(File.read(File.expand_path("../../resources/spdx/licenses.json",
|
1173
|
+
__dir__))).fetch("licenses")
|
1174
1174
|
license = spdx.find do |l|
|
1175
|
-
l[
|
1175
|
+
l["name"] == name || l["licenseId"] == name || l["seeAlso"].first == normalize_cc_url(name)
|
1176
1176
|
end
|
1177
1177
|
|
1178
1178
|
if license
|
1179
|
-
{
|
1179
|
+
{ "id" => license["licenseId"], "url" => license["seeAlso"].first }.compact
|
1180
1180
|
else
|
1181
|
-
{
|
1181
|
+
{ "rights" => name }
|
1182
1182
|
end
|
1183
1183
|
end
|
1184
1184
|
|
1185
1185
|
def hsh_to_spdx(hsh)
|
1186
|
-
spdx = JSON.load(File.read(File.expand_path(
|
1187
|
-
__dir__))).fetch(
|
1188
|
-
hsh[
|
1186
|
+
spdx = JSON.load(File.read(File.expand_path("../../resources/spdx/licenses.json",
|
1187
|
+
__dir__))).fetch("licenses")
|
1188
|
+
hsh["rightsUri"] = hsh.delete("rightsURI") if hsh["rightsUri"].blank?
|
1189
1189
|
license = spdx.find do |l|
|
1190
|
-
l[
|
1190
|
+
l["licenseId"].casecmp?(hsh["rightsIdentifier"]) || l["seeAlso"].first == normalize_cc_url(hsh["rightsUri"]) || l["name"] == hsh["rights"] || l["seeAlso"].first == normalize_cc_url(hsh["rights"])
|
1191
1191
|
end
|
1192
1192
|
|
1193
1193
|
if license
|
1194
|
-
{
|
1194
|
+
{ "id" => license["licenseId"], "url" => license["seeAlso"].first }.compact
|
1195
1195
|
else
|
1196
1196
|
{
|
1197
|
-
|
1198
|
-
|
1197
|
+
"id" => hsh["rightsIdentifier"].present? ? hsh["rightsIdentifier"].downcase : nil,
|
1198
|
+
"url" => hsh["rightsURI"] || hsh["rightsUri"],
|
1199
1199
|
}.compact
|
1200
1200
|
end
|
1201
1201
|
end
|
@@ -1203,152 +1203,163 @@ module Commonmeta
|
|
1203
1203
|
def spdx_to_hsh(hsh)
|
1204
1204
|
return nil unless hsh.present? && hsh.is_a?(Hash)
|
1205
1205
|
|
1206
|
-
spdx = JSON.load(File.read(File.expand_path(
|
1207
|
-
__dir__))).fetch(
|
1206
|
+
spdx = JSON.load(File.read(File.expand_path("../../resources/spdx/licenses.json",
|
1207
|
+
__dir__))).fetch("licenses")
|
1208
1208
|
|
1209
|
-
license = spdx.find { |l| l[
|
1209
|
+
license = spdx.find { |l| l["licenseId"].casecmp?(hsh["id"]) }
|
1210
1210
|
|
1211
1211
|
if license
|
1212
1212
|
[{
|
1213
|
-
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1213
|
+
"rightsIdentifier" => license["licenseId"].downcase,
|
1214
|
+
"rightsUri" => license["seeAlso"].first,
|
1215
|
+
"rights" => license["name"],
|
1216
|
+
"rightsIdentifierScheme" => "SPDX",
|
1217
|
+
"schemeUri" => "https://spdx.org/licenses/",
|
1218
1218
|
}.compact]
|
1219
1219
|
else
|
1220
|
-
[{
|
1220
|
+
[{ "rightsIdentifier" => hsh["id"], "rightsURI" => hsh["url"] }.compact]
|
1221
1221
|
end
|
1222
1222
|
end
|
1223
1223
|
|
1224
1224
|
def name_to_fos(name)
|
1225
1225
|
# first find subject in Fields of Science (OECD)
|
1226
|
-
fos = JSON.load(File.read(File.expand_path(
|
1227
|
-
__dir__))).fetch(
|
1226
|
+
fos = JSON.load(File.read(File.expand_path("../../resources/oecd/fos-mappings.json",
|
1227
|
+
__dir__))).fetch("fosFields")
|
1228
1228
|
|
1229
|
-
subject = fos.find { |l| l[
|
1229
|
+
subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
|
1230
1230
|
|
1231
1231
|
if subject
|
1232
1232
|
return [{
|
1233
|
-
|
1234
|
-
|
1233
|
+
"subject" => sanitize(name).downcase,
|
1234
|
+
},
|
1235
1235
|
{
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1236
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1237
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1238
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
1239
|
+
}]
|
1240
1240
|
end
|
1241
1241
|
|
1242
1242
|
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
1243
1243
|
# and map to Fields of Science. Add an extra entry for the latter
|
1244
|
-
fores = JSON.load(File.read(File.expand_path(
|
1244
|
+
fores = JSON.load(File.read(File.expand_path("../../resources/oecd/for-mappings.json",
|
1245
1245
|
__dir__)))
|
1246
|
-
for_fields = fores.fetch(
|
1247
|
-
for_disciplines = fores.fetch(
|
1246
|
+
for_fields = fores.fetch("forFields")
|
1247
|
+
for_disciplines = fores.fetch("forDisciplines")
|
1248
1248
|
|
1249
|
-
subject = for_fields.find { |l| l[
|
1250
|
-
for_disciplines.find { |l| l[
|
1249
|
+
subject = for_fields.find { |l| l["forLabel"] == name } ||
|
1250
|
+
for_disciplines.find { |l| l["forLabel"] == name }
|
1251
1251
|
|
1252
1252
|
if subject
|
1253
1253
|
[{
|
1254
|
-
|
1254
|
+
"subject" => sanitize(name).downcase,
|
1255
1255
|
},
|
1256
1256
|
{
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1257
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1258
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1259
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
1260
|
+
}]
|
1261
1261
|
else
|
1262
|
-
[{
|
1262
|
+
[{ "subject" => sanitize(name).downcase }]
|
1263
1263
|
end
|
1264
1264
|
end
|
1265
1265
|
|
1266
1266
|
def hsh_to_fos(hsh)
|
1267
1267
|
# first find subject in Fields of Science (OECD)
|
1268
|
-
fos = JSON.load(File.read(File.expand_path(
|
1269
|
-
__dir__))).fetch(
|
1268
|
+
fos = JSON.load(File.read(File.expand_path("../../resources/oecd/fos-mappings.json",
|
1269
|
+
__dir__))).fetch("fosFields")
|
1270
1270
|
subject = fos.find do |l|
|
1271
|
-
l[
|
1271
|
+
l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] || l["fosLabel"] == hsh["subject"]
|
1272
1272
|
end
|
1273
1273
|
|
1274
1274
|
if subject
|
1275
1275
|
return [{
|
1276
|
-
|
1277
|
-
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1276
|
+
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1277
|
+
"subjectScheme" => hsh["subjectScheme"],
|
1278
|
+
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
1279
|
+
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
1280
|
+
"classificationCode" => hsh["classificationCode"],
|
1281
|
+
"lang" => hsh["lang"],
|
1282
|
+
}.compact,
|
1283
1283
|
{
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1287
|
-
|
1284
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1285
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1286
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
1287
|
+
}.compact]
|
1288
1288
|
end
|
1289
1289
|
|
1290
1290
|
# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
|
1291
1291
|
# and map to Fields of Science. Add an extra entry for the latter
|
1292
|
-
fores = JSON.load(File.read(File.expand_path(
|
1292
|
+
fores = JSON.load(File.read(File.expand_path("../../resources/oecd/for-mappings.json",
|
1293
1293
|
__dir__)))
|
1294
|
-
for_fields = fores.fetch(
|
1295
|
-
for_disciplines = fores.fetch(
|
1294
|
+
for_fields = fores.fetch("forFields")
|
1295
|
+
for_disciplines = fores.fetch("forDisciplines")
|
1296
1296
|
|
1297
1297
|
# try to extract forId
|
1298
|
-
if hsh[
|
1299
|
-
for_id = hsh[
|
1300
|
-
for_id = for_id.rjust(6,
|
1298
|
+
if hsh["subjectScheme"] == "FOR"
|
1299
|
+
for_id = hsh["__content__"].to_s.split(" ").first || hsh["subject"].to_s.split(" ").first
|
1300
|
+
for_id = for_id.rjust(6, "0")
|
1301
1301
|
|
1302
|
-
subject = for_fields.find { |l| l[
|
1303
|
-
for_disciplines.find { |l| l[
|
1302
|
+
subject = for_fields.find { |l| l["forId"] == for_id } ||
|
1303
|
+
for_disciplines.find { |l| l["forId"] == for_id[0..3] }
|
1304
1304
|
else
|
1305
1305
|
subject = for_fields.find do |l|
|
1306
|
-
l[
|
1306
|
+
l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"]
|
1307
1307
|
end ||
|
1308
1308
|
for_disciplines.find do |l|
|
1309
|
-
l[
|
1309
|
+
l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"]
|
1310
1310
|
end
|
1311
1311
|
end
|
1312
1312
|
|
1313
1313
|
if subject
|
1314
1314
|
[{
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1315
|
+
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1316
|
+
"subjectScheme" => hsh["subjectScheme"],
|
1317
|
+
"classificationCode" => hsh["classificationCode"],
|
1318
|
+
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
1319
|
+
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
1320
|
+
"lang" => hsh["lang"],
|
1321
1321
|
}.compact,
|
1322
1322
|
{
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1323
|
+
"subject" => "FOS: " + subject["fosLabel"],
|
1324
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)",
|
1325
|
+
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
1326
|
+
}]
|
1327
1327
|
else
|
1328
1328
|
[{
|
1329
|
-
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1329
|
+
"subject" => sanitize(hsh["__content__"] || hsh["subject"]),
|
1330
|
+
"subjectScheme" => hsh["subjectScheme"],
|
1331
|
+
"classificationCode" => hsh["classificationCode"],
|
1332
|
+
"schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
|
1333
|
+
"valueUri" => hsh["valueURI"] || hsh["valueUri"],
|
1334
|
+
"lang" => hsh["lang"],
|
1335
1335
|
}.compact]
|
1336
1336
|
end
|
1337
1337
|
end
|
1338
1338
|
|
1339
1339
|
def encode_doi(prefix)
|
1340
1340
|
# DOI suffix is a generated from a random number, encoded in base32
|
1341
|
-
# suffix has 8 digits plus two checksum digits. With base32 there are
|
1341
|
+
# suffix has 8 digits plus two checksum digits. With base32 there are
|
1342
1342
|
# 32 possible digits, so 8 digits gives 32^8 possible combinations
|
1343
|
-
random_int = SecureRandom.random_number(32**7..(32**8) - 1)
|
1343
|
+
random_int = SecureRandom.random_number(32 ** 7..(32 ** 8) - 1)
|
1344
1344
|
suffix = Base32::URL.encode(random_int, checksum: true)
|
1345
1345
|
str = "#{suffix[0, 5]}-#{suffix[5, 10]}"
|
1346
1346
|
"https://doi.org/#{prefix}/#{str}"
|
1347
1347
|
end
|
1348
1348
|
|
1349
1349
|
def decode_doi(doi)
|
1350
|
-
suffix = doi.split(
|
1350
|
+
suffix = doi.split("/", 5).last
|
1351
1351
|
Base32::URL.decode(suffix)
|
1352
1352
|
end
|
1353
|
+
|
1354
|
+
def encode_container_id
|
1355
|
+
# suffix has 5 digits plus two checksum digits. With base32 there are
|
1356
|
+
# 32 possible digits, so 5 digits gives 32^5 possible combinations
|
1357
|
+
random_int = SecureRandom.random_number(32 ** 4..(32 ** 5) - 1)
|
1358
|
+
Base32::URL.encode(random_int, checksum: true)
|
1359
|
+
end
|
1360
|
+
|
1361
|
+
def decode_container_id(id)
|
1362
|
+
Base32::URL.decode(id)
|
1363
|
+
end
|
1353
1364
|
end
|
1354
1365
|
end
|