bolognese 0.9.36 → 0.9.37
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/DOI, +0 -0
- data/Gemfile.lock +2 -2
- data/lib/bolognese/cli.rb +9 -2
- data/lib/bolognese/datacite_utils.rb +7 -12
- data/lib/bolognese/doi_utils.rb +2 -1
- data/lib/bolognese/metadata.rb +55 -52
- data/lib/bolognese/readers/crossref_reader.rb +1 -1
- data/lib/bolognese/readers/datacite_reader.rb +13 -2
- data/lib/bolognese/readers/ris_reader.rb +1 -1
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/writers/citeproc_writer.rb +0 -2
- data/lib/bolognese/writers/codemeta_writer.rb +1 -1
- data/lib/bolognese/writers/crosscite_writer.rb +0 -2
- data/lib/bolognese/writers/datacite_json_writer.rb +0 -2
- data/lib/bolognese/writers/rdf_xml_writer.rb +0 -2
- data/lib/bolognese/writers/ris_writer.rb +0 -2
- data/lib/bolognese/writers/schema_org_writer.rb +0 -2
- data/lib/bolognese/writers/turtle_writer.rb +0 -2
- data/spec/cli_spec.rb +5 -5
- data/spec/fixtures/crosscite.json +2 -2
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/Dataset.yml +142 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_doi.yml +142 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_title.yml +142 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_metadata_as_datacite_xml/validates_against_schema.yml +37 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_metadata_as_datacite_xml/with_data_citation.yml +37 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_bibtex/text.yml +6 -8
- data/spec/readers/datacite_reader_spec.rb +28 -8
- data/spec/readers/schema_org_reader_spec.rb +2 -1
- data/spec/writers/bibtex_writer_spec.rb +3 -2
- data/spec/writers/citeproc_writer_spec.rb +2 -2
- data/spec/writers/crosscite_writer_spec.rb +1 -0
- data/spec/writers/datacite_writer_spec.rb +32 -0
- data/spec/writers/ris_writer_spec.rb +2 -2
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a4c14e423a53deb412218f3c855ee1f593296d98
|
4
|
+
data.tar.gz: d05e58c1dbcb061bdc4d468f048a02cd238036a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24551c9b762c597fc87407e926a510c873d42e6650f83e76e9081d8b43d92e4e06067aa18a8f30aca41bacc39a98fa7fda1b6d4ea1dc5c785aa7c971785c95fe
|
7
|
+
data.tar.gz: a66e615373cf75d9fee848a017db2966722fb2754b5985bf58f8cf45b43d3be4c243f25d4ac7377ee26d722fc0b682f02fb1ae308c584f6db5a3b74badfbc3ed
|
data/DOI,
ADDED
File without changes
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
bolognese (0.9.
|
4
|
+
bolognese (0.9.37)
|
5
5
|
activesupport (>= 4.2.5, < 6)
|
6
6
|
benchmark_methods (~> 0.7)
|
7
7
|
bibtex-ruby (~> 4.1)
|
@@ -149,7 +149,7 @@ GEM
|
|
149
149
|
temple (0.8.0)
|
150
150
|
thor (0.19.4)
|
151
151
|
thread_safe (0.3.6)
|
152
|
-
tilt (2.0.
|
152
|
+
tilt (2.0.8)
|
153
153
|
trollop (2.1.2)
|
154
154
|
tzinfo (1.2.3)
|
155
155
|
thread_safe (~> 0.1)
|
data/lib/bolognese/cli.rb
CHANGED
@@ -27,9 +27,16 @@ module Bolognese
|
|
27
27
|
method_option :to, aliases: "-t", default: "schema_org"
|
28
28
|
method_option :regenerate, :type => :boolean, :force => false
|
29
29
|
def convert(input)
|
30
|
-
metadata = Metadata.new(input: input,
|
30
|
+
metadata = Metadata.new(input: input,
|
31
|
+
from: options[:from],
|
32
|
+
regenerate: options[:regenerate])
|
31
33
|
to = options[:to] || "schema_org"
|
32
|
-
|
34
|
+
|
35
|
+
if metadata.valid?
|
36
|
+
puts metadata.send(to)
|
37
|
+
else
|
38
|
+
$stderr.puts metadata.errors
|
39
|
+
end
|
33
40
|
end
|
34
41
|
|
35
42
|
default_task :convert
|
@@ -1,11 +1,5 @@
|
|
1
1
|
module Bolognese
|
2
2
|
module DataciteUtils
|
3
|
-
def schema
|
4
|
-
kernel = schema_version.split("/").last || "kernel-4.0"
|
5
|
-
filepath = File.expand_path("../../../resources/#{kernel}/metadata.xsd", __FILE__)
|
6
|
-
Nokogiri::XML::Schema(open(filepath))
|
7
|
-
end
|
8
|
-
|
9
3
|
def datacite_xml
|
10
4
|
@datacite_xml ||= Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
|
11
5
|
xml.resource(root_attributes) do
|
@@ -14,12 +8,13 @@ module Bolognese
|
|
14
8
|
end.to_xml
|
15
9
|
end
|
16
10
|
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
def datacite_errors(xml: nil, schema_version: nil)
|
12
|
+
schema_version ||= "http://datacite.org/schema/kernel-4"
|
13
|
+
kernel = schema_version.to_s.split("/").last
|
14
|
+
filepath = File.expand_path("../../../resources/#{kernel}/metadata.xsd", __FILE__)
|
15
|
+
schema = Nokogiri::XML::Schema(open(filepath))
|
20
16
|
|
21
|
-
|
22
|
-
schema.validate(Nokogiri::XML(datacite, nil, 'UTF-8')).map { |error| error.to_s }.unwrap
|
17
|
+
schema.validate(Nokogiri::XML(xml, nil, 'UTF-8')).map { |error| error.to_s }.unwrap
|
23
18
|
rescue Nokogiri::XML::SyntaxError => e
|
24
19
|
e.message
|
25
20
|
end
|
@@ -88,7 +83,7 @@ module Bolognese
|
|
88
83
|
end
|
89
84
|
|
90
85
|
def insert_publisher(xml)
|
91
|
-
xml.publisher(publisher)
|
86
|
+
xml.publisher(publisher || container_title)
|
92
87
|
end
|
93
88
|
|
94
89
|
def insert_publication_year(xml)
|
data/lib/bolognese/doi_utils.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
module Bolognese
|
2
2
|
module DoiUtils
|
3
3
|
def validate_doi(doi)
|
4
|
-
Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|doi.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
|
4
|
+
doi = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|doi.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
|
5
|
+
doi = doi.delete("\u200B").downcase if doi.present?
|
5
6
|
end
|
6
7
|
|
7
8
|
def validate_prefix(doi)
|
data/lib/bolognese/metadata.rb
CHANGED
@@ -57,20 +57,24 @@ module Bolognese
|
|
57
57
|
include Bolognese::Writers::SchemaOrgWriter
|
58
58
|
include Bolognese::Writers::TurtleWriter
|
59
59
|
|
60
|
-
|
61
|
-
:
|
62
|
-
:
|
63
|
-
:
|
64
|
-
:
|
65
|
-
:
|
66
|
-
:
|
67
|
-
:
|
68
|
-
:
|
69
|
-
:
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
73
|
-
:
|
60
|
+
attr_accessor :doi, :author, :title, :publisher, :contributor, :license,
|
61
|
+
:date_accepted, :date_available, :date_copyrighted, :date_collected,
|
62
|
+
:date_submitted, :date_valid, :date_created, :date_modified,
|
63
|
+
:date_registered, :date_updated, :member_id, :data_center_id, :journal,
|
64
|
+
:volume, :issue, :pagination, :url, :version, :keywords, :editor,
|
65
|
+
:description, :alternate_name, :language, :content_size, :spatial_coverage,
|
66
|
+
:schema_version, :additional_type, :has_part, :same_as,
|
67
|
+
:is_previous_version_of, :is_new_version_of, :is_cited_by, :cites,
|
68
|
+
:is_supplement_to, :is_supplemented_by, :is_continued_by, :continues,
|
69
|
+
:has_metadata, :is_metadata_for, :is_referenced_by, :references,
|
70
|
+
:is_documented_by, :documents, :is_compiled_by, :compiles,
|
71
|
+
:is_variant_form_of, :is_original_form_of, :is_reviewed_by, :reviews,
|
72
|
+
:is_derived_from, :is_source_of, :format, :funding, :type, :bibtex_type,
|
73
|
+
:citeproc_type, :ris_type
|
74
|
+
|
75
|
+
attr_reader :id, :from, :raw, :metadata, :doc, :provider, :citation,
|
76
|
+
:page_start, :page_end, :should_passthru, :errors,
|
77
|
+
:related_identifier, :reverse, :name_detector
|
74
78
|
|
75
79
|
def initialize(input: nil, from: nil, regenerate: false, **options)
|
76
80
|
id = normalize_id(input, options)
|
@@ -100,7 +104,6 @@ module Bolognese
|
|
100
104
|
@raw = string.present? ? string.strip : nil
|
101
105
|
|
102
106
|
@should_passthru = (@from == "datacite") && !regenerate
|
103
|
-
@doi = options[:doi].presence
|
104
107
|
|
105
108
|
@url = hsh.to_h["url"].presence
|
106
109
|
@date_registered = hsh.to_h["date_registered"].presence
|
@@ -117,40 +120,39 @@ module Bolognese
|
|
117
120
|
exists? && errors.nil?
|
118
121
|
end
|
119
122
|
|
123
|
+
# validate against DataCite schema, unless there are already errors in the reader
|
120
124
|
def errors
|
121
|
-
|
125
|
+
xml = should_passthru ? raw : datacite_xml
|
126
|
+
metadata.fetch("errors", nil) || datacite_errors(xml: xml,
|
127
|
+
schema_version: schema_version)
|
122
128
|
end
|
123
129
|
|
124
|
-
# def errors
|
125
|
-
# doc && doc.errors.map { |error| error.to_s }.unwrap
|
126
|
-
# end
|
127
|
-
|
128
130
|
def id
|
129
131
|
@doi.present? ? doi_as_url(@doi) : metadata.fetch("id", nil)
|
130
132
|
end
|
131
133
|
|
132
134
|
def type
|
133
|
-
metadata.fetch("type", nil)
|
135
|
+
@type ||= metadata.fetch("type", nil)
|
134
136
|
end
|
135
137
|
|
136
138
|
def additional_type
|
137
|
-
metadata.fetch("additional_type", nil)
|
139
|
+
@additional_type ||= metadata.fetch("additional_type", nil)
|
138
140
|
end
|
139
141
|
|
140
142
|
def citeproc_type
|
141
|
-
metadata.fetch("citeproc_type", nil)
|
143
|
+
@citeproc_type ||= metadata.fetch("citeproc_type", nil)
|
142
144
|
end
|
143
145
|
|
144
146
|
def bibtex_type
|
145
|
-
metadata.fetch("bibtex_type", nil)
|
147
|
+
@bibtex_type ||= metadata.fetch("bibtex_type", nil)
|
146
148
|
end
|
147
149
|
|
148
150
|
def ris_type
|
149
|
-
metadata.fetch("ris_type", nil)
|
151
|
+
@ris_type ||= metadata.fetch("ris_type", nil)
|
150
152
|
end
|
151
153
|
|
152
154
|
def resource_type_general
|
153
|
-
metadata.fetch("resource_type_general", nil)
|
155
|
+
@resource_type_general ||= metadata.fetch("resource_type_general", nil)
|
154
156
|
end
|
155
157
|
|
156
158
|
def doi
|
@@ -162,63 +164,63 @@ module Bolognese
|
|
162
164
|
end
|
163
165
|
|
164
166
|
def title
|
165
|
-
metadata.fetch("title", nil)
|
167
|
+
@title ||= metadata.fetch("title", nil)
|
166
168
|
end
|
167
169
|
|
168
170
|
def alternate_name
|
169
|
-
metadata.fetch("alternate_name", nil)
|
171
|
+
@alternate_name ||= metadata.fetch("alternate_name", nil)
|
170
172
|
end
|
171
173
|
|
172
174
|
def author
|
173
|
-
metadata.fetch("author", nil)
|
175
|
+
@author ||= metadata.fetch("author", nil)
|
174
176
|
end
|
175
177
|
|
176
178
|
def editor
|
177
|
-
metadata.fetch("editor", nil)
|
179
|
+
@editor ||= metadata.fetch("editor", nil)
|
178
180
|
end
|
179
181
|
|
180
182
|
def publisher
|
181
|
-
metadata.fetch("publisher", nil)
|
183
|
+
@publisher ||= metadata.fetch("publisher", nil)
|
182
184
|
end
|
183
185
|
|
184
186
|
def provider
|
185
|
-
metadata.fetch("provider", nil)
|
187
|
+
@provider ||= metadata.fetch("provider", nil)
|
186
188
|
end
|
187
189
|
|
188
190
|
def date_created
|
189
|
-
metadata.fetch("date_created", nil)
|
191
|
+
@date_created ||= metadata.fetch("date_created", nil)
|
190
192
|
end
|
191
193
|
|
192
194
|
def date_accepted
|
193
|
-
metadata.fetch("date_accepted", nil)
|
195
|
+
@date_accepted ||= metadata.fetch("date_accepted", nil)
|
194
196
|
end
|
195
197
|
|
196
198
|
def date_available
|
197
|
-
metadata.fetch("date_available", nil)
|
199
|
+
@date_available ||= metadata.fetch("date_available", nil)
|
198
200
|
end
|
199
201
|
|
200
202
|
def date_copyrighted
|
201
|
-
metadata.fetch("date_copyrighted", nil)
|
203
|
+
@date_copyrighted ||= metadata.fetch("date_copyrighted", nil)
|
202
204
|
end
|
203
205
|
|
204
206
|
def date_collected
|
205
|
-
metadata.fetch("date_collected", nil)
|
207
|
+
@date_collected ||= metadata.fetch("date_collected", nil)
|
206
208
|
end
|
207
209
|
|
208
210
|
def date_submitted
|
209
|
-
metadata.fetch("date_submitted", nil)
|
211
|
+
@date_submitted ||= metadata.fetch("date_submitted", nil)
|
210
212
|
end
|
211
213
|
|
212
214
|
def date_valid
|
213
|
-
metadata.fetch("date_valid", nil)
|
215
|
+
@date_valid ||= metadata.fetch("date_valid", nil)
|
214
216
|
end
|
215
217
|
|
216
218
|
def date_published
|
217
|
-
metadata.fetch("date_published", nil)
|
219
|
+
@date_published ||= metadata.fetch("date_published", nil)
|
218
220
|
end
|
219
221
|
|
220
222
|
def date_modified
|
221
|
-
metadata.fetch("date_modified", nil)
|
223
|
+
@date_modified ||= metadata.fetch("date_modified", nil)
|
222
224
|
end
|
223
225
|
|
224
226
|
def date_registered
|
@@ -230,43 +232,43 @@ module Bolognese
|
|
230
232
|
end
|
231
233
|
|
232
234
|
def volume
|
233
|
-
metadata.fetch("volume", nil)
|
235
|
+
@volume ||= metadata.fetch("volume", nil)
|
234
236
|
end
|
235
237
|
|
236
238
|
def pagination
|
237
|
-
metadata.fetch("pagination", nil)
|
239
|
+
@pagination ||= metadata.fetch("pagination", nil)
|
238
240
|
end
|
239
241
|
|
240
242
|
def description
|
241
|
-
metadata.fetch("description", nil)
|
243
|
+
@description ||= metadata.fetch("description", nil)
|
242
244
|
end
|
243
245
|
|
244
246
|
def license
|
245
|
-
metadata.fetch("license", nil)
|
247
|
+
@license ||= metadata.fetch("license", nil)
|
246
248
|
end
|
247
249
|
|
248
250
|
def version
|
249
|
-
metadata.fetch("version", nil)
|
251
|
+
@version ||= metadata.fetch("version", nil)
|
250
252
|
end
|
251
253
|
|
252
254
|
def keywords
|
253
|
-
metadata.fetch("keywords", nil)
|
255
|
+
@keywords ||= metadata.fetch("keywords", nil)
|
254
256
|
end
|
255
257
|
|
256
258
|
def language
|
257
|
-
metadata.fetch("language", nil)
|
259
|
+
@language ||= metadata.fetch("language", nil)
|
258
260
|
end
|
259
261
|
|
260
262
|
def content_size
|
261
|
-
metadata.fetch("content_size", nil)
|
263
|
+
@content_size ||= metadata.fetch("content_size", nil)
|
262
264
|
end
|
263
265
|
|
264
266
|
def schema_version
|
265
|
-
metadata.fetch("schema_version", nil)
|
267
|
+
@schema_version ||= metadata.fetch("schema_version", nil)
|
266
268
|
end
|
267
269
|
|
268
270
|
def funding
|
269
|
-
metadata.fetch("funding", nil)
|
271
|
+
@funding ||= metadata.fetch("funding", nil)
|
270
272
|
end
|
271
273
|
|
272
274
|
def member_id
|
@@ -330,7 +332,8 @@ module Bolognese
|
|
330
332
|
end
|
331
333
|
|
332
334
|
def related_identifier_hsh(relation_type)
|
333
|
-
Array.wrap(send(relation_type)).
|
335
|
+
Array.wrap(send(relation_type)).select { |r| r["id"] || r["issn"] }
|
336
|
+
.map { |r| r.merge("relationType" => relation_type.camelize) }
|
334
337
|
end
|
335
338
|
|
336
339
|
def related_identifier
|
@@ -98,7 +98,7 @@ module Bolognese
|
|
98
98
|
meta.dig("crossref").keys.last.camelize
|
99
99
|
end
|
100
100
|
type = CR_TO_SO_TRANSLATIONS[additional_type] || "ScholarlyArticle"
|
101
|
-
doi = bibliographic_metadata.dig("doi_data", "doi")
|
101
|
+
doi = bibliographic_metadata.dig("doi_data", "doi").to_s.downcase
|
102
102
|
|
103
103
|
{ "id" => normalize_doi(doi),
|
104
104
|
"type" => type,
|
@@ -29,7 +29,18 @@ module Bolognese
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def read_datacite(string: nil, **options)
|
32
|
-
|
32
|
+
return { "errors" => "no content" } unless string.present?
|
33
|
+
|
34
|
+
meta = Maremma.from_xml(string).fetch("resource", {})
|
35
|
+
schema_version = meta.fetch("xmlns", nil)
|
36
|
+
|
37
|
+
# validate only when option is set, as this step is expensive and
|
38
|
+
# not needed if XML comes from DataCite MDS
|
39
|
+
if options[:validate]
|
40
|
+
errors = datacite_errors(xml: string, schema_version: schema_version)
|
41
|
+
return { "errors" => errors } if errors.present?
|
42
|
+
end
|
43
|
+
|
33
44
|
id = normalize_doi(meta.dig("identifier", "__content__"), sandbox: options[:sandbox])
|
34
45
|
doi = doi_from_url(id)
|
35
46
|
resource_type_general = meta.dig("resourceType", "resourceTypeGeneral")
|
@@ -104,7 +115,7 @@ module Bolognese
|
|
104
115
|
"keywords" => keywords,
|
105
116
|
"language" => meta.fetch("language", nil),
|
106
117
|
"content_size" => meta.fetch("size", nil),
|
107
|
-
"schema_version" =>
|
118
|
+
"schema_version" => schema_version
|
108
119
|
}
|
109
120
|
end
|
110
121
|
|
@@ -36,7 +36,7 @@ module Bolognese
|
|
36
36
|
ris_type = meta.fetch("TY", nil) || "GEN"
|
37
37
|
type = RIS_TO_SO_TRANSLATIONS[ris_type] || "CreativeWork"
|
38
38
|
|
39
|
-
doi = meta.fetch("DO", nil)
|
39
|
+
doi = validate_doi(meta.fetch("DO", nil))
|
40
40
|
author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "name" => a } }
|
41
41
|
container_title = meta.fetch("T2", nil)
|
42
42
|
is_part_of = if container_title.present?
|
data/lib/bolognese/version.rb
CHANGED