bolognese 0.9.36 → 0.9.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/DOI, +0 -0
- data/Gemfile.lock +2 -2
- data/lib/bolognese/cli.rb +9 -2
- data/lib/bolognese/datacite_utils.rb +7 -12
- data/lib/bolognese/doi_utils.rb +2 -1
- data/lib/bolognese/metadata.rb +55 -52
- data/lib/bolognese/readers/crossref_reader.rb +1 -1
- data/lib/bolognese/readers/datacite_reader.rb +13 -2
- data/lib/bolognese/readers/ris_reader.rb +1 -1
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/writers/citeproc_writer.rb +0 -2
- data/lib/bolognese/writers/codemeta_writer.rb +1 -1
- data/lib/bolognese/writers/crosscite_writer.rb +0 -2
- data/lib/bolognese/writers/datacite_json_writer.rb +0 -2
- data/lib/bolognese/writers/rdf_xml_writer.rb +0 -2
- data/lib/bolognese/writers/ris_writer.rb +0 -2
- data/lib/bolognese/writers/schema_org_writer.rb +0 -2
- data/lib/bolognese/writers/turtle_writer.rb +0 -2
- data/spec/cli_spec.rb +5 -5
- data/spec/fixtures/crosscite.json +2 -2
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/Dataset.yml +142 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_doi.yml +142 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_title.yml +142 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_metadata_as_datacite_xml/validates_against_schema.yml +37 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_metadata_as_datacite_xml/with_data_citation.yml +37 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_bibtex/text.yml +6 -8
- data/spec/readers/datacite_reader_spec.rb +28 -8
- data/spec/readers/schema_org_reader_spec.rb +2 -1
- data/spec/writers/bibtex_writer_spec.rb +3 -2
- data/spec/writers/citeproc_writer_spec.rb +2 -2
- data/spec/writers/crosscite_writer_spec.rb +1 -0
- data/spec/writers/datacite_writer_spec.rb +32 -0
- data/spec/writers/ris_writer_spec.rb +2 -2
- metadata +8 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a4c14e423a53deb412218f3c855ee1f593296d98
|
|
4
|
+
data.tar.gz: d05e58c1dbcb061bdc4d468f048a02cd238036a5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 24551c9b762c597fc87407e926a510c873d42e6650f83e76e9081d8b43d92e4e06067aa18a8f30aca41bacc39a98fa7fda1b6d4ea1dc5c785aa7c971785c95fe
|
|
7
|
+
data.tar.gz: a66e615373cf75d9fee848a017db2966722fb2754b5985bf58f8cf45b43d3be4c243f25d4ac7377ee26d722fc0b682f02fb1ae308c584f6db5a3b74badfbc3ed
|
data/DOI,
ADDED
|
File without changes
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
bolognese (0.9.
|
|
4
|
+
bolognese (0.9.37)
|
|
5
5
|
activesupport (>= 4.2.5, < 6)
|
|
6
6
|
benchmark_methods (~> 0.7)
|
|
7
7
|
bibtex-ruby (~> 4.1)
|
|
@@ -149,7 +149,7 @@ GEM
|
|
|
149
149
|
temple (0.8.0)
|
|
150
150
|
thor (0.19.4)
|
|
151
151
|
thread_safe (0.3.6)
|
|
152
|
-
tilt (2.0.
|
|
152
|
+
tilt (2.0.8)
|
|
153
153
|
trollop (2.1.2)
|
|
154
154
|
tzinfo (1.2.3)
|
|
155
155
|
thread_safe (~> 0.1)
|
data/lib/bolognese/cli.rb
CHANGED
|
@@ -27,9 +27,16 @@ module Bolognese
|
|
|
27
27
|
method_option :to, aliases: "-t", default: "schema_org"
|
|
28
28
|
method_option :regenerate, :type => :boolean, :force => false
|
|
29
29
|
def convert(input)
|
|
30
|
-
metadata = Metadata.new(input: input,
|
|
30
|
+
metadata = Metadata.new(input: input,
|
|
31
|
+
from: options[:from],
|
|
32
|
+
regenerate: options[:regenerate])
|
|
31
33
|
to = options[:to] || "schema_org"
|
|
32
|
-
|
|
34
|
+
|
|
35
|
+
if metadata.valid?
|
|
36
|
+
puts metadata.send(to)
|
|
37
|
+
else
|
|
38
|
+
$stderr.puts metadata.errors
|
|
39
|
+
end
|
|
33
40
|
end
|
|
34
41
|
|
|
35
42
|
default_task :convert
|
|
@@ -1,11 +1,5 @@
|
|
|
1
1
|
module Bolognese
|
|
2
2
|
module DataciteUtils
|
|
3
|
-
def schema
|
|
4
|
-
kernel = schema_version.split("/").last || "kernel-4.0"
|
|
5
|
-
filepath = File.expand_path("../../../resources/#{kernel}/metadata.xsd", __FILE__)
|
|
6
|
-
Nokogiri::XML::Schema(open(filepath))
|
|
7
|
-
end
|
|
8
|
-
|
|
9
3
|
def datacite_xml
|
|
10
4
|
@datacite_xml ||= Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
|
|
11
5
|
xml.resource(root_attributes) do
|
|
@@ -14,12 +8,13 @@ module Bolognese
|
|
|
14
8
|
end.to_xml
|
|
15
9
|
end
|
|
16
10
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
11
|
+
def datacite_errors(xml: nil, schema_version: nil)
|
|
12
|
+
schema_version ||= "http://datacite.org/schema/kernel-4"
|
|
13
|
+
kernel = schema_version.to_s.split("/").last
|
|
14
|
+
filepath = File.expand_path("../../../resources/#{kernel}/metadata.xsd", __FILE__)
|
|
15
|
+
schema = Nokogiri::XML::Schema(open(filepath))
|
|
20
16
|
|
|
21
|
-
|
|
22
|
-
schema.validate(Nokogiri::XML(datacite, nil, 'UTF-8')).map { |error| error.to_s }.unwrap
|
|
17
|
+
schema.validate(Nokogiri::XML(xml, nil, 'UTF-8')).map { |error| error.to_s }.unwrap
|
|
23
18
|
rescue Nokogiri::XML::SyntaxError => e
|
|
24
19
|
e.message
|
|
25
20
|
end
|
|
@@ -88,7 +83,7 @@ module Bolognese
|
|
|
88
83
|
end
|
|
89
84
|
|
|
90
85
|
def insert_publisher(xml)
|
|
91
|
-
xml.publisher(publisher)
|
|
86
|
+
xml.publisher(publisher || container_title)
|
|
92
87
|
end
|
|
93
88
|
|
|
94
89
|
def insert_publication_year(xml)
|
data/lib/bolognese/doi_utils.rb
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
module Bolognese
|
|
2
2
|
module DoiUtils
|
|
3
3
|
def validate_doi(doi)
|
|
4
|
-
Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|doi.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
|
|
4
|
+
doi = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|doi.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
|
|
5
|
+
doi = doi.delete("\u200B").downcase if doi.present?
|
|
5
6
|
end
|
|
6
7
|
|
|
7
8
|
def validate_prefix(doi)
|
data/lib/bolognese/metadata.rb
CHANGED
|
@@ -57,20 +57,24 @@ module Bolognese
|
|
|
57
57
|
include Bolognese::Writers::SchemaOrgWriter
|
|
58
58
|
include Bolognese::Writers::TurtleWriter
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
:
|
|
62
|
-
:
|
|
63
|
-
:
|
|
64
|
-
:
|
|
65
|
-
:
|
|
66
|
-
:
|
|
67
|
-
:
|
|
68
|
-
:
|
|
69
|
-
:
|
|
70
|
-
:
|
|
71
|
-
:
|
|
72
|
-
:
|
|
73
|
-
:
|
|
60
|
+
attr_accessor :doi, :author, :title, :publisher, :contributor, :license,
|
|
61
|
+
:date_accepted, :date_available, :date_copyrighted, :date_collected,
|
|
62
|
+
:date_submitted, :date_valid, :date_created, :date_modified,
|
|
63
|
+
:date_registered, :date_updated, :member_id, :data_center_id, :journal,
|
|
64
|
+
:volume, :issue, :pagination, :url, :version, :keywords, :editor,
|
|
65
|
+
:description, :alternate_name, :language, :content_size, :spatial_coverage,
|
|
66
|
+
:schema_version, :additional_type, :has_part, :same_as,
|
|
67
|
+
:is_previous_version_of, :is_new_version_of, :is_cited_by, :cites,
|
|
68
|
+
:is_supplement_to, :is_supplemented_by, :is_continued_by, :continues,
|
|
69
|
+
:has_metadata, :is_metadata_for, :is_referenced_by, :references,
|
|
70
|
+
:is_documented_by, :documents, :is_compiled_by, :compiles,
|
|
71
|
+
:is_variant_form_of, :is_original_form_of, :is_reviewed_by, :reviews,
|
|
72
|
+
:is_derived_from, :is_source_of, :format, :funding, :type, :bibtex_type,
|
|
73
|
+
:citeproc_type, :ris_type
|
|
74
|
+
|
|
75
|
+
attr_reader :id, :from, :raw, :metadata, :doc, :provider, :citation,
|
|
76
|
+
:page_start, :page_end, :should_passthru, :errors,
|
|
77
|
+
:related_identifier, :reverse, :name_detector
|
|
74
78
|
|
|
75
79
|
def initialize(input: nil, from: nil, regenerate: false, **options)
|
|
76
80
|
id = normalize_id(input, options)
|
|
@@ -100,7 +104,6 @@ module Bolognese
|
|
|
100
104
|
@raw = string.present? ? string.strip : nil
|
|
101
105
|
|
|
102
106
|
@should_passthru = (@from == "datacite") && !regenerate
|
|
103
|
-
@doi = options[:doi].presence
|
|
104
107
|
|
|
105
108
|
@url = hsh.to_h["url"].presence
|
|
106
109
|
@date_registered = hsh.to_h["date_registered"].presence
|
|
@@ -117,40 +120,39 @@ module Bolognese
|
|
|
117
120
|
exists? && errors.nil?
|
|
118
121
|
end
|
|
119
122
|
|
|
123
|
+
# validate against DataCite schema, unless there are already errors in the reader
|
|
120
124
|
def errors
|
|
121
|
-
|
|
125
|
+
xml = should_passthru ? raw : datacite_xml
|
|
126
|
+
metadata.fetch("errors", nil) || datacite_errors(xml: xml,
|
|
127
|
+
schema_version: schema_version)
|
|
122
128
|
end
|
|
123
129
|
|
|
124
|
-
# def errors
|
|
125
|
-
# doc && doc.errors.map { |error| error.to_s }.unwrap
|
|
126
|
-
# end
|
|
127
|
-
|
|
128
130
|
def id
|
|
129
131
|
@doi.present? ? doi_as_url(@doi) : metadata.fetch("id", nil)
|
|
130
132
|
end
|
|
131
133
|
|
|
132
134
|
def type
|
|
133
|
-
metadata.fetch("type", nil)
|
|
135
|
+
@type ||= metadata.fetch("type", nil)
|
|
134
136
|
end
|
|
135
137
|
|
|
136
138
|
def additional_type
|
|
137
|
-
metadata.fetch("additional_type", nil)
|
|
139
|
+
@additional_type ||= metadata.fetch("additional_type", nil)
|
|
138
140
|
end
|
|
139
141
|
|
|
140
142
|
def citeproc_type
|
|
141
|
-
metadata.fetch("citeproc_type", nil)
|
|
143
|
+
@citeproc_type ||= metadata.fetch("citeproc_type", nil)
|
|
142
144
|
end
|
|
143
145
|
|
|
144
146
|
def bibtex_type
|
|
145
|
-
metadata.fetch("bibtex_type", nil)
|
|
147
|
+
@bibtex_type ||= metadata.fetch("bibtex_type", nil)
|
|
146
148
|
end
|
|
147
149
|
|
|
148
150
|
def ris_type
|
|
149
|
-
metadata.fetch("ris_type", nil)
|
|
151
|
+
@ris_type ||= metadata.fetch("ris_type", nil)
|
|
150
152
|
end
|
|
151
153
|
|
|
152
154
|
def resource_type_general
|
|
153
|
-
metadata.fetch("resource_type_general", nil)
|
|
155
|
+
@resource_type_general ||= metadata.fetch("resource_type_general", nil)
|
|
154
156
|
end
|
|
155
157
|
|
|
156
158
|
def doi
|
|
@@ -162,63 +164,63 @@ module Bolognese
|
|
|
162
164
|
end
|
|
163
165
|
|
|
164
166
|
def title
|
|
165
|
-
metadata.fetch("title", nil)
|
|
167
|
+
@title ||= metadata.fetch("title", nil)
|
|
166
168
|
end
|
|
167
169
|
|
|
168
170
|
def alternate_name
|
|
169
|
-
metadata.fetch("alternate_name", nil)
|
|
171
|
+
@alternate_name ||= metadata.fetch("alternate_name", nil)
|
|
170
172
|
end
|
|
171
173
|
|
|
172
174
|
def author
|
|
173
|
-
metadata.fetch("author", nil)
|
|
175
|
+
@author ||= metadata.fetch("author", nil)
|
|
174
176
|
end
|
|
175
177
|
|
|
176
178
|
def editor
|
|
177
|
-
metadata.fetch("editor", nil)
|
|
179
|
+
@editor ||= metadata.fetch("editor", nil)
|
|
178
180
|
end
|
|
179
181
|
|
|
180
182
|
def publisher
|
|
181
|
-
metadata.fetch("publisher", nil)
|
|
183
|
+
@publisher ||= metadata.fetch("publisher", nil)
|
|
182
184
|
end
|
|
183
185
|
|
|
184
186
|
def provider
|
|
185
|
-
metadata.fetch("provider", nil)
|
|
187
|
+
@provider ||= metadata.fetch("provider", nil)
|
|
186
188
|
end
|
|
187
189
|
|
|
188
190
|
def date_created
|
|
189
|
-
metadata.fetch("date_created", nil)
|
|
191
|
+
@date_created ||= metadata.fetch("date_created", nil)
|
|
190
192
|
end
|
|
191
193
|
|
|
192
194
|
def date_accepted
|
|
193
|
-
metadata.fetch("date_accepted", nil)
|
|
195
|
+
@date_accepted ||= metadata.fetch("date_accepted", nil)
|
|
194
196
|
end
|
|
195
197
|
|
|
196
198
|
def date_available
|
|
197
|
-
metadata.fetch("date_available", nil)
|
|
199
|
+
@date_available ||= metadata.fetch("date_available", nil)
|
|
198
200
|
end
|
|
199
201
|
|
|
200
202
|
def date_copyrighted
|
|
201
|
-
metadata.fetch("date_copyrighted", nil)
|
|
203
|
+
@date_copyrighted ||= metadata.fetch("date_copyrighted", nil)
|
|
202
204
|
end
|
|
203
205
|
|
|
204
206
|
def date_collected
|
|
205
|
-
metadata.fetch("date_collected", nil)
|
|
207
|
+
@date_collected ||= metadata.fetch("date_collected", nil)
|
|
206
208
|
end
|
|
207
209
|
|
|
208
210
|
def date_submitted
|
|
209
|
-
metadata.fetch("date_submitted", nil)
|
|
211
|
+
@date_submitted ||= metadata.fetch("date_submitted", nil)
|
|
210
212
|
end
|
|
211
213
|
|
|
212
214
|
def date_valid
|
|
213
|
-
metadata.fetch("date_valid", nil)
|
|
215
|
+
@date_valid ||= metadata.fetch("date_valid", nil)
|
|
214
216
|
end
|
|
215
217
|
|
|
216
218
|
def date_published
|
|
217
|
-
metadata.fetch("date_published", nil)
|
|
219
|
+
@date_published ||= metadata.fetch("date_published", nil)
|
|
218
220
|
end
|
|
219
221
|
|
|
220
222
|
def date_modified
|
|
221
|
-
metadata.fetch("date_modified", nil)
|
|
223
|
+
@date_modified ||= metadata.fetch("date_modified", nil)
|
|
222
224
|
end
|
|
223
225
|
|
|
224
226
|
def date_registered
|
|
@@ -230,43 +232,43 @@ module Bolognese
|
|
|
230
232
|
end
|
|
231
233
|
|
|
232
234
|
def volume
|
|
233
|
-
metadata.fetch("volume", nil)
|
|
235
|
+
@volume ||= metadata.fetch("volume", nil)
|
|
234
236
|
end
|
|
235
237
|
|
|
236
238
|
def pagination
|
|
237
|
-
metadata.fetch("pagination", nil)
|
|
239
|
+
@pagination ||= metadata.fetch("pagination", nil)
|
|
238
240
|
end
|
|
239
241
|
|
|
240
242
|
def description
|
|
241
|
-
metadata.fetch("description", nil)
|
|
243
|
+
@description ||= metadata.fetch("description", nil)
|
|
242
244
|
end
|
|
243
245
|
|
|
244
246
|
def license
|
|
245
|
-
metadata.fetch("license", nil)
|
|
247
|
+
@license ||= metadata.fetch("license", nil)
|
|
246
248
|
end
|
|
247
249
|
|
|
248
250
|
def version
|
|
249
|
-
metadata.fetch("version", nil)
|
|
251
|
+
@version ||= metadata.fetch("version", nil)
|
|
250
252
|
end
|
|
251
253
|
|
|
252
254
|
def keywords
|
|
253
|
-
metadata.fetch("keywords", nil)
|
|
255
|
+
@keywords ||= metadata.fetch("keywords", nil)
|
|
254
256
|
end
|
|
255
257
|
|
|
256
258
|
def language
|
|
257
|
-
metadata.fetch("language", nil)
|
|
259
|
+
@language ||= metadata.fetch("language", nil)
|
|
258
260
|
end
|
|
259
261
|
|
|
260
262
|
def content_size
|
|
261
|
-
metadata.fetch("content_size", nil)
|
|
263
|
+
@content_size ||= metadata.fetch("content_size", nil)
|
|
262
264
|
end
|
|
263
265
|
|
|
264
266
|
def schema_version
|
|
265
|
-
metadata.fetch("schema_version", nil)
|
|
267
|
+
@schema_version ||= metadata.fetch("schema_version", nil)
|
|
266
268
|
end
|
|
267
269
|
|
|
268
270
|
def funding
|
|
269
|
-
metadata.fetch("funding", nil)
|
|
271
|
+
@funding ||= metadata.fetch("funding", nil)
|
|
270
272
|
end
|
|
271
273
|
|
|
272
274
|
def member_id
|
|
@@ -330,7 +332,8 @@ module Bolognese
|
|
|
330
332
|
end
|
|
331
333
|
|
|
332
334
|
def related_identifier_hsh(relation_type)
|
|
333
|
-
Array.wrap(send(relation_type)).
|
|
335
|
+
Array.wrap(send(relation_type)).select { |r| r["id"] || r["issn"] }
|
|
336
|
+
.map { |r| r.merge("relationType" => relation_type.camelize) }
|
|
334
337
|
end
|
|
335
338
|
|
|
336
339
|
def related_identifier
|
|
@@ -98,7 +98,7 @@ module Bolognese
|
|
|
98
98
|
meta.dig("crossref").keys.last.camelize
|
|
99
99
|
end
|
|
100
100
|
type = CR_TO_SO_TRANSLATIONS[additional_type] || "ScholarlyArticle"
|
|
101
|
-
doi = bibliographic_metadata.dig("doi_data", "doi")
|
|
101
|
+
doi = bibliographic_metadata.dig("doi_data", "doi").to_s.downcase
|
|
102
102
|
|
|
103
103
|
{ "id" => normalize_doi(doi),
|
|
104
104
|
"type" => type,
|
|
@@ -29,7 +29,18 @@ module Bolognese
|
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
def read_datacite(string: nil, **options)
|
|
32
|
-
|
|
32
|
+
return { "errors" => "no content" } unless string.present?
|
|
33
|
+
|
|
34
|
+
meta = Maremma.from_xml(string).fetch("resource", {})
|
|
35
|
+
schema_version = meta.fetch("xmlns", nil)
|
|
36
|
+
|
|
37
|
+
# validate only when option is set, as this step is expensive and
|
|
38
|
+
# not needed if XML comes from DataCite MDS
|
|
39
|
+
if options[:validate]
|
|
40
|
+
errors = datacite_errors(xml: string, schema_version: schema_version)
|
|
41
|
+
return { "errors" => errors } if errors.present?
|
|
42
|
+
end
|
|
43
|
+
|
|
33
44
|
id = normalize_doi(meta.dig("identifier", "__content__"), sandbox: options[:sandbox])
|
|
34
45
|
doi = doi_from_url(id)
|
|
35
46
|
resource_type_general = meta.dig("resourceType", "resourceTypeGeneral")
|
|
@@ -104,7 +115,7 @@ module Bolognese
|
|
|
104
115
|
"keywords" => keywords,
|
|
105
116
|
"language" => meta.fetch("language", nil),
|
|
106
117
|
"content_size" => meta.fetch("size", nil),
|
|
107
|
-
"schema_version" =>
|
|
118
|
+
"schema_version" => schema_version
|
|
108
119
|
}
|
|
109
120
|
end
|
|
110
121
|
|
|
@@ -36,7 +36,7 @@ module Bolognese
|
|
|
36
36
|
ris_type = meta.fetch("TY", nil) || "GEN"
|
|
37
37
|
type = RIS_TO_SO_TRANSLATIONS[ris_type] || "CreativeWork"
|
|
38
38
|
|
|
39
|
-
doi = meta.fetch("DO", nil)
|
|
39
|
+
doi = validate_doi(meta.fetch("DO", nil))
|
|
40
40
|
author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "name" => a } }
|
|
41
41
|
container_title = meta.fetch("T2", nil)
|
|
42
42
|
is_part_of = if container_title.present?
|
data/lib/bolognese/version.rb
CHANGED