bolognese 0.9.36 → 0.9.37

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/DOI, +0 -0
  3. data/Gemfile.lock +2 -2
  4. data/lib/bolognese/cli.rb +9 -2
  5. data/lib/bolognese/datacite_utils.rb +7 -12
  6. data/lib/bolognese/doi_utils.rb +2 -1
  7. data/lib/bolognese/metadata.rb +55 -52
  8. data/lib/bolognese/readers/crossref_reader.rb +1 -1
  9. data/lib/bolognese/readers/datacite_reader.rb +13 -2
  10. data/lib/bolognese/readers/ris_reader.rb +1 -1
  11. data/lib/bolognese/version.rb +1 -1
  12. data/lib/bolognese/writers/citeproc_writer.rb +0 -2
  13. data/lib/bolognese/writers/codemeta_writer.rb +1 -1
  14. data/lib/bolognese/writers/crosscite_writer.rb +0 -2
  15. data/lib/bolognese/writers/datacite_json_writer.rb +0 -2
  16. data/lib/bolognese/writers/rdf_xml_writer.rb +0 -2
  17. data/lib/bolognese/writers/ris_writer.rb +0 -2
  18. data/lib/bolognese/writers/schema_org_writer.rb +0 -2
  19. data/lib/bolognese/writers/turtle_writer.rb +0 -2
  20. data/spec/cli_spec.rb +5 -5
  21. data/spec/fixtures/crosscite.json +2 -2
  22. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/Dataset.yml +142 -0
  23. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_doi.yml +142 -0
  24. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_title.yml +142 -0
  25. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_metadata_as_datacite_xml/validates_against_schema.yml +37 -0
  26. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_metadata_as_datacite_xml/with_data_citation.yml +37 -0
  27. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_bibtex/text.yml +6 -8
  28. data/spec/readers/datacite_reader_spec.rb +28 -8
  29. data/spec/readers/schema_org_reader_spec.rb +2 -1
  30. data/spec/writers/bibtex_writer_spec.rb +3 -2
  31. data/spec/writers/citeproc_writer_spec.rb +2 -2
  32. data/spec/writers/crosscite_writer_spec.rb +1 -0
  33. data/spec/writers/datacite_writer_spec.rb +32 -0
  34. data/spec/writers/ris_writer_spec.rb +2 -2
  35. metadata +8 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8418e6a4e1153609a947be86602553372b3b2954
4
- data.tar.gz: 2cde62ec0b2f271b30f0697f31543b7c78664b3a
3
+ metadata.gz: a4c14e423a53deb412218f3c855ee1f593296d98
4
+ data.tar.gz: d05e58c1dbcb061bdc4d468f048a02cd238036a5
5
5
  SHA512:
6
- metadata.gz: 69dcf1889488893a2f2aaeb5f472bb8afd60cfc4caf3882c7972942ef2b23e96b103c8d6d6a616dc04271113aae374275171f001b1a620942f70f4707e319733
7
- data.tar.gz: c505571a6a18693f89711e533a35fc6afdaceff702cd8aacb0c3d889c01c59cb0982be2b075c3a6850bbd463a4089abfa0bdd9f3bb0bde6f9be851907b8b18be
6
+ metadata.gz: 24551c9b762c597fc87407e926a510c873d42e6650f83e76e9081d8b43d92e4e06067aa18a8f30aca41bacc39a98fa7fda1b6d4ea1dc5c785aa7c971785c95fe
7
+ data.tar.gz: a66e615373cf75d9fee848a017db2966722fb2754b5985bf58f8cf45b43d3be4c243f25d4ac7377ee26d722fc0b682f02fb1ae308c584f6db5a3b74badfbc3ed
data/DOI, ADDED
File without changes
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (0.9.36)
4
+ bolognese (0.9.37)
5
5
  activesupport (>= 4.2.5, < 6)
6
6
  benchmark_methods (~> 0.7)
7
7
  bibtex-ruby (~> 4.1)
@@ -149,7 +149,7 @@ GEM
149
149
  temple (0.8.0)
150
150
  thor (0.19.4)
151
151
  thread_safe (0.3.6)
152
- tilt (2.0.7)
152
+ tilt (2.0.8)
153
153
  trollop (2.1.2)
154
154
  tzinfo (1.2.3)
155
155
  thread_safe (~> 0.1)
data/lib/bolognese/cli.rb CHANGED
@@ -27,9 +27,16 @@ module Bolognese
27
27
  method_option :to, aliases: "-t", default: "schema_org"
28
28
  method_option :regenerate, :type => :boolean, :force => false
29
29
  def convert(input)
30
- metadata = Metadata.new(input: input, from: options[:from], regenerate: options[:regenerate])
30
+ metadata = Metadata.new(input: input,
31
+ from: options[:from],
32
+ regenerate: options[:regenerate])
31
33
  to = options[:to] || "schema_org"
32
- puts metadata.send(to)
34
+
35
+ if metadata.valid?
36
+ puts metadata.send(to)
37
+ else
38
+ $stderr.puts metadata.errors
39
+ end
33
40
  end
34
41
 
35
42
  default_task :convert
@@ -1,11 +1,5 @@
1
1
  module Bolognese
2
2
  module DataciteUtils
3
- def schema
4
- kernel = schema_version.split("/").last || "kernel-4.0"
5
- filepath = File.expand_path("../../../resources/#{kernel}/metadata.xsd", __FILE__)
6
- Nokogiri::XML::Schema(open(filepath))
7
- end
8
-
9
3
  def datacite_xml
10
4
  @datacite_xml ||= Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
11
5
  xml.resource(root_attributes) do
@@ -14,12 +8,13 @@ module Bolognese
14
8
  end.to_xml
15
9
  end
16
10
 
17
- # def datacite
18
- # datacite_xml
19
- # end
11
+ def datacite_errors(xml: nil, schema_version: nil)
12
+ schema_version ||= "http://datacite.org/schema/kernel-4"
13
+ kernel = schema_version.to_s.split("/").last
14
+ filepath = File.expand_path("../../../resources/#{kernel}/metadata.xsd", __FILE__)
15
+ schema = Nokogiri::XML::Schema(open(filepath))
20
16
 
21
- def datacite_errors
22
- schema.validate(Nokogiri::XML(datacite, nil, 'UTF-8')).map { |error| error.to_s }.unwrap
17
+ schema.validate(Nokogiri::XML(xml, nil, 'UTF-8')).map { |error| error.to_s }.unwrap
23
18
  rescue Nokogiri::XML::SyntaxError => e
24
19
  e.message
25
20
  end
@@ -88,7 +83,7 @@ module Bolognese
88
83
  end
89
84
 
90
85
  def insert_publisher(xml)
91
- xml.publisher(publisher)
86
+ xml.publisher(publisher || container_title)
92
87
  end
93
88
 
94
89
  def insert_publication_year(xml)
@@ -1,7 +1,8 @@
1
1
  module Bolognese
2
2
  module DoiUtils
3
3
  def validate_doi(doi)
4
- Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|doi.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
4
+ doi = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|doi.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
5
+ doi = doi.delete("\u200B").downcase if doi.present?
5
6
  end
6
7
 
7
8
  def validate_prefix(doi)
@@ -57,20 +57,24 @@ module Bolognese
57
57
  include Bolognese::Writers::SchemaOrgWriter
58
58
  include Bolognese::Writers::TurtleWriter
59
59
 
60
- attr_reader :id, :from, :raw, :metadata, :doc, :provider, :schema_version, :license, :citation,
61
- :additional_type, :alternate_name, :url, :version, :keywords, :editor,
62
- :page_start, :page_end, :date_modified, :language, :spatial_coverage,
63
- :content_size, :funding, :journal, :bibtex_type, :date_created, :has_part,
64
- :publisher, :contributor, :same_as, :is_previous_version_of, :is_new_version_of,
65
- :should_passthru, :errors, :datacite_errors, :date_accepted, :date_available,
66
- :date_copyrighted, :date_collected, :date_submitted, :date_valid,
67
- :is_cited_by, :cites, :is_supplement_to, :is_supplemented_by,
68
- :is_continued_by, :continues, :has_metadata, :is_metadata_for,
69
- :is_referenced_by, :references, :is_documented_by, :documents,
70
- :is_compiled_by, :compiles, :is_variant_form_of, :is_original_form_of,
71
- :is_reviewed_by, :reviews, :is_derived_from, :is_source_of, :format,
72
- :related_identifier, :reverse, :citeproc_type, :ris_type, :volume, :issue,
73
- :member_id, :data_center_id, :date_registered, :date_updated, :name_detector
60
+ attr_accessor :doi, :author, :title, :publisher, :contributor, :license,
61
+ :date_accepted, :date_available, :date_copyrighted, :date_collected,
62
+ :date_submitted, :date_valid, :date_created, :date_modified,
63
+ :date_registered, :date_updated, :member_id, :data_center_id, :journal,
64
+ :volume, :issue, :pagination, :url, :version, :keywords, :editor,
65
+ :description, :alternate_name, :language, :content_size, :spatial_coverage,
66
+ :schema_version, :additional_type, :has_part, :same_as,
67
+ :is_previous_version_of, :is_new_version_of, :is_cited_by, :cites,
68
+ :is_supplement_to, :is_supplemented_by, :is_continued_by, :continues,
69
+ :has_metadata, :is_metadata_for, :is_referenced_by, :references,
70
+ :is_documented_by, :documents, :is_compiled_by, :compiles,
71
+ :is_variant_form_of, :is_original_form_of, :is_reviewed_by, :reviews,
72
+ :is_derived_from, :is_source_of, :format, :funding, :type, :bibtex_type,
73
+ :citeproc_type, :ris_type
74
+
75
+ attr_reader :id, :from, :raw, :metadata, :doc, :provider, :citation,
76
+ :page_start, :page_end, :should_passthru, :errors,
77
+ :related_identifier, :reverse, :name_detector
74
78
 
75
79
  def initialize(input: nil, from: nil, regenerate: false, **options)
76
80
  id = normalize_id(input, options)
@@ -100,7 +104,6 @@ module Bolognese
100
104
  @raw = string.present? ? string.strip : nil
101
105
 
102
106
  @should_passthru = (@from == "datacite") && !regenerate
103
- @doi = options[:doi].presence
104
107
 
105
108
  @url = hsh.to_h["url"].presence
106
109
  @date_registered = hsh.to_h["date_registered"].presence
@@ -117,40 +120,39 @@ module Bolognese
117
120
  exists? && errors.nil?
118
121
  end
119
122
 
123
+ # validate against DataCite schema, unless there are already errors in the reader
120
124
  def errors
121
- metadata.fetch("errors", nil)
125
+ xml = should_passthru ? raw : datacite_xml
126
+ metadata.fetch("errors", nil) || datacite_errors(xml: xml,
127
+ schema_version: schema_version)
122
128
  end
123
129
 
124
- # def errors
125
- # doc && doc.errors.map { |error| error.to_s }.unwrap
126
- # end
127
-
128
130
  def id
129
131
  @doi.present? ? doi_as_url(@doi) : metadata.fetch("id", nil)
130
132
  end
131
133
 
132
134
  def type
133
- metadata.fetch("type", nil)
135
+ @type ||= metadata.fetch("type", nil)
134
136
  end
135
137
 
136
138
  def additional_type
137
- metadata.fetch("additional_type", nil)
139
+ @additional_type ||= metadata.fetch("additional_type", nil)
138
140
  end
139
141
 
140
142
  def citeproc_type
141
- metadata.fetch("citeproc_type", nil)
143
+ @citeproc_type ||= metadata.fetch("citeproc_type", nil)
142
144
  end
143
145
 
144
146
  def bibtex_type
145
- metadata.fetch("bibtex_type", nil)
147
+ @bibtex_type ||= metadata.fetch("bibtex_type", nil)
146
148
  end
147
149
 
148
150
  def ris_type
149
- metadata.fetch("ris_type", nil)
151
+ @ris_type ||= metadata.fetch("ris_type", nil)
150
152
  end
151
153
 
152
154
  def resource_type_general
153
- metadata.fetch("resource_type_general", nil)
155
+ @resource_type_general ||= metadata.fetch("resource_type_general", nil)
154
156
  end
155
157
 
156
158
  def doi
@@ -162,63 +164,63 @@ module Bolognese
162
164
  end
163
165
 
164
166
  def title
165
- metadata.fetch("title", nil)
167
+ @title ||= metadata.fetch("title", nil)
166
168
  end
167
169
 
168
170
  def alternate_name
169
- metadata.fetch("alternate_name", nil)
171
+ @alternate_name ||= metadata.fetch("alternate_name", nil)
170
172
  end
171
173
 
172
174
  def author
173
- metadata.fetch("author", nil)
175
+ @author ||= metadata.fetch("author", nil)
174
176
  end
175
177
 
176
178
  def editor
177
- metadata.fetch("editor", nil)
179
+ @editor ||= metadata.fetch("editor", nil)
178
180
  end
179
181
 
180
182
  def publisher
181
- metadata.fetch("publisher", nil)
183
+ @publisher ||= metadata.fetch("publisher", nil)
182
184
  end
183
185
 
184
186
  def provider
185
- metadata.fetch("provider", nil)
187
+ @provider ||= metadata.fetch("provider", nil)
186
188
  end
187
189
 
188
190
  def date_created
189
- metadata.fetch("date_created", nil)
191
+ @date_created ||= metadata.fetch("date_created", nil)
190
192
  end
191
193
 
192
194
  def date_accepted
193
- metadata.fetch("date_accepted", nil)
195
+ @date_accepted ||= metadata.fetch("date_accepted", nil)
194
196
  end
195
197
 
196
198
  def date_available
197
- metadata.fetch("date_available", nil)
199
+ @date_available ||= metadata.fetch("date_available", nil)
198
200
  end
199
201
 
200
202
  def date_copyrighted
201
- metadata.fetch("date_copyrighted", nil)
203
+ @date_copyrighted ||= metadata.fetch("date_copyrighted", nil)
202
204
  end
203
205
 
204
206
  def date_collected
205
- metadata.fetch("date_collected", nil)
207
+ @date_collected ||= metadata.fetch("date_collected", nil)
206
208
  end
207
209
 
208
210
  def date_submitted
209
- metadata.fetch("date_submitted", nil)
211
+ @date_submitted ||= metadata.fetch("date_submitted", nil)
210
212
  end
211
213
 
212
214
  def date_valid
213
- metadata.fetch("date_valid", nil)
215
+ @date_valid ||= metadata.fetch("date_valid", nil)
214
216
  end
215
217
 
216
218
  def date_published
217
- metadata.fetch("date_published", nil)
219
+ @date_published ||= metadata.fetch("date_published", nil)
218
220
  end
219
221
 
220
222
  def date_modified
221
- metadata.fetch("date_modified", nil)
223
+ @date_modified ||= metadata.fetch("date_modified", nil)
222
224
  end
223
225
 
224
226
  def date_registered
@@ -230,43 +232,43 @@ module Bolognese
230
232
  end
231
233
 
232
234
  def volume
233
- metadata.fetch("volume", nil)
235
+ @volume ||= metadata.fetch("volume", nil)
234
236
  end
235
237
 
236
238
  def pagination
237
- metadata.fetch("pagination", nil)
239
+ @pagination ||= metadata.fetch("pagination", nil)
238
240
  end
239
241
 
240
242
  def description
241
- metadata.fetch("description", nil)
243
+ @description ||= metadata.fetch("description", nil)
242
244
  end
243
245
 
244
246
  def license
245
- metadata.fetch("license", nil)
247
+ @license ||= metadata.fetch("license", nil)
246
248
  end
247
249
 
248
250
  def version
249
- metadata.fetch("version", nil)
251
+ @version ||= metadata.fetch("version", nil)
250
252
  end
251
253
 
252
254
  def keywords
253
- metadata.fetch("keywords", nil)
255
+ @keywords ||= metadata.fetch("keywords", nil)
254
256
  end
255
257
 
256
258
  def language
257
- metadata.fetch("language", nil)
259
+ @language ||= metadata.fetch("language", nil)
258
260
  end
259
261
 
260
262
  def content_size
261
- metadata.fetch("content_size", nil)
263
+ @content_size ||= metadata.fetch("content_size", nil)
262
264
  end
263
265
 
264
266
  def schema_version
265
- metadata.fetch("schema_version", nil)
267
+ @schema_version ||= metadata.fetch("schema_version", nil)
266
268
  end
267
269
 
268
270
  def funding
269
- metadata.fetch("funding", nil)
271
+ @funding ||= metadata.fetch("funding", nil)
270
272
  end
271
273
 
272
274
  def member_id
@@ -330,7 +332,8 @@ module Bolognese
330
332
  end
331
333
 
332
334
  def related_identifier_hsh(relation_type)
333
- Array.wrap(send(relation_type)).map { |r| r.merge("relationType" => relation_type.camelize) }
335
+ Array.wrap(send(relation_type)).select { |r| r["id"] || r["issn"] }
336
+ .map { |r| r.merge("relationType" => relation_type.camelize) }
334
337
  end
335
338
 
336
339
  def related_identifier
@@ -98,7 +98,7 @@ module Bolognese
98
98
  meta.dig("crossref").keys.last.camelize
99
99
  end
100
100
  type = CR_TO_SO_TRANSLATIONS[additional_type] || "ScholarlyArticle"
101
- doi = bibliographic_metadata.dig("doi_data", "doi")
101
+ doi = bibliographic_metadata.dig("doi_data", "doi").to_s.downcase
102
102
 
103
103
  { "id" => normalize_doi(doi),
104
104
  "type" => type,
@@ -29,7 +29,18 @@ module Bolognese
29
29
  end
30
30
 
31
31
  def read_datacite(string: nil, **options)
32
- meta = string.present? ? Maremma.from_xml(string).fetch("resource", {}) : {}
32
+ return { "errors" => "no content" } unless string.present?
33
+
34
+ meta = Maremma.from_xml(string).fetch("resource", {})
35
+ schema_version = meta.fetch("xmlns", nil)
36
+
37
+ # validate only when option is set, as this step is expensive and
38
+ # not needed if XML comes from DataCite MDS
39
+ if options[:validate]
40
+ errors = datacite_errors(xml: string, schema_version: schema_version)
41
+ return { "errors" => errors } if errors.present?
42
+ end
43
+
33
44
  id = normalize_doi(meta.dig("identifier", "__content__"), sandbox: options[:sandbox])
34
45
  doi = doi_from_url(id)
35
46
  resource_type_general = meta.dig("resourceType", "resourceTypeGeneral")
@@ -104,7 +115,7 @@ module Bolognese
104
115
  "keywords" => keywords,
105
116
  "language" => meta.fetch("language", nil),
106
117
  "content_size" => meta.fetch("size", nil),
107
- "schema_version" => meta.fetch("xmlns", nil)
118
+ "schema_version" => schema_version
108
119
  }
109
120
  end
110
121
 
@@ -36,7 +36,7 @@ module Bolognese
36
36
  ris_type = meta.fetch("TY", nil) || "GEN"
37
37
  type = RIS_TO_SO_TRANSLATIONS[ris_type] || "CreativeWork"
38
38
 
39
- doi = meta.fetch("DO", nil)
39
+ doi = validate_doi(meta.fetch("DO", nil))
40
40
  author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "name" => a } }
41
41
  container_title = meta.fetch("T2", nil)
42
42
  is_part_of = if container_title.present?
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "0.9.36"
2
+ VERSION = "0.9.37"
3
3
  end
@@ -2,8 +2,6 @@ module Bolognese
2
2
  module Writers
3
3
  module CiteprocWriter
4
4
  def citeproc
5
- return nil unless valid?
6
-
7
5
  hsh = {
8
6
  "type" => citeproc_type,
9
7
  "id" => id,
@@ -3,7 +3,7 @@ module Bolognese
3
3
  module CodemetaWriter
4
4
  def codemeta
5
5
  return nil unless valid?
6
-
6
+
7
7
  hsh = {
8
8
  "@context" => id.present? ? "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld" : nil,
9
9
  "@type" => type,
@@ -2,8 +2,6 @@ module Bolognese
2
2
  module Writers
3
3
  module CrossciteWriter
4
4
  def crosscite
5
- return nil unless valid?
6
-
7
5
  hsh = {
8
6
  "id" => id,
9
7
  "doi" => doi,
@@ -2,8 +2,6 @@ module Bolognese
2
2
  module Writers
3
3
  module DataciteJsonWriter
4
4
  def datacite_json
5
- return nil unless valid?
6
-
7
5
  hsh = {
8
6
  "id" => id,
9
7
  "doi" => doi,
@@ -2,8 +2,6 @@ module Bolognese
2
2
  module Writers
3
3
  module RdfXmlWriter
4
4
  def rdf_xml
5
- return nil unless valid?
6
-
7
5
  graph.dump(:rdfxml, prefixes: { schema: "http://schema.org/" })
8
6
  end
9
7
  end