bolognese 0.9.36 → 0.9.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/DOI, +0 -0
  3. data/Gemfile.lock +2 -2
  4. data/lib/bolognese/cli.rb +9 -2
  5. data/lib/bolognese/datacite_utils.rb +7 -12
  6. data/lib/bolognese/doi_utils.rb +2 -1
  7. data/lib/bolognese/metadata.rb +55 -52
  8. data/lib/bolognese/readers/crossref_reader.rb +1 -1
  9. data/lib/bolognese/readers/datacite_reader.rb +13 -2
  10. data/lib/bolognese/readers/ris_reader.rb +1 -1
  11. data/lib/bolognese/version.rb +1 -1
  12. data/lib/bolognese/writers/citeproc_writer.rb +0 -2
  13. data/lib/bolognese/writers/codemeta_writer.rb +1 -1
  14. data/lib/bolognese/writers/crosscite_writer.rb +0 -2
  15. data/lib/bolognese/writers/datacite_json_writer.rb +0 -2
  16. data/lib/bolognese/writers/rdf_xml_writer.rb +0 -2
  17. data/lib/bolognese/writers/ris_writer.rb +0 -2
  18. data/lib/bolognese/writers/schema_org_writer.rb +0 -2
  19. data/lib/bolognese/writers/turtle_writer.rb +0 -2
  20. data/spec/cli_spec.rb +5 -5
  21. data/spec/fixtures/crosscite.json +2 -2
  22. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/Dataset.yml +142 -0
  23. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_doi.yml +142 -0
  24. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_title.yml +142 -0
  25. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_metadata_as_datacite_xml/validates_against_schema.yml +37 -0
  26. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_metadata_as_datacite_xml/with_data_citation.yml +37 -0
  27. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_bibtex/text.yml +6 -8
  28. data/spec/readers/datacite_reader_spec.rb +28 -8
  29. data/spec/readers/schema_org_reader_spec.rb +2 -1
  30. data/spec/writers/bibtex_writer_spec.rb +3 -2
  31. data/spec/writers/citeproc_writer_spec.rb +2 -2
  32. data/spec/writers/crosscite_writer_spec.rb +1 -0
  33. data/spec/writers/datacite_writer_spec.rb +32 -0
  34. data/spec/writers/ris_writer_spec.rb +2 -2
  35. metadata +8 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8418e6a4e1153609a947be86602553372b3b2954
4
- data.tar.gz: 2cde62ec0b2f271b30f0697f31543b7c78664b3a
3
+ metadata.gz: a4c14e423a53deb412218f3c855ee1f593296d98
4
+ data.tar.gz: d05e58c1dbcb061bdc4d468f048a02cd238036a5
5
5
  SHA512:
6
- metadata.gz: 69dcf1889488893a2f2aaeb5f472bb8afd60cfc4caf3882c7972942ef2b23e96b103c8d6d6a616dc04271113aae374275171f001b1a620942f70f4707e319733
7
- data.tar.gz: c505571a6a18693f89711e533a35fc6afdaceff702cd8aacb0c3d889c01c59cb0982be2b075c3a6850bbd463a4089abfa0bdd9f3bb0bde6f9be851907b8b18be
6
+ metadata.gz: 24551c9b762c597fc87407e926a510c873d42e6650f83e76e9081d8b43d92e4e06067aa18a8f30aca41bacc39a98fa7fda1b6d4ea1dc5c785aa7c971785c95fe
7
+ data.tar.gz: a66e615373cf75d9fee848a017db2966722fb2754b5985bf58f8cf45b43d3be4c243f25d4ac7377ee26d722fc0b682f02fb1ae308c584f6db5a3b74badfbc3ed
data/DOI, ADDED
File without changes
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (0.9.36)
4
+ bolognese (0.9.37)
5
5
  activesupport (>= 4.2.5, < 6)
6
6
  benchmark_methods (~> 0.7)
7
7
  bibtex-ruby (~> 4.1)
@@ -149,7 +149,7 @@ GEM
149
149
  temple (0.8.0)
150
150
  thor (0.19.4)
151
151
  thread_safe (0.3.6)
152
- tilt (2.0.7)
152
+ tilt (2.0.8)
153
153
  trollop (2.1.2)
154
154
  tzinfo (1.2.3)
155
155
  thread_safe (~> 0.1)
data/lib/bolognese/cli.rb CHANGED
@@ -27,9 +27,16 @@ module Bolognese
27
27
  method_option :to, aliases: "-t", default: "schema_org"
28
28
  method_option :regenerate, :type => :boolean, :force => false
29
29
  def convert(input)
30
- metadata = Metadata.new(input: input, from: options[:from], regenerate: options[:regenerate])
30
+ metadata = Metadata.new(input: input,
31
+ from: options[:from],
32
+ regenerate: options[:regenerate])
31
33
  to = options[:to] || "schema_org"
32
- puts metadata.send(to)
34
+
35
+ if metadata.valid?
36
+ puts metadata.send(to)
37
+ else
38
+ $stderr.puts metadata.errors
39
+ end
33
40
  end
34
41
 
35
42
  default_task :convert
@@ -1,11 +1,5 @@
1
1
  module Bolognese
2
2
  module DataciteUtils
3
- def schema
4
- kernel = schema_version.split("/").last || "kernel-4.0"
5
- filepath = File.expand_path("../../../resources/#{kernel}/metadata.xsd", __FILE__)
6
- Nokogiri::XML::Schema(open(filepath))
7
- end
8
-
9
3
  def datacite_xml
10
4
  @datacite_xml ||= Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
11
5
  xml.resource(root_attributes) do
@@ -14,12 +8,13 @@ module Bolognese
14
8
  end.to_xml
15
9
  end
16
10
 
17
- # def datacite
18
- # datacite_xml
19
- # end
11
+ def datacite_errors(xml: nil, schema_version: nil)
12
+ schema_version ||= "http://datacite.org/schema/kernel-4"
13
+ kernel = schema_version.to_s.split("/").last
14
+ filepath = File.expand_path("../../../resources/#{kernel}/metadata.xsd", __FILE__)
15
+ schema = Nokogiri::XML::Schema(open(filepath))
20
16
 
21
- def datacite_errors
22
- schema.validate(Nokogiri::XML(datacite, nil, 'UTF-8')).map { |error| error.to_s }.unwrap
17
+ schema.validate(Nokogiri::XML(xml, nil, 'UTF-8')).map { |error| error.to_s }.unwrap
23
18
  rescue Nokogiri::XML::SyntaxError => e
24
19
  e.message
25
20
  end
@@ -88,7 +83,7 @@ module Bolognese
88
83
  end
89
84
 
90
85
  def insert_publisher(xml)
91
- xml.publisher(publisher)
86
+ xml.publisher(publisher || container_title)
92
87
  end
93
88
 
94
89
  def insert_publication_year(xml)
@@ -1,7 +1,8 @@
1
1
  module Bolognese
2
2
  module DoiUtils
3
3
  def validate_doi(doi)
4
- Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|doi.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
4
+ doi = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|doi.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
5
+ doi = doi.delete("\u200B").downcase if doi.present?
5
6
  end
6
7
 
7
8
  def validate_prefix(doi)
@@ -57,20 +57,24 @@ module Bolognese
57
57
  include Bolognese::Writers::SchemaOrgWriter
58
58
  include Bolognese::Writers::TurtleWriter
59
59
 
60
- attr_reader :id, :from, :raw, :metadata, :doc, :provider, :schema_version, :license, :citation,
61
- :additional_type, :alternate_name, :url, :version, :keywords, :editor,
62
- :page_start, :page_end, :date_modified, :language, :spatial_coverage,
63
- :content_size, :funding, :journal, :bibtex_type, :date_created, :has_part,
64
- :publisher, :contributor, :same_as, :is_previous_version_of, :is_new_version_of,
65
- :should_passthru, :errors, :datacite_errors, :date_accepted, :date_available,
66
- :date_copyrighted, :date_collected, :date_submitted, :date_valid,
67
- :is_cited_by, :cites, :is_supplement_to, :is_supplemented_by,
68
- :is_continued_by, :continues, :has_metadata, :is_metadata_for,
69
- :is_referenced_by, :references, :is_documented_by, :documents,
70
- :is_compiled_by, :compiles, :is_variant_form_of, :is_original_form_of,
71
- :is_reviewed_by, :reviews, :is_derived_from, :is_source_of, :format,
72
- :related_identifier, :reverse, :citeproc_type, :ris_type, :volume, :issue,
73
- :member_id, :data_center_id, :date_registered, :date_updated, :name_detector
60
+ attr_accessor :doi, :author, :title, :publisher, :contributor, :license,
61
+ :date_accepted, :date_available, :date_copyrighted, :date_collected,
62
+ :date_submitted, :date_valid, :date_created, :date_modified,
63
+ :date_registered, :date_updated, :member_id, :data_center_id, :journal,
64
+ :volume, :issue, :pagination, :url, :version, :keywords, :editor,
65
+ :description, :alternate_name, :language, :content_size, :spatial_coverage,
66
+ :schema_version, :additional_type, :has_part, :same_as,
67
+ :is_previous_version_of, :is_new_version_of, :is_cited_by, :cites,
68
+ :is_supplement_to, :is_supplemented_by, :is_continued_by, :continues,
69
+ :has_metadata, :is_metadata_for, :is_referenced_by, :references,
70
+ :is_documented_by, :documents, :is_compiled_by, :compiles,
71
+ :is_variant_form_of, :is_original_form_of, :is_reviewed_by, :reviews,
72
+ :is_derived_from, :is_source_of, :format, :funding, :type, :bibtex_type,
73
+ :citeproc_type, :ris_type
74
+
75
+ attr_reader :id, :from, :raw, :metadata, :doc, :provider, :citation,
76
+ :page_start, :page_end, :should_passthru, :errors,
77
+ :related_identifier, :reverse, :name_detector
74
78
 
75
79
  def initialize(input: nil, from: nil, regenerate: false, **options)
76
80
  id = normalize_id(input, options)
@@ -100,7 +104,6 @@ module Bolognese
100
104
  @raw = string.present? ? string.strip : nil
101
105
 
102
106
  @should_passthru = (@from == "datacite") && !regenerate
103
- @doi = options[:doi].presence
104
107
 
105
108
  @url = hsh.to_h["url"].presence
106
109
  @date_registered = hsh.to_h["date_registered"].presence
@@ -117,40 +120,39 @@ module Bolognese
117
120
  exists? && errors.nil?
118
121
  end
119
122
 
123
+ # validate against DataCite schema, unless there are already errors in the reader
120
124
  def errors
121
- metadata.fetch("errors", nil)
125
+ xml = should_passthru ? raw : datacite_xml
126
+ metadata.fetch("errors", nil) || datacite_errors(xml: xml,
127
+ schema_version: schema_version)
122
128
  end
123
129
 
124
- # def errors
125
- # doc && doc.errors.map { |error| error.to_s }.unwrap
126
- # end
127
-
128
130
  def id
129
131
  @doi.present? ? doi_as_url(@doi) : metadata.fetch("id", nil)
130
132
  end
131
133
 
132
134
  def type
133
- metadata.fetch("type", nil)
135
+ @type ||= metadata.fetch("type", nil)
134
136
  end
135
137
 
136
138
  def additional_type
137
- metadata.fetch("additional_type", nil)
139
+ @additional_type ||= metadata.fetch("additional_type", nil)
138
140
  end
139
141
 
140
142
  def citeproc_type
141
- metadata.fetch("citeproc_type", nil)
143
+ @citeproc_type ||= metadata.fetch("citeproc_type", nil)
142
144
  end
143
145
 
144
146
  def bibtex_type
145
- metadata.fetch("bibtex_type", nil)
147
+ @bibtex_type ||= metadata.fetch("bibtex_type", nil)
146
148
  end
147
149
 
148
150
  def ris_type
149
- metadata.fetch("ris_type", nil)
151
+ @ris_type ||= metadata.fetch("ris_type", nil)
150
152
  end
151
153
 
152
154
  def resource_type_general
153
- metadata.fetch("resource_type_general", nil)
155
+ @resource_type_general ||= metadata.fetch("resource_type_general", nil)
154
156
  end
155
157
 
156
158
  def doi
@@ -162,63 +164,63 @@ module Bolognese
162
164
  end
163
165
 
164
166
  def title
165
- metadata.fetch("title", nil)
167
+ @title ||= metadata.fetch("title", nil)
166
168
  end
167
169
 
168
170
  def alternate_name
169
- metadata.fetch("alternate_name", nil)
171
+ @alternate_name ||= metadata.fetch("alternate_name", nil)
170
172
  end
171
173
 
172
174
  def author
173
- metadata.fetch("author", nil)
175
+ @author ||= metadata.fetch("author", nil)
174
176
  end
175
177
 
176
178
  def editor
177
- metadata.fetch("editor", nil)
179
+ @editor ||= metadata.fetch("editor", nil)
178
180
  end
179
181
 
180
182
  def publisher
181
- metadata.fetch("publisher", nil)
183
+ @publisher ||= metadata.fetch("publisher", nil)
182
184
  end
183
185
 
184
186
  def provider
185
- metadata.fetch("provider", nil)
187
+ @provider ||= metadata.fetch("provider", nil)
186
188
  end
187
189
 
188
190
  def date_created
189
- metadata.fetch("date_created", nil)
191
+ @date_created ||= metadata.fetch("date_created", nil)
190
192
  end
191
193
 
192
194
  def date_accepted
193
- metadata.fetch("date_accepted", nil)
195
+ @date_accepted ||= metadata.fetch("date_accepted", nil)
194
196
  end
195
197
 
196
198
  def date_available
197
- metadata.fetch("date_available", nil)
199
+ @date_available ||= metadata.fetch("date_available", nil)
198
200
  end
199
201
 
200
202
  def date_copyrighted
201
- metadata.fetch("date_copyrighted", nil)
203
+ @date_copyrighted ||= metadata.fetch("date_copyrighted", nil)
202
204
  end
203
205
 
204
206
  def date_collected
205
- metadata.fetch("date_collected", nil)
207
+ @date_collected ||= metadata.fetch("date_collected", nil)
206
208
  end
207
209
 
208
210
  def date_submitted
209
- metadata.fetch("date_submitted", nil)
211
+ @date_submitted ||= metadata.fetch("date_submitted", nil)
210
212
  end
211
213
 
212
214
  def date_valid
213
- metadata.fetch("date_valid", nil)
215
+ @date_valid ||= metadata.fetch("date_valid", nil)
214
216
  end
215
217
 
216
218
  def date_published
217
- metadata.fetch("date_published", nil)
219
+ @date_published ||= metadata.fetch("date_published", nil)
218
220
  end
219
221
 
220
222
  def date_modified
221
- metadata.fetch("date_modified", nil)
223
+ @date_modified ||= metadata.fetch("date_modified", nil)
222
224
  end
223
225
 
224
226
  def date_registered
@@ -230,43 +232,43 @@ module Bolognese
230
232
  end
231
233
 
232
234
  def volume
233
- metadata.fetch("volume", nil)
235
+ @volume ||= metadata.fetch("volume", nil)
234
236
  end
235
237
 
236
238
  def pagination
237
- metadata.fetch("pagination", nil)
239
+ @pagination ||= metadata.fetch("pagination", nil)
238
240
  end
239
241
 
240
242
  def description
241
- metadata.fetch("description", nil)
243
+ @description ||= metadata.fetch("description", nil)
242
244
  end
243
245
 
244
246
  def license
245
- metadata.fetch("license", nil)
247
+ @license ||= metadata.fetch("license", nil)
246
248
  end
247
249
 
248
250
  def version
249
- metadata.fetch("version", nil)
251
+ @version ||= metadata.fetch("version", nil)
250
252
  end
251
253
 
252
254
  def keywords
253
- metadata.fetch("keywords", nil)
255
+ @keywords ||= metadata.fetch("keywords", nil)
254
256
  end
255
257
 
256
258
  def language
257
- metadata.fetch("language", nil)
259
+ @language ||= metadata.fetch("language", nil)
258
260
  end
259
261
 
260
262
  def content_size
261
- metadata.fetch("content_size", nil)
263
+ @content_size ||= metadata.fetch("content_size", nil)
262
264
  end
263
265
 
264
266
  def schema_version
265
- metadata.fetch("schema_version", nil)
267
+ @schema_version ||= metadata.fetch("schema_version", nil)
266
268
  end
267
269
 
268
270
  def funding
269
- metadata.fetch("funding", nil)
271
+ @funding ||= metadata.fetch("funding", nil)
270
272
  end
271
273
 
272
274
  def member_id
@@ -330,7 +332,8 @@ module Bolognese
330
332
  end
331
333
 
332
334
  def related_identifier_hsh(relation_type)
333
- Array.wrap(send(relation_type)).map { |r| r.merge("relationType" => relation_type.camelize) }
335
+ Array.wrap(send(relation_type)).select { |r| r["id"] || r["issn"] }
336
+ .map { |r| r.merge("relationType" => relation_type.camelize) }
334
337
  end
335
338
 
336
339
  def related_identifier
@@ -98,7 +98,7 @@ module Bolognese
98
98
  meta.dig("crossref").keys.last.camelize
99
99
  end
100
100
  type = CR_TO_SO_TRANSLATIONS[additional_type] || "ScholarlyArticle"
101
- doi = bibliographic_metadata.dig("doi_data", "doi")
101
+ doi = bibliographic_metadata.dig("doi_data", "doi").to_s.downcase
102
102
 
103
103
  { "id" => normalize_doi(doi),
104
104
  "type" => type,
@@ -29,7 +29,18 @@ module Bolognese
29
29
  end
30
30
 
31
31
  def read_datacite(string: nil, **options)
32
- meta = string.present? ? Maremma.from_xml(string).fetch("resource", {}) : {}
32
+ return { "errors" => "no content" } unless string.present?
33
+
34
+ meta = Maremma.from_xml(string).fetch("resource", {})
35
+ schema_version = meta.fetch("xmlns", nil)
36
+
37
+ # validate only when option is set, as this step is expensive and
38
+ # not needed if XML comes from DataCite MDS
39
+ if options[:validate]
40
+ errors = datacite_errors(xml: string, schema_version: schema_version)
41
+ return { "errors" => errors } if errors.present?
42
+ end
43
+
33
44
  id = normalize_doi(meta.dig("identifier", "__content__"), sandbox: options[:sandbox])
34
45
  doi = doi_from_url(id)
35
46
  resource_type_general = meta.dig("resourceType", "resourceTypeGeneral")
@@ -104,7 +115,7 @@ module Bolognese
104
115
  "keywords" => keywords,
105
116
  "language" => meta.fetch("language", nil),
106
117
  "content_size" => meta.fetch("size", nil),
107
- "schema_version" => meta.fetch("xmlns", nil)
118
+ "schema_version" => schema_version
108
119
  }
109
120
  end
110
121
 
@@ -36,7 +36,7 @@ module Bolognese
36
36
  ris_type = meta.fetch("TY", nil) || "GEN"
37
37
  type = RIS_TO_SO_TRANSLATIONS[ris_type] || "CreativeWork"
38
38
 
39
- doi = meta.fetch("DO", nil)
39
+ doi = validate_doi(meta.fetch("DO", nil))
40
40
  author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "name" => a } }
41
41
  container_title = meta.fetch("T2", nil)
42
42
  is_part_of = if container_title.present?
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "0.9.36"
2
+ VERSION = "0.9.37"
3
3
  end
@@ -2,8 +2,6 @@ module Bolognese
2
2
  module Writers
3
3
  module CiteprocWriter
4
4
  def citeproc
5
- return nil unless valid?
6
-
7
5
  hsh = {
8
6
  "type" => citeproc_type,
9
7
  "id" => id,
@@ -3,7 +3,7 @@ module Bolognese
3
3
  module CodemetaWriter
4
4
  def codemeta
5
5
  return nil unless valid?
6
-
6
+
7
7
  hsh = {
8
8
  "@context" => id.present? ? "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld" : nil,
9
9
  "@type" => type,
@@ -2,8 +2,6 @@ module Bolognese
2
2
  module Writers
3
3
  module CrossciteWriter
4
4
  def crosscite
5
- return nil unless valid?
6
-
7
5
  hsh = {
8
6
  "id" => id,
9
7
  "doi" => doi,
@@ -2,8 +2,6 @@ module Bolognese
2
2
  module Writers
3
3
  module DataciteJsonWriter
4
4
  def datacite_json
5
- return nil unless valid?
6
-
7
5
  hsh = {
8
6
  "id" => id,
9
7
  "doi" => doi,
@@ -2,8 +2,6 @@ module Bolognese
2
2
  module Writers
3
3
  module RdfXmlWriter
4
4
  def rdf_xml
5
- return nil unless valid?
6
-
7
5
  graph.dump(:rdfxml, prefixes: { schema: "http://schema.org/" })
8
6
  end
9
7
  end