bolognese 0.7.1 → 0.7.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +5 -3
  3. data/README.md +5 -0
  4. data/bolognese.gemspec +1 -0
  5. data/lib/bolognese.rb +1 -0
  6. data/lib/bolognese/bibtex.rb +4 -2
  7. data/lib/bolognese/cli.rb +3 -3
  8. data/lib/bolognese/codemeta.rb +4 -3
  9. data/lib/bolognese/crossref.rb +4 -4
  10. data/lib/bolognese/datacite.rb +30 -23
  11. data/lib/bolognese/datacite_utils.rb +17 -10
  12. data/lib/bolognese/metadata.rb +9 -7
  13. data/lib/bolognese/schema_org.rb +4 -4
  14. data/lib/bolognese/utils.rb +7 -3
  15. data/lib/bolognese/version.rb +1 -1
  16. data/resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd +22 -0
  17. data/resources/kernel-2.1/include/datacite-contributorType-v2.xsd +3 -0
  18. data/resources/kernel-2.1/include/datacite-dateType-v1.1.xsd +31 -0
  19. data/resources/kernel-2.1/include/datacite-dateType-v2.xsd +3 -0
  20. data/resources/kernel-2.1/include/datacite-descriptionType-v1.1.xsd +14 -0
  21. data/resources/kernel-2.1/include/datacite-descriptionType-v2.xsd +3 -0
  22. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v1.1.xsd +24 -0
  23. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v2.xsd +3 -0
  24. data/resources/kernel-2.1/include/datacite-relationType-v1.1.xsd +29 -0
  25. data/resources/kernel-2.1/include/datacite-relationType-v2.xsd +3 -0
  26. data/resources/kernel-2.1/include/datacite-resourceType-v1.1.xsd +22 -0
  27. data/resources/kernel-2.1/include/datacite-resourceType-v2.xsd +3 -0
  28. data/resources/kernel-2.1/include/datacite-titleType-v1.1.xsd +11 -0
  29. data/resources/kernel-2.1/include/datacite-titleType-v2.xsd +3 -0
  30. data/resources/kernel-2.1/metadata.xsd +315 -0
  31. data/resources/kernel-2.2/include/datacite-contributorType-v2.xsd +29 -0
  32. data/resources/kernel-2.2/include/datacite-dateType-v2.xsd +21 -0
  33. data/resources/kernel-2.2/include/datacite-descriptionType-v2.xsd +15 -0
  34. data/resources/kernel-2.2/include/datacite-relatedIdentifierType-v2.xsd +25 -0
  35. data/resources/kernel-2.2/include/datacite-relationType-v2.xsd +29 -0
  36. data/resources/kernel-2.2/include/datacite-resourceType-v2.xsd +23 -0
  37. data/resources/kernel-2.2/include/datacite-titleType-v2.xsd +10 -0
  38. data/resources/kernel-2.2/metadata.xsd +316 -0
  39. data/resources/kernel-3/include/datacite-contributorType-v3.1.xsd +35 -0
  40. data/resources/kernel-3/include/datacite-dateType-v3.xsd +21 -0
  41. data/resources/kernel-3/include/datacite-descriptionType-v3.xsd +17 -0
  42. data/resources/kernel-3/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  43. data/resources/kernel-3/include/datacite-relationType-v3.1.xsd +38 -0
  44. data/resources/kernel-3/include/datacite-resourceType-v3.xsd +26 -0
  45. data/resources/kernel-3/include/datacite-titleType-v3.xsd +12 -0
  46. data/resources/kernel-3/metadata.xsd +380 -0
  47. data/resources/{kernel-4.0 → kernel-4}/include/datacite-contributorType-v4.xsd +0 -0
  48. data/resources/{kernel-4.0 → kernel-4}/include/datacite-dateType-v4.xsd +0 -0
  49. data/resources/{kernel-4.0 → kernel-4}/include/datacite-descriptionType-v4.xsd +0 -0
  50. data/resources/{kernel-4.0 → kernel-4}/include/datacite-funderIdentifierType-v4.xsd +0 -0
  51. data/resources/{kernel-4.0 → kernel-4}/include/datacite-relatedIdentifierType-v4.xsd +0 -0
  52. data/resources/{kernel-4.0 → kernel-4}/include/datacite-relationType-v4.xsd +0 -0
  53. data/resources/{kernel-4.0 → kernel-4}/include/datacite-resourceType-v4.xsd +0 -0
  54. data/resources/{kernel-4.0 → kernel-4}/include/datacite-titleType-v4.xsd +0 -0
  55. data/resources/{kernel-4.0 → kernel-4}/metadata.xsd +0 -0
  56. data/spec/bibtex_spec.rb +1 -2
  57. data/spec/cli_spec.rb +1 -1
  58. data/spec/codemeta_spec.rb +6 -8
  59. data/spec/crossref_spec.rb +7 -10
  60. data/spec/datacite_spec.rb +39 -43
  61. data/spec/fixtures/datacite_dataset.xml +58 -0
  62. data/spec/fixtures/datacite_missing_creator.xml +1 -0
  63. data/spec/schema_org_spec.rb +3 -4
  64. metadata +57 -11
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e529a85cd30e24af04203358d96f53dfe7c17526
4
- data.tar.gz: 588daf5d103c64b25e4c06709dc6815c32f67587
3
+ metadata.gz: bc292f3921b5634ec479a7e392fd8f9c476b5739
4
+ data.tar.gz: 33a44a56720c036829596fad78a38674dc1bcdef
5
5
  SHA512:
6
- metadata.gz: b0d7078349cd7b70cf40e996099c1017b9dbf9487580236aa8c8fa974ef28dc61ad7508ccf827c4ee9ea0a7fdd9c51ba9fa9e37554ecb807ab17a7bca8ee9673
7
- data.tar.gz: 2772cbe1d11e52aa3835279161e826552dc3ddda889109d460d8a79996f94202784b689ce65172a9ee0af08c2e65f51d8dbd99a3e01a152f86231e400e65f9f4
6
+ metadata.gz: 14e2ba1b16d38ffdf5bb839596b7c1b73e62fa2d2a35cca89d3c615801334f26c36dc302f87080a112ea5be6165614a49816a218242c86632422c4b937c7612b
7
+ data.tar.gz: 5415c8c7283392900c4558d3b732099dc47216774207ce47d369221df3c2bf096abfdb0ca79e4a955b67135288c2d2ee80375b684c43a205bf163bb68b142785
data/Gemfile.lock CHANGED
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (0.7.1)
4
+ bolognese (0.7.2)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
6
  bibtex-ruby (~> 4.1)
7
7
  builder (~> 3.2, >= 3.2.2)
8
+ colorize (~> 0.8.1)
8
9
  maremma (~> 3.5)
9
10
  namae (~> 0.10.2)
10
11
  nokogiri (~> 1.6, >= 1.6.8)
@@ -23,8 +24,9 @@ GEM
23
24
  bibtex-ruby (4.4.3)
24
25
  latex-decode (~> 0.0)
25
26
  builder (3.2.3)
26
- codeclimate-test-reporter (1.0.5)
27
+ codeclimate-test-reporter (1.0.6)
27
28
  simplecov
29
+ colorize (0.8.1)
28
30
  crack (0.4.3)
29
31
  safe_yaml (~> 1.0.0)
30
32
  diff-lcs (1.3)
@@ -92,7 +94,7 @@ GEM
92
94
  simplecov-html (~> 0.10.0)
93
95
  simplecov-html (0.10.0)
94
96
  thor (0.19.4)
95
- thread_safe (0.3.5)
97
+ thread_safe (0.3.6)
96
98
  tzinfo (1.2.2)
97
99
  thread_safe (~> 0.1)
98
100
  unicode (0.4.4.2)
data/README.md CHANGED
@@ -105,6 +105,11 @@ Commands:
105
105
  bolognese --version, -v # print the version
106
106
  bolognese help [COMMAND] # Describe available commands or one specific command
107
107
  ```
108
+ ## Errors
109
+
110
+ Errors are returned to STDOUT.
111
+
112
+ All DataCite XML input is validated against the corresponding schema version (kernel 2.1, 2.2, 3, or 4).
108
113
 
109
114
  ## Examples
110
115
 
data/bolognese.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.add_dependency 'activesupport', '~> 4.2', '>= 4.2.5'
22
22
  s.add_dependency 'bibtex-ruby', '~> 4.1'
23
23
  s.add_dependency 'thor', '~> 0.19'
24
+ s.add_dependency 'colorize', '~> 0.8.1'
24
25
  s.add_dependency 'namae', '~> 0.10.2'
25
26
  s.add_dependency 'postrank-uri', '~> 1.0', '>= 1.0.18'
26
27
  s.add_development_dependency 'bundler', '~> 1.0'
data/lib/bolognese.rb CHANGED
@@ -3,6 +3,7 @@ require 'nokogiri'
3
3
  require 'maremma'
4
4
  require 'postrank-uri'
5
5
  require 'bibtex'
6
+ require 'colorize'
6
7
 
7
8
  require "bolognese/version"
8
9
  require "bolognese/metadata"
@@ -30,8 +30,6 @@ module Bolognese
30
30
  @raw = string
31
31
  end
32
32
 
33
- alias_method :schema_org, :as_schema_org
34
-
35
33
  def metadata
36
34
  @metadata ||= raw.present? ? BibTeX.parse(raw).first : {}
37
35
  end
@@ -40,6 +38,10 @@ module Bolognese
40
38
  metadata.present?
41
39
  end
42
40
 
41
+ def valid?
42
+ true
43
+ end
44
+
43
45
  def type
44
46
  BIB_TO_SO_TRANSLATIONS[metadata.type.to_s] || "ScholarlyArticle"
45
47
  end
data/lib/bolognese/cli.rb CHANGED
@@ -11,7 +11,7 @@ module Bolognese
11
11
  include Bolognese::Utils
12
12
 
13
13
  def self.exit_on_failure?
14
- false
14
+ true
15
15
  end
16
16
 
17
17
  # from http://stackoverflow.com/questions/22809972/adding-a-version-option-to-a-ruby-thor-cli
@@ -25,7 +25,7 @@ module Bolognese
25
25
  desc "", "convert metadata"
26
26
  method_option :from, aliases: "-f"
27
27
  method_option :to, aliases: "-t", default: "schema_org"
28
- method_option :schema_version
28
+ method_option :regenerate, :type => :boolean, :force => false
29
29
  def convert(input)
30
30
  id = normalize_id(input)
31
31
 
@@ -45,7 +45,7 @@ module Bolognese
45
45
 
46
46
  to = options[:to] || "schema_org"
47
47
 
48
- write(id: id, string: string, from: from, to: to, schema_version: options[:schema_version])
48
+ write(id: id, string: string, from: from, to: to, regenerate: options[:regenerate])
49
49
  end
50
50
 
51
51
  default_task :convert
@@ -12,9 +12,6 @@ module Bolognese
12
12
  end
13
13
  end
14
14
 
15
- alias_method :schema_org, :as_schema_org
16
- alias_method :bibtex, :as_bibtex
17
-
18
15
  def metadata
19
16
  @metadata ||= raw.present? ? Maremma.from_json(raw) : {}
20
17
  end
@@ -23,6 +20,10 @@ module Bolognese
23
20
  metadata.present?
24
21
  end
25
22
 
23
+ def valid?
24
+ true
25
+ end
26
+
26
27
  def doi
27
28
  doi_from_url(id)
28
29
  end
@@ -72,10 +72,6 @@ module Bolognese
72
72
  end
73
73
 
74
74
  alias_method :crossref, :raw
75
- alias_method :as_crossref, :raw
76
- alias_method :schema_org, :as_schema_org
77
- alias_method :codemeta, :as_codemeta
78
- alias_method :bibtex, :as_bibtex
79
75
 
80
76
  def metadata
81
77
  @metadata ||= raw.present? ? Maremma.from_xml(raw).fetch("doi_records", {}).fetch("doi_record", {}) : {}
@@ -85,6 +81,10 @@ module Bolognese
85
81
  metadata.present?
86
82
  end
87
83
 
84
+ def valid?
85
+ true
86
+ end
87
+
88
88
  def doi
89
89
  bibliographic_metadata.dig("doi_data", "doi")
90
90
  end
@@ -18,7 +18,9 @@ module Bolognese
18
18
  "Other" => "CreativeWork"
19
19
  }
20
20
 
21
- def initialize(id: nil, string: nil, schema_version: nil)
21
+ SCHEMA = File.expand_path("../../../resources/kernel-4.0/metadata.xsd", __FILE__)
22
+
23
+ def initialize(id: nil, string: nil, regenerate: false)
22
24
  id = normalize_doi(id) if id.present?
23
25
 
24
26
  if string.present?
@@ -28,25 +30,12 @@ module Bolognese
28
30
  @raw = response.body.fetch("data", nil)
29
31
  end
30
32
 
31
- @schema_version = schema_version
32
- end
33
-
34
- alias_method :schema_org, :as_schema_org
35
- alias_method :codemeta, :as_codemeta
36
- alias_method :bibtex, :as_bibtex
37
-
38
- def schema_version
39
- @schema_version ||= metadata.fetch("xsi:schemaLocation", "").split(" ").first
33
+ @should_passthru = !regenerate
40
34
  end
41
35
 
42
- # show DataCite XML in different version if schema_version option is provided
43
- # currently only supports 4.0
36
+ # generate new DataCite XML version 4.0 if regenerate (!should_passthru) option is provided
44
37
  def datacite
45
- if schema_version != metadata.fetch("xsi:schemaLocation", "").split(" ").first
46
- as_datacite
47
- else
48
- raw
49
- end
38
+ should_passthru ? raw : datacite_xml
50
39
  end
51
40
 
52
41
  def metadata
@@ -57,6 +46,25 @@ module Bolognese
57
46
  metadata.present?
58
47
  end
59
48
 
49
+ def valid?
50
+ errors.blank?
51
+ end
52
+
53
+ def errors
54
+ arr = schema.validate(Nokogiri::XML(raw)).map { |error| error.to_s }
55
+ array_unwrap(arr)
56
+ end
57
+
58
+ def schema_version
59
+ metadata.fetch("xmlns", nil)
60
+ end
61
+
62
+ def schema
63
+ kernel = schema_version.split("/").last
64
+ filepath = File.expand_path("../../../resources/#{kernel}/metadata.xsd", __FILE__)
65
+ Nokogiri::XML::Schema(open(filepath))
66
+ end
67
+
60
68
  def doi
61
69
  metadata.fetch("identifier", {}).fetch("__content__", nil)
62
70
  end
@@ -90,13 +98,12 @@ module Bolognese
90
98
  parse_attributes(metadata.dig("alternateIdentifiers", "alternateIdentifier"))
91
99
  end
92
100
 
101
+ def descriptions
102
+ Array.wrap(metadata.dig("descriptions", "description"))
103
+ end
104
+
93
105
  def description
94
- des = metadata.dig("descriptions", "description", "__content__")
95
- if des.is_a?(Hash)
96
- des.to_xml
97
- elsif des.is_a?(String)
98
- des.strip
99
- end
106
+ parse_attributes(descriptions)
100
107
  end
101
108
 
102
109
  def license
@@ -38,22 +38,19 @@ module Bolognese
38
38
  end
39
39
 
40
40
  def datacite_xml
41
- Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
41
+ @datacite_xml ||= Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
42
42
  xml.resource(root_attributes) do
43
43
  insert_work(xml)
44
44
  end
45
45
  end.to_xml
46
46
  end
47
47
 
48
- def as_datacite
49
- if validation_errors.blank?
50
- datacite_xml
51
- end
48
+ def datacite
49
+ datacite_xml
52
50
  end
53
51
 
54
- def validation_errors
55
- @validation_errors ||= schema.validate(Nokogiri::XML(datacite_xml))
56
- .map { |error| error.to_s }
52
+ def datacite_errors
53
+ @datacite_errors ||= schema.validate(Nokogiri::XML(datacite)).map { |error| error.to_s }
57
54
  end
58
55
 
59
56
  def insert_work(xml)
@@ -227,10 +224,20 @@ module Bolognese
227
224
  end
228
225
 
229
226
  def insert_descriptions(xml)
230
- return xml unless description.present?
227
+ return xml unless descriptions.present?
231
228
 
232
229
  xml.descriptions do
233
- xml.description(description, 'descriptionType' => "Abstract")
230
+ Array.wrap(description).each do |des|
231
+ insert_description(xml, des)
232
+ end
233
+ end
234
+ end
235
+
236
+ def insert_description(xml, des)
237
+ if des.is_a?(String)
238
+ xml.description(des.strip, 'descriptionType' => "Abstract")
239
+ elsif des.is_a?(Hash)
240
+ xml.description(des["__content__"].strip, 'descriptionType' => des["descriptionType"],)
234
241
  end
235
242
  end
236
243
 
@@ -16,14 +16,17 @@ module Bolognese
16
16
  :additional_type, :alternate_name, :url, :version, :keywords, :editor,
17
17
  :page_start, :page_end, :date_modified, :language, :spatial_coverage,
18
18
  :content_size, :funder, :journal, :bibtex_type, :date_created, :has_part,
19
- :publisher, :contributor, :schema_version, :same_as, :predecessor_of, :successor_of
20
-
21
- alias_method :datacite, :as_datacite
19
+ :publisher, :contributor, :same_as, :predecessor_of,
20
+ :successor_of, :should_passthru, :datacite_errors
22
21
 
23
22
  def publication_year
24
23
  date_published && date_published[0..3]
25
24
  end
26
25
 
26
+ def descriptions
27
+ Array.wrap(description)
28
+ end
29
+
27
30
  def pagination
28
31
  [page_start, page_end].compact.join("-").presence
29
32
  end
@@ -32,7 +35,7 @@ module Bolognese
32
35
  publisher.to_h.fetch("name", nil)
33
36
  end
34
37
 
35
- def as_schema_org
38
+ def schema_org
36
39
  { "@context" => id.present? ? "http://schema.org" : nil,
37
40
  "@type" => type,
38
41
  "@id" => id,
@@ -67,7 +70,7 @@ module Bolognese
67
70
  }.compact.to_json
68
71
  end
69
72
 
70
- def as_codemeta
73
+ def codemeta
71
74
  { "@context" => id.present? ? "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld" : nil,
72
75
  "@type" => type,
73
76
  "@id" => id,
@@ -85,7 +88,7 @@ module Bolognese
85
88
  }.compact.to_json
86
89
  end
87
90
 
88
- def as_bibtex
91
+ def bibtex
89
92
  bib = {
90
93
  bibtex_type: bibtex_type.to_sym,
91
94
  bibtex_key: id,
@@ -100,7 +103,6 @@ module Bolognese
100
103
  publisher: publisher_string,
101
104
  year: publication_year
102
105
  }.compact
103
-
104
106
  BibTeX::Entry.new(bib).to_s
105
107
  end
106
108
  end
@@ -13,10 +13,6 @@ module Bolognese
13
13
  end
14
14
  end
15
15
 
16
- alias_method :schema_org, :as_schema_org
17
- alias_method :codemeta, :as_codemeta
18
- alias_method :bibtex, :as_bibtex
19
-
20
16
  def metadata
21
17
  @metadata ||= raw.present? ? Maremma.from_json(raw) : {}
22
18
  end
@@ -25,6 +21,10 @@ module Bolognese
25
21
  metadata.present?
26
22
  end
27
23
 
24
+ def valid?
25
+ true
26
+ end
27
+
28
28
  def doi
29
29
  doi_from_url(id)
30
30
  end
@@ -28,7 +28,7 @@ module Bolognese
28
28
  "bibtex"
29
29
  elsif options[:ext] == ".xml" && Maremma.from_xml(string).dig("doi_records", "doi_record", "crossref")
30
30
  "crossref"
31
- elsif options[:ext] == ".xml" && Maremma.from_xml(string).dig("resource", "xmlns") == "http://datacite.org/schema/kernel-4"
31
+ elsif options[:ext] == ".xml" && Maremma.from_xml(string).dig("resource", "xmlns").start_with?("http://datacite.org/schema/kernel")
32
32
  "datacite"
33
33
  elsif options[:filename] == "codemeta.json"
34
34
  "codemeta"
@@ -39,13 +39,17 @@ module Bolognese
39
39
  if from.present?
40
40
  p = case from
41
41
  when "crossref" then Crossref.new(id: id, string: string)
42
- when "datacite" then Datacite.new(id: id, string: string, schema_version: options[:schema_version])
42
+ when "datacite" then Datacite.new(id: id, string: string, regenerate: options[:regenerate])
43
43
  when "codemeta" then Codemeta.new(id: id, string: string)
44
44
  when "bibtex" then Bibtex.new(string: string)
45
45
  else SchemaOrg.new(id: id)
46
46
  end
47
47
 
48
- puts p.send(to)
48
+ if p.valid?
49
+ puts p.send(to)
50
+ else
51
+ $stderr.puts p.errors.colorize(:red)
52
+ end
49
53
  else
50
54
  puts "not implemented"
51
55
  end
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "0.7.1"
2
+ VERSION = "0.7.2"
3
3
  end
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!-- Version 1.0 - Created 2011-01-13 - FZ, TIB, Germany -->
3
+ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="http://datacite.org/schema/kernel-2.1" elementFormDefault="qualified">
4
+ <xs:simpleType name="contributorType">
5
+ <xs:annotation>
6
+ <xs:documentation>The type of contributor of the resource.</xs:documentation>
7
+ </xs:annotation>
8
+ <xs:restriction base="xs:string">
9
+ <xs:enumeration value="ContactPerson"/>
10
+ <xs:enumeration value="DataCollector"/>
11
+ <xs:enumeration value="DataManager"/>
12
+ <xs:enumeration value="Editor"/>
13
+ <xs:enumeration value="HostingInstitution"/>
14
+ <xs:enumeration value="ProjectLeader"/>
15
+ <xs:enumeration value="ProjectMember"/>
16
+ <xs:enumeration value="RegistrationAgency"/>
17
+ <xs:enumeration value="RegistrationAuthority"/>
18
+ <xs:enumeration value="Researcher"/>
19
+ <xs:enumeration value="WorkPackageLeader"/>
20
+ </xs:restriction>
21
+ </xs:simpleType>
22
+ </xs:schema>
@@ -0,0 +1,3 @@
1
+ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-2.1" targetNamespace="http://datacite.org/schema/kernel-2.1" elementFormDefault="qualified">
2
+ <xs:include schemaLocation="datacite-contributorType-v1.1.xsd"/>
3
+ </xs:schema>