bolognese 2.0.3 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fc718876c65857d9b07af2752b4ce8ed8bda05b5e3f7dae983530a414262b9e6
4
- data.tar.gz: 997f406aa28447acf4507aad0c2ffb0f4e4e42da4028dbe2210d027b438dea09
3
+ metadata.gz: 71ee294125ac977b19c081b25a540d504bc78e932be89beb5d25e4754637166d
4
+ data.tar.gz: e95a2a6495a9ae0cf3014ba20024902da8ab1fd78d28472abfa0472ea3eb9b2b
5
5
  SHA512:
6
- metadata.gz: e5692d0fa4a116b0c3f32ae2be26f916f282860a69daed4564fa7b8e91823d8648eddf958fc1fdd9c9c86ab2fcf81f108b334da57b8b9c6ce7fd79b94c009262
7
- data.tar.gz: d1a74bd310e4ad4a7e8f34c218685cf3d76183d063feffab36c926e4d449e46a0792b2d279d4aeb6540ad74c441cabda0d242053eb96a01f5c6598112a1848e2
6
+ metadata.gz: 02de737b6845e350022dff585da6c2c5ef564852ae41e569d27474aee996afb482fa7711e7a47ebe0d020df9d7a93e88dc889d2529896955306a784d0b4862b5
7
+ data.tar.gz: 78b6149f043f22700f638a8105e9242e98e9da9cb4329a427f35c0e4c51125044eedaf639994b0586296f1cb1e65c15694304b340a5b3dab9a36631490c45fd0
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (2.0.3)
4
+ bolognese (2.1.1)
5
5
  activesupport (>= 4.2.5)
6
6
  benchmark_methods (~> 0.7)
7
7
  bibtex-ruby (>= 5.1.0)
@@ -235,4 +235,4 @@ DEPENDENCIES
235
235
  webmock (~> 3.0, >= 3.0.1)
236
236
 
237
237
  BUNDLED WITH
238
- 2.4.20
238
+ 2.4.22
@@ -74,8 +74,11 @@ module Bolognese
74
74
  creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
75
75
  end
76
76
  contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
77
- publisher_name = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
78
- publisher = { "name" => publisher_name } if publisher_name.present?
77
+
78
+ publisher = {
79
+ "name" => parse_attributes(meta.fetch("publisher", nil), content: "name", first: true),
80
+ "publisherIdentifier" => parse_attributes(meta.fetch("publisher", nil), content: "@id", first: true),
81
+ }.compact if meta.fetch("publisher", nil).present?
79
82
 
80
83
  ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
81
84
  container = if meta.fetch(ct, nil).present?
@@ -600,12 +600,12 @@ module Bolognese
600
600
  end
601
601
 
602
602
  def validate_orcid(orcid)
603
- orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z/.match(orcid)).last
603
+ orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\/{0,1}\z/.match(orcid)).last
604
604
  orcid.gsub(/[[:space:]]/, "-") if orcid.present?
605
605
  end
606
606
 
607
607
  def validate_ror(ror)
608
- Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})$/.match(ror)).last
608
+ Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})\/{0,1}$/.match(ror)).last
609
609
  end
610
610
 
611
611
  def validate_orcid_scheme(orcid_scheme)
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "2.0.3"
2
+ VERSION = "2.1.1"
3
3
  end
@@ -4,11 +4,8 @@ module Bolognese
4
4
  module Writers
5
5
  module DataciteJsonWriter
6
6
  def datacite_json
7
- # Remove the following change for the schema 4.5 release
8
7
  if crosscite_hsh.present?
9
- datacite_json_hsh = crosscite_hsh
10
- datacite_json_hsh['publisher'] = self.publisher['name'] if self.publisher&.respond_to?(:to_hash) && self.publisher.has_key?('name') && !self.publisher['name'].blank?
11
- JSON.pretty_generate datacite_json_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) }
8
+ JSON.pretty_generate crosscite_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) }
12
9
  end
13
10
  end
14
11
  end
@@ -86,7 +86,10 @@ module Bolognese
86
86
  end
87
87
 
88
88
  def insert_publisher_name(xml)
89
- xml.send("publisher-name", publisher["name"])
89
+ attributes = {
90
+ "xml:lang" => publisher["lang"]
91
+ }.compact
92
+ xml.send("publisher-name", attributes, publisher["name"])
90
93
  end
91
94
 
92
95
  def insert_publication_date(xml)
@@ -37,7 +37,7 @@ module Bolognese
37
37
  "schemaVersion" => schema_version,
38
38
  "periodical" => types.present? ? ((types["schemaOrg"] != "Dataset") && container.present? ? to_schema_org(container) : nil) : nil,
39
39
  "includedInDataCatalog" => types.present? ? ((types["schemaOrg"] == "Dataset") && container.present? ? to_schema_org_container(container, type: "Dataset") : nil) : nil,
40
- "publisher" => publisher.present? ? { "@type" => "Organization", "name" => publisher["name"] } : nil,
40
+ "publisher" => publisher.present? ? { "@type" => "Organization", "@id" => publisher["publisherIdentifier"], "name" => publisher["name"] }.compact : nil,
41
41
  "funder" => to_schema_org_funder(funding_references),
42
42
  "provider" => agency.present? ? { "@type" => "Organization", "name" => agency } : nil
43
43
  }.compact.presence
@@ -168,11 +168,12 @@ describe Bolognese::Metadata, vcr: true do
168
168
  expect(subject.creators[4]).to eq("nameType"=>"Organizational", "name"=>"University Of Kivu", "nameIdentifiers"=> [{"nameIdentifier"=>"https://ror.org/01qfhxr31", "schemeUri"=>"https://ror.org", "nameIdentifierScheme"=>"ROR"}], "affiliation"=>[])
169
169
  expect(subject.creators[5]).to eq("nameType"=>"Organizational", "name"=>"សាកលវិទ្យាល័យកម្ពុជា", "nameIdentifiers"=> [{"nameIdentifier"=>"http://ror.org/025e3rc84", "nameIdentifierScheme"=>"RORS"}], "affiliation"=>[])
170
170
  expect(subject.creators[6]).to eq("nameType"=>"Organizational", "name"=>"جامعة زاخۆ", "nameIdentifiers"=> [{"nameIdentifier"=>"05sd1pz50", "schemeUri"=>"https://ror.org", "nameIdentifierScheme"=>"RORS"}], "affiliation"=>[])
171
+ expect(subject.creators[9]).to eq("nameType"=>"Organizational", "name"=>"Gump South Pacific Research Station", "nameIdentifiers"=> [{"nameIdentifier"=>"https://ror.org/04sk0et52", "schemeUri"=>"https://ror.org", "nameIdentifierScheme"=>"ROR"}], "affiliation"=>[])
171
172
  expect(subject.contributors.first).to eq("nameType"=>"Organizational", "name"=>" Nawroz University ", "nameIdentifiers"=> [{"nameIdentifier"=>"https://ror.org/04gp75d48", "schemeUri"=>"https://ror.org", "nameIdentifierScheme"=>"ROR"}], "affiliation"=>[], "contributorType"=>"Producer")
172
173
  expect(subject.contributors.last).to eq("nameType"=>"Organizational", "name"=>"University Of Greenland (Https://Www.Uni.Gl/)", "nameIdentifiers"=> [{"nameIdentifier"=>"https://ror.org/00t5j6b61", "schemeUri"=>"https://ror.org", "nameIdentifierScheme"=>"ROR"}],"affiliation"=>[], "contributorType"=>"Sponsor")
173
174
  end
174
175
 
175
- context "affiliationIdentifier" do
176
+ context "affiliationIdentifier/nameIdentifier" do
176
177
  let(:input) { fixture_path + 'datacite-example-ROR-nameIdentifiers.xml' }
177
178
  subject { Bolognese::Metadata.new(input: input, from: "datacite") }
178
179
 
@@ -207,6 +208,11 @@ describe Bolognese::Metadata, vcr: true do
207
208
  expect(subject.creators[8]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0118", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}])
208
209
  end
209
210
 
211
+ it "should normalize valid ORCID nameIdentifier with trailing slash" do
212
+ #" 0000-0001-9998-0118 ", # Valid ORCID with leading/trailing spaces
213
+ expect(subject.creators[10]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0117", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}])
214
+ end
215
+
210
216
  it "should parse non ROR schema's without normalizing them" do
211
217
  input = fixture_path + 'datacite-example-ROR-nameIdentifiers.xml'
212
218
  subject = Bolognese::Metadata.new(input: input, from: "datacite")
@@ -250,4 +256,4 @@ describe Bolognese::Metadata, vcr: true do
250
256
  expect(response).to eq("Matt Jones and Peter Slaughter and {University of California, Santa Barbara}")
251
257
  end
252
258
  end
253
- end
259
+ end
@@ -43,6 +43,14 @@
43
43
  <creatorName nameType="Personal">Mike B</creatorName>
44
44
  <nameIdentifier nameIdentifierScheme="ORCID"> 0000-0001-9998-0118 </nameIdentifier>
45
45
  </creator>
46
+ <creator>
47
+ <creatorName nameType="Organizational">Gump South Pacific Research Station</creatorName>
48
+ <nameIdentifier nameIdentifierScheme="ROR" schemeURI="https://ror.org/">https://ror.org/04sk0et52/</nameIdentifier>
49
+ </creator>
50
+ <creator>
51
+ <creatorName nameType="Personal">Ashwini Sukale</creatorName>
52
+ <nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID">https://orcid.org/0000-0001-9998-0117/</nameIdentifier>
53
+ </creator>
46
54
  </creators>
47
55
  <titles>
48
56
  <title xml:lang="en-US">Genomic Standards Consortium (GSC) Island Sampling Day: Moorea Reef to Ridges Genomic Transect</title>
@@ -15,7 +15,7 @@
15
15
  <title xml:lang="en">DOI Test 2 title content</title>
16
16
  <title xml:lang="en">AAPP</title>
17
17
  </titles>
18
- <publisher>OSI SAF</publisher>
18
+ <publisher xml:lang="en">OSI SAF</publisher>
19
19
  <publicationYear>2004</publicationYear>
20
20
  <resourceType resourceTypeGeneral="Collection">Climate Data Record</resourceType>
21
21
  <dates>
@@ -14,6 +14,7 @@
14
14
  }],
15
15
  "publisher": {
16
16
  "@type": "Organization",
17
+ "@id": "https://ror.org/04wxnsj81",
17
18
  "name": "DataCite"
18
19
  },
19
20
  "dateCreated": "2016-12-20",
@@ -169,7 +169,7 @@ describe Bolognese::Metadata, vcr: true do
169
169
  expect(subject.publication_year).to eq("2016")
170
170
  expect(subject.related_identifiers.length).to eq(3)
171
171
  expect(subject.related_identifiers.last).to eq("relatedIdentifier"=>"10.5438/55e5-t5c0", "relatedIdentifierType"=>"DOI", "relationType"=>"References")
172
- expect(subject.publisher).to eq({"name"=>"DataCite"})
172
+ expect(subject.publisher).to eq({"name"=>"DataCite", "publisherIdentifier"=>"https://ror.org/04wxnsj81"})
173
173
  end
174
174
 
175
175
  it "GTEx dataset" do
@@ -18,9 +18,7 @@ describe Bolognese::Metadata, vcr: true do
18
18
  "rightsIdentifierScheme"=>"SPDX",
19
19
  "rightsUri"=>"https://creativecommons.org/licenses/by/3.0/legalcode",
20
20
  "schemeUri"=>"https://spdx.org/licenses/"}])
21
- # REVERT the publisher line to the commented line for the schema 4.5 release
22
- # expect(datacite.fetch("publisher")).to eq({"name"=>"eLife Sciences Publications, Ltd"})
23
- expect(datacite.fetch("publisher")).to eq("eLife Sciences Publications, Ltd")
21
+ expect(datacite.fetch("publisher")).to eq({"name"=>"eLife Sciences Publications, Ltd"})
24
22
  end
25
23
 
26
24
  it "with ORCID ID" do
@@ -31,9 +29,7 @@ describe Bolognese::Metadata, vcr: true do
31
29
  expect(datacite.fetch("types")).to eq("bibtex"=>"article", "citeproc"=>"article-journal", "resourceType"=>"JournalArticle", "resourceTypeGeneral"=>"JournalArticle", "ris"=>"JOUR", "schemaOrg"=>"ScholarlyArticle")
32
30
  expect(datacite.fetch("creators").length).to eq(7)
33
31
  expect(datacite.fetch("creators").first).to eq("nameType"=>"Personal", "name"=>"Thanassi, Wendy", "givenName"=>"Wendy", "familyName"=>"Thanassi", "affiliation" => [{"name"=>"Department of Medicine, Veterans Affairs Palo Alto Health Care System, 3801 Miranda Avenue MC-, Palo Alto, CA 94304-1207, USA"}, {"name"=>"Occupational Health Strategic Health Care Group, Office of Public Health, Veterans Health Administration, Washington, DC 20006, USA"}, {"name"=>"Division of Emergency Medicine, Stanford University School of Medicine, Stanford, CA 94304, USA"}, {"name"=>"War Related Illness and Injury Study Center (WRIISC) and Mental Illness Research Education and Clinical Center (MIRECC), Department of Veterans Affairs, Palo Alto, CA 94304, USA"}])
34
- # REVERT the publisher line to the commented line for the schema 4.5 release
35
- # expect(datacite.fetch("publisher")).to eq({"name"=>"Hindawi Limited"})
36
- expect(datacite.fetch("publisher")).to eq("Hindawi Limited")
32
+ expect(datacite.fetch("publisher")).to eq({"name"=>"Hindawi Limited"})
37
33
  end
38
34
 
39
35
  it "Crossref DOI" do
@@ -45,9 +41,7 @@ describe Bolognese::Metadata, vcr: true do
45
41
  expect(datacite.dig("descriptions", 0, "description")).to start_with("Among various advantages, their small size makes model organisms preferred subjects of investigation.")
46
42
  expect(datacite.fetch("creators").length).to eq(5)
47
43
  expect(datacite.fetch("creators").first).to eq("nameType"=>"Personal", "name"=>"Sankar, Martial", "givenName"=>"Martial", "familyName"=>"Sankar")
48
- # REVERT the publisher line to the commented line for the schema 4.5 release
49
- # expect(datacite.fetch("publisher")).to eq({"name"=>"{eLife} Sciences Organisation, Ltd."})
50
- expect(datacite.fetch("publisher")).to eq("{eLife} Sciences Organisation, Ltd.")
44
+ expect(datacite.fetch("publisher")).to eq({"name"=>"{eLife} Sciences Organisation, Ltd."})
51
45
  end
52
46
 
53
47
  it "BlogPosting Citeproc JSON" do
@@ -58,9 +52,7 @@ describe Bolognese::Metadata, vcr: true do
58
52
  expect(datacite.fetch("titles")).to eq([{"title"=>"Eating your own Dog Food"}])
59
53
  expect(datacite.dig("descriptions", 0, "description")).to start_with("Eating your own dog food")
60
54
  expect(datacite.fetch("creators")).to eq([{"familyName"=>"Fenner", "givenName"=>"Martin", "name"=>"Fenner, Martin"}])
61
- # REVERT the publisher line to the commented line for the schema 4.5 release
62
- # expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
63
- expect(datacite.fetch("publisher")).to eq("DataCite")
55
+ expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
64
56
  end
65
57
 
66
58
  it "rdataone" do
@@ -72,9 +64,7 @@ describe Bolognese::Metadata, vcr: true do
72
64
  expect(datacite.fetch("creators").first).to eq("affiliation"=>[{"name"=>"NCEAS"}], "nameType"=>"Personal", "name" => "Jones, Matt",
73
65
  "nameIdentifiers" => [{"nameIdentifier"=>"https://orcid.org/0000-0003-0077-4738", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "givenName"=>"Matt", "familyName"=>"Jones")
74
66
  expect(datacite.fetch("version")).to eq("2.0.0")
75
- # REVERT the publisher line to the commented line for the schema 4.5 release
76
- # expect(datacite.fetch("publisher")).to eq({"name"=>"https://cran.r-project.org"})
77
- expect(datacite.fetch("publisher")).to eq("https://cran.r-project.org")
67
+ expect(datacite.fetch("publisher")).to eq({"name"=>"https://cran.r-project.org"})
78
68
  end
79
69
 
80
70
  it "maremma" do
@@ -90,9 +80,7 @@ describe Bolognese::Metadata, vcr: true do
90
80
  [{"nameIdentifier"=>"https://orcid.org/0000-0003-0077-4738",
91
81
  "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}],
92
82
  "nameType"=>"Personal"}])
93
- # REVERT the publisher line to the commented line for the schema 4.5 release
94
- # expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
95
- expect(datacite.fetch("publisher")).to eq("DataCite")
83
+ expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
96
84
  end
97
85
 
98
86
  it "with data citation schema.org" do
@@ -103,26 +91,22 @@ describe Bolognese::Metadata, vcr: true do
103
91
  expect(datacite.fetch("titles")).to eq([{"title"=>"Eating your own Dog Food"}])
104
92
  expect(datacite.fetch("relatedIdentifiers").count).to eq(3)
105
93
  expect(datacite.fetch("relatedIdentifiers").first).to eq("relatedIdentifier"=>"10.5438/0000-00ss", "relatedIdentifierType"=>"DOI", "relationType"=>"IsPartOf", "resourceTypeGeneral"=>"Text")
106
- # REVERT the publisher line to the commented line for the schema 4.5 release
107
- # expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
108
- expect(datacite.fetch("publisher")).to eq("DataCite")
94
+ expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
109
95
  end
110
96
 
111
97
  it "DataCite DOI with Schema 4.5 publisher attributes" do
112
98
  input = fixture_path + "datacite-example-full-v4.5.xml"
113
99
  subject = Bolognese::Metadata.new(input: input, from: "datacite")
114
100
  datacite = JSON.parse(subject.datacite_json)
115
- # REVERT the publisher line to the commented lines for the schema 4.5 release
116
- # expect(datacite.fetch("publisher")).to eq(
117
- # {
118
- # "name" => "Example Publisher",
119
- # "publisherIdentifier" => "https://ror.org/04z8jg394",
120
- # "publisherIdentifierScheme" => "ROR",
121
- # "schemeUri" => "https://ror.org/",
122
- # "lang" => "en",
123
- # },
124
- # )
125
- expect(datacite.fetch("publisher")).to eq("Example Publisher")
101
+ expect(datacite.fetch("publisher")).to eq(
102
+ {
103
+ "name" => "Example Publisher",
104
+ "publisherIdentifier" => "https://ror.org/04z8jg394",
105
+ "publisherIdentifierScheme" => "ROR",
106
+ "schemeUri" => "https://ror.org/",
107
+ "lang" => "en",
108
+ },
109
+ )
126
110
  end
127
111
  end
128
112
  end
@@ -179,6 +179,13 @@ describe Bolognese::Metadata, vcr: true do
179
179
  expect(jats.dig("day")).to be_nil
180
180
  expect(jats.dig("pub_id")).to eq("pub_id_type"=>"doi", "__content__"=>"10.34747/g6yb-3412")
181
181
  end
182
+
183
+ it "with publisher with language" do
184
+ input = fixture_path + 'datacite-xml-lang.xml'
185
+ subject = Bolognese::Metadata.new(input: input)
186
+ jats = Maremma.from_xml(subject.jats).fetch("element_citation", {})
187
+ expect(jats.dig("publisher_name")).to eq({"xml:lang"=>"en", "__content__"=>"OSI SAF"})
188
+ end
182
189
  end
183
190
 
184
191
  context "change metadata as datacite xml" do
@@ -365,5 +365,18 @@ describe Bolognese::Metadata, vcr: true do
365
365
  expect(json["@type"]).to eq("CreativeWork")
366
366
  expect(json["datePublished"]).to eq("2019")
367
367
  end
368
+
369
+ it "from Schema 4.5 with publisher attributes" do
370
+ input = fixture_path + 'datacite-example-full-v4.5.xml'
371
+ subject = Bolognese::Metadata.new(input: input)
372
+ json = JSON.parse(subject.schema_org)
373
+ expect(json["publisher"]).to eq(
374
+ {
375
+ "@type" => "Organization",
376
+ "@id" => "https://ror.org/04z8jg394",
377
+ "name" => "Example Publisher"
378
+ }
379
+ )
380
+ end
368
381
  end
369
382
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bolognese
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.3
4
+ version: 2.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-11 00:00:00.000000000 Z
11
+ date: 2024-02-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: maremma