bolognese 2.0.3 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fc718876c65857d9b07af2752b4ce8ed8bda05b5e3f7dae983530a414262b9e6
4
- data.tar.gz: 997f406aa28447acf4507aad0c2ffb0f4e4e42da4028dbe2210d027b438dea09
3
+ metadata.gz: 71ee294125ac977b19c081b25a540d504bc78e932be89beb5d25e4754637166d
4
+ data.tar.gz: e95a2a6495a9ae0cf3014ba20024902da8ab1fd78d28472abfa0472ea3eb9b2b
5
5
  SHA512:
6
- metadata.gz: e5692d0fa4a116b0c3f32ae2be26f916f282860a69daed4564fa7b8e91823d8648eddf958fc1fdd9c9c86ab2fcf81f108b334da57b8b9c6ce7fd79b94c009262
7
- data.tar.gz: d1a74bd310e4ad4a7e8f34c218685cf3d76183d063feffab36c926e4d449e46a0792b2d279d4aeb6540ad74c441cabda0d242053eb96a01f5c6598112a1848e2
6
+ metadata.gz: 02de737b6845e350022dff585da6c2c5ef564852ae41e569d27474aee996afb482fa7711e7a47ebe0d020df9d7a93e88dc889d2529896955306a784d0b4862b5
7
+ data.tar.gz: 78b6149f043f22700f638a8105e9242e98e9da9cb4329a427f35c0e4c51125044eedaf639994b0586296f1cb1e65c15694304b340a5b3dab9a36631490c45fd0
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (2.0.3)
4
+ bolognese (2.1.1)
5
5
  activesupport (>= 4.2.5)
6
6
  benchmark_methods (~> 0.7)
7
7
  bibtex-ruby (>= 5.1.0)
@@ -235,4 +235,4 @@ DEPENDENCIES
235
235
  webmock (~> 3.0, >= 3.0.1)
236
236
 
237
237
  BUNDLED WITH
238
- 2.4.20
238
+ 2.4.22
@@ -74,8 +74,11 @@ module Bolognese
74
74
  creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
75
75
  end
76
76
  contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
77
- publisher_name = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
78
- publisher = { "name" => publisher_name } if publisher_name.present?
77
+
78
+ publisher = {
79
+ "name" => parse_attributes(meta.fetch("publisher", nil), content: "name", first: true),
80
+ "publisherIdentifier" => parse_attributes(meta.fetch("publisher", nil), content: "@id", first: true),
81
+ }.compact if meta.fetch("publisher", nil).present?
79
82
 
80
83
  ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
81
84
  container = if meta.fetch(ct, nil).present?
@@ -600,12 +600,12 @@ module Bolognese
600
600
  end
601
601
 
602
602
  def validate_orcid(orcid)
603
- orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z/.match(orcid)).last
603
+ orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\/{0,1}\z/.match(orcid)).last
604
604
  orcid.gsub(/[[:space:]]/, "-") if orcid.present?
605
605
  end
606
606
 
607
607
  def validate_ror(ror)
608
- Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})$/.match(ror)).last
608
+ Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})\/{0,1}$/.match(ror)).last
609
609
  end
610
610
 
611
611
  def validate_orcid_scheme(orcid_scheme)
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "2.0.3"
2
+ VERSION = "2.1.1"
3
3
  end
@@ -4,11 +4,8 @@ module Bolognese
4
4
  module Writers
5
5
  module DataciteJsonWriter
6
6
  def datacite_json
7
- # Remove the following change for the schema 4.5 release
8
7
  if crosscite_hsh.present?
9
- datacite_json_hsh = crosscite_hsh
10
- datacite_json_hsh['publisher'] = self.publisher['name'] if self.publisher&.respond_to?(:to_hash) && self.publisher.has_key?('name') && !self.publisher['name'].blank?
11
- JSON.pretty_generate datacite_json_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) }
8
+ JSON.pretty_generate crosscite_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) }
12
9
  end
13
10
  end
14
11
  end
@@ -86,7 +86,10 @@ module Bolognese
86
86
  end
87
87
 
88
88
  def insert_publisher_name(xml)
89
- xml.send("publisher-name", publisher["name"])
89
+ attributes = {
90
+ "xml:lang" => publisher["lang"]
91
+ }.compact
92
+ xml.send("publisher-name", attributes, publisher["name"])
90
93
  end
91
94
 
92
95
  def insert_publication_date(xml)
@@ -37,7 +37,7 @@ module Bolognese
37
37
  "schemaVersion" => schema_version,
38
38
  "periodical" => types.present? ? ((types["schemaOrg"] != "Dataset") && container.present? ? to_schema_org(container) : nil) : nil,
39
39
  "includedInDataCatalog" => types.present? ? ((types["schemaOrg"] == "Dataset") && container.present? ? to_schema_org_container(container, type: "Dataset") : nil) : nil,
40
- "publisher" => publisher.present? ? { "@type" => "Organization", "name" => publisher["name"] } : nil,
40
+ "publisher" => publisher.present? ? { "@type" => "Organization", "@id" => publisher["publisherIdentifier"], "name" => publisher["name"] }.compact : nil,
41
41
  "funder" => to_schema_org_funder(funding_references),
42
42
  "provider" => agency.present? ? { "@type" => "Organization", "name" => agency } : nil
43
43
  }.compact.presence
@@ -168,11 +168,12 @@ describe Bolognese::Metadata, vcr: true do
168
168
  expect(subject.creators[4]).to eq("nameType"=>"Organizational", "name"=>"University Of Kivu", "nameIdentifiers"=> [{"nameIdentifier"=>"https://ror.org/01qfhxr31", "schemeUri"=>"https://ror.org", "nameIdentifierScheme"=>"ROR"}], "affiliation"=>[])
169
169
  expect(subject.creators[5]).to eq("nameType"=>"Organizational", "name"=>"សាកលវិទ្យាល័យកម្ពុជា", "nameIdentifiers"=> [{"nameIdentifier"=>"http://ror.org/025e3rc84", "nameIdentifierScheme"=>"RORS"}], "affiliation"=>[])
170
170
  expect(subject.creators[6]).to eq("nameType"=>"Organizational", "name"=>"جامعة زاخۆ", "nameIdentifiers"=> [{"nameIdentifier"=>"05sd1pz50", "schemeUri"=>"https://ror.org", "nameIdentifierScheme"=>"RORS"}], "affiliation"=>[])
171
+ expect(subject.creators[9]).to eq("nameType"=>"Organizational", "name"=>"Gump South Pacific Research Station", "nameIdentifiers"=> [{"nameIdentifier"=>"https://ror.org/04sk0et52", "schemeUri"=>"https://ror.org", "nameIdentifierScheme"=>"ROR"}], "affiliation"=>[])
171
172
  expect(subject.contributors.first).to eq("nameType"=>"Organizational", "name"=>" Nawroz University ", "nameIdentifiers"=> [{"nameIdentifier"=>"https://ror.org/04gp75d48", "schemeUri"=>"https://ror.org", "nameIdentifierScheme"=>"ROR"}], "affiliation"=>[], "contributorType"=>"Producer")
172
173
  expect(subject.contributors.last).to eq("nameType"=>"Organizational", "name"=>"University Of Greenland (Https://Www.Uni.Gl/)", "nameIdentifiers"=> [{"nameIdentifier"=>"https://ror.org/00t5j6b61", "schemeUri"=>"https://ror.org", "nameIdentifierScheme"=>"ROR"}],"affiliation"=>[], "contributorType"=>"Sponsor")
173
174
  end
174
175
 
175
- context "affiliationIdentifier" do
176
+ context "affiliationIdentifier/nameIdentifier" do
176
177
  let(:input) { fixture_path + 'datacite-example-ROR-nameIdentifiers.xml' }
177
178
  subject { Bolognese::Metadata.new(input: input, from: "datacite") }
178
179
 
@@ -207,6 +208,11 @@ describe Bolognese::Metadata, vcr: true do
207
208
  expect(subject.creators[8]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0118", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}])
208
209
  end
209
210
 
211
+ it "should normalize valid ORCID nameIdentifier with trailing slash" do
212
+ #" 0000-0001-9998-0118 ", # Valid ORCID with leading/trailing spaces
213
+ expect(subject.creators[10]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0117", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}])
214
+ end
215
+
210
216
  it "should parse non ROR schema's without normalizing them" do
211
217
  input = fixture_path + 'datacite-example-ROR-nameIdentifiers.xml'
212
218
  subject = Bolognese::Metadata.new(input: input, from: "datacite")
@@ -250,4 +256,4 @@ describe Bolognese::Metadata, vcr: true do
250
256
  expect(response).to eq("Matt Jones and Peter Slaughter and {University of California, Santa Barbara}")
251
257
  end
252
258
  end
253
- end
259
+ end
@@ -43,6 +43,14 @@
43
43
  <creatorName nameType="Personal">Mike B</creatorName>
44
44
  <nameIdentifier nameIdentifierScheme="ORCID"> 0000-0001-9998-0118 </nameIdentifier>
45
45
  </creator>
46
+ <creator>
47
+ <creatorName nameType="Organizational">Gump South Pacific Research Station</creatorName>
48
+ <nameIdentifier nameIdentifierScheme="ROR" schemeURI="https://ror.org/">https://ror.org/04sk0et52/</nameIdentifier>
49
+ </creator>
50
+ <creator>
51
+ <creatorName nameType="Personal">Ashwini Sukale</creatorName>
52
+ <nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID">https://orcid.org/0000-0001-9998-0117/</nameIdentifier>
53
+ </creator>
46
54
  </creators>
47
55
  <titles>
48
56
  <title xml:lang="en-US">Genomic Standards Consortium (GSC) Island Sampling Day: Moorea Reef to Ridges Genomic Transect</title>
@@ -15,7 +15,7 @@
15
15
  <title xml:lang="en">DOI Test 2 title content</title>
16
16
  <title xml:lang="en">AAPP</title>
17
17
  </titles>
18
- <publisher>OSI SAF</publisher>
18
+ <publisher xml:lang="en">OSI SAF</publisher>
19
19
  <publicationYear>2004</publicationYear>
20
20
  <resourceType resourceTypeGeneral="Collection">Climate Data Record</resourceType>
21
21
  <dates>
@@ -14,6 +14,7 @@
14
14
  }],
15
15
  "publisher": {
16
16
  "@type": "Organization",
17
+ "@id": "https://ror.org/04wxnsj81",
17
18
  "name": "DataCite"
18
19
  },
19
20
  "dateCreated": "2016-12-20",
@@ -169,7 +169,7 @@ describe Bolognese::Metadata, vcr: true do
169
169
  expect(subject.publication_year).to eq("2016")
170
170
  expect(subject.related_identifiers.length).to eq(3)
171
171
  expect(subject.related_identifiers.last).to eq("relatedIdentifier"=>"10.5438/55e5-t5c0", "relatedIdentifierType"=>"DOI", "relationType"=>"References")
172
- expect(subject.publisher).to eq({"name"=>"DataCite"})
172
+ expect(subject.publisher).to eq({"name"=>"DataCite", "publisherIdentifier"=>"https://ror.org/04wxnsj81"})
173
173
  end
174
174
 
175
175
  it "GTEx dataset" do
@@ -18,9 +18,7 @@ describe Bolognese::Metadata, vcr: true do
18
18
  "rightsIdentifierScheme"=>"SPDX",
19
19
  "rightsUri"=>"https://creativecommons.org/licenses/by/3.0/legalcode",
20
20
  "schemeUri"=>"https://spdx.org/licenses/"}])
21
- # REVERT the publisher line to the commented line for the schema 4.5 release
22
- # expect(datacite.fetch("publisher")).to eq({"name"=>"eLife Sciences Publications, Ltd"})
23
- expect(datacite.fetch("publisher")).to eq("eLife Sciences Publications, Ltd")
21
+ expect(datacite.fetch("publisher")).to eq({"name"=>"eLife Sciences Publications, Ltd"})
24
22
  end
25
23
 
26
24
  it "with ORCID ID" do
@@ -31,9 +29,7 @@ describe Bolognese::Metadata, vcr: true do
31
29
  expect(datacite.fetch("types")).to eq("bibtex"=>"article", "citeproc"=>"article-journal", "resourceType"=>"JournalArticle", "resourceTypeGeneral"=>"JournalArticle", "ris"=>"JOUR", "schemaOrg"=>"ScholarlyArticle")
32
30
  expect(datacite.fetch("creators").length).to eq(7)
33
31
  expect(datacite.fetch("creators").first).to eq("nameType"=>"Personal", "name"=>"Thanassi, Wendy", "givenName"=>"Wendy", "familyName"=>"Thanassi", "affiliation" => [{"name"=>"Department of Medicine, Veterans Affairs Palo Alto Health Care System, 3801 Miranda Avenue MC-, Palo Alto, CA 94304-1207, USA"}, {"name"=>"Occupational Health Strategic Health Care Group, Office of Public Health, Veterans Health Administration, Washington, DC 20006, USA"}, {"name"=>"Division of Emergency Medicine, Stanford University School of Medicine, Stanford, CA 94304, USA"}, {"name"=>"War Related Illness and Injury Study Center (WRIISC) and Mental Illness Research Education and Clinical Center (MIRECC), Department of Veterans Affairs, Palo Alto, CA 94304, USA"}])
34
- # REVERT the publisher line to the commented line for the schema 4.5 release
35
- # expect(datacite.fetch("publisher")).to eq({"name"=>"Hindawi Limited"})
36
- expect(datacite.fetch("publisher")).to eq("Hindawi Limited")
32
+ expect(datacite.fetch("publisher")).to eq({"name"=>"Hindawi Limited"})
37
33
  end
38
34
 
39
35
  it "Crossref DOI" do
@@ -45,9 +41,7 @@ describe Bolognese::Metadata, vcr: true do
45
41
  expect(datacite.dig("descriptions", 0, "description")).to start_with("Among various advantages, their small size makes model organisms preferred subjects of investigation.")
46
42
  expect(datacite.fetch("creators").length).to eq(5)
47
43
  expect(datacite.fetch("creators").first).to eq("nameType"=>"Personal", "name"=>"Sankar, Martial", "givenName"=>"Martial", "familyName"=>"Sankar")
48
- # REVERT the publisher line to the commented line for the schema 4.5 release
49
- # expect(datacite.fetch("publisher")).to eq({"name"=>"{eLife} Sciences Organisation, Ltd."})
50
- expect(datacite.fetch("publisher")).to eq("{eLife} Sciences Organisation, Ltd.")
44
+ expect(datacite.fetch("publisher")).to eq({"name"=>"{eLife} Sciences Organisation, Ltd."})
51
45
  end
52
46
 
53
47
  it "BlogPosting Citeproc JSON" do
@@ -58,9 +52,7 @@ describe Bolognese::Metadata, vcr: true do
58
52
  expect(datacite.fetch("titles")).to eq([{"title"=>"Eating your own Dog Food"}])
59
53
  expect(datacite.dig("descriptions", 0, "description")).to start_with("Eating your own dog food")
60
54
  expect(datacite.fetch("creators")).to eq([{"familyName"=>"Fenner", "givenName"=>"Martin", "name"=>"Fenner, Martin"}])
61
- # REVERT the publisher line to the commented line for the schema 4.5 release
62
- # expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
63
- expect(datacite.fetch("publisher")).to eq("DataCite")
55
+ expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
64
56
  end
65
57
 
66
58
  it "rdataone" do
@@ -72,9 +64,7 @@ describe Bolognese::Metadata, vcr: true do
72
64
  expect(datacite.fetch("creators").first).to eq("affiliation"=>[{"name"=>"NCEAS"}], "nameType"=>"Personal", "name" => "Jones, Matt",
73
65
  "nameIdentifiers" => [{"nameIdentifier"=>"https://orcid.org/0000-0003-0077-4738", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "givenName"=>"Matt", "familyName"=>"Jones")
74
66
  expect(datacite.fetch("version")).to eq("2.0.0")
75
- # REVERT the publisher line to the commented line for the schema 4.5 release
76
- # expect(datacite.fetch("publisher")).to eq({"name"=>"https://cran.r-project.org"})
77
- expect(datacite.fetch("publisher")).to eq("https://cran.r-project.org")
67
+ expect(datacite.fetch("publisher")).to eq({"name"=>"https://cran.r-project.org"})
78
68
  end
79
69
 
80
70
  it "maremma" do
@@ -90,9 +80,7 @@ describe Bolognese::Metadata, vcr: true do
90
80
  [{"nameIdentifier"=>"https://orcid.org/0000-0003-0077-4738",
91
81
  "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}],
92
82
  "nameType"=>"Personal"}])
93
- # REVERT the publisher line to the commented line for the schema 4.5 release
94
- # expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
95
- expect(datacite.fetch("publisher")).to eq("DataCite")
83
+ expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
96
84
  end
97
85
 
98
86
  it "with data citation schema.org" do
@@ -103,26 +91,22 @@ describe Bolognese::Metadata, vcr: true do
103
91
  expect(datacite.fetch("titles")).to eq([{"title"=>"Eating your own Dog Food"}])
104
92
  expect(datacite.fetch("relatedIdentifiers").count).to eq(3)
105
93
  expect(datacite.fetch("relatedIdentifiers").first).to eq("relatedIdentifier"=>"10.5438/0000-00ss", "relatedIdentifierType"=>"DOI", "relationType"=>"IsPartOf", "resourceTypeGeneral"=>"Text")
106
- # REVERT the publisher line to the commented line for the schema 4.5 release
107
- # expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
108
- expect(datacite.fetch("publisher")).to eq("DataCite")
94
+ expect(datacite.fetch("publisher")).to eq({"name"=>"DataCite"})
109
95
  end
110
96
 
111
97
  it "DataCite DOI with Schema 4.5 publisher attributes" do
112
98
  input = fixture_path + "datacite-example-full-v4.5.xml"
113
99
  subject = Bolognese::Metadata.new(input: input, from: "datacite")
114
100
  datacite = JSON.parse(subject.datacite_json)
115
- # REVERT the publisher line to the commented lines for the schema 4.5 release
116
- # expect(datacite.fetch("publisher")).to eq(
117
- # {
118
- # "name" => "Example Publisher",
119
- # "publisherIdentifier" => "https://ror.org/04z8jg394",
120
- # "publisherIdentifierScheme" => "ROR",
121
- # "schemeUri" => "https://ror.org/",
122
- # "lang" => "en",
123
- # },
124
- # )
125
- expect(datacite.fetch("publisher")).to eq("Example Publisher")
101
+ expect(datacite.fetch("publisher")).to eq(
102
+ {
103
+ "name" => "Example Publisher",
104
+ "publisherIdentifier" => "https://ror.org/04z8jg394",
105
+ "publisherIdentifierScheme" => "ROR",
106
+ "schemeUri" => "https://ror.org/",
107
+ "lang" => "en",
108
+ },
109
+ )
126
110
  end
127
111
  end
128
112
  end
@@ -179,6 +179,13 @@ describe Bolognese::Metadata, vcr: true do
179
179
  expect(jats.dig("day")).to be_nil
180
180
  expect(jats.dig("pub_id")).to eq("pub_id_type"=>"doi", "__content__"=>"10.34747/g6yb-3412")
181
181
  end
182
+
183
+ it "with publisher with language" do
184
+ input = fixture_path + 'datacite-xml-lang.xml'
185
+ subject = Bolognese::Metadata.new(input: input)
186
+ jats = Maremma.from_xml(subject.jats).fetch("element_citation", {})
187
+ expect(jats.dig("publisher_name")).to eq({"xml:lang"=>"en", "__content__"=>"OSI SAF"})
188
+ end
182
189
  end
183
190
 
184
191
  context "change metadata as datacite xml" do
@@ -365,5 +365,18 @@ describe Bolognese::Metadata, vcr: true do
365
365
  expect(json["@type"]).to eq("CreativeWork")
366
366
  expect(json["datePublished"]).to eq("2019")
367
367
  end
368
+
369
+ it "from Schema 4.5 with publisher attributes" do
370
+ input = fixture_path + 'datacite-example-full-v4.5.xml'
371
+ subject = Bolognese::Metadata.new(input: input)
372
+ json = JSON.parse(subject.schema_org)
373
+ expect(json["publisher"]).to eq(
374
+ {
375
+ "@type" => "Organization",
376
+ "@id" => "https://ror.org/04z8jg394",
377
+ "name" => "Example Publisher"
378
+ }
379
+ )
380
+ end
368
381
  end
369
382
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bolognese
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.3
4
+ version: 2.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-11 00:00:00.000000000 Z
11
+ date: 2024-02-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: maremma