bolognese 1.3.11 → 1.3.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f48b979736d6c650c605afb1ef0593cba49c8f3e0f719f571dd8c0a46035f91
4
- data.tar.gz: 87be2f1d2d8e744d4b2a7095a78d767270dd374670df6e4c02790e482bce0962
3
+ metadata.gz: 17907b9f240b77cd0be24a0bc4ad4787a0849e64264cbbaa26e0583183b7e8f3
4
+ data.tar.gz: 6d8f7836ec34a526a2478ef58cadf3e040475772eeb6bc0b0336490aaf525b48
5
5
  SHA512:
6
- metadata.gz: bc6fe47b28d56eef7bd8d301fd0824608b122f502a15154240a5b9554e49d3e2fba077f7547face75f5c10340c1be4464af2afe52c5c8f90a5e345bf32812ac2
7
- data.tar.gz: 2bc6010a67fddd5208fcce970296f57ca399f0b18dea24d7e4314376baebc59b8ec08e23c84e50bf450df9a1fda177279c614a9834957b76c08baa8c85760bd3
6
+ metadata.gz: f4dd37336d18737867915f56d93d5b27e17b81f7770c56466c6b08c8e1c695752fed591dc51fd9e45d9313e47be0e2f8b50fb7a564f95e1e1b39e4866e4a77c9
7
+ data.tar.gz: 1b2f2cd3262ba69e9326c5da28dc8c7b771b12cbf2568c8c9b88068216e6c610638f1977fe0ed29ded47c431a79724ec35370585976f53b2d5188102f5b44a3b
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bolognese (1.3.11)
4
+ bolognese (1.3.13)
5
5
  activesupport (>= 4.2.5, < 6)
6
6
  benchmark_methods (~> 0.7)
7
7
  bibtex-ruby (~> 4.1)
data/lib/bolognese.rb CHANGED
@@ -18,6 +18,7 @@ require 'gender_detector'
18
18
  require 'citeproc/ruby'
19
19
  require 'citeproc'
20
20
  require 'csl/styles'
21
+ require 'edtf'
21
22
 
22
23
  require "bolognese/version"
23
24
  require "bolognese/metadata"
@@ -50,37 +50,31 @@ module Bolognese
50
50
  end
51
51
 
52
52
  related_identifiers = if meta.try(:journal).present? && meta.try(:issn).to_s.presence
53
- [{ "type" => "Periodical",
54
- "relationType" => "IsPartOf",
55
- "relatedIdentifierType" => "ISSN",
56
- "title" => meta.journal.to_s,
57
- "relatedIdentifier" => meta.try(:issn).to_s.presence }.compact]
58
- else
59
- nil
60
- end
53
+ [{ "type" => "Periodical",
54
+ "relationType" => "IsPartOf",
55
+ "relatedIdentifierType" => "ISSN",
56
+ "title" => meta.journal.to_s,
57
+ "relatedIdentifier" => meta.try(:issn).to_s.presence }.compact]
58
+ end
61
59
 
62
60
  container = if meta.try(:journal).present?
63
- first_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[0] : nil
64
- last_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[1] : nil
65
-
66
- { "type" => "Journal",
67
- "title" => meta.journal.to_s,
68
- "identifier" => meta.try(:issn).to_s.presence,
69
- "identifierType" => meta.try(:issn).present? ? "ISSN" : nil,
70
- "volume" => meta.try(:volume).to_s.presence,
71
- "firstPage" => first_page,
72
- "lastPage" => last_page }.compact
73
- else
74
- nil
75
- end
61
+ first_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[0] : nil
62
+ last_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[1] : nil
63
+
64
+ { "type" => "Journal",
65
+ "title" => meta.journal.to_s,
66
+ "identifier" => meta.try(:issn).to_s.presence,
67
+ "identifierType" => meta.try(:issn).present? ? "ISSN" : nil,
68
+ "volume" => meta.try(:volume).to_s.presence,
69
+ "firstPage" => first_page,
70
+ "lastPage" => last_page }.compact
71
+ end
76
72
 
77
73
  state = meta.try(:doi).to_s.present? || read_options.present? ? "findable" : "not_found"
78
- dates = if meta.try(:date).present?
79
- [{ "date" => meta.date.to_s,
80
- "dateType" => "Issued" }]
81
- else
82
- nil
83
- end
74
+ dates = if meta.try(:date).present? && Date.edtf(meta.date.to_s).present?
75
+ [{ "date" => meta.date.to_s,
76
+ "dateType" => "Issued" }]
77
+ end
84
78
  publication_year = meta.try(:date).present? ? meta.date.to_s[0..3] : nil
85
79
 
86
80
  { "id" => normalize_doi(doi),
@@ -52,27 +52,23 @@ module Bolognese
52
52
  [{ "nameType" => "Organizational", "name" => ":(unav)" }]
53
53
  end
54
54
  contributors = get_authors(from_citeproc(Array.wrap(meta.fetch("editor", nil))))
55
- dates = if meta.fetch("issued", nil).present?
56
- [{ "date" => get_date_from_date_parts(meta.fetch("issued", nil)),
57
- "dateType" => "Issued" }]
58
- else
59
- nil
60
- end
55
+ dates = if date = get_date_from_date_parts(meta.fetch("issued", nil))
56
+ if Date.edtf(date).present?
57
+ [{ "date" => date,
58
+ "dateType" => "Issued" }]
59
+ end
60
+ end
61
61
  publication_year = get_date_from_date_parts(meta.fetch("issued", nil)).to_s[0..3]
62
62
  rights_list = if meta.fetch("copyright", nil)
63
- [{ "rightsUri" => normalize_url(meta.fetch("copyright")) }.compact]
64
- else
65
- nil
66
- end
63
+ [{ "rightsUri" => normalize_url(meta.fetch("copyright")) }.compact]
64
+ end
67
65
  related_identifiers = if meta.fetch("container-title", nil).present? && meta.fetch("ISSN", nil).present?
68
- [{ "type" => "Periodical",
69
- "relationType" => "IsPartOf",
70
- "relatedIdentifierType" => "ISSN",
71
- "title" => meta.fetch("container-title", nil),
72
- "relatedIdentifier" => meta.fetch("ISSN", nil) }.compact]
73
- else
74
- nil
75
- end
66
+ [{ "type" => "Periodical",
67
+ "relationType" => "IsPartOf",
68
+ "relatedIdentifierType" => "ISSN",
69
+ "title" => meta.fetch("container-title", nil),
70
+ "relatedIdentifier" => meta.fetch("ISSN", nil) }.compact]
71
+ end
76
72
  container = if meta.fetch("container-title", nil).present?
77
73
  first_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip)[0] : nil
78
74
  last_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip)[1] : nil
@@ -106,38 +106,46 @@ module Bolognese
106
106
  [{ "title" => ":{unav)" }]
107
107
  end
108
108
 
109
- date_created = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "created" }
109
+ date_published = crossref_date_published(bibliographic_metadata)
110
+ if date_published.present?
111
+ date_published = { "date" => date_published, "dateType" => "Issued" }
112
+ else
113
+ date_published = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "created" }
114
+ date_published = { "date" => date_published.fetch("__content__", "")[0..9], "dateType" => "Issued" } if date_published.present?
115
+ end
110
116
  date_updated = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "last-update" }
111
117
  date_updated = { "date" => date_updated.fetch("__content__", nil), "dateType" => "Updated" } if date_updated.present?
112
- dates = [
113
- { "date" => crossref_date_published(bibliographic_metadata) || date_created.to_h.fetch("__content__", "")[0..9].presence, "dateType" => "Issued" }, date_updated
114
- ].compact
115
- publication_year = crossref_date_published(bibliographic_metadata).present? ? crossref_date_published(bibliographic_metadata)[0..3] : date_created.to_h.fetch("__content__", "")[0..3].presence
118
+
119
+ # check that date is valid iso8601 date
120
+ date_published = nil unless Date.edtf(date_published.to_h["date"]).present?
121
+ date_updated = nil unless Date.edtf(date_updated.to_h["date"]).present?
122
+
123
+ dates = [date_published, date_updated].compact
124
+ publication_year = date_published.to_h.fetch("date", "")[0..3].presence
125
+
116
126
  state = meta.present? || read_options.present? ? "findable" : "not_found"
117
127
 
118
128
  related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
119
129
  container = if journal_metadata.present? || book_metadata.present?
120
- issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
130
+ issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
121
131
 
122
- { "type" => "Journal",
123
- "identifier" => issn,
124
- "identifierType" => issn.present? ? "ISSN" : nil,
125
- "title" => parse_attributes(journal_metadata.to_h["full_title"]),
126
- "volume" => parse_attributes(journal_issue.dig("journal_volume", "volume")),
127
- "issue" => parse_attributes(journal_issue.dig("issue")),
128
- "firstPage" => bibliographic_metadata.dig("pages", "first_page"),
129
- "lastPage" => bibliographic_metadata.dig("pages", "last_page") }.compact
130
- elsif book_series_metadata.to_h.fetch("series_metadata", nil).present?
131
- issn = normalize_issn(book_series_metadata.dig("series_metadata", "issn"))
132
+ { "type" => "Journal",
133
+ "identifier" => issn,
134
+ "identifierType" => issn.present? ? "ISSN" : nil,
135
+ "title" => parse_attributes(journal_metadata.to_h["full_title"]),
136
+ "volume" => parse_attributes(journal_issue.dig("journal_volume", "volume")),
137
+ "issue" => parse_attributes(journal_issue.dig("issue")),
138
+ "firstPage" => bibliographic_metadata.dig("pages", "first_page"),
139
+ "lastPage" => bibliographic_metadata.dig("pages", "last_page") }.compact
140
+ elsif book_series_metadata.to_h.fetch("series_metadata", nil).present?
141
+ issn = normalize_issn(book_series_metadata.dig("series_metadata", "issn"))
132
142
 
133
- { "type" => "Book Series",
134
- "identifier" => issn,
135
- "identifierType" => issn.present? ? "ISSN" : nil,
136
- "title" => book_series_metadata.dig("series_metadata", "titles", "title"),
137
- "volume" => bibliographic_metadata.fetch("volume", nil) }.compact
138
- else
139
- nil
140
- end
143
+ { "type" => "Book Series",
144
+ "identifier" => issn,
145
+ "identifierType" => issn.present? ? "ISSN" : nil,
146
+ "title" => book_series_metadata.dig("series_metadata", "titles", "title"),
147
+ "volume" => bibliographic_metadata.fetch("volume", nil) }.compact
148
+ end
141
149
 
142
150
  identifiers = [{ "identifierType" => "DOI", "identifier" => normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig("doi_data", "doi")) }, crossref_alternate_identifiers(bibliographic_metadata)].compact
143
151
 
@@ -127,20 +127,40 @@ module Bolognese
127
127
  nil
128
128
  elsif k.is_a?(String)
129
129
  { "subject" => sanitize(k) }
130
- else
130
+ elsif k.is_a?(Hash)
131
131
  { "subject" => sanitize(k["__content__"]), "subjectScheme" => k["subjectScheme"], "schemeUri" => k["schemeURI"], "valueUri" => k["valueURI"], "lang" => k["lang"] }.compact
132
132
  end
133
133
  end.compact
134
- dates = Array.wrap(meta.dig("dates", "date")).map do |d|
135
- {
136
- "date" => parse_attributes(d),
137
- "dateType" => parse_attributes(d, content: "dateType"),
138
- "dateInformation" => parse_attributes(d, content: "dateInformation")
139
- }.compact
134
+ dates = Array.wrap(meta.dig("dates", "date")).map do |r|
135
+ if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
136
+ if Date.edtf(date).present?
137
+ { "date" => date,
138
+ "dateType" => parse_attributes(r, content: "dateType"),
139
+ "dateInformation" => parse_attributes(r, content: "dateInformation")
140
+ }.compact
141
+ end
142
+ end
140
143
  end.compact
141
144
  dates << { "date" => meta.fetch("publicationYear", nil), "dateType" => "Issued" } if meta.fetch("publicationYear", nil).present? && get_date(dates, "Issued").blank?
142
- sizes = Array.wrap(meta.dig("sizes", "size"))
143
- formats = Array.wrap(parse_attributes(meta.dig("formats", "format")))
145
+ sizes = Array.wrap(meta.dig("sizes", "size")).map do |k|
146
+ if k.blank?
147
+ nil
148
+ elsif k.is_a?(String)
149
+ sanitize(k).presence
150
+ elsif k.is_a?(Hash)
151
+ sanitize(k["__content__"]).presence
152
+ end
153
+ end.compact
154
+ formats = Array.wrap(meta.dig("formats", "format")).map do |k|
155
+ if k.blank?
156
+ nil
157
+ elsif k.is_a?(String)
158
+ sanitize(k).presence
159
+ elsif k.is_a?(Hash)
160
+ sanitize(k["__content__"]).presence
161
+ end
162
+ end.compact
163
+ .map { |s| s.to_s.squish.presence }.compact
144
164
  funding_references = Array.wrap(meta.dig("fundingReferences", "fundingReference")).compact.map do |fr|
145
165
  scheme_uri = parse_attributes(fr["funderIdentifier"], content: "schemeURI")
146
166
  funder_identifier = parse_attributes(fr["funderIdentifier"])
@@ -124,9 +124,9 @@ module Bolognese
124
124
  end
125
125
  end
126
126
  dates = []
127
- dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if meta.fetch("datePublished", nil).present?
128
- dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if meta.fetch("dateCreated", nil).present?
129
- dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
127
+ dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if Date.edtf(meta.fetch("datePublished", nil)).present?
128
+ dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if Date.edtf(meta.fetch("dateCreated", nil)).present?
129
+ dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if Date.edtf(meta.fetch("dateModified", nil)).present?
130
130
  publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
131
131
 
132
132
  state = meta.present? || read_options.present? ? "findable" : "not_found"
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "1.3.11"
2
+ VERSION = "1.3.13"
3
3
  end
@@ -0,0 +1,57 @@
1
+ <?xml version="1.0"?>
2
+ <resource xmlns="http://datacite.org/schema/kernel-2.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-2.2 http://schema.datacite.org/meta/kernel-2.2/metadata.xsd">
3
+ <identifier identifierType="DOI">10.13116/c6b6060e-0648-4f88-b108-95ea00928171</identifier>
4
+ <creators>
5
+ <creator>
6
+ <creatorName>EvK2 CNR Committee</creatorName>
7
+ </creator>
8
+ </creators>
9
+ <titles>
10
+ <title>SHARE (Stations at High Altitude for Research on the Environment) Network</title>
11
+ <title titleType="Subtitle">Urdukas (Baltoro Glacier, Baltistan - Pakistan)</title>
12
+ </titles>
13
+ <publisher>EvK2 CNR Committee</publisher>
14
+ <publicationYear>2011</publicationYear>
15
+ <subjects>
16
+ <subject>Environmental research</subject>
17
+ </subjects>
18
+ <contributors>
19
+ <contributor contributorType="Funder">
20
+ <contributorName>EvK2 CNR Committee</contributorName>
21
+ </contributor>
22
+ </contributors>
23
+ <dates>
24
+ <date dateType="Valid">
25
+ </date>
26
+ </dates>
27
+ <language>en</language>
28
+ <resourceType resourceTypeGeneral="Dataset"/>
29
+ <relatedIdentifiers>
30
+ <relatedIdentifier relatedIdentifierType="URL" relationType="IsDocumentedBy">http://geonetwork.evk2cnr.org:8080/geonetwork/srv/en/main.home</relatedIdentifier>
31
+ </relatedIdentifiers>
32
+ <sizes>
33
+ <size>
34
+ </size>
35
+ </sizes>
36
+ <formats>
37
+ <format>text</format>
38
+ </formats>
39
+ <version>1.0</version>
40
+ <rights>
41
+ </rights>
42
+ <descriptions>
43
+ <description descriptionType="Abstract">The Baltoro Glacier, 62 km long, is one of the longest glaciers outside of the Polar Regions. It is located in Baltistan, in the Northern Areas of Pakistan, and runs through part of the Karakoram mountain range. In the middle of the Baltoro glacier there is the campsite of Urdukas located on the trek way to Concordia.
44
+ This automatic weather station was installed, in June 17, 2004 on the Baltoro Glacier, more precisely on a moraine ridge close to the left glacier margin at 3926 m asl.
45
+
46
+ Geographical coordinates:
47
+ Latitude: 35deg 43' 41" N
48
+ Longitude: 76deg 17' 10" EThe Urdukas AWS provides data (with a 60 minute time resolution" of the following parameters:
49
+ - air temperature (degC)
50
+ - atmopherich pressure (hPa)
51
+ - relative humidity (%)
52
+ - total precipitation (mm)
53
+ - global solar radiation (W/m2)
54
+ - wind speed (m/s)
55
+ - wind direction (degree)</description>
56
+ </descriptions>
57
+ </resource>
@@ -151,6 +151,24 @@ describe Bolognese::Metadata, vcr: true do
151
151
  expect(subject.schema_version).to eq("http://datacite.org/schema/kernel-4")
152
152
  end
153
153
 
154
+ it "empty sizes and dates attributes" do
155
+ input = fixture_path + 'datacite-empty-sizes.xml'
156
+ subject = Bolognese::Metadata.new(input: input)
157
+ expect(subject.valid?).to be true
158
+ expect(subject.types["schemaOrg"]).to eq("Dataset")
159
+ expect(subject.types["resourceType"]).to be_nil
160
+ expect(subject.types["resourceTypeGeneral"]).to eq("Dataset")
161
+ expect(subject.creators.length).to eq(1)
162
+ expect(subject.creators.first).to eq("affiliation"=>[], "name"=>"EvK2 CNR Committee", "nameIdentifiers"=>[])
163
+ expect(subject.titles).to eq([{"title"=>"SHARE (Stations at High Altitude for Research on the Environment) Network"}, {"title"=>"Urdukas (Baltoro Glacier, Baltistan - Pakistan)", "titleType"=>"Subtitle"}])
164
+ expect(subject.dates).to eq([{"date"=>"2011", "dateType"=>"Issued"}])
165
+ expect(subject.publication_year).to eq("2011")
166
+ expect(subject.sizes).to eq([])
167
+ expect(subject.publisher).to eq("EvK2 CNR Committee")
168
+ expect(subject.agency).to eq("DataCite")
169
+ expect(subject.schema_version).to eq("http://datacite.org/schema/kernel-2.2")
170
+ end
171
+
154
172
  it "multiple licenses" do
155
173
  input = "https://doi.org/10.5281/ZENODO.48440"
156
174
  subject = Bolognese::Metadata.new(input: input)
@@ -799,7 +817,7 @@ describe Bolognese::Metadata, vcr: true do
799
817
  expect(subject.types["resourceTypeGeneral"]).to eq("Dataset")
800
818
  expect(subject.creators).to eq([{"name"=>"Anonymous", "nameIdentifiers" => [], "affiliation" => []}])
801
819
  expect(subject.titles).to eq([{"title"=>"Messung der Bildunschaerfe in H.264-codierten Bildern und Videosequenzen"}])
802
- expect(subject.dates).to eq([{"date"=>"07.04.2017", "dateType"=>"Available"}, {"date"=>"2017", "dateType"=>"Issued"}])
820
+ expect(subject.dates).to eq([{"date"=>"2017", "dateType"=>"Issued"}])
803
821
  expect(subject.publication_year).to eq("2017")
804
822
  expect(subject.publisher).to eq("Siemens AG")
805
823
  expect(subject.agency).to eq("DataCite")
@@ -836,7 +854,7 @@ describe Bolognese::Metadata, vcr: true do
836
854
  "affiliation"=>[{"name"=>"Royal Netherlands Meteorological Institute (KNMI)"}]}])
837
855
  expect(subject.titles).to eq([{"title"=>"Multi-Sensor Reanalysis (MSR) of total ozone, version 2"}])
838
856
  expect(subject.version_info).to eq("2")
839
- expect(subject.dates).to eq([{"date"=>"2014-04-25", "dateType"=>"Available"}, {"date"=>"1970-04-01 / (:tba)", "dateType"=>"Collected"}, {"date"=>"2015", "dateType"=>"Issued"}])
857
+ expect(subject.dates).to eq([{"date"=>"2014-04-25", "dateType"=>"Available"}, {"date"=>"2015", "dateType"=>"Issued"}])
840
858
  expect(subject.publication_year).to eq("2015")
841
859
  expect(subject.publisher).to eq("Royal Netherlands Meteorological Institute (KNMI)")
842
860
  expect(subject.agency).to eq("DataCite")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bolognese
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.11
4
+ version: 1.3.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-04 00:00:00.000000000 Z
11
+ date: 2019-08-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: maremma
@@ -717,6 +717,7 @@ files:
717
717
  - spec/fixtures/crossref.bib
718
718
  - spec/fixtures/crossref.ris
719
719
  - spec/fixtures/crossref.xml
720
+ - spec/fixtures/datacite-empty-sizes.xml
720
721
  - spec/fixtures/datacite-example-affiliation.xml
721
722
  - spec/fixtures/datacite-example-complicated-v3.0.xml
722
723
  - spec/fixtures/datacite-example-complicated-v4.0.xml