bolognese 1.3.11 → 1.3.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/bolognese.rb +1 -0
- data/lib/bolognese/readers/bibtex_reader.rb +21 -27
- data/lib/bolognese/readers/citeproc_reader.rb +14 -18
- data/lib/bolognese/readers/crossref_reader.rb +32 -24
- data/lib/bolognese/readers/datacite_reader.rb +29 -9
- data/lib/bolognese/readers/schema_org_reader.rb +3 -3
- data/lib/bolognese/version.rb +1 -1
- data/spec/fixtures/datacite-empty-sizes.xml +57 -0
- data/spec/readers/datacite_reader_spec.rb +20 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17907b9f240b77cd0be24a0bc4ad4787a0849e64264cbbaa26e0583183b7e8f3
|
4
|
+
data.tar.gz: 6d8f7836ec34a526a2478ef58cadf3e040475772eeb6bc0b0336490aaf525b48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4dd37336d18737867915f56d93d5b27e17b81f7770c56466c6b08c8e1c695752fed591dc51fd9e45d9313e47be0e2f8b50fb7a564f95e1e1b39e4866e4a77c9
|
7
|
+
data.tar.gz: 1b2f2cd3262ba69e9326c5da28dc8c7b771b12cbf2568c8c9b88068216e6c610638f1977fe0ed29ded47c431a79724ec35370585976f53b2d5188102f5b44a3b
|
data/Gemfile.lock
CHANGED
data/lib/bolognese.rb
CHANGED
@@ -50,37 +50,31 @@ module Bolognese
|
|
50
50
|
end
|
51
51
|
|
52
52
|
related_identifiers = if meta.try(:journal).present? && meta.try(:issn).to_s.presence
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
nil
|
60
|
-
end
|
53
|
+
[{ "type" => "Periodical",
|
54
|
+
"relationType" => "IsPartOf",
|
55
|
+
"relatedIdentifierType" => "ISSN",
|
56
|
+
"title" => meta.journal.to_s,
|
57
|
+
"relatedIdentifier" => meta.try(:issn).to_s.presence }.compact]
|
58
|
+
end
|
61
59
|
|
62
60
|
container = if meta.try(:journal).present?
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
nil
|
75
|
-
end
|
61
|
+
first_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[0] : nil
|
62
|
+
last_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[1] : nil
|
63
|
+
|
64
|
+
{ "type" => "Journal",
|
65
|
+
"title" => meta.journal.to_s,
|
66
|
+
"identifier" => meta.try(:issn).to_s.presence,
|
67
|
+
"identifierType" => meta.try(:issn).present? ? "ISSN" : nil,
|
68
|
+
"volume" => meta.try(:volume).to_s.presence,
|
69
|
+
"firstPage" => first_page,
|
70
|
+
"lastPage" => last_page }.compact
|
71
|
+
end
|
76
72
|
|
77
73
|
state = meta.try(:doi).to_s.present? || read_options.present? ? "findable" : "not_found"
|
78
|
-
dates = if meta.try(:date).present?
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
nil
|
83
|
-
end
|
74
|
+
dates = if meta.try(:date).present? && Date.edtf(meta.date.to_s).present?
|
75
|
+
[{ "date" => meta.date.to_s,
|
76
|
+
"dateType" => "Issued" }]
|
77
|
+
end
|
84
78
|
publication_year = meta.try(:date).present? ? meta.date.to_s[0..3] : nil
|
85
79
|
|
86
80
|
{ "id" => normalize_doi(doi),
|
@@ -52,27 +52,23 @@ module Bolognese
|
|
52
52
|
[{ "nameType" => "Organizational", "name" => ":(unav)" }]
|
53
53
|
end
|
54
54
|
contributors = get_authors(from_citeproc(Array.wrap(meta.fetch("editor", nil))))
|
55
|
-
dates = if meta.fetch("issued", nil)
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
55
|
+
dates = if date = get_date_from_date_parts(meta.fetch("issued", nil))
|
56
|
+
if Date.edtf(date).present?
|
57
|
+
[{ "date" => date,
|
58
|
+
"dateType" => "Issued" }]
|
59
|
+
end
|
60
|
+
end
|
61
61
|
publication_year = get_date_from_date_parts(meta.fetch("issued", nil)).to_s[0..3]
|
62
62
|
rights_list = if meta.fetch("copyright", nil)
|
63
|
-
|
64
|
-
|
65
|
-
nil
|
66
|
-
end
|
63
|
+
[{ "rightsUri" => normalize_url(meta.fetch("copyright")) }.compact]
|
64
|
+
end
|
67
65
|
related_identifiers = if meta.fetch("container-title", nil).present? && meta.fetch("ISSN", nil).present?
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
nil
|
75
|
-
end
|
66
|
+
[{ "type" => "Periodical",
|
67
|
+
"relationType" => "IsPartOf",
|
68
|
+
"relatedIdentifierType" => "ISSN",
|
69
|
+
"title" => meta.fetch("container-title", nil),
|
70
|
+
"relatedIdentifier" => meta.fetch("ISSN", nil) }.compact]
|
71
|
+
end
|
76
72
|
container = if meta.fetch("container-title", nil).present?
|
77
73
|
first_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip)[0] : nil
|
78
74
|
last_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip)[1] : nil
|
@@ -106,38 +106,46 @@ module Bolognese
|
|
106
106
|
[{ "title" => ":{unav)" }]
|
107
107
|
end
|
108
108
|
|
109
|
-
|
109
|
+
date_published = crossref_date_published(bibliographic_metadata)
|
110
|
+
if date_published.present?
|
111
|
+
date_published = { "date" => date_published, "dateType" => "Issued" }
|
112
|
+
else
|
113
|
+
date_published = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "created" }
|
114
|
+
date_published = { "date" => date_published.fetch("__content__", "")[0..9], "dateType" => "Issued" } if date_published.present?
|
115
|
+
end
|
110
116
|
date_updated = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "last-update" }
|
111
117
|
date_updated = { "date" => date_updated.fetch("__content__", nil), "dateType" => "Updated" } if date_updated.present?
|
112
|
-
|
113
|
-
|
114
|
-
].
|
115
|
-
|
118
|
+
|
119
|
+
# check that date is valid iso8601 date
|
120
|
+
date_published = nil unless Date.edtf(date_published.to_h["date"]).present?
|
121
|
+
date_updated = nil unless Date.edtf(date_updated.to_h["date"]).present?
|
122
|
+
|
123
|
+
dates = [date_published, date_updated].compact
|
124
|
+
publication_year = date_published.to_h.fetch("date", "")[0..3].presence
|
125
|
+
|
116
126
|
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
117
127
|
|
118
128
|
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
|
119
129
|
container = if journal_metadata.present? || book_metadata.present?
|
120
|
-
|
130
|
+
issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
|
121
131
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
+
{ "type" => "Journal",
|
133
|
+
"identifier" => issn,
|
134
|
+
"identifierType" => issn.present? ? "ISSN" : nil,
|
135
|
+
"title" => parse_attributes(journal_metadata.to_h["full_title"]),
|
136
|
+
"volume" => parse_attributes(journal_issue.dig("journal_volume", "volume")),
|
137
|
+
"issue" => parse_attributes(journal_issue.dig("issue")),
|
138
|
+
"firstPage" => bibliographic_metadata.dig("pages", "first_page"),
|
139
|
+
"lastPage" => bibliographic_metadata.dig("pages", "last_page") }.compact
|
140
|
+
elsif book_series_metadata.to_h.fetch("series_metadata", nil).present?
|
141
|
+
issn = normalize_issn(book_series_metadata.dig("series_metadata", "issn"))
|
132
142
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
nil
|
140
|
-
end
|
143
|
+
{ "type" => "Book Series",
|
144
|
+
"identifier" => issn,
|
145
|
+
"identifierType" => issn.present? ? "ISSN" : nil,
|
146
|
+
"title" => book_series_metadata.dig("series_metadata", "titles", "title"),
|
147
|
+
"volume" => bibliographic_metadata.fetch("volume", nil) }.compact
|
148
|
+
end
|
141
149
|
|
142
150
|
identifiers = [{ "identifierType" => "DOI", "identifier" => normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig("doi_data", "doi")) }, crossref_alternate_identifiers(bibliographic_metadata)].compact
|
143
151
|
|
@@ -127,20 +127,40 @@ module Bolognese
|
|
127
127
|
nil
|
128
128
|
elsif k.is_a?(String)
|
129
129
|
{ "subject" => sanitize(k) }
|
130
|
-
|
130
|
+
elsif k.is_a?(Hash)
|
131
131
|
{ "subject" => sanitize(k["__content__"]), "subjectScheme" => k["subjectScheme"], "schemeUri" => k["schemeURI"], "valueUri" => k["valueURI"], "lang" => k["lang"] }.compact
|
132
132
|
end
|
133
133
|
end.compact
|
134
|
-
dates = Array.wrap(meta.dig("dates", "date")).map do |
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
134
|
+
dates = Array.wrap(meta.dig("dates", "date")).map do |r|
|
135
|
+
if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
|
136
|
+
if Date.edtf(date).present?
|
137
|
+
{ "date" => date,
|
138
|
+
"dateType" => parse_attributes(r, content: "dateType"),
|
139
|
+
"dateInformation" => parse_attributes(r, content: "dateInformation")
|
140
|
+
}.compact
|
141
|
+
end
|
142
|
+
end
|
140
143
|
end.compact
|
141
144
|
dates << { "date" => meta.fetch("publicationYear", nil), "dateType" => "Issued" } if meta.fetch("publicationYear", nil).present? && get_date(dates, "Issued").blank?
|
142
|
-
sizes = Array.wrap(meta.dig("sizes", "size"))
|
143
|
-
|
145
|
+
sizes = Array.wrap(meta.dig("sizes", "size")).map do |k|
|
146
|
+
if k.blank?
|
147
|
+
nil
|
148
|
+
elsif k.is_a?(String)
|
149
|
+
sanitize(k).presence
|
150
|
+
elsif k.is_a?(Hash)
|
151
|
+
sanitize(k["__content__"]).presence
|
152
|
+
end
|
153
|
+
end.compact
|
154
|
+
formats = Array.wrap(meta.dig("formats", "format")).map do |k|
|
155
|
+
if k.blank?
|
156
|
+
nil
|
157
|
+
elsif k.is_a?(String)
|
158
|
+
sanitize(k).presence
|
159
|
+
elsif k.is_a?(Hash)
|
160
|
+
sanitize(k["__content__"]).presence
|
161
|
+
end
|
162
|
+
end.compact
|
163
|
+
.map { |s| s.to_s.squish.presence }.compact
|
144
164
|
funding_references = Array.wrap(meta.dig("fundingReferences", "fundingReference")).compact.map do |fr|
|
145
165
|
scheme_uri = parse_attributes(fr["funderIdentifier"], content: "schemeURI")
|
146
166
|
funder_identifier = parse_attributes(fr["funderIdentifier"])
|
@@ -124,9 +124,9 @@ module Bolognese
|
|
124
124
|
end
|
125
125
|
end
|
126
126
|
dates = []
|
127
|
-
dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if meta.fetch("datePublished", nil).present?
|
128
|
-
dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if meta.fetch("dateCreated", nil).present?
|
129
|
-
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
|
127
|
+
dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if Date.edtf(meta.fetch("datePublished", nil)).present?
|
128
|
+
dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if Date.edtf(meta.fetch("dateCreated", nil)).present?
|
129
|
+
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if Date.edtf(meta.fetch("dateModified", nil)).present?
|
130
130
|
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
|
131
131
|
|
132
132
|
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
data/lib/bolognese/version.rb
CHANGED
@@ -0,0 +1,57 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<resource xmlns="http://datacite.org/schema/kernel-2.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-2.2 http://schema.datacite.org/meta/kernel-2.2/metadata.xsd">
|
3
|
+
<identifier identifierType="DOI">10.13116/c6b6060e-0648-4f88-b108-95ea00928171</identifier>
|
4
|
+
<creators>
|
5
|
+
<creator>
|
6
|
+
<creatorName>EvK2 CNR Committee</creatorName>
|
7
|
+
</creator>
|
8
|
+
</creators>
|
9
|
+
<titles>
|
10
|
+
<title>SHARE (Stations at High Altitude for Research on the Environment) Network</title>
|
11
|
+
<title titleType="Subtitle">Urdukas (Baltoro Glacier, Baltistan - Pakistan)</title>
|
12
|
+
</titles>
|
13
|
+
<publisher>EvK2 CNR Committee</publisher>
|
14
|
+
<publicationYear>2011</publicationYear>
|
15
|
+
<subjects>
|
16
|
+
<subject>Environmental research</subject>
|
17
|
+
</subjects>
|
18
|
+
<contributors>
|
19
|
+
<contributor contributorType="Funder">
|
20
|
+
<contributorName>EvK2 CNR Committee</contributorName>
|
21
|
+
</contributor>
|
22
|
+
</contributors>
|
23
|
+
<dates>
|
24
|
+
<date dateType="Valid">
|
25
|
+
</date>
|
26
|
+
</dates>
|
27
|
+
<language>en</language>
|
28
|
+
<resourceType resourceTypeGeneral="Dataset"/>
|
29
|
+
<relatedIdentifiers>
|
30
|
+
<relatedIdentifier relatedIdentifierType="URL" relationType="IsDocumentedBy">http://geonetwork.evk2cnr.org:8080/geonetwork/srv/en/main.home</relatedIdentifier>
|
31
|
+
</relatedIdentifiers>
|
32
|
+
<sizes>
|
33
|
+
<size>
|
34
|
+
</size>
|
35
|
+
</sizes>
|
36
|
+
<formats>
|
37
|
+
<format>text</format>
|
38
|
+
</formats>
|
39
|
+
<version>1.0</version>
|
40
|
+
<rights>
|
41
|
+
</rights>
|
42
|
+
<descriptions>
|
43
|
+
<description descriptionType="Abstract">The Baltoro Glacier, 62 km long, is one of the longest glaciers outside of the Polar Regions. It is located in Baltistan, in the Northern Areas of Pakistan, and runs through part of the Karakoram mountain range. In the middle of the Baltoro glacier there is the campsite of Urdukas located on the trek way to Concordia.
|
44
|
+
This automatic weather station was installed, in June 17, 2004 on the Baltoro Glacier, more precisely on a moraine ridge close to the left glacier margin at 3926 m asl.
|
45
|
+
|
46
|
+
Geographical coordinates:
|
47
|
+
Latitude: 35deg 43' 41" N
|
48
|
+
Longitude: 76deg 17' 10" EThe Urdukas AWS provides data (with a 60 minute time resolution" of the following parameters:
|
49
|
+
- air temperature (degC)
|
50
|
+
- atmopherich pressure (hPa)
|
51
|
+
- relative humidity (%)
|
52
|
+
- total precipitation (mm)
|
53
|
+
- global solar radiation (W/m2)
|
54
|
+
- wind speed (m/s)
|
55
|
+
- wind direction (degree)</description>
|
56
|
+
</descriptions>
|
57
|
+
</resource>
|
@@ -151,6 +151,24 @@ describe Bolognese::Metadata, vcr: true do
|
|
151
151
|
expect(subject.schema_version).to eq("http://datacite.org/schema/kernel-4")
|
152
152
|
end
|
153
153
|
|
154
|
+
it "empty sizes and dates attributes" do
|
155
|
+
input = fixture_path + 'datacite-empty-sizes.xml'
|
156
|
+
subject = Bolognese::Metadata.new(input: input)
|
157
|
+
expect(subject.valid?).to be true
|
158
|
+
expect(subject.types["schemaOrg"]).to eq("Dataset")
|
159
|
+
expect(subject.types["resourceType"]).to be_nil
|
160
|
+
expect(subject.types["resourceTypeGeneral"]).to eq("Dataset")
|
161
|
+
expect(subject.creators.length).to eq(1)
|
162
|
+
expect(subject.creators.first).to eq("affiliation"=>[], "name"=>"EvK2 CNR Committee", "nameIdentifiers"=>[])
|
163
|
+
expect(subject.titles).to eq([{"title"=>"SHARE (Stations at High Altitude for Research on the Environment) Network"}, {"title"=>"Urdukas (Baltoro Glacier, Baltistan - Pakistan)", "titleType"=>"Subtitle"}])
|
164
|
+
expect(subject.dates).to eq([{"date"=>"2011", "dateType"=>"Issued"}])
|
165
|
+
expect(subject.publication_year).to eq("2011")
|
166
|
+
expect(subject.sizes).to eq([])
|
167
|
+
expect(subject.publisher).to eq("EvK2 CNR Committee")
|
168
|
+
expect(subject.agency).to eq("DataCite")
|
169
|
+
expect(subject.schema_version).to eq("http://datacite.org/schema/kernel-2.2")
|
170
|
+
end
|
171
|
+
|
154
172
|
it "multiple licenses" do
|
155
173
|
input = "https://doi.org/10.5281/ZENODO.48440"
|
156
174
|
subject = Bolognese::Metadata.new(input: input)
|
@@ -799,7 +817,7 @@ describe Bolognese::Metadata, vcr: true do
|
|
799
817
|
expect(subject.types["resourceTypeGeneral"]).to eq("Dataset")
|
800
818
|
expect(subject.creators).to eq([{"name"=>"Anonymous", "nameIdentifiers" => [], "affiliation" => []}])
|
801
819
|
expect(subject.titles).to eq([{"title"=>"Messung der Bildunschaerfe in H.264-codierten Bildern und Videosequenzen"}])
|
802
|
-
expect(subject.dates).to eq([{"date"=>"
|
820
|
+
expect(subject.dates).to eq([{"date"=>"2017", "dateType"=>"Issued"}])
|
803
821
|
expect(subject.publication_year).to eq("2017")
|
804
822
|
expect(subject.publisher).to eq("Siemens AG")
|
805
823
|
expect(subject.agency).to eq("DataCite")
|
@@ -836,7 +854,7 @@ describe Bolognese::Metadata, vcr: true do
|
|
836
854
|
"affiliation"=>[{"name"=>"Royal Netherlands Meteorological Institute (KNMI)"}]}])
|
837
855
|
expect(subject.titles).to eq([{"title"=>"Multi-Sensor Reanalysis (MSR) of total ozone, version 2"}])
|
838
856
|
expect(subject.version_info).to eq("2")
|
839
|
-
expect(subject.dates).to eq([{"date"=>"2014-04-25", "dateType"=>"Available"}, {"date"=>"
|
857
|
+
expect(subject.dates).to eq([{"date"=>"2014-04-25", "dateType"=>"Available"}, {"date"=>"2015", "dateType"=>"Issued"}])
|
840
858
|
expect(subject.publication_year).to eq("2015")
|
841
859
|
expect(subject.publisher).to eq("Royal Netherlands Meteorological Institute (KNMI)")
|
842
860
|
expect(subject.agency).to eq("DataCite")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bolognese
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Fenner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: maremma
|
@@ -717,6 +717,7 @@ files:
|
|
717
717
|
- spec/fixtures/crossref.bib
|
718
718
|
- spec/fixtures/crossref.ris
|
719
719
|
- spec/fixtures/crossref.xml
|
720
|
+
- spec/fixtures/datacite-empty-sizes.xml
|
720
721
|
- spec/fixtures/datacite-example-affiliation.xml
|
721
722
|
- spec/fixtures/datacite-example-complicated-v3.0.xml
|
722
723
|
- spec/fixtures/datacite-example-complicated-v4.0.xml
|