bolognese 1.0.19 → 1.0.20
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/bolognese/metadata.rb +23 -20
- data/lib/bolognese/readers/bibtex_reader.rb +4 -2
- data/lib/bolognese/readers/citeproc_reader.rb +4 -2
- data/lib/bolognese/readers/codemeta_reader.rb +4 -2
- data/lib/bolognese/readers/crossref_reader.rb +4 -2
- data/lib/bolognese/readers/datacite_json_reader.rb +4 -2
- data/lib/bolognese/readers/datacite_reader.rb +4 -2
- data/lib/bolognese/readers/ris_reader.rb +4 -2
- data/lib/bolognese/readers/schema_org_reader.rb +4 -2
- data/lib/bolognese/version.rb +1 -1
- data/spec/readers/datacite_reader_spec.rb +2 -0
- data/spec/readers/schema_org_reader_spec.rb +37 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cdd3b249d4bb2a0126342f0fe581953de3bad55a6da02eda97129752b7caf5f0
|
4
|
+
data.tar.gz: b54781d9c4c6fdce604e2f4c12cb4a009a67f7c9e72b0034fa3a18829bd10d7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e587b91a50c7617c450b373fcae22f97f5455c0566bbe33b1fab806b2a426f821e49692dc2a865f8a286b4d5c63f22caf92263c9fb2b576bf1bc5d480e183f5
|
7
|
+
data.tar.gz: 2d4d9c4629aa9787c04b2d5933fe269353de0f95e4f6038fed2ceb7663c113dcbdb010642f1229cfb762b3332a842895b937df7bc9b1edc127c0778a41d4d38c
|
data/Gemfile.lock
CHANGED
data/lib/bolognese/metadata.rb
CHANGED
@@ -76,28 +76,31 @@ module Bolognese
|
|
76
76
|
@content_url = hsh.to_h["content_url"].presence
|
77
77
|
|
78
78
|
# set attributes directly
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
79
|
+
read_options = options.slice(
|
80
|
+
:sandbox,
|
81
|
+
:creator,
|
82
|
+
:contributor,
|
83
|
+
:titles,
|
84
|
+
:types,
|
85
|
+
:alternate_identifiers,
|
86
|
+
:periodical,
|
87
|
+
:publisher,
|
88
|
+
:funding_references,
|
89
|
+
:dates,
|
90
|
+
:publication_year,
|
91
|
+
:descriptions,
|
92
|
+
:rights_list,
|
93
|
+
:version,
|
94
|
+
:subjects,
|
95
|
+
:language,
|
96
|
+
:geo_locations,
|
97
|
+
:related_identifiers,
|
98
|
+
:formats,
|
99
|
+
:sizes
|
100
|
+
).compact
|
98
101
|
|
99
102
|
# generate name for method to call dynamically
|
100
|
-
@meta = @from.present? ? send("read_" + @from, string: string
|
103
|
+
@meta = @from.present? ? send("read_" + @from, { string: string }.merge(read_options)) : {}
|
101
104
|
@identifier = normalize_doi(options[:doi] || input, options) || @meta.fetch("id", nil) || @meta.fetch("identifier", nil)
|
102
105
|
end
|
103
106
|
|
@@ -27,6 +27,8 @@ module Bolognese
|
|
27
27
|
}
|
28
28
|
|
29
29
|
def read_bibtex(string: nil, **options)
|
30
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
31
|
+
|
30
32
|
meta = string.present? ? BibTeX.parse(string).first : OpenStruct.new
|
31
33
|
|
32
34
|
schema_org = BIB_TO_SO_TRANSLATIONS[meta.try(:type).to_s] || "ScholarlyArticle"
|
@@ -66,7 +68,7 @@ module Bolognese
|
|
66
68
|
end
|
67
69
|
|
68
70
|
page_first, page_last = meta.try(:pages).to_s.split("-")
|
69
|
-
state = doi.present? ? "findable" : "not_found"
|
71
|
+
state = doi.present? || read_options.present? ? "findable" : "not_found"
|
70
72
|
dates = if meta.try(:date).present?
|
71
73
|
[{ "date" => meta.date.to_s,
|
72
74
|
"dateType" => "Issued" }]
|
@@ -92,7 +94,7 @@ module Bolognese
|
|
92
94
|
"descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s).presence, "descriptionType" => "Abstract" }] : [],
|
93
95
|
"rights_list" => meta.try(:copyright).present? ? [{ "rightsUri" => meta.try(:copyright).to_s.presence }.compact] : [],
|
94
96
|
"state" => state
|
95
|
-
}
|
97
|
+
}.merge(read_options)
|
96
98
|
end
|
97
99
|
end
|
98
100
|
end
|
@@ -31,6 +31,8 @@ module Bolognese
|
|
31
31
|
return { "errors" => errors } if errors.present?
|
32
32
|
end
|
33
33
|
|
34
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
35
|
+
|
34
36
|
meta = string.present? ? Maremma.from_json(string) : {}
|
35
37
|
|
36
38
|
citeproc_type = meta.fetch("type", nil)
|
@@ -75,7 +77,7 @@ module Bolognese
|
|
75
77
|
nil
|
76
78
|
end
|
77
79
|
id = normalize_id(meta.fetch("id", nil))
|
78
|
-
state = id.present? ? "findable" : "not_found"
|
80
|
+
state = id.present? || read_options.present? ? "findable" : "not_found"
|
79
81
|
subjects = Array.wrap(meta.fetch("categories", nil)).map do |s|
|
80
82
|
{ "subject" => s }
|
81
83
|
end
|
@@ -99,7 +101,7 @@ module Bolognese
|
|
99
101
|
"version" => meta.fetch("version", nil),
|
100
102
|
"subjects" => subjects,
|
101
103
|
"state" => state
|
102
|
-
}
|
104
|
+
}.merge(read_options)
|
103
105
|
end
|
104
106
|
end
|
105
107
|
end
|
@@ -18,6 +18,8 @@ module Bolognese
|
|
18
18
|
return { "errors" => errors } if errors.present?
|
19
19
|
end
|
20
20
|
|
21
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
22
|
+
|
21
23
|
meta = string.present? ? Maremma.from_json(string) : {}
|
22
24
|
identifier = meta.fetch("identifier", nil)
|
23
25
|
id = normalize_id(meta.fetch("@id", nil) || identifier)
|
@@ -29,7 +31,7 @@ module Bolognese
|
|
29
31
|
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
|
30
32
|
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
|
31
33
|
publisher = meta.fetch("publisher", nil)
|
32
|
-
state = meta.present? ? "findable" : "not_found"
|
34
|
+
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
33
35
|
schema_org = meta.fetch("@type", nil)
|
34
36
|
types = {
|
35
37
|
"resourceTypeGeneral" => Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org],
|
@@ -60,7 +62,7 @@ module Bolognese
|
|
60
62
|
"version" => meta.fetch("version", nil),
|
61
63
|
"subjects" => subjects,
|
62
64
|
"state" => state
|
63
|
-
}
|
65
|
+
}.merge(read_options)
|
64
66
|
end
|
65
67
|
|
66
68
|
# def related_identifiers(relation_type)
|
@@ -20,6 +20,8 @@ module Bolognese
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def read_crossref(string: nil, **options)
|
23
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
24
|
+
|
23
25
|
if string.present?
|
24
26
|
m = Maremma.from_xml(string).dig("doi_records", "doi_record") || {}
|
25
27
|
meta = m.dig("crossref", "error").nil? ? m : {}
|
@@ -91,7 +93,7 @@ module Bolognese
|
|
91
93
|
"dateType" => "Updated" }
|
92
94
|
]
|
93
95
|
publication_year = crossref_date_published(bibliographic_metadata).present? ? crossref_date_published(bibliographic_metadata)[0..3] : nil
|
94
|
-
state = meta.present? ? "findable" : "not_found"
|
96
|
+
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
95
97
|
|
96
98
|
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
|
97
99
|
periodical = if journal_metadata.present?
|
@@ -129,7 +131,7 @@ module Bolognese
|
|
129
131
|
"sizes" => nil,
|
130
132
|
"schema_version" => nil,
|
131
133
|
"state" => state
|
132
|
-
}
|
134
|
+
}.merge(read_options)
|
133
135
|
end
|
134
136
|
|
135
137
|
def crossref_alternate_identifiers(bibliographic_metadata)
|
@@ -7,9 +7,11 @@ module Bolognese
|
|
7
7
|
errors = jsonlint(string)
|
8
8
|
return { "errors" => errors } if errors.present?
|
9
9
|
|
10
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
11
|
+
|
10
12
|
meta = string.present? ? Maremma.from_json(string) : {}
|
11
13
|
|
12
|
-
state = meta.fetch("doi", nil).present? ? "findable" : "not_found"
|
14
|
+
state = meta.fetch("doi", nil).present? || read_options.present? ? "findable" : "not_found"
|
13
15
|
|
14
16
|
dates = Array.wrap(meta.fetch("dates", nil)).map do |d|
|
15
17
|
{ "date" => d["date"],
|
@@ -51,7 +53,7 @@ module Bolognese
|
|
51
53
|
"geo_locations" => meta.fetch("geoLocations", nil),
|
52
54
|
"schema_version" => meta.fetch("schemaVersion", nil),
|
53
55
|
"state" => state
|
54
|
-
}
|
56
|
+
}.merge(read_options)
|
55
57
|
end
|
56
58
|
end
|
57
59
|
end
|
@@ -49,6 +49,8 @@ module Bolognese
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def read_datacite(string: nil, **options)
|
52
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
53
|
+
|
52
54
|
doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
|
53
55
|
ns = doc.collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
|
54
56
|
schema_version = Array.wrap(ns).last || "http://datacite.org/schema/kernel-4"
|
@@ -180,7 +182,7 @@ module Bolognese
|
|
180
182
|
end
|
181
183
|
end.compact
|
182
184
|
periodical = set_periodical(meta)
|
183
|
-
state = doi.present? ? "findable" : "not_found"
|
185
|
+
state = doi.present? || read_options.present? ? "findable" : "not_found"
|
184
186
|
|
185
187
|
{ "id" => id,
|
186
188
|
"types" => types,
|
@@ -207,7 +209,7 @@ module Bolognese
|
|
207
209
|
"sizes" => sizes,
|
208
210
|
"schema_version" => schema_version,
|
209
211
|
"state" => state
|
210
|
-
}
|
212
|
+
}.merge(read_options)
|
211
213
|
end
|
212
214
|
|
213
215
|
def set_periodical(meta)
|
@@ -34,6 +34,8 @@ module Bolognese
|
|
34
34
|
}
|
35
35
|
|
36
36
|
def read_ris(string: nil, **options)
|
37
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
38
|
+
|
37
39
|
meta = ris_meta(string: string)
|
38
40
|
|
39
41
|
ris_type = meta.fetch("TY", nil) || "GEN"
|
@@ -69,7 +71,7 @@ module Bolognese
|
|
69
71
|
else
|
70
72
|
nil
|
71
73
|
end
|
72
|
-
state = doi.present? ? "findable" : "not_found"
|
74
|
+
state = doi.present? || read_options.present? ? "findable" : "not_found"
|
73
75
|
subjects = Array.wrap(meta.fetch("KW", nil)).map do |s|
|
74
76
|
{ "subject" => s }
|
75
77
|
end
|
@@ -93,7 +95,7 @@ module Bolognese
|
|
93
95
|
"subjects" => subjects,
|
94
96
|
"language" => meta.fetch("LA", nil),
|
95
97
|
"state" => state
|
96
|
-
}
|
98
|
+
}.merge(read_options)
|
97
99
|
end
|
98
100
|
|
99
101
|
def ris_meta(string: nil)
|
@@ -33,6 +33,8 @@ module Bolognese
|
|
33
33
|
return { "errors" => errors } if errors.present?
|
34
34
|
end
|
35
35
|
|
36
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
37
|
+
|
36
38
|
meta = string.present? ? Maremma.from_json(string) : {}
|
37
39
|
|
38
40
|
identifier = Array.wrap(meta.fetch("identifier", nil))
|
@@ -108,7 +110,7 @@ module Bolognese
|
|
108
110
|
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
|
109
111
|
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
|
110
112
|
|
111
|
-
state = meta.present? ? "findable" : "not_found"
|
113
|
+
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
112
114
|
geo_locations = Array.wrap(meta.fetch("spatialCoverage", nil)).map do |gl|
|
113
115
|
if gl.dig("geo", "box")
|
114
116
|
s, w, n, e = gl.dig("geo", "box").split(" ", 4)
|
@@ -159,7 +161,7 @@ module Bolognese
|
|
159
161
|
"schema_version" => meta.fetch("schemaVersion", nil).to_s.presence,
|
160
162
|
"funding_references" => funding_references,
|
161
163
|
"geo_locations" => geo_locations
|
162
|
-
}
|
164
|
+
}.merge(read_options)
|
163
165
|
end
|
164
166
|
|
165
167
|
def schema_org_related_identifier(meta, relation_type: nil)
|
data/lib/bolognese/version.rb
CHANGED
@@ -185,6 +185,7 @@ describe Bolognese::Metadata, vcr: true do
|
|
185
185
|
"funderName"=>"European Commission"}],
|
186
186
|
types: { "resourceTypeGeneral" => "Dataset", "schemaOrg" => "Dataset" })
|
187
187
|
|
188
|
+
expect(subject.valid?).to be true
|
188
189
|
expect(subject.doi).to eq("10.5281/zenodo.1239")
|
189
190
|
expect(subject.identifier).to eq("https://doi.org/10.5281/zenodo.1239")
|
190
191
|
expect(subject.types["schemaOrg"]).to eq("Dataset")
|
@@ -203,6 +204,7 @@ describe Bolognese::Metadata, vcr: true do
|
|
203
204
|
"funderName"=>"European Commission"}])
|
204
205
|
expect(subject.agency).to eq("DataCite")
|
205
206
|
expect(subject.schema_version).to eq("http://datacite.org/schema/kernel-4")
|
207
|
+
expect(subject.state).to eq("findable")
|
206
208
|
end
|
207
209
|
|
208
210
|
it "missing resource_type_general" do
|
@@ -229,5 +229,42 @@ describe Bolognese::Metadata, vcr: true do
|
|
229
229
|
expect(subject.publisher).to eq("TOPMed")
|
230
230
|
expect(subject.funding_references).to eq([{"funderIdentifier"=>"https://doi.org/10.13039/100000050", "funderIdentifierType"=>"Crossref Funder ID", "funderName"=>"National Heart, Lung, and Blood Institute (NHLBI)"}])
|
231
231
|
end
|
232
|
+
|
233
|
+
it "from attributes" do
|
234
|
+
subject = Bolognese::Metadata.new(input: nil,
|
235
|
+
from: "schema_org",
|
236
|
+
doi: "10.5281/zenodo.1239",
|
237
|
+
creator: [{"type"=>"Person", "name"=>"Jahn, Najko", "givenName"=>"Najko", "familyName"=>"Jahn"}],
|
238
|
+
titles: [{ "title" => "Publication Fp7 Funding Acknowledgment - Plos Openaire" }],
|
239
|
+
descriptions: [{ "description" => "The dataset contains a sample of metadata describing papers" }],
|
240
|
+
publisher: "Zenodo",
|
241
|
+
publication_year: "2013",
|
242
|
+
dates: [{"date"=>"2013-04-03", "dateType"=>"Issued"}],
|
243
|
+
funding_references: [{"awardNumber"=>"246686",
|
244
|
+
"awardTitle"=>"Open Access Infrastructure for Research in Europe",
|
245
|
+
"awardUri"=>"info:eu-repo/grantAgreement/EC/FP7/246686/",
|
246
|
+
"funderIdentifier"=>"https://doi.org/10.13039/501100000780",
|
247
|
+
"funderIdentifierType"=>"Crossref Funder ID",
|
248
|
+
"funderName"=>"European Commission"}],
|
249
|
+
types: { "resourceTypeGeneral" => "Dataset", "schemaOrg" => "Dataset" })
|
250
|
+
|
251
|
+
expect(subject.valid?).to be true
|
252
|
+
expect(subject.doi).to eq("10.5281/zenodo.1239")
|
253
|
+
expect(subject.identifier).to eq("https://doi.org/10.5281/zenodo.1239")
|
254
|
+
expect(subject.types["schemaOrg"]).to eq("Dataset")
|
255
|
+
expect(subject.types["resourceTypeGeneral"]).to eq("Dataset")
|
256
|
+
expect(subject.creator).to eq([{"familyName"=>"Jahn", "givenName"=>"Najko", "name"=>"Jahn, Najko", "type"=>"Person"}])
|
257
|
+
expect(subject.titles).to eq([{"title"=>"Publication Fp7 Funding Acknowledgment - Plos Openaire"}])
|
258
|
+
expect(subject.descriptions.first["description"]).to start_with("The dataset contains a sample of metadata describing papers")
|
259
|
+
expect(subject.dates).to eq([{"date"=>"2013-04-03", "dateType"=>"Issued"}])
|
260
|
+
expect(subject.publication_year).to eq("2013")
|
261
|
+
expect(subject.publisher).to eq("Zenodo")
|
262
|
+
expect(subject.funding_references).to eq([{"awardNumber"=>"246686",
|
263
|
+
"awardTitle"=>"Open Access Infrastructure for Research in Europe",
|
264
|
+
"awardUri"=>"info:eu-repo/grantAgreement/EC/FP7/246686/",
|
265
|
+
"funderIdentifier"=>"https://doi.org/10.13039/501100000780",
|
266
|
+
"funderIdentifierType"=>"Crossref Funder ID",
|
267
|
+
"funderName"=>"European Commission"}])
|
268
|
+
end
|
232
269
|
end
|
233
270
|
end
|