bolognese 1.0.19 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/bolognese/metadata.rb +23 -20
- data/lib/bolognese/readers/bibtex_reader.rb +4 -2
- data/lib/bolognese/readers/citeproc_reader.rb +4 -2
- data/lib/bolognese/readers/codemeta_reader.rb +4 -2
- data/lib/bolognese/readers/crossref_reader.rb +4 -2
- data/lib/bolognese/readers/datacite_json_reader.rb +4 -2
- data/lib/bolognese/readers/datacite_reader.rb +4 -2
- data/lib/bolognese/readers/ris_reader.rb +4 -2
- data/lib/bolognese/readers/schema_org_reader.rb +4 -2
- data/lib/bolognese/version.rb +1 -1
- data/spec/readers/datacite_reader_spec.rb +2 -0
- data/spec/readers/schema_org_reader_spec.rb +37 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cdd3b249d4bb2a0126342f0fe581953de3bad55a6da02eda97129752b7caf5f0
|
4
|
+
data.tar.gz: b54781d9c4c6fdce604e2f4c12cb4a009a67f7c9e72b0034fa3a18829bd10d7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e587b91a50c7617c450b373fcae22f97f5455c0566bbe33b1fab806b2a426f821e49692dc2a865f8a286b4d5c63f22caf92263c9fb2b576bf1bc5d480e183f5
|
7
|
+
data.tar.gz: 2d4d9c4629aa9787c04b2d5933fe269353de0f95e4f6038fed2ceb7663c113dcbdb010642f1229cfb762b3332a842895b937df7bc9b1edc127c0778a41d4d38c
|
data/Gemfile.lock
CHANGED
data/lib/bolognese/metadata.rb
CHANGED
@@ -76,28 +76,31 @@ module Bolognese
|
|
76
76
|
@content_url = hsh.to_h["content_url"].presence
|
77
77
|
|
78
78
|
# set attributes directly
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
79
|
+
read_options = options.slice(
|
80
|
+
:sandbox,
|
81
|
+
:creator,
|
82
|
+
:contributor,
|
83
|
+
:titles,
|
84
|
+
:types,
|
85
|
+
:alternate_identifiers,
|
86
|
+
:periodical,
|
87
|
+
:publisher,
|
88
|
+
:funding_references,
|
89
|
+
:dates,
|
90
|
+
:publication_year,
|
91
|
+
:descriptions,
|
92
|
+
:rights_list,
|
93
|
+
:version,
|
94
|
+
:subjects,
|
95
|
+
:language,
|
96
|
+
:geo_locations,
|
97
|
+
:related_identifiers,
|
98
|
+
:formats,
|
99
|
+
:sizes
|
100
|
+
).compact
|
98
101
|
|
99
102
|
# generate name for method to call dynamically
|
100
|
-
@meta = @from.present? ? send("read_" + @from, string: string
|
103
|
+
@meta = @from.present? ? send("read_" + @from, { string: string }.merge(read_options)) : {}
|
101
104
|
@identifier = normalize_doi(options[:doi] || input, options) || @meta.fetch("id", nil) || @meta.fetch("identifier", nil)
|
102
105
|
end
|
103
106
|
|
@@ -27,6 +27,8 @@ module Bolognese
|
|
27
27
|
}
|
28
28
|
|
29
29
|
def read_bibtex(string: nil, **options)
|
30
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
31
|
+
|
30
32
|
meta = string.present? ? BibTeX.parse(string).first : OpenStruct.new
|
31
33
|
|
32
34
|
schema_org = BIB_TO_SO_TRANSLATIONS[meta.try(:type).to_s] || "ScholarlyArticle"
|
@@ -66,7 +68,7 @@ module Bolognese
|
|
66
68
|
end
|
67
69
|
|
68
70
|
page_first, page_last = meta.try(:pages).to_s.split("-")
|
69
|
-
state = doi.present? ? "findable" : "not_found"
|
71
|
+
state = doi.present? || read_options.present? ? "findable" : "not_found"
|
70
72
|
dates = if meta.try(:date).present?
|
71
73
|
[{ "date" => meta.date.to_s,
|
72
74
|
"dateType" => "Issued" }]
|
@@ -92,7 +94,7 @@ module Bolognese
|
|
92
94
|
"descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s).presence, "descriptionType" => "Abstract" }] : [],
|
93
95
|
"rights_list" => meta.try(:copyright).present? ? [{ "rightsUri" => meta.try(:copyright).to_s.presence }.compact] : [],
|
94
96
|
"state" => state
|
95
|
-
}
|
97
|
+
}.merge(read_options)
|
96
98
|
end
|
97
99
|
end
|
98
100
|
end
|
@@ -31,6 +31,8 @@ module Bolognese
|
|
31
31
|
return { "errors" => errors } if errors.present?
|
32
32
|
end
|
33
33
|
|
34
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
35
|
+
|
34
36
|
meta = string.present? ? Maremma.from_json(string) : {}
|
35
37
|
|
36
38
|
citeproc_type = meta.fetch("type", nil)
|
@@ -75,7 +77,7 @@ module Bolognese
|
|
75
77
|
nil
|
76
78
|
end
|
77
79
|
id = normalize_id(meta.fetch("id", nil))
|
78
|
-
state = id.present? ? "findable" : "not_found"
|
80
|
+
state = id.present? || read_options.present? ? "findable" : "not_found"
|
79
81
|
subjects = Array.wrap(meta.fetch("categories", nil)).map do |s|
|
80
82
|
{ "subject" => s }
|
81
83
|
end
|
@@ -99,7 +101,7 @@ module Bolognese
|
|
99
101
|
"version" => meta.fetch("version", nil),
|
100
102
|
"subjects" => subjects,
|
101
103
|
"state" => state
|
102
|
-
}
|
104
|
+
}.merge(read_options)
|
103
105
|
end
|
104
106
|
end
|
105
107
|
end
|
@@ -18,6 +18,8 @@ module Bolognese
|
|
18
18
|
return { "errors" => errors } if errors.present?
|
19
19
|
end
|
20
20
|
|
21
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
22
|
+
|
21
23
|
meta = string.present? ? Maremma.from_json(string) : {}
|
22
24
|
identifier = meta.fetch("identifier", nil)
|
23
25
|
id = normalize_id(meta.fetch("@id", nil) || identifier)
|
@@ -29,7 +31,7 @@ module Bolognese
|
|
29
31
|
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
|
30
32
|
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
|
31
33
|
publisher = meta.fetch("publisher", nil)
|
32
|
-
state = meta.present? ? "findable" : "not_found"
|
34
|
+
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
33
35
|
schema_org = meta.fetch("@type", nil)
|
34
36
|
types = {
|
35
37
|
"resourceTypeGeneral" => Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org],
|
@@ -60,7 +62,7 @@ module Bolognese
|
|
60
62
|
"version" => meta.fetch("version", nil),
|
61
63
|
"subjects" => subjects,
|
62
64
|
"state" => state
|
63
|
-
}
|
65
|
+
}.merge(read_options)
|
64
66
|
end
|
65
67
|
|
66
68
|
# def related_identifiers(relation_type)
|
@@ -20,6 +20,8 @@ module Bolognese
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def read_crossref(string: nil, **options)
|
23
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
24
|
+
|
23
25
|
if string.present?
|
24
26
|
m = Maremma.from_xml(string).dig("doi_records", "doi_record") || {}
|
25
27
|
meta = m.dig("crossref", "error").nil? ? m : {}
|
@@ -91,7 +93,7 @@ module Bolognese
|
|
91
93
|
"dateType" => "Updated" }
|
92
94
|
]
|
93
95
|
publication_year = crossref_date_published(bibliographic_metadata).present? ? crossref_date_published(bibliographic_metadata)[0..3] : nil
|
94
|
-
state = meta.present? ? "findable" : "not_found"
|
96
|
+
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
95
97
|
|
96
98
|
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
|
97
99
|
periodical = if journal_metadata.present?
|
@@ -129,7 +131,7 @@ module Bolognese
|
|
129
131
|
"sizes" => nil,
|
130
132
|
"schema_version" => nil,
|
131
133
|
"state" => state
|
132
|
-
}
|
134
|
+
}.merge(read_options)
|
133
135
|
end
|
134
136
|
|
135
137
|
def crossref_alternate_identifiers(bibliographic_metadata)
|
@@ -7,9 +7,11 @@ module Bolognese
|
|
7
7
|
errors = jsonlint(string)
|
8
8
|
return { "errors" => errors } if errors.present?
|
9
9
|
|
10
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
11
|
+
|
10
12
|
meta = string.present? ? Maremma.from_json(string) : {}
|
11
13
|
|
12
|
-
state = meta.fetch("doi", nil).present? ? "findable" : "not_found"
|
14
|
+
state = meta.fetch("doi", nil).present? || read_options.present? ? "findable" : "not_found"
|
13
15
|
|
14
16
|
dates = Array.wrap(meta.fetch("dates", nil)).map do |d|
|
15
17
|
{ "date" => d["date"],
|
@@ -51,7 +53,7 @@ module Bolognese
|
|
51
53
|
"geo_locations" => meta.fetch("geoLocations", nil),
|
52
54
|
"schema_version" => meta.fetch("schemaVersion", nil),
|
53
55
|
"state" => state
|
54
|
-
}
|
56
|
+
}.merge(read_options)
|
55
57
|
end
|
56
58
|
end
|
57
59
|
end
|
@@ -49,6 +49,8 @@ module Bolognese
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def read_datacite(string: nil, **options)
|
52
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
53
|
+
|
52
54
|
doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
|
53
55
|
ns = doc.collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
|
54
56
|
schema_version = Array.wrap(ns).last || "http://datacite.org/schema/kernel-4"
|
@@ -180,7 +182,7 @@ module Bolognese
|
|
180
182
|
end
|
181
183
|
end.compact
|
182
184
|
periodical = set_periodical(meta)
|
183
|
-
state = doi.present? ? "findable" : "not_found"
|
185
|
+
state = doi.present? || read_options.present? ? "findable" : "not_found"
|
184
186
|
|
185
187
|
{ "id" => id,
|
186
188
|
"types" => types,
|
@@ -207,7 +209,7 @@ module Bolognese
|
|
207
209
|
"sizes" => sizes,
|
208
210
|
"schema_version" => schema_version,
|
209
211
|
"state" => state
|
210
|
-
}
|
212
|
+
}.merge(read_options)
|
211
213
|
end
|
212
214
|
|
213
215
|
def set_periodical(meta)
|
@@ -34,6 +34,8 @@ module Bolognese
|
|
34
34
|
}
|
35
35
|
|
36
36
|
def read_ris(string: nil, **options)
|
37
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
38
|
+
|
37
39
|
meta = ris_meta(string: string)
|
38
40
|
|
39
41
|
ris_type = meta.fetch("TY", nil) || "GEN"
|
@@ -69,7 +71,7 @@ module Bolognese
|
|
69
71
|
else
|
70
72
|
nil
|
71
73
|
end
|
72
|
-
state = doi.present? ? "findable" : "not_found"
|
74
|
+
state = doi.present? || read_options.present? ? "findable" : "not_found"
|
73
75
|
subjects = Array.wrap(meta.fetch("KW", nil)).map do |s|
|
74
76
|
{ "subject" => s }
|
75
77
|
end
|
@@ -93,7 +95,7 @@ module Bolognese
|
|
93
95
|
"subjects" => subjects,
|
94
96
|
"language" => meta.fetch("LA", nil),
|
95
97
|
"state" => state
|
96
|
-
}
|
98
|
+
}.merge(read_options)
|
97
99
|
end
|
98
100
|
|
99
101
|
def ris_meta(string: nil)
|
@@ -33,6 +33,8 @@ module Bolognese
|
|
33
33
|
return { "errors" => errors } if errors.present?
|
34
34
|
end
|
35
35
|
|
36
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:string, :sandbox))
|
37
|
+
|
36
38
|
meta = string.present? ? Maremma.from_json(string) : {}
|
37
39
|
|
38
40
|
identifier = Array.wrap(meta.fetch("identifier", nil))
|
@@ -108,7 +110,7 @@ module Bolognese
|
|
108
110
|
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
|
109
111
|
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
|
110
112
|
|
111
|
-
state = meta.present? ? "findable" : "not_found"
|
113
|
+
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
112
114
|
geo_locations = Array.wrap(meta.fetch("spatialCoverage", nil)).map do |gl|
|
113
115
|
if gl.dig("geo", "box")
|
114
116
|
s, w, n, e = gl.dig("geo", "box").split(" ", 4)
|
@@ -159,7 +161,7 @@ module Bolognese
|
|
159
161
|
"schema_version" => meta.fetch("schemaVersion", nil).to_s.presence,
|
160
162
|
"funding_references" => funding_references,
|
161
163
|
"geo_locations" => geo_locations
|
162
|
-
}
|
164
|
+
}.merge(read_options)
|
163
165
|
end
|
164
166
|
|
165
167
|
def schema_org_related_identifier(meta, relation_type: nil)
|
data/lib/bolognese/version.rb
CHANGED
@@ -185,6 +185,7 @@ describe Bolognese::Metadata, vcr: true do
|
|
185
185
|
"funderName"=>"European Commission"}],
|
186
186
|
types: { "resourceTypeGeneral" => "Dataset", "schemaOrg" => "Dataset" })
|
187
187
|
|
188
|
+
expect(subject.valid?).to be true
|
188
189
|
expect(subject.doi).to eq("10.5281/zenodo.1239")
|
189
190
|
expect(subject.identifier).to eq("https://doi.org/10.5281/zenodo.1239")
|
190
191
|
expect(subject.types["schemaOrg"]).to eq("Dataset")
|
@@ -203,6 +204,7 @@ describe Bolognese::Metadata, vcr: true do
|
|
203
204
|
"funderName"=>"European Commission"}])
|
204
205
|
expect(subject.agency).to eq("DataCite")
|
205
206
|
expect(subject.schema_version).to eq("http://datacite.org/schema/kernel-4")
|
207
|
+
expect(subject.state).to eq("findable")
|
206
208
|
end
|
207
209
|
|
208
210
|
it "missing resource_type_general" do
|
@@ -229,5 +229,42 @@ describe Bolognese::Metadata, vcr: true do
|
|
229
229
|
expect(subject.publisher).to eq("TOPMed")
|
230
230
|
expect(subject.funding_references).to eq([{"funderIdentifier"=>"https://doi.org/10.13039/100000050", "funderIdentifierType"=>"Crossref Funder ID", "funderName"=>"National Heart, Lung, and Blood Institute (NHLBI)"}])
|
231
231
|
end
|
232
|
+
|
233
|
+
it "from attributes" do
|
234
|
+
subject = Bolognese::Metadata.new(input: nil,
|
235
|
+
from: "schema_org",
|
236
|
+
doi: "10.5281/zenodo.1239",
|
237
|
+
creator: [{"type"=>"Person", "name"=>"Jahn, Najko", "givenName"=>"Najko", "familyName"=>"Jahn"}],
|
238
|
+
titles: [{ "title" => "Publication Fp7 Funding Acknowledgment - Plos Openaire" }],
|
239
|
+
descriptions: [{ "description" => "The dataset contains a sample of metadata describing papers" }],
|
240
|
+
publisher: "Zenodo",
|
241
|
+
publication_year: "2013",
|
242
|
+
dates: [{"date"=>"2013-04-03", "dateType"=>"Issued"}],
|
243
|
+
funding_references: [{"awardNumber"=>"246686",
|
244
|
+
"awardTitle"=>"Open Access Infrastructure for Research in Europe",
|
245
|
+
"awardUri"=>"info:eu-repo/grantAgreement/EC/FP7/246686/",
|
246
|
+
"funderIdentifier"=>"https://doi.org/10.13039/501100000780",
|
247
|
+
"funderIdentifierType"=>"Crossref Funder ID",
|
248
|
+
"funderName"=>"European Commission"}],
|
249
|
+
types: { "resourceTypeGeneral" => "Dataset", "schemaOrg" => "Dataset" })
|
250
|
+
|
251
|
+
expect(subject.valid?).to be true
|
252
|
+
expect(subject.doi).to eq("10.5281/zenodo.1239")
|
253
|
+
expect(subject.identifier).to eq("https://doi.org/10.5281/zenodo.1239")
|
254
|
+
expect(subject.types["schemaOrg"]).to eq("Dataset")
|
255
|
+
expect(subject.types["resourceTypeGeneral"]).to eq("Dataset")
|
256
|
+
expect(subject.creator).to eq([{"familyName"=>"Jahn", "givenName"=>"Najko", "name"=>"Jahn, Najko", "type"=>"Person"}])
|
257
|
+
expect(subject.titles).to eq([{"title"=>"Publication Fp7 Funding Acknowledgment - Plos Openaire"}])
|
258
|
+
expect(subject.descriptions.first["description"]).to start_with("The dataset contains a sample of metadata describing papers")
|
259
|
+
expect(subject.dates).to eq([{"date"=>"2013-04-03", "dateType"=>"Issued"}])
|
260
|
+
expect(subject.publication_year).to eq("2013")
|
261
|
+
expect(subject.publisher).to eq("Zenodo")
|
262
|
+
expect(subject.funding_references).to eq([{"awardNumber"=>"246686",
|
263
|
+
"awardTitle"=>"Open Access Infrastructure for Research in Europe",
|
264
|
+
"awardUri"=>"info:eu-repo/grantAgreement/EC/FP7/246686/",
|
265
|
+
"funderIdentifier"=>"https://doi.org/10.13039/501100000780",
|
266
|
+
"funderIdentifierType"=>"Crossref Funder ID",
|
267
|
+
"funderName"=>"European Commission"}])
|
268
|
+
end
|
232
269
|
end
|
233
270
|
end
|