bolognese 0.4.3 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +7 -1
- data/Gemfile.lock +1 -1
- data/README.md +326 -11
- data/lib/bolognese/author_utils.rb +7 -5
- data/lib/bolognese/cli.rb +20 -19
- data/lib/bolognese/crossref.rb +11 -49
- data/lib/bolognese/datacite.rb +16 -33
- data/lib/bolognese/datacite_utils.rb +28 -25
- data/lib/bolognese/doi_utils.rb +1 -1
- data/lib/bolognese/metadata.rb +55 -13
- data/lib/bolognese/schema_org.rb +12 -60
- data/lib/bolognese/utils.rb +24 -12
- data/lib/bolognese/version.rb +1 -1
- data/spec/cli_spec.rb +13 -0
- data/spec/crossref_spec.rb +6 -1
- data/spec/datacite_spec.rb +6 -1
- data/spec/fixtures/schema_org.json +44 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/default.yml +760 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/default.yml +214 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/schema_org/default.yml +653 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/Schema_org_JSON.yml +719 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_id/doi.yml +930 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_id/url.yml +930 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Schema_org_JSON.yml +173 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml +2 -2
- data/spec/fixtures/vcr_cassettes/Bolognese_SchemaOrg/get_metadata/BlogPosting.yml +42 -21
- data/spec/fixtures/vcr_cassettes/Bolognese_SchemaOrg/get_metadata/BlogPosting_schema_org_JSON.yml +653 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_SchemaOrg/get_metadata_as_datacite_xml/with_data_citation.yml +653 -0
- data/spec/metadata_spec.rb +9 -12
- data/spec/schema_org_spec.rb +41 -3
- data/spec/utils_spec.rb +3 -3
- metadata +12 -2
data/lib/bolognese/cli.rb
CHANGED
|
@@ -2,8 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
require "thor"
|
|
4
4
|
|
|
5
|
+
require_relative 'doi_utils'
|
|
6
|
+
require_relative 'utils'
|
|
7
|
+
|
|
5
8
|
module Bolognese
|
|
6
9
|
class CLI < Thor
|
|
10
|
+
include Bolognese::DoiUtils
|
|
11
|
+
include Bolognese::Utils
|
|
12
|
+
|
|
7
13
|
def self.exit_on_failure?
|
|
8
14
|
true
|
|
9
15
|
end
|
|
@@ -16,26 +22,21 @@ module Bolognese
|
|
|
16
22
|
puts Bolognese::VERSION
|
|
17
23
|
end
|
|
18
24
|
|
|
19
|
-
desc "read
|
|
25
|
+
desc "read id", "read metadata for ID"
|
|
20
26
|
method_option :as, default: "schema_org"
|
|
21
|
-
def read(
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
puts Datacite.new(id: pid).as_schema_org
|
|
35
|
-
when provider == "schema_org" && options[:as] == "datacite"
|
|
36
|
-
puts SchemaOrg.new(id: pid).as_datacite
|
|
37
|
-
when provider == "schema_org"
|
|
38
|
-
puts SchemaOrg.new(id: pid).as_schema_org
|
|
27
|
+
def read(id)
|
|
28
|
+
id = normalize_id(id)
|
|
29
|
+
provider = find_provider(id)
|
|
30
|
+
output = options[:as] || "schema_org"
|
|
31
|
+
|
|
32
|
+
if provider.present?
|
|
33
|
+
p = case provider
|
|
34
|
+
when "crossref" then Crossref.new(id: id)
|
|
35
|
+
when "datacite" then Datacite.new(id: id)
|
|
36
|
+
else SchemaOrg.new(id: id)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
puts p.send(output)
|
|
39
40
|
else
|
|
40
41
|
puts "not implemented"
|
|
41
42
|
end
|
data/lib/bolognese/crossref.rb
CHANGED
|
@@ -31,8 +31,6 @@ module Bolognese
|
|
|
31
31
|
"PostedContent" => nil
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
-
attr_reader = :id, :raw, :metadata, :schema_org
|
|
35
|
-
|
|
36
34
|
def initialize(id: nil, string: nil)
|
|
37
35
|
id = normalize_doi(id) if id.present?
|
|
38
36
|
|
|
@@ -44,6 +42,10 @@ module Bolognese
|
|
|
44
42
|
end
|
|
45
43
|
end
|
|
46
44
|
|
|
45
|
+
alias_method :crossref, :raw
|
|
46
|
+
alias_method :as_crossref, :raw
|
|
47
|
+
alias_method :schema_org, :as_schema_org
|
|
48
|
+
|
|
47
49
|
def metadata
|
|
48
50
|
@metadata ||= raw.present? ? Maremma.from_xml(raw).fetch("doi_records", {}).fetch("doi_record", {}) : {}
|
|
49
51
|
end
|
|
@@ -120,23 +122,17 @@ module Bolognese
|
|
|
120
122
|
end
|
|
121
123
|
end
|
|
122
124
|
|
|
123
|
-
def keywords
|
|
124
|
-
|
|
125
|
-
end
|
|
126
|
-
|
|
127
125
|
def author
|
|
128
|
-
|
|
129
|
-
Array(person).select { |a| a["contributor_role"] == "author" }.map do |a|
|
|
130
|
-
{ "@type" => "Person",
|
|
131
|
-
"@id" => parse_attribute(a["ORCID"]),
|
|
132
|
-
"givenName" => a["given_name"],
|
|
133
|
-
"familyName" => a["surname"] }.compact
|
|
134
|
-
end
|
|
126
|
+
people("author")
|
|
135
127
|
end
|
|
136
128
|
|
|
137
129
|
def editor
|
|
130
|
+
people("editor")
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def people(contributor_role)
|
|
138
134
|
person = bibliographic_metadata.dig("contributors", "person_name")
|
|
139
|
-
Array(person).select { |a| a["contributor_role"] ==
|
|
135
|
+
Array(person).select { |a| a["contributor_role"] == contributor_role }.map do |a|
|
|
140
136
|
{ "@type" => "Person",
|
|
141
137
|
"@id" => parse_attribute(a["ORCID"]),
|
|
142
138
|
"givenName" => a["given_name"],
|
|
@@ -144,14 +140,6 @@ module Bolognese
|
|
|
144
140
|
end.presence
|
|
145
141
|
end
|
|
146
142
|
|
|
147
|
-
def version
|
|
148
|
-
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
def date_created
|
|
152
|
-
|
|
153
|
-
end
|
|
154
|
-
|
|
155
143
|
def date_published
|
|
156
144
|
pub_date = bibliographic_metadata.fetch("publication_date", nil) ||
|
|
157
145
|
bibliographic_metadata.fetch("acceptance_date", nil)
|
|
@@ -188,15 +176,11 @@ module Bolognese
|
|
|
188
176
|
is_part_of.fetch("name", nil)
|
|
189
177
|
end
|
|
190
178
|
|
|
191
|
-
def has_part
|
|
192
|
-
|
|
193
|
-
end
|
|
194
|
-
|
|
195
179
|
def citation
|
|
196
180
|
citations = bibliographic_metadata.dig("citation_list", "citation")
|
|
197
181
|
Array.wrap(citations).map do |c|
|
|
198
182
|
{ "@type" => "CreativeWork",
|
|
199
|
-
"@id" =>
|
|
183
|
+
"@id" => normalize_id(c["doi"]),
|
|
200
184
|
"position" => c["key"],
|
|
201
185
|
"name" => c["article_title"],
|
|
202
186
|
"datePublished" => c["cYear"] }.compact
|
|
@@ -207,27 +191,5 @@ module Bolognese
|
|
|
207
191
|
{ "@type" => "Organization",
|
|
208
192
|
"name" => "Crossref" }
|
|
209
193
|
end
|
|
210
|
-
|
|
211
|
-
def as_schema_org
|
|
212
|
-
{ "@context" => "http://schema.org",
|
|
213
|
-
"@type" => type,
|
|
214
|
-
"@id" => id,
|
|
215
|
-
"additionalType" => additional_type,
|
|
216
|
-
"name" => name,
|
|
217
|
-
"alternateName" => alternate_name,
|
|
218
|
-
"author" => author,
|
|
219
|
-
"editor" => editor,
|
|
220
|
-
"description" => description,
|
|
221
|
-
"license" => license,
|
|
222
|
-
"datePublished" => date_published,
|
|
223
|
-
"dateModified" => date_modified,
|
|
224
|
-
"pageStart" => page_start,
|
|
225
|
-
"pageEnd" => page_end,
|
|
226
|
-
"isPartOf" => is_part_of,
|
|
227
|
-
"hasPart" => has_part,
|
|
228
|
-
"citation" => citation,
|
|
229
|
-
"provider" => provider
|
|
230
|
-
}.compact
|
|
231
|
-
end
|
|
232
194
|
end
|
|
233
195
|
end
|
data/lib/bolognese/datacite.rb
CHANGED
|
@@ -18,8 +18,6 @@ module Bolognese
|
|
|
18
18
|
"Other" => "CreativeWork"
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
-
attr_reader = :id, :raw, :metadata, :schema_org
|
|
22
|
-
|
|
23
21
|
def initialize(id: nil, string: nil)
|
|
24
22
|
id = normalize_doi(id) if id.present?
|
|
25
23
|
|
|
@@ -31,6 +29,9 @@ module Bolognese
|
|
|
31
29
|
end
|
|
32
30
|
end
|
|
33
31
|
|
|
32
|
+
alias_method :datacite, :raw
|
|
33
|
+
alias_method :schema_org, :as_schema_org
|
|
34
|
+
|
|
34
35
|
def metadata
|
|
35
36
|
@metadata ||= raw.present? ? Maremma.from_xml(raw).fetch("resource", {}) : {}
|
|
36
37
|
end
|
|
@@ -79,17 +80,19 @@ module Bolognese
|
|
|
79
80
|
end
|
|
80
81
|
|
|
81
82
|
def keywords
|
|
82
|
-
Array(metadata.dig("subjects", "subject")).join(", ")
|
|
83
|
+
Array.wrap(metadata.dig("subjects", "subject")).join(", ").presence
|
|
83
84
|
end
|
|
84
85
|
|
|
85
86
|
def author
|
|
86
87
|
authors = metadata.dig("creators", "creator")
|
|
87
88
|
authors = [authors] if authors.is_a?(Hash)
|
|
88
|
-
get_authors(authors)
|
|
89
|
+
get_authors(authors).presence
|
|
89
90
|
end
|
|
90
91
|
|
|
91
92
|
def editor
|
|
92
|
-
|
|
93
|
+
editors = metadata.dig("contributors", "contributor")
|
|
94
|
+
editors = [editors] if editors.is_a?(Hash)
|
|
95
|
+
get_authors(editors).presence
|
|
93
96
|
end
|
|
94
97
|
|
|
95
98
|
def version
|
|
@@ -100,19 +103,21 @@ module Bolognese
|
|
|
100
103
|
Array.wrap(metadata.dig("dates", "date"))
|
|
101
104
|
end
|
|
102
105
|
|
|
106
|
+
def date(date_type)
|
|
107
|
+
dd = dates.find { |d| d["dateType"] == date_type } || {}
|
|
108
|
+
dd.fetch("text", nil)
|
|
109
|
+
end
|
|
110
|
+
|
|
103
111
|
def date_created
|
|
104
|
-
|
|
105
|
-
created.fetch("text", nil)
|
|
112
|
+
date("Created")
|
|
106
113
|
end
|
|
107
114
|
|
|
108
115
|
def date_published
|
|
109
|
-
|
|
110
|
-
published.fetch("text", nil) || metadata.fetch("publicationYear")
|
|
116
|
+
date("Issued") || metadata.fetch("publicationYear")
|
|
111
117
|
end
|
|
112
118
|
|
|
113
119
|
def date_modified
|
|
114
|
-
|
|
115
|
-
modified.fetch("text", nil)
|
|
120
|
+
date("Updated")
|
|
116
121
|
end
|
|
117
122
|
|
|
118
123
|
def related_identifiers(relation_type)
|
|
@@ -150,27 +155,5 @@ module Bolognese
|
|
|
150
155
|
{ "@type" => "Organization",
|
|
151
156
|
"name" => "DataCite" }
|
|
152
157
|
end
|
|
153
|
-
|
|
154
|
-
def as_schema_org
|
|
155
|
-
{ "@context" => "http://schema.org",
|
|
156
|
-
"@type" => type,
|
|
157
|
-
"@id" => id,
|
|
158
|
-
"name" => name,
|
|
159
|
-
"alternateName" => alternate_name,
|
|
160
|
-
"author" => author,
|
|
161
|
-
"description" => description,
|
|
162
|
-
"license" => license,
|
|
163
|
-
"version" => version,
|
|
164
|
-
"keywords" => keywords,
|
|
165
|
-
"dateCreated" => date_created,
|
|
166
|
-
"datePublished" => date_published,
|
|
167
|
-
"dateModified" => date_modified,
|
|
168
|
-
"isPartOf" => is_part_of,
|
|
169
|
-
"hasPart" => has_part,
|
|
170
|
-
"citation" => citation,
|
|
171
|
-
"publisher" => publisher,
|
|
172
|
-
"provider" => provider
|
|
173
|
-
}.compact
|
|
174
|
-
end
|
|
175
158
|
end
|
|
176
159
|
end
|
|
@@ -2,16 +2,24 @@ module Bolognese
|
|
|
2
2
|
module DataciteUtils
|
|
3
3
|
|
|
4
4
|
SO_TO_DC_TRANSLATIONS = {
|
|
5
|
-
"
|
|
5
|
+
"Article" => "Text",
|
|
6
|
+
"AudioObject" => "Sound",
|
|
7
|
+
"Blog" => "Text",
|
|
8
|
+
"BlogPosting" => "Text",
|
|
6
9
|
"Collection" => "Collection",
|
|
10
|
+
"CreativeWork" => "Other",
|
|
11
|
+
"DataCatalog" => "Dataset",
|
|
7
12
|
"Dataset" => "Dataset",
|
|
8
13
|
"Event" => "Event",
|
|
9
14
|
"ImageObject" => "Image",
|
|
15
|
+
"Movie" => "Audiovisual",
|
|
16
|
+
"PublicationIssue" => "Text",
|
|
17
|
+
"ScholarlyArticle" => "Text",
|
|
10
18
|
"Service" => "Service",
|
|
11
19
|
"SoftwareSourceCode" => "Software",
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
"
|
|
20
|
+
"VideoObject" => "Audiovisual",
|
|
21
|
+
"WebPage" => "Text",
|
|
22
|
+
"WebSite" => "Text"
|
|
15
23
|
}
|
|
16
24
|
|
|
17
25
|
LICENSE_NAMES = {
|
|
@@ -114,13 +122,19 @@ module Bolognese
|
|
|
114
122
|
end
|
|
115
123
|
|
|
116
124
|
def insert_publication_year(xml)
|
|
117
|
-
xml.publicationYear(date_published[0..3])
|
|
125
|
+
xml.publicationYear(date_published && date_published[0..3])
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def resource_type
|
|
129
|
+
{ "resource_type_general" => SO_TO_DC_TRANSLATIONS[type] || "Other",
|
|
130
|
+
"text" => additional_type || type }
|
|
118
131
|
end
|
|
119
132
|
|
|
120
133
|
def insert_resource_type(xml)
|
|
121
134
|
return xml unless type.present?
|
|
122
135
|
|
|
123
|
-
xml.resourceType(
|
|
136
|
+
xml.resourceType(resource_type["text"],
|
|
137
|
+
'resourceTypeGeneral' => resource_type["resource_type_general"])
|
|
124
138
|
end
|
|
125
139
|
|
|
126
140
|
def insert_alternate_identifiers(xml)
|
|
@@ -160,28 +174,17 @@ module Bolognese
|
|
|
160
174
|
end
|
|
161
175
|
|
|
162
176
|
def rel_identifiers
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
"relation_type" => "IsPartOf" }
|
|
168
|
-
end.select { |i| i["related_identifier_type"].present? }
|
|
169
|
-
|
|
170
|
-
hp = Array.wrap(has_part).map do |i|
|
|
171
|
-
{
|
|
172
|
-
"text" => i["@id"],
|
|
173
|
-
"related_identifier_type" => validate_url(i["@id"]),
|
|
174
|
-
"relation_type" => "HasPart" }
|
|
175
|
-
end.select { |i| i["related_identifier_type"].present? }
|
|
177
|
+
rel_identifier(rel_ids: is_part_of, relation_type: "IsPartOf") +
|
|
178
|
+
rel_identifier(rel_ids: has_part, relation_type: "HasPart") +
|
|
179
|
+
rel_identifier(rel_ids: citation, relation_type: "References")
|
|
180
|
+
end
|
|
176
181
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
182
|
+
def rel_identifier(rel_ids: nil, relation_type: nil)
|
|
183
|
+
Array.wrap(rel_ids).map do |i|
|
|
184
|
+
{ "text" => i["@id"],
|
|
180
185
|
"related_identifier_type" => validate_url(i["@id"]),
|
|
181
|
-
"relation_type" =>
|
|
186
|
+
"relation_type" => relation_type }
|
|
182
187
|
end.select { |i| i["related_identifier_type"].present? }
|
|
183
|
-
|
|
184
|
-
ipo + hp + c
|
|
185
188
|
end
|
|
186
189
|
|
|
187
190
|
def insert_related_identifiers(xml)
|
data/lib/bolognese/doi_utils.rb
CHANGED
|
@@ -9,7 +9,7 @@ module Bolognese
|
|
|
9
9
|
return nil unless doi.present?
|
|
10
10
|
|
|
11
11
|
# remove non-printing whitespace and downcase
|
|
12
|
-
doi = doi.
|
|
12
|
+
doi = doi.delete("\u200B").downcase
|
|
13
13
|
|
|
14
14
|
# turn DOI into URL, escape unsafe characters
|
|
15
15
|
"https://doi.org/" + Addressable::URI.encode(doi)
|
data/lib/bolognese/metadata.rb
CHANGED
|
@@ -14,23 +14,65 @@ module Bolognese
|
|
|
14
14
|
|
|
15
15
|
attr_reader :id, :raw, :provider
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
alias_method :datacite, :as_datacite
|
|
18
|
+
|
|
19
|
+
def url
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def version
|
|
24
|
+
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def keywords
|
|
28
|
+
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def date_created
|
|
32
|
+
|
|
20
33
|
end
|
|
21
34
|
|
|
22
|
-
def
|
|
23
|
-
|
|
35
|
+
def page_start
|
|
36
|
+
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def page_end
|
|
40
|
+
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def has_part
|
|
44
|
+
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def publisher
|
|
48
|
+
|
|
24
49
|
end
|
|
25
50
|
|
|
26
|
-
def
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
"
|
|
31
|
-
|
|
32
|
-
"
|
|
33
|
-
|
|
51
|
+
def as_schema_org
|
|
52
|
+
{ "@context" => "http://schema.org",
|
|
53
|
+
"@type" => type,
|
|
54
|
+
"@id" => id,
|
|
55
|
+
"url" => url,
|
|
56
|
+
"additionalType" => additional_type,
|
|
57
|
+
"name" => name,
|
|
58
|
+
"alternateName" => alternate_name,
|
|
59
|
+
"author" => author,
|
|
60
|
+
"editor" => editor,
|
|
61
|
+
"description" => description,
|
|
62
|
+
"license" => license,
|
|
63
|
+
"version" => version,
|
|
64
|
+
"keywords" => keywords,
|
|
65
|
+
"dateCreated" => date_created,
|
|
66
|
+
"datePublished" => date_published,
|
|
67
|
+
"dateModified" => date_modified,
|
|
68
|
+
"pageStart" => page_start,
|
|
69
|
+
"pageEnd" => page_end,
|
|
70
|
+
"isPartOf" => is_part_of,
|
|
71
|
+
"hasPart" => has_part,
|
|
72
|
+
"citation" => citation,
|
|
73
|
+
"publisher" => publisher,
|
|
74
|
+
"provider" => provider
|
|
75
|
+
}.compact.to_json
|
|
34
76
|
end
|
|
35
77
|
end
|
|
36
78
|
end
|
data/lib/bolognese/schema_org.rb
CHANGED
|
@@ -1,46 +1,22 @@
|
|
|
1
1
|
module Bolognese
|
|
2
2
|
class SchemaOrg < Metadata
|
|
3
3
|
|
|
4
|
-
DC_TO_SO_TRANSLATIONS = {
|
|
5
|
-
"Audiovisual" => "VideoObject",
|
|
6
|
-
"Collection" => "Collection",
|
|
7
|
-
"Dataset" => "Dataset",
|
|
8
|
-
"Event" => "Event",
|
|
9
|
-
"Image" => "ImageObject",
|
|
10
|
-
"InteractiveResource" => nil,
|
|
11
|
-
"Model" => nil,
|
|
12
|
-
"PhysicalObject" => nil,
|
|
13
|
-
"Service" => "Service",
|
|
14
|
-
"Software" => "SoftwareSourceCode",
|
|
15
|
-
"Sound" => "AudioObject",
|
|
16
|
-
"Text" => "ScholarlyArticle",
|
|
17
|
-
"Workflow" => nil,
|
|
18
|
-
"Other" => "CreativeWork"
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
attr_reader = :id, :raw, :metadata, :schema_org
|
|
22
|
-
|
|
23
4
|
def initialize(id: nil, string: nil)
|
|
24
|
-
id =
|
|
5
|
+
id = normalize_id(id) if id.present?
|
|
25
6
|
|
|
26
7
|
if string.present?
|
|
27
8
|
@raw = string
|
|
28
9
|
elsif id.present?
|
|
29
10
|
response = Maremma.get(id)
|
|
30
|
-
|
|
11
|
+
doc = Nokogiri::XML(response.body.fetch("data", nil))
|
|
12
|
+
@raw = doc.at_xpath('//script[@type="application/ld+json"]')
|
|
31
13
|
end
|
|
32
14
|
end
|
|
33
15
|
|
|
16
|
+
alias_method :schema_org, :as_schema_org
|
|
17
|
+
|
|
34
18
|
def metadata
|
|
35
|
-
@metadata ||=
|
|
36
|
-
if raw.present?
|
|
37
|
-
doc = Nokogiri::XML(raw)
|
|
38
|
-
tag = doc.at_xpath('//script[@type="application/ld+json"]')
|
|
39
|
-
Maremma.from_json(tag)
|
|
40
|
-
else
|
|
41
|
-
{}
|
|
42
|
-
end
|
|
43
|
-
end
|
|
19
|
+
@metadata ||= raw.present? ? Maremma.from_json(raw) : {}
|
|
44
20
|
end
|
|
45
21
|
|
|
46
22
|
def exists?
|
|
@@ -52,11 +28,11 @@ module Bolognese
|
|
|
52
28
|
end
|
|
53
29
|
|
|
54
30
|
def id
|
|
55
|
-
|
|
31
|
+
normalize_id(metadata.fetch("@id", nil))
|
|
56
32
|
end
|
|
57
33
|
|
|
58
34
|
def url
|
|
59
|
-
|
|
35
|
+
normalize_id(metadata.fetch("url", nil))
|
|
60
36
|
end
|
|
61
37
|
|
|
62
38
|
def type
|
|
@@ -76,11 +52,11 @@ module Bolognese
|
|
|
76
52
|
end
|
|
77
53
|
|
|
78
54
|
def author
|
|
79
|
-
Array(metadata.fetch("author", nil)).map { |a| a.except("name") }
|
|
55
|
+
Array(metadata.fetch("author", nil)).map { |a| a.except("name") }.presence
|
|
80
56
|
end
|
|
81
57
|
|
|
82
58
|
def editor
|
|
83
|
-
Array(metadata.fetch("editor", nil)).map { |a| a.except("name") }
|
|
59
|
+
Array(metadata.fetch("editor", nil)).map { |a| a.except("name") }.presence
|
|
84
60
|
end
|
|
85
61
|
|
|
86
62
|
def description
|
|
@@ -120,11 +96,11 @@ module Bolognese
|
|
|
120
96
|
end
|
|
121
97
|
|
|
122
98
|
def has_part
|
|
123
|
-
related_identifiers("hasPart")
|
|
99
|
+
related_identifiers("hasPart").presence
|
|
124
100
|
end
|
|
125
101
|
|
|
126
102
|
def citation
|
|
127
|
-
related_identifiers("citation")
|
|
103
|
+
related_identifiers("citation").presence
|
|
128
104
|
end
|
|
129
105
|
|
|
130
106
|
def publisher
|
|
@@ -142,29 +118,5 @@ module Bolognese
|
|
|
142
118
|
def provider
|
|
143
119
|
metadata.fetch("provider", nil)
|
|
144
120
|
end
|
|
145
|
-
|
|
146
|
-
def as_schema_org
|
|
147
|
-
{ "@context" => "http://schema.org",
|
|
148
|
-
"@type" => type,
|
|
149
|
-
"@id" => id,
|
|
150
|
-
"url" => url,
|
|
151
|
-
"name" => name,
|
|
152
|
-
"alternateName" => alternate_name,
|
|
153
|
-
"author" => author,
|
|
154
|
-
"editor" => editor,
|
|
155
|
-
"description" => description,
|
|
156
|
-
"license" => license,
|
|
157
|
-
"version" => version,
|
|
158
|
-
"keywords" => keywords,
|
|
159
|
-
"dateCreated" => date_created,
|
|
160
|
-
"datePublished" => date_published,
|
|
161
|
-
"dateModified" => date_modified,
|
|
162
|
-
"isPartOf" => is_part_of,
|
|
163
|
-
"hasPart" => has_part,
|
|
164
|
-
"citation" => citation,
|
|
165
|
-
"publisher" => publisher,
|
|
166
|
-
"provider" => provider
|
|
167
|
-
}.compact
|
|
168
|
-
end
|
|
169
121
|
end
|
|
170
122
|
end
|