briard 2.4.2 → 2.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.github/workflows/rubocop.yml +50 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +144 -620
- data/.rubocop_todo.yml +76 -0
- data/CHANGELOG.md +18 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +43 -9
- data/Rakefile +1 -1
- data/{bolognese.gemspec → briard.gemspec} +46 -39
- data/lib/briard/array.rb +2 -2
- data/lib/briard/author_utils.rb +79 -71
- data/lib/briard/cli.rb +12 -13
- data/lib/briard/crossref_utils.rb +73 -61
- data/lib/briard/datacite_utils.rb +132 -106
- data/lib/briard/doi_utils.rb +10 -10
- data/lib/briard/metadata.rb +96 -106
- data/lib/briard/metadata_utils.rb +87 -78
- data/lib/briard/readers/bibtex_reader.rb +65 -65
- data/lib/briard/readers/cff_reader.rb +88 -70
- data/lib/briard/readers/citeproc_reader.rb +90 -84
- data/lib/briard/readers/codemeta_reader.rb +68 -50
- data/lib/briard/readers/crosscite_reader.rb +2 -2
- data/lib/briard/readers/crossref_reader.rb +249 -210
- data/lib/briard/readers/datacite_json_reader.rb +3 -3
- data/lib/briard/readers/datacite_reader.rb +225 -189
- data/lib/briard/readers/npm_reader.rb +49 -42
- data/lib/briard/readers/ris_reader.rb +82 -80
- data/lib/briard/readers/schema_org_reader.rb +182 -159
- data/lib/briard/string.rb +1 -1
- data/lib/briard/utils.rb +4 -4
- data/lib/briard/version.rb +3 -1
- data/lib/briard/whitelist_scrubber.rb +11 -4
- data/lib/briard/writers/bibtex_writer.rb +14 -8
- data/lib/briard/writers/cff_writer.rb +33 -26
- data/lib/briard/writers/codemeta_writer.rb +19 -15
- data/lib/briard/writers/csv_writer.rb +6 -4
- data/lib/briard/writers/datacite_json_writer.rb +8 -2
- data/lib/briard/writers/jats_writer.rb +33 -28
- data/lib/briard/writers/rdf_xml_writer.rb +1 -1
- data/lib/briard/writers/ris_writer.rb +30 -18
- data/lib/briard/writers/turtle_writer.rb +1 -1
- data/lib/briard.rb +6 -6
- data/rubocop.sarif +0 -0
- data/spec/array_spec.rb +5 -5
- data/spec/author_utils_spec.rb +151 -132
- data/spec/datacite_utils_spec.rb +135 -83
- data/spec/doi_utils_spec.rb +168 -164
- data/spec/find_from_format_spec.rb +69 -69
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
- data/spec/metadata_spec.rb +91 -90
- data/spec/readers/bibtex_reader_spec.rb +43 -38
- data/spec/readers/cff_reader_spec.rb +165 -153
- data/spec/readers/citeproc_reader_spec.rb +45 -40
- data/spec/readers/codemeta_reader_spec.rb +128 -115
- data/spec/readers/crosscite_reader_spec.rb +34 -24
- data/spec/readers/crossref_reader_spec.rb +1098 -939
- data/spec/readers/datacite_json_reader_spec.rb +53 -40
- data/spec/readers/datacite_reader_spec.rb +1541 -1337
- data/spec/readers/npm_reader_spec.rb +48 -43
- data/spec/readers/ris_reader_spec.rb +53 -47
- data/spec/readers/schema_org_reader_spec.rb +329 -267
- data/spec/spec_helper.rb +6 -5
- data/spec/utils_spec.rb +371 -347
- data/spec/writers/bibtex_writer_spec.rb +143 -143
- data/spec/writers/cff_writer_spec.rb +96 -90
- data/spec/writers/citation_writer_spec.rb +34 -33
- data/spec/writers/citeproc_writer_spec.rb +226 -224
- data/spec/writers/codemeta_writer_spec.rb +18 -16
- data/spec/writers/crosscite_writer_spec.rb +91 -73
- data/spec/writers/crossref_writer_spec.rb +99 -91
- data/spec/writers/csv_writer_spec.rb +70 -70
- data/spec/writers/datacite_json_writer_spec.rb +78 -68
- data/spec/writers/datacite_writer_spec.rb +417 -322
- data/spec/writers/jats_writer_spec.rb +177 -161
- data/spec/writers/rdf_xml_writer_spec.rb +68 -63
- data/spec/writers/ris_writer_spec.rb +162 -162
- data/spec/writers/schema_org_writer_spec.rb +329 -294
- data/spec/writers/turtle_writer_spec.rb +47 -47
- metadata +242 -166
- data/.github/workflows/release.yml +0 -47
@@ -3,11 +3,11 @@
|
|
3
3
|
module Briard
|
4
4
|
module Readers
|
5
5
|
module DataciteJsonReader
|
6
|
-
def read_datacite_json(string: nil, **
|
6
|
+
def read_datacite_json(string: nil, **_options)
|
7
7
|
errors = jsonlint(string)
|
8
|
-
return {
|
8
|
+
return { 'errors' => errors } if errors.present?
|
9
9
|
|
10
|
-
string.present? ? Maremma.from_json(string).transform_keys!
|
10
|
+
string.present? ? Maremma.from_json(string).transform_keys!(&:underscore) : {}
|
11
11
|
end
|
12
12
|
end
|
13
13
|
end
|
@@ -4,12 +4,12 @@ module Briard
|
|
4
4
|
module Readers
|
5
5
|
module DataciteReader
|
6
6
|
def get_datacite(id: nil, **options)
|
7
|
-
return {
|
7
|
+
return { 'string' => nil, 'state' => 'not_found' } unless id.present?
|
8
8
|
|
9
9
|
api_url = doi_api_url(id, options)
|
10
10
|
response = Maremma.get(api_url)
|
11
|
-
attributes = response.body.dig(
|
12
|
-
return {
|
11
|
+
attributes = response.body.dig('data', 'attributes')
|
12
|
+
return { 'string' => nil, 'state' => 'not_found' } unless attributes.present?
|
13
13
|
|
14
14
|
string = attributes.fetch('xml', nil)
|
15
15
|
string = Base64.decode64(string) if string.present?
|
@@ -18,92 +18,104 @@ module Briard
|
|
18
18
|
doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
|
19
19
|
|
20
20
|
# remove leading and trailing whitespace in text nodes
|
21
|
-
doc.xpath(
|
22
|
-
if node.content
|
21
|
+
doc.xpath('//text()').each do |node|
|
22
|
+
if /\S/.match?(node.content)
|
23
23
|
node.content = node.content.strip
|
24
24
|
else
|
25
25
|
node.remove
|
26
26
|
end
|
27
27
|
end
|
28
|
-
string = doc.to_xml(:
|
28
|
+
string = doc.to_xml(indent: 2)
|
29
29
|
end
|
30
30
|
|
31
|
-
client = Array.wrap(response.body.fetch(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
m.
|
37
|
-
end.
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
31
|
+
client = Array.wrap(response.body.fetch('included', nil)).find do |m|
|
32
|
+
m['type'] == 'clients'
|
33
|
+
end
|
34
|
+
client_id = client.to_h.fetch('id', nil)
|
35
|
+
provider_id = Array.wrap(client.to_h.fetch('relationships', nil)).find do |m|
|
36
|
+
m['provider'].present?
|
37
|
+
end.to_h.dig('provider', 'data', 'id')
|
38
|
+
|
39
|
+
content_url = attributes.fetch('contentUrl',
|
40
|
+
nil) || Array.wrap(response.body.fetch('included',
|
41
|
+
nil)).select do |m|
|
42
|
+
m['type'] == 'media'
|
43
|
+
end.map do |m|
|
44
|
+
m.dig('attributes', 'url')
|
45
|
+
end.compact
|
46
|
+
|
47
|
+
{ 'string' => string,
|
48
|
+
'url' => attributes.fetch('url', nil),
|
49
|
+
'state' => attributes.fetch('state', nil),
|
50
|
+
'date_registered' => attributes.fetch('registered', nil),
|
51
|
+
'date_updated' => attributes.fetch('updated', nil),
|
52
|
+
'provider_id' => provider_id,
|
53
|
+
'client_id' => client_id,
|
54
|
+
'content_url' => content_url }
|
47
55
|
end
|
48
56
|
|
49
57
|
def read_datacite(string: nil, **options)
|
50
|
-
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
|
58
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
|
59
|
+
:sandbox, :validate, :ra))
|
51
60
|
|
52
61
|
doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
|
53
62
|
if read_options.present?
|
54
|
-
schema_version =
|
63
|
+
schema_version = 'http://datacite.org/schema/kernel-4'
|
55
64
|
else
|
56
|
-
ns = doc.collect_namespaces.find { |
|
57
|
-
schema_version = Array.wrap(ns).last ||
|
65
|
+
ns = doc.collect_namespaces.find { |_k, v| v.start_with?('http://datacite.org/schema/kernel') }
|
66
|
+
schema_version = Array.wrap(ns).last || 'http://datacite.org/schema/kernel-4'
|
58
67
|
end
|
59
68
|
doc.remove_namespaces!
|
60
|
-
string = doc.to_xml(:
|
69
|
+
string = doc.to_xml(indent: 2)
|
61
70
|
|
62
|
-
meta = Maremma.from_xml(string).to_h.fetch(
|
71
|
+
meta = Maremma.from_xml(string).to_h.fetch('resource', {})
|
63
72
|
|
64
73
|
# validate only when option is set, as this step is expensive and
|
65
74
|
# not needed if XML comes from DataCite MDS
|
66
75
|
if options[:validate]
|
67
76
|
errors = datacite_errors(xml: string, schema_version: schema_version)
|
68
|
-
return {
|
77
|
+
return { 'errors' => errors } if errors.present?
|
69
78
|
end
|
70
79
|
|
71
|
-
if options[:doi]
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
+
id = if options[:doi]
|
81
|
+
normalize_doi(options[:doi], sandbox: options[:sandbox])
|
82
|
+
else
|
83
|
+
normalize_doi(meta.dig('identifier', '__content__') || options[:id],
|
84
|
+
sandbox: options[:sandbox])
|
85
|
+
end
|
86
|
+
|
87
|
+
identifiers = Array.wrap(meta.dig('alternateIdentifiers', 'alternateIdentifier')).map do |r|
|
88
|
+
if r['__content__'].present?
|
89
|
+
{ 'identifierType' => get_identifier_type(r['alternateIdentifierType']),
|
90
|
+
'identifier' => r['__content__'] }
|
80
91
|
end
|
81
92
|
end.compact
|
82
93
|
|
83
|
-
resource_type_general = meta.dig(
|
84
|
-
resource_type = meta.dig(
|
85
|
-
schema_org = Briard::Utils::CR_TO_SO_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::DC_TO_SO_TRANSLATIONS[resource_type_general.to_s.dasherize] ||
|
94
|
+
resource_type_general = meta.dig('resourceType', 'resourceTypeGeneral')
|
95
|
+
resource_type = meta.dig('resourceType', '__content__')
|
96
|
+
schema_org = Briard::Utils::CR_TO_SO_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::DC_TO_SO_TRANSLATIONS[resource_type_general.to_s.dasherize] || 'CreativeWork'
|
86
97
|
types = {
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
98
|
+
'resourceTypeGeneral' => resource_type_general,
|
99
|
+
'resourceType' => resource_type,
|
100
|
+
'schemaOrg' => schema_org,
|
101
|
+
'citeproc' => Briard::Utils::CR_TO_CP_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || 'article',
|
102
|
+
'bibtex' => Briard::Utils::CR_TO_BIB_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || 'misc',
|
103
|
+
'ris' => Briard::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || 'GEN'
|
93
104
|
}.compact
|
94
105
|
|
95
106
|
titles = get_titles(meta)
|
96
107
|
|
97
|
-
descriptions = Array.wrap(meta.dig(
|
108
|
+
descriptions = Array.wrap(meta.dig('descriptions', 'description')).map do |r|
|
98
109
|
if r.blank?
|
99
110
|
nil
|
100
111
|
elsif r.is_a?(String)
|
101
|
-
{
|
112
|
+
{ 'description' => sanitize(r), 'descriptionType' => 'Abstract' }
|
102
113
|
elsif r.is_a?(Hash)
|
103
|
-
{
|
114
|
+
{ 'description' => sanitize(r['__content__']),
|
115
|
+
'descriptionType' => r['descriptionType'], 'lang' => r['lang'] }.compact
|
104
116
|
end
|
105
117
|
end.compact
|
106
|
-
rights_list = Array.wrap(meta.dig(
|
118
|
+
rights_list = Array.wrap(meta.dig('rightsList', 'rights')).map do |r|
|
107
119
|
if r.blank?
|
108
120
|
nil
|
109
121
|
elsif r.is_a?(String)
|
@@ -113,130 +125,140 @@ module Briard
|
|
113
125
|
end
|
114
126
|
end.compact
|
115
127
|
|
116
|
-
subjects = Array.wrap(meta.dig(
|
117
|
-
|
128
|
+
subjects = Array.wrap(meta.dig('subjects', 'subject')).reduce([]) do |sum, subject|
|
129
|
+
case subject
|
130
|
+
when String
|
118
131
|
sum += name_to_fos(subject)
|
119
|
-
|
132
|
+
when Hash
|
120
133
|
sum += hsh_to_fos(subject)
|
121
134
|
end
|
122
135
|
|
123
136
|
sum
|
124
|
-
end.uniq { |s| s[
|
125
|
-
|
126
|
-
dates = Array.wrap(meta.dig(
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
end
|
137
|
+
end.uniq { |s| s['subject'] }
|
138
|
+
|
139
|
+
dates = Array.wrap(meta.dig('dates', 'date')).map do |r|
|
140
|
+
next unless r.is_a?(Hash) && date = sanitize(r['__content__']).presence
|
141
|
+
|
142
|
+
next unless Date.edtf(date).present? || Briard::Utils::UNKNOWN_INFORMATION.key?(date)
|
143
|
+
|
144
|
+
{ 'date' => date,
|
145
|
+
'dateType' => parse_attributes(r, content: 'dateType'),
|
146
|
+
'dateInformation' => parse_attributes(r, content: 'dateInformation') }.compact
|
135
147
|
end.compact
|
136
|
-
|
137
|
-
|
148
|
+
if meta.fetch('publicationYear', nil).present? && get_date(dates, 'Issued').blank?
|
149
|
+
dates << { 'date' => meta.fetch('publicationYear', nil), 'dateType' => 'Issued' }
|
150
|
+
end
|
151
|
+
sizes = Array.wrap(meta.dig('sizes', 'size')).map do |k|
|
138
152
|
if k.blank?
|
139
153
|
nil
|
140
154
|
elsif k.is_a?(String)
|
141
155
|
sanitize(k).presence
|
142
156
|
elsif k.is_a?(Hash)
|
143
|
-
sanitize(k[
|
157
|
+
sanitize(k['__content__']).presence
|
144
158
|
end
|
145
159
|
end.compact
|
146
|
-
formats = Array.wrap(meta.dig(
|
160
|
+
formats = Array.wrap(meta.dig('formats', 'format')).map do |k|
|
147
161
|
if k.blank?
|
148
162
|
nil
|
149
163
|
elsif k.is_a?(String)
|
150
164
|
sanitize(k).presence
|
151
165
|
elsif k.is_a?(Hash)
|
152
|
-
sanitize(k[
|
166
|
+
sanitize(k['__content__']).presence
|
153
167
|
end
|
154
168
|
end.compact
|
155
|
-
|
156
|
-
funding_references = Array.wrap(meta.dig(
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
169
|
+
.map { |s| s.to_s.squish.presence }.compact
|
170
|
+
funding_references = Array.wrap(meta.dig('fundingReferences',
|
171
|
+
'fundingReference')).compact.map do |fr|
|
172
|
+
scheme_uri = parse_attributes(fr['funderIdentifier'], content: 'schemeURI')
|
173
|
+
funder_identifier = parse_attributes(fr['funderIdentifier'])
|
174
|
+
funder_identifier_type = parse_attributes(fr['funderIdentifier'],
|
175
|
+
content: 'funderIdentifierType')
|
176
|
+
if funder_identifier_type != 'Other'
|
177
|
+
funder_identifier = if !funder_identifier.to_s.start_with?('https://',
|
178
|
+
'http://') && scheme_uri.present?
|
179
|
+
normalize_id(scheme_uri + funder_identifier)
|
180
|
+
else
|
181
|
+
normalize_id(funder_identifier)
|
182
|
+
end
|
162
183
|
end
|
163
184
|
|
164
185
|
{
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
186
|
+
'funderName' => fr['funderName'],
|
187
|
+
'funderIdentifier' => funder_identifier,
|
188
|
+
'funderIdentifierType' => funder_identifier_type,
|
189
|
+
'awardNumber' => parse_attributes(fr['awardNumber']),
|
190
|
+
'awardUri' => parse_attributes(fr['awardNumber'], content: 'awardURI'),
|
191
|
+
'awardTitle' => fr['awardTitle']
|
192
|
+
}.compact
|
171
193
|
end
|
172
|
-
related_identifiers = Array.wrap(meta.dig(
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
194
|
+
related_identifiers = Array.wrap(meta.dig('relatedIdentifiers',
|
195
|
+
'relatedIdentifier')).map do |ri|
|
196
|
+
rid = if ri['relatedIdentifierType'] == 'DOI'
|
197
|
+
validate_doi(ri['__content__'].to_s.downcase)
|
198
|
+
else
|
199
|
+
ri['__content__']
|
200
|
+
end
|
178
201
|
|
179
202
|
{
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
203
|
+
'relatedIdentifier' => rid,
|
204
|
+
'relatedIdentifierType' => ri['relatedIdentifierType'],
|
205
|
+
'relationType' => ri['relationType'],
|
206
|
+
'resourceTypeGeneral' => ri['resourceTypeGeneral'],
|
207
|
+
'relatedMetadataScheme' => ri['relatedMetadataScheme'],
|
208
|
+
'schemeUri' => ri['schemeURI'],
|
209
|
+
'schemeType' => ri['schemeType']
|
187
210
|
}.compact
|
188
211
|
end
|
189
212
|
|
190
|
-
related_items = Array.wrap(meta.dig(
|
191
|
-
|
192
|
-
rii = ri["relatedItemIdentifier"]
|
213
|
+
related_items = Array.wrap(meta.dig('relatedItems', 'relatedItem')).map do |ri|
|
214
|
+
rii = ri['relatedItemIdentifier']
|
193
215
|
relatedItemIdentifier = nil
|
194
216
|
if rii
|
195
|
-
if rii[
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
217
|
+
rid = if rii['relatedItemIdentifierType'] == 'DOI'
|
218
|
+
validate_doi(rii['__content__'].to_s.downcase)
|
219
|
+
else
|
220
|
+
rii['__content__']
|
221
|
+
end
|
200
222
|
|
201
223
|
relatedItemIdentifier = {
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
224
|
+
'relatedItemIdentifier' => rid,
|
225
|
+
'relatedItemIdentifierType' => rii['relatedItemIdentifierType'],
|
226
|
+
'relatedMetadataScheme' => rii['relatedMetadataScheme'],
|
227
|
+
'schemeURI' => rii['schemeURI'],
|
228
|
+
'schemeType' => rii['schemeType']
|
207
229
|
}.compact
|
208
230
|
end
|
209
231
|
|
210
|
-
number = ri[
|
232
|
+
number = ri['number']
|
211
233
|
if number.is_a?(String)
|
212
234
|
number = number
|
213
235
|
numberType = nil
|
214
236
|
else
|
215
|
-
number = ri.dig(
|
216
|
-
numberType = ri.dig(
|
237
|
+
number = ri.dig('number', '__content__')
|
238
|
+
numberType = ri.dig('number', 'numberType')
|
217
239
|
end
|
218
240
|
|
219
241
|
a = {
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
242
|
+
'relationType' => ri['relationType'],
|
243
|
+
'relatedItemType' => ri['relatedItemType'],
|
244
|
+
'relatedItemIdentifier' => relatedItemIdentifier,
|
245
|
+
'creators' => get_authors(Array.wrap(ri.dig('creators', 'creator'))),
|
246
|
+
'titles' => get_titles(ri),
|
247
|
+
'publicationYear' => ri['publicationYear'],
|
248
|
+
'volume' => ri['volume'],
|
249
|
+
'issue' => ri['issue'],
|
250
|
+
'number' => number,
|
251
|
+
'numberType' => numberType,
|
252
|
+
'firstPage' => ri['firstPage'],
|
253
|
+
'lastPage' => ri['lastPage'],
|
254
|
+
'publisher' => ri['publisher'],
|
255
|
+
'edition' => ri['edition'],
|
256
|
+
'contributors' => get_authors(Array.wrap(ri.dig('contributors', 'contributor')))
|
235
257
|
}.compact
|
236
258
|
end
|
237
259
|
|
238
|
-
geo_locations = Array.wrap(meta.dig(
|
239
|
-
if !gl.is_a?(Hash) || gl[
|
260
|
+
geo_locations = Array.wrap(meta.dig('geoLocations', 'geoLocation')).map do |gl|
|
261
|
+
if !gl.is_a?(Hash) || gl['geoLocationPoint'].is_a?(String) || gl['geoLocationBox'].is_a?(String) || gl['geoLocationPolygon'].is_a?(String)
|
240
262
|
nil
|
241
263
|
else
|
242
264
|
|
@@ -244,78 +266,94 @@ module Briard
|
|
244
266
|
# we want to return an array if it's already an array (i.e. multiple geoLocationPolygons)
|
245
267
|
# vs if it's singular just return the object
|
246
268
|
# This is for backwards compatability to allow both scenarios.
|
247
|
-
if gl
|
248
|
-
geoLocationPolygon = gl
|
249
|
-
Array.wrap(glp
|
269
|
+
if gl['geoLocationPolygon'].is_a?(Array)
|
270
|
+
geoLocationPolygon = gl['geoLocationPolygon'].map do |glp|
|
271
|
+
Array.wrap(glp['polygonPoint']).map do |glpp|
|
272
|
+
{ 'polygonPoint' => glpp }
|
273
|
+
end.compact.presence
|
250
274
|
end.compact.presence
|
251
275
|
else
|
252
|
-
geoLocationPolygon = Array.wrap(gl.dig(
|
276
|
+
geoLocationPolygon = Array.wrap(gl.dig('geoLocationPolygon',
|
277
|
+
'polygonPoint')).map do |glp|
|
278
|
+
{ 'polygonPoint' => glp }
|
279
|
+
end.compact.presence
|
253
280
|
end
|
254
281
|
|
255
282
|
{
|
256
|
-
|
257
|
-
|
258
|
-
|
283
|
+
'geoLocationPoint' => {
|
284
|
+
'pointLatitude' => gl.dig('geoLocationPoint', 'pointLatitude'),
|
285
|
+
'pointLongitude' => gl.dig('geoLocationPoint', 'pointLongitude')
|
259
286
|
}.compact.presence,
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
287
|
+
'geoLocationBox' => {
|
288
|
+
'westBoundLongitude' => gl.dig('geoLocationBox', 'westBoundLongitude'),
|
289
|
+
'eastBoundLongitude' => gl.dig('geoLocationBox', 'eastBoundLongitude'),
|
290
|
+
'southBoundLatitude' => gl.dig('geoLocationBox', 'southBoundLatitude'),
|
291
|
+
'northBoundLatitude' => gl.dig('geoLocationBox', 'northBoundLatitude')
|
265
292
|
}.compact.presence,
|
266
|
-
|
267
|
-
|
293
|
+
'geoLocationPolygon' => geoLocationPolygon,
|
294
|
+
'geoLocationPlace' => parse_attributes(gl['geoLocationPlace'],
|
295
|
+
first: true).to_s.strip.presence
|
268
296
|
}.compact
|
269
297
|
end
|
270
298
|
end.compact
|
271
299
|
|
272
|
-
state = id.present? || read_options.present? ?
|
273
|
-
|
274
|
-
{
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
300
|
+
state = id.present? || read_options.present? ? 'findable' : 'not_found'
|
301
|
+
|
302
|
+
{ 'id' => id,
|
303
|
+
'types' => types,
|
304
|
+
'doi' => doi_from_url(id),
|
305
|
+
'identifiers' => identifiers,
|
306
|
+
'url' => options.fetch(:url, nil).to_s.strip.presence,
|
307
|
+
'titles' => titles,
|
308
|
+
'creators' => get_authors(Array.wrap(meta.dig('creators', 'creator'))),
|
309
|
+
'contributors' => get_authors(Array.wrap(meta.dig('contributors', 'contributor'))),
|
310
|
+
'container' => set_container(meta),
|
311
|
+
'publisher' => parse_attributes(meta.fetch('publisher', nil),
|
312
|
+
first: true).to_s.strip.presence,
|
313
|
+
'agency' => 'datacite',
|
314
|
+
'funding_references' => funding_references,
|
315
|
+
'dates' => dates,
|
316
|
+
'publication_year' => parse_attributes(meta.fetch('publicationYear', nil),
|
317
|
+
first: true).to_s.strip.presence,
|
318
|
+
'descriptions' => descriptions,
|
319
|
+
'rights_list' => Array.wrap(rights_list),
|
320
|
+
'version_info' => meta.fetch('version', nil).to_s.presence,
|
321
|
+
'subjects' => subjects,
|
322
|
+
'language' => parse_attributes(meta.fetch('language', nil),
|
323
|
+
first: true).to_s.strip.presence,
|
324
|
+
'geo_locations' => geo_locations,
|
325
|
+
'related_identifiers' => related_identifiers,
|
326
|
+
'related_items' => related_items,
|
327
|
+
'formats' => formats,
|
328
|
+
'sizes' => sizes,
|
329
|
+
'schema_version' => schema_version,
|
330
|
+
'state' => state }.merge(read_options)
|
301
331
|
end
|
302
332
|
|
303
333
|
def set_container(meta)
|
304
|
-
series_information = Array.wrap(meta.dig(
|
334
|
+
series_information = Array.wrap(meta.dig('descriptions', 'description')).find do |r|
|
335
|
+
r['descriptionType'] == 'SeriesInformation'
|
336
|
+
end.to_h.fetch('__content__', nil)
|
305
337
|
si = get_series_information(series_information)
|
306
338
|
|
307
|
-
is_part_of = Array.wrap(meta.dig(
|
339
|
+
is_part_of = Array.wrap(meta.dig('relatedIdentifiers', 'relatedIdentifier')).find do |ri|
|
340
|
+
ri['relationType'] == 'IsPartOf'
|
341
|
+
end.to_h
|
308
342
|
|
309
|
-
if si[
|
343
|
+
if si['title'].present? || is_part_of.present?
|
310
344
|
{
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
345
|
+
'type' => if meta.dig('resourceType', 'resourceTypeGeneral') == 'Dataset'
|
346
|
+
'DataRepository'
|
347
|
+
else
|
348
|
+
'Series'
|
349
|
+
end,
|
350
|
+
'identifier' => is_part_of['__content__'],
|
351
|
+
'identifierType' => is_part_of['relatedIdentifierType'],
|
352
|
+
'title' => si['title'],
|
353
|
+
'volume' => si['volume'],
|
354
|
+
'issue' => si['issue'],
|
355
|
+
'firstPage' => si['firstPage'],
|
356
|
+
'lastPage' => si['lastPage']
|
319
357
|
}.compact
|
320
358
|
else
|
321
359
|
{}
|
@@ -323,19 +361,17 @@ module Briard
|
|
323
361
|
end
|
324
362
|
|
325
363
|
def get_titles(meta)
|
326
|
-
|
364
|
+
Array.wrap(meta.dig('titles', 'title')).map do |r|
|
327
365
|
if r.blank?
|
328
366
|
nil
|
329
367
|
elsif r.is_a?(String)
|
330
|
-
{
|
368
|
+
{ 'title' => sanitize(r) }
|
331
369
|
else
|
332
|
-
{
|
370
|
+
{ 'title' => sanitize(r['__content__']), 'titleType' => r['titleType'],
|
371
|
+
'lang' => r['lang'] }.compact
|
333
372
|
end
|
334
373
|
end.compact
|
335
|
-
|
336
|
-
titles
|
337
374
|
end
|
338
|
-
|
339
375
|
end
|
340
376
|
end
|
341
377
|
end
|