briard 2.4.2 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +72 -0
  3. data/.github/workflows/rubocop.yml +50 -0
  4. data/.rubocop.yml +144 -620
  5. data/.rubocop_todo.yml +76 -0
  6. data/CHANGELOG.md +18 -0
  7. data/Gemfile +2 -0
  8. data/Gemfile.lock +40 -6
  9. data/Rakefile +1 -1
  10. data/{bolognese.gemspec → briard.gemspec} +46 -39
  11. data/lib/briard/array.rb +2 -2
  12. data/lib/briard/author_utils.rb +79 -71
  13. data/lib/briard/cli.rb +12 -13
  14. data/lib/briard/crossref_utils.rb +73 -61
  15. data/lib/briard/datacite_utils.rb +132 -106
  16. data/lib/briard/doi_utils.rb +10 -10
  17. data/lib/briard/metadata.rb +96 -106
  18. data/lib/briard/metadata_utils.rb +87 -78
  19. data/lib/briard/readers/bibtex_reader.rb +65 -65
  20. data/lib/briard/readers/cff_reader.rb +88 -70
  21. data/lib/briard/readers/citeproc_reader.rb +90 -84
  22. data/lib/briard/readers/codemeta_reader.rb +68 -50
  23. data/lib/briard/readers/crosscite_reader.rb +2 -2
  24. data/lib/briard/readers/crossref_reader.rb +249 -210
  25. data/lib/briard/readers/datacite_json_reader.rb +3 -3
  26. data/lib/briard/readers/datacite_reader.rb +225 -189
  27. data/lib/briard/readers/npm_reader.rb +49 -42
  28. data/lib/briard/readers/ris_reader.rb +82 -80
  29. data/lib/briard/readers/schema_org_reader.rb +182 -159
  30. data/lib/briard/string.rb +1 -1
  31. data/lib/briard/utils.rb +4 -4
  32. data/lib/briard/version.rb +3 -1
  33. data/lib/briard/whitelist_scrubber.rb +11 -4
  34. data/lib/briard/writers/bibtex_writer.rb +14 -8
  35. data/lib/briard/writers/cff_writer.rb +33 -26
  36. data/lib/briard/writers/codemeta_writer.rb +19 -15
  37. data/lib/briard/writers/csv_writer.rb +6 -4
  38. data/lib/briard/writers/datacite_json_writer.rb +8 -2
  39. data/lib/briard/writers/jats_writer.rb +33 -28
  40. data/lib/briard/writers/rdf_xml_writer.rb +1 -1
  41. data/lib/briard/writers/ris_writer.rb +30 -18
  42. data/lib/briard/writers/turtle_writer.rb +1 -1
  43. data/lib/briard.rb +6 -6
  44. data/rubocop.sarif +0 -0
  45. data/spec/array_spec.rb +5 -5
  46. data/spec/author_utils_spec.rb +151 -132
  47. data/spec/datacite_utils_spec.rb +135 -83
  48. data/spec/doi_utils_spec.rb +168 -164
  49. data/spec/find_from_format_spec.rb +69 -69
  50. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
  51. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
  52. data/spec/metadata_spec.rb +91 -90
  53. data/spec/readers/bibtex_reader_spec.rb +43 -38
  54. data/spec/readers/cff_reader_spec.rb +165 -153
  55. data/spec/readers/citeproc_reader_spec.rb +45 -40
  56. data/spec/readers/codemeta_reader_spec.rb +128 -115
  57. data/spec/readers/crosscite_reader_spec.rb +34 -24
  58. data/spec/readers/crossref_reader_spec.rb +1098 -939
  59. data/spec/readers/datacite_json_reader_spec.rb +53 -40
  60. data/spec/readers/datacite_reader_spec.rb +1541 -1337
  61. data/spec/readers/npm_reader_spec.rb +48 -43
  62. data/spec/readers/ris_reader_spec.rb +53 -47
  63. data/spec/readers/schema_org_reader_spec.rb +329 -267
  64. data/spec/spec_helper.rb +6 -5
  65. data/spec/utils_spec.rb +371 -347
  66. data/spec/writers/bibtex_writer_spec.rb +143 -143
  67. data/spec/writers/cff_writer_spec.rb +96 -90
  68. data/spec/writers/citation_writer_spec.rb +34 -33
  69. data/spec/writers/citeproc_writer_spec.rb +226 -224
  70. data/spec/writers/codemeta_writer_spec.rb +18 -16
  71. data/spec/writers/crosscite_writer_spec.rb +91 -73
  72. data/spec/writers/crossref_writer_spec.rb +99 -91
  73. data/spec/writers/csv_writer_spec.rb +70 -70
  74. data/spec/writers/datacite_json_writer_spec.rb +78 -68
  75. data/spec/writers/datacite_writer_spec.rb +417 -322
  76. data/spec/writers/jats_writer_spec.rb +177 -161
  77. data/spec/writers/rdf_xml_writer_spec.rb +68 -63
  78. data/spec/writers/ris_writer_spec.rb +162 -162
  79. data/spec/writers/turtle_writer_spec.rb +47 -47
  80. metadata +242 -166
  81. data/.github/workflows/release.yml +0 -47
@@ -3,11 +3,11 @@
3
3
  module Briard
4
4
  module Readers
5
5
  module DataciteJsonReader
6
- def read_datacite_json(string: nil, **options)
6
+ def read_datacite_json(string: nil, **_options)
7
7
  errors = jsonlint(string)
8
- return { "errors" => errors } if errors.present?
8
+ return { 'errors' => errors } if errors.present?
9
9
 
10
- string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
10
+ string.present? ? Maremma.from_json(string).transform_keys!(&:underscore) : {}
11
11
  end
12
12
  end
13
13
  end
@@ -4,12 +4,12 @@ module Briard
4
4
  module Readers
5
5
  module DataciteReader
6
6
  def get_datacite(id: nil, **options)
7
- return { "string" => nil, "state" => "not_found" } unless id.present?
7
+ return { 'string' => nil, 'state' => 'not_found' } unless id.present?
8
8
 
9
9
  api_url = doi_api_url(id, options)
10
10
  response = Maremma.get(api_url)
11
- attributes = response.body.dig("data", "attributes")
12
- return { "string" => nil, "state" => "not_found" } unless attributes.present?
11
+ attributes = response.body.dig('data', 'attributes')
12
+ return { 'string' => nil, 'state' => 'not_found' } unless attributes.present?
13
13
 
14
14
  string = attributes.fetch('xml', nil)
15
15
  string = Base64.decode64(string) if string.present?
@@ -18,92 +18,104 @@ module Briard
18
18
  doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
19
19
 
20
20
  # remove leading and trailing whitespace in text nodes
21
- doc.xpath("//text()").each do |node|
22
- if node.content =~ /\S/
21
+ doc.xpath('//text()').each do |node|
22
+ if /\S/.match?(node.content)
23
23
  node.content = node.content.strip
24
24
  else
25
25
  node.remove
26
26
  end
27
27
  end
28
- string = doc.to_xml(:indent => 2)
28
+ string = doc.to_xml(indent: 2)
29
29
  end
30
30
 
31
- client = Array.wrap(response.body.fetch("included", nil)).find { |m| m["type"] == "clients" }
32
- client_id = client.to_h.fetch("id", nil)
33
- provider_id = Array.wrap(client.to_h.fetch("relationships", nil)).find { |m| m["provider"].present? }.to_h.dig("provider", "data", "id")
34
-
35
- content_url = attributes.fetch("contentUrl", nil) || Array.wrap(response.body.fetch("included", nil)).select { |m| m["type"] == "media" }.map do |m|
36
- m.dig("attributes", "url")
37
- end.compact
38
-
39
- { "string" => string,
40
- "url" => attributes.fetch("url", nil),
41
- "state" => attributes.fetch("state", nil),
42
- "date_registered" => attributes.fetch("registered", nil),
43
- "date_updated" => attributes.fetch("updated", nil),
44
- "provider_id" => provider_id,
45
- "client_id" => client_id,
46
- "content_url" => content_url }
31
+ client = Array.wrap(response.body.fetch('included', nil)).find do |m|
32
+ m['type'] == 'clients'
33
+ end
34
+ client_id = client.to_h.fetch('id', nil)
35
+ provider_id = Array.wrap(client.to_h.fetch('relationships', nil)).find do |m|
36
+ m['provider'].present?
37
+ end.to_h.dig('provider', 'data', 'id')
38
+
39
+ content_url = attributes.fetch('contentUrl',
40
+ nil) || Array.wrap(response.body.fetch('included',
41
+ nil)).select do |m|
42
+ m['type'] == 'media'
43
+ end.map do |m|
44
+ m.dig('attributes', 'url')
45
+ end.compact
46
+
47
+ { 'string' => string,
48
+ 'url' => attributes.fetch('url', nil),
49
+ 'state' => attributes.fetch('state', nil),
50
+ 'date_registered' => attributes.fetch('registered', nil),
51
+ 'date_updated' => attributes.fetch('updated', nil),
52
+ 'provider_id' => provider_id,
53
+ 'client_id' => client_id,
54
+ 'content_url' => content_url }
47
55
  end
48
56
 
49
57
  def read_datacite(string: nil, **options)
50
- read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
58
+ read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
59
+ :sandbox, :validate, :ra))
51
60
 
52
61
  doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
53
62
  if read_options.present?
54
- schema_version = "http://datacite.org/schema/kernel-4"
63
+ schema_version = 'http://datacite.org/schema/kernel-4'
55
64
  else
56
- ns = doc.collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
57
- schema_version = Array.wrap(ns).last || "http://datacite.org/schema/kernel-4"
65
+ ns = doc.collect_namespaces.find { |_k, v| v.start_with?('http://datacite.org/schema/kernel') }
66
+ schema_version = Array.wrap(ns).last || 'http://datacite.org/schema/kernel-4'
58
67
  end
59
68
  doc.remove_namespaces!
60
- string = doc.to_xml(:indent => 2)
69
+ string = doc.to_xml(indent: 2)
61
70
 
62
- meta = Maremma.from_xml(string).to_h.fetch("resource", {})
71
+ meta = Maremma.from_xml(string).to_h.fetch('resource', {})
63
72
 
64
73
  # validate only when option is set, as this step is expensive and
65
74
  # not needed if XML comes from DataCite MDS
66
75
  if options[:validate]
67
76
  errors = datacite_errors(xml: string, schema_version: schema_version)
68
- return { "errors" => errors } if errors.present?
77
+ return { 'errors' => errors } if errors.present?
69
78
  end
70
79
 
71
- if options[:doi]
72
- id = normalize_doi(options[:doi], sandbox: options[:sandbox])
73
- else
74
- id = normalize_doi(meta.dig("identifier", "__content__") || options[:id], sandbox: options[:sandbox])
75
- end
76
-
77
- identifiers = Array.wrap(meta.dig("alternateIdentifiers", "alternateIdentifier")).map do |r|
78
- if r["__content__"].present?
79
- { "identifierType" => get_identifier_type(r["alternateIdentifierType"]), "identifier" => r["__content__"] }
80
+ id = if options[:doi]
81
+ normalize_doi(options[:doi], sandbox: options[:sandbox])
82
+ else
83
+ normalize_doi(meta.dig('identifier', '__content__') || options[:id],
84
+ sandbox: options[:sandbox])
85
+ end
86
+
87
+ identifiers = Array.wrap(meta.dig('alternateIdentifiers', 'alternateIdentifier')).map do |r|
88
+ if r['__content__'].present?
89
+ { 'identifierType' => get_identifier_type(r['alternateIdentifierType']),
90
+ 'identifier' => r['__content__'] }
80
91
  end
81
92
  end.compact
82
93
 
83
- resource_type_general = meta.dig("resourceType", "resourceTypeGeneral")
84
- resource_type = meta.dig("resourceType", "__content__")
85
- schema_org = Briard::Utils::CR_TO_SO_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::DC_TO_SO_TRANSLATIONS[resource_type_general.to_s.dasherize] || "CreativeWork"
94
+ resource_type_general = meta.dig('resourceType', 'resourceTypeGeneral')
95
+ resource_type = meta.dig('resourceType', '__content__')
96
+ schema_org = Briard::Utils::CR_TO_SO_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::DC_TO_SO_TRANSLATIONS[resource_type_general.to_s.dasherize] || 'CreativeWork'
86
97
  types = {
87
- "resourceTypeGeneral" => resource_type_general,
88
- "resourceType" => resource_type,
89
- "schemaOrg" => schema_org,
90
- "citeproc" => Briard::Utils::CR_TO_CP_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || "article",
91
- "bibtex" => Briard::Utils::CR_TO_BIB_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
92
- "ris" => Briard::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
98
+ 'resourceTypeGeneral' => resource_type_general,
99
+ 'resourceType' => resource_type,
100
+ 'schemaOrg' => schema_org,
101
+ 'citeproc' => Briard::Utils::CR_TO_CP_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || 'article',
102
+ 'bibtex' => Briard::Utils::CR_TO_BIB_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || 'misc',
103
+ 'ris' => Briard::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Briard::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || 'GEN'
93
104
  }.compact
94
105
 
95
106
  titles = get_titles(meta)
96
107
 
97
- descriptions = Array.wrap(meta.dig("descriptions", "description")).map do |r|
108
+ descriptions = Array.wrap(meta.dig('descriptions', 'description')).map do |r|
98
109
  if r.blank?
99
110
  nil
100
111
  elsif r.is_a?(String)
101
- { "description" => sanitize(r), "descriptionType" => "Abstract" }
112
+ { 'description' => sanitize(r), 'descriptionType' => 'Abstract' }
102
113
  elsif r.is_a?(Hash)
103
- { "description" => sanitize(r["__content__"]), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
114
+ { 'description' => sanitize(r['__content__']),
115
+ 'descriptionType' => r['descriptionType'], 'lang' => r['lang'] }.compact
104
116
  end
105
117
  end.compact
106
- rights_list = Array.wrap(meta.dig("rightsList", "rights")).map do |r|
118
+ rights_list = Array.wrap(meta.dig('rightsList', 'rights')).map do |r|
107
119
  if r.blank?
108
120
  nil
109
121
  elsif r.is_a?(String)
@@ -113,130 +125,140 @@ module Briard
113
125
  end
114
126
  end.compact
115
127
 
116
- subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
117
- if subject.is_a?(String)
128
+ subjects = Array.wrap(meta.dig('subjects', 'subject')).reduce([]) do |sum, subject|
129
+ case subject
130
+ when String
118
131
  sum += name_to_fos(subject)
119
- elsif subject.is_a?(Hash)
132
+ when Hash
120
133
  sum += hsh_to_fos(subject)
121
134
  end
122
135
 
123
136
  sum
124
- end.uniq { |s| s["subject"] }
125
-
126
- dates = Array.wrap(meta.dig("dates", "date")).map do |r|
127
- if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
128
- if Date.edtf(date).present? || Briard::Utils::UNKNOWN_INFORMATION.key?(date)
129
- { "date" => date,
130
- "dateType" => parse_attributes(r, content: "dateType"),
131
- "dateInformation" => parse_attributes(r, content: "dateInformation")
132
- }.compact
133
- end
134
- end
137
+ end.uniq { |s| s['subject'] }
138
+
139
+ dates = Array.wrap(meta.dig('dates', 'date')).map do |r|
140
+ next unless r.is_a?(Hash) && date = sanitize(r['__content__']).presence
141
+
142
+ next unless Date.edtf(date).present? || Briard::Utils::UNKNOWN_INFORMATION.key?(date)
143
+
144
+ { 'date' => date,
145
+ 'dateType' => parse_attributes(r, content: 'dateType'),
146
+ 'dateInformation' => parse_attributes(r, content: 'dateInformation') }.compact
135
147
  end.compact
136
- dates << { "date" => meta.fetch("publicationYear", nil), "dateType" => "Issued" } if meta.fetch("publicationYear", nil).present? && get_date(dates, "Issued").blank?
137
- sizes = Array.wrap(meta.dig("sizes", "size")).map do |k|
148
+ if meta.fetch('publicationYear', nil).present? && get_date(dates, 'Issued').blank?
149
+ dates << { 'date' => meta.fetch('publicationYear', nil), 'dateType' => 'Issued' }
150
+ end
151
+ sizes = Array.wrap(meta.dig('sizes', 'size')).map do |k|
138
152
  if k.blank?
139
153
  nil
140
154
  elsif k.is_a?(String)
141
155
  sanitize(k).presence
142
156
  elsif k.is_a?(Hash)
143
- sanitize(k["__content__"]).presence
157
+ sanitize(k['__content__']).presence
144
158
  end
145
159
  end.compact
146
- formats = Array.wrap(meta.dig("formats", "format")).map do |k|
160
+ formats = Array.wrap(meta.dig('formats', 'format')).map do |k|
147
161
  if k.blank?
148
162
  nil
149
163
  elsif k.is_a?(String)
150
164
  sanitize(k).presence
151
165
  elsif k.is_a?(Hash)
152
- sanitize(k["__content__"]).presence
166
+ sanitize(k['__content__']).presence
153
167
  end
154
168
  end.compact
155
- .map { |s| s.to_s.squish.presence }.compact
156
- funding_references = Array.wrap(meta.dig("fundingReferences", "fundingReference")).compact.map do |fr|
157
- scheme_uri = parse_attributes(fr["funderIdentifier"], content: "schemeURI")
158
- funder_identifier = parse_attributes(fr["funderIdentifier"])
159
- funder_identifier_type = parse_attributes(fr["funderIdentifier"], content: "funderIdentifierType")
160
- if funder_identifier_type != "Other"
161
- funder_identifier = !funder_identifier.to_s.start_with?("https://","http://") && scheme_uri.present? ? normalize_id(scheme_uri + funder_identifier) : normalize_id(funder_identifier)
169
+ .map { |s| s.to_s.squish.presence }.compact
170
+ funding_references = Array.wrap(meta.dig('fundingReferences',
171
+ 'fundingReference')).compact.map do |fr|
172
+ scheme_uri = parse_attributes(fr['funderIdentifier'], content: 'schemeURI')
173
+ funder_identifier = parse_attributes(fr['funderIdentifier'])
174
+ funder_identifier_type = parse_attributes(fr['funderIdentifier'],
175
+ content: 'funderIdentifierType')
176
+ if funder_identifier_type != 'Other'
177
+ funder_identifier = if !funder_identifier.to_s.start_with?('https://',
178
+ 'http://') && scheme_uri.present?
179
+ normalize_id(scheme_uri + funder_identifier)
180
+ else
181
+ normalize_id(funder_identifier)
182
+ end
162
183
  end
163
184
 
164
185
  {
165
- "funderName" => fr["funderName"],
166
- "funderIdentifier" => funder_identifier,
167
- "funderIdentifierType" => funder_identifier_type,
168
- "awardNumber" => parse_attributes(fr["awardNumber"]),
169
- "awardUri" => parse_attributes(fr["awardNumber"], content: "awardURI"),
170
- "awardTitle" => fr["awardTitle"] }.compact
186
+ 'funderName' => fr['funderName'],
187
+ 'funderIdentifier' => funder_identifier,
188
+ 'funderIdentifierType' => funder_identifier_type,
189
+ 'awardNumber' => parse_attributes(fr['awardNumber']),
190
+ 'awardUri' => parse_attributes(fr['awardNumber'], content: 'awardURI'),
191
+ 'awardTitle' => fr['awardTitle']
192
+ }.compact
171
193
  end
172
- related_identifiers = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).map do |ri|
173
- if ri["relatedIdentifierType"] == "DOI"
174
- rid = validate_doi(ri["__content__"].to_s.downcase)
175
- else
176
- rid = ri["__content__"]
177
- end
194
+ related_identifiers = Array.wrap(meta.dig('relatedIdentifiers',
195
+ 'relatedIdentifier')).map do |ri|
196
+ rid = if ri['relatedIdentifierType'] == 'DOI'
197
+ validate_doi(ri['__content__'].to_s.downcase)
198
+ else
199
+ ri['__content__']
200
+ end
178
201
 
179
202
  {
180
- "relatedIdentifier" => rid,
181
- "relatedIdentifierType" => ri["relatedIdentifierType"],
182
- "relationType" => ri["relationType"],
183
- "resourceTypeGeneral" => ri["resourceTypeGeneral"],
184
- "relatedMetadataScheme" => ri["relatedMetadataScheme"],
185
- "schemeUri" => ri["schemeURI"],
186
- "schemeType" => ri["schemeType"]
203
+ 'relatedIdentifier' => rid,
204
+ 'relatedIdentifierType' => ri['relatedIdentifierType'],
205
+ 'relationType' => ri['relationType'],
206
+ 'resourceTypeGeneral' => ri['resourceTypeGeneral'],
207
+ 'relatedMetadataScheme' => ri['relatedMetadataScheme'],
208
+ 'schemeUri' => ri['schemeURI'],
209
+ 'schemeType' => ri['schemeType']
187
210
  }.compact
188
211
  end
189
212
 
190
- related_items = Array.wrap(meta.dig("relatedItems", "relatedItem")).map do |ri|
191
-
192
- rii = ri["relatedItemIdentifier"]
213
+ related_items = Array.wrap(meta.dig('relatedItems', 'relatedItem')).map do |ri|
214
+ rii = ri['relatedItemIdentifier']
193
215
  relatedItemIdentifier = nil
194
216
  if rii
195
- if rii["relatedItemIdentifierType"] == "DOI"
196
- rid = validate_doi(rii["__content__"].to_s.downcase)
197
- else
198
- rid = rii["__content__"]
199
- end
217
+ rid = if rii['relatedItemIdentifierType'] == 'DOI'
218
+ validate_doi(rii['__content__'].to_s.downcase)
219
+ else
220
+ rii['__content__']
221
+ end
200
222
 
201
223
  relatedItemIdentifier = {
202
- "relatedItemIdentifier" => rid,
203
- "relatedItemIdentifierType" => rii["relatedItemIdentifierType"],
204
- "relatedMetadataScheme" => rii["relatedMetadataScheme"],
205
- "schemeURI" => rii["schemeURI"],
206
- "schemeType" => rii["schemeType"]
224
+ 'relatedItemIdentifier' => rid,
225
+ 'relatedItemIdentifierType' => rii['relatedItemIdentifierType'],
226
+ 'relatedMetadataScheme' => rii['relatedMetadataScheme'],
227
+ 'schemeURI' => rii['schemeURI'],
228
+ 'schemeType' => rii['schemeType']
207
229
  }.compact
208
230
  end
209
231
 
210
- number = ri["number"]
232
+ number = ri['number']
211
233
  if number.is_a?(String)
212
234
  number = number
213
235
  numberType = nil
214
236
  else
215
- number = ri.dig("number", "__content__")
216
- numberType = ri.dig("number", "numberType")
237
+ number = ri.dig('number', '__content__')
238
+ numberType = ri.dig('number', 'numberType')
217
239
  end
218
240
 
219
241
  a = {
220
- "relationType" => ri["relationType"],
221
- "relatedItemType" => ri["relatedItemType"],
222
- "relatedItemIdentifier" => relatedItemIdentifier,
223
- "creators" => get_authors(Array.wrap(ri.dig("creators", "creator"))),
224
- "titles" => get_titles(ri),
225
- "publicationYear" => ri["publicationYear"],
226
- "volume" => ri["volume"],
227
- "issue" => ri["issue"],
228
- "number" => number,
229
- "numberType" => numberType,
230
- "firstPage" => ri["firstPage"],
231
- "lastPage" => ri["lastPage"],
232
- "publisher" => ri["publisher"],
233
- "edition" => ri["edition"],
234
- "contributors" => get_authors(Array.wrap(ri.dig("contributors", "contributor"))),
242
+ 'relationType' => ri['relationType'],
243
+ 'relatedItemType' => ri['relatedItemType'],
244
+ 'relatedItemIdentifier' => relatedItemIdentifier,
245
+ 'creators' => get_authors(Array.wrap(ri.dig('creators', 'creator'))),
246
+ 'titles' => get_titles(ri),
247
+ 'publicationYear' => ri['publicationYear'],
248
+ 'volume' => ri['volume'],
249
+ 'issue' => ri['issue'],
250
+ 'number' => number,
251
+ 'numberType' => numberType,
252
+ 'firstPage' => ri['firstPage'],
253
+ 'lastPage' => ri['lastPage'],
254
+ 'publisher' => ri['publisher'],
255
+ 'edition' => ri['edition'],
256
+ 'contributors' => get_authors(Array.wrap(ri.dig('contributors', 'contributor')))
235
257
  }.compact
236
258
  end
237
259
 
238
- geo_locations = Array.wrap(meta.dig("geoLocations", "geoLocation")).map do |gl|
239
- if !gl.is_a?(Hash) || gl["geoLocationPoint"].is_a?(String) || gl["geoLocationBox"].is_a?(String) || gl["geoLocationPolygon"].is_a?(String)
260
+ geo_locations = Array.wrap(meta.dig('geoLocations', 'geoLocation')).map do |gl|
261
+ if !gl.is_a?(Hash) || gl['geoLocationPoint'].is_a?(String) || gl['geoLocationBox'].is_a?(String) || gl['geoLocationPolygon'].is_a?(String)
240
262
  nil
241
263
  else
242
264
 
@@ -244,78 +266,94 @@ module Briard
244
266
  # we want to return an array if it's already an array (i.e. multiple geoLocationPolygons)
245
267
  # vs if it's singular just return the object
246
268
  # This is for backwards compatability to allow both scenarios.
247
- if gl.dig("geoLocationPolygon").kind_of?(Array)
248
- geoLocationPolygon = gl.dig("geoLocationPolygon").map do |glp|
249
- Array.wrap(glp.dig("polygonPoint")).map { |glpp| { "polygonPoint" => glpp } }.compact.presence
269
+ if gl['geoLocationPolygon'].is_a?(Array)
270
+ geoLocationPolygon = gl['geoLocationPolygon'].map do |glp|
271
+ Array.wrap(glp['polygonPoint']).map do |glpp|
272
+ { 'polygonPoint' => glpp }
273
+ end.compact.presence
250
274
  end.compact.presence
251
275
  else
252
- geoLocationPolygon = Array.wrap(gl.dig("geoLocationPolygon", "polygonPoint")).map { |glp| { "polygonPoint" => glp } }.compact.presence
276
+ geoLocationPolygon = Array.wrap(gl.dig('geoLocationPolygon',
277
+ 'polygonPoint')).map do |glp|
278
+ { 'polygonPoint' => glp }
279
+ end.compact.presence
253
280
  end
254
281
 
255
282
  {
256
- "geoLocationPoint" => {
257
- "pointLatitude" => gl.dig("geoLocationPoint", "pointLatitude"),
258
- "pointLongitude" => gl.dig("geoLocationPoint", "pointLongitude")
283
+ 'geoLocationPoint' => {
284
+ 'pointLatitude' => gl.dig('geoLocationPoint', 'pointLatitude'),
285
+ 'pointLongitude' => gl.dig('geoLocationPoint', 'pointLongitude')
259
286
  }.compact.presence,
260
- "geoLocationBox" => {
261
- "westBoundLongitude" => gl.dig("geoLocationBox", "westBoundLongitude"),
262
- "eastBoundLongitude" => gl.dig("geoLocationBox", "eastBoundLongitude"),
263
- "southBoundLatitude" => gl.dig("geoLocationBox", "southBoundLatitude"),
264
- "northBoundLatitude" => gl.dig("geoLocationBox", "northBoundLatitude")
287
+ 'geoLocationBox' => {
288
+ 'westBoundLongitude' => gl.dig('geoLocationBox', 'westBoundLongitude'),
289
+ 'eastBoundLongitude' => gl.dig('geoLocationBox', 'eastBoundLongitude'),
290
+ 'southBoundLatitude' => gl.dig('geoLocationBox', 'southBoundLatitude'),
291
+ 'northBoundLatitude' => gl.dig('geoLocationBox', 'northBoundLatitude')
265
292
  }.compact.presence,
266
- "geoLocationPolygon" => geoLocationPolygon,
267
- "geoLocationPlace" => parse_attributes(gl["geoLocationPlace"], first: true).to_s.strip.presence
293
+ 'geoLocationPolygon' => geoLocationPolygon,
294
+ 'geoLocationPlace' => parse_attributes(gl['geoLocationPlace'],
295
+ first: true).to_s.strip.presence
268
296
  }.compact
269
297
  end
270
298
  end.compact
271
299
 
272
- state = id.present? || read_options.present? ? "findable" : "not_found"
273
-
274
- { "id" => id,
275
- "types" => types,
276
- "doi" => doi_from_url(id),
277
- "identifiers" => identifiers,
278
- "url" => options.fetch(:url, nil).to_s.strip.presence,
279
- "titles" => titles,
280
- "creators" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
281
- "contributors" => get_authors(Array.wrap(meta.dig("contributors", "contributor"))),
282
- "container" => set_container(meta),
283
- "publisher" => parse_attributes(meta.fetch("publisher", nil), first: true).to_s.strip.presence,
284
- "agency" => "datacite",
285
- "funding_references" => funding_references,
286
- "dates" => dates,
287
- "publication_year" => parse_attributes(meta.fetch("publicationYear", nil), first: true).to_s.strip.presence,
288
- "descriptions" => descriptions,
289
- "rights_list" => Array.wrap(rights_list),
290
- "version_info" => meta.fetch("version", nil).to_s.presence,
291
- "subjects" => subjects,
292
- "language" => parse_attributes(meta.fetch("language", nil), first: true).to_s.strip.presence,
293
- "geo_locations" => geo_locations,
294
- "related_identifiers" => related_identifiers,
295
- "related_items" => related_items,
296
- "formats" => formats,
297
- "sizes" => sizes,
298
- "schema_version" => schema_version,
299
- "state" => state
300
- }.merge(read_options)
300
+ state = id.present? || read_options.present? ? 'findable' : 'not_found'
301
+
302
+ { 'id' => id,
303
+ 'types' => types,
304
+ 'doi' => doi_from_url(id),
305
+ 'identifiers' => identifiers,
306
+ 'url' => options.fetch(:url, nil).to_s.strip.presence,
307
+ 'titles' => titles,
308
+ 'creators' => get_authors(Array.wrap(meta.dig('creators', 'creator'))),
309
+ 'contributors' => get_authors(Array.wrap(meta.dig('contributors', 'contributor'))),
310
+ 'container' => set_container(meta),
311
+ 'publisher' => parse_attributes(meta.fetch('publisher', nil),
312
+ first: true).to_s.strip.presence,
313
+ 'agency' => 'datacite',
314
+ 'funding_references' => funding_references,
315
+ 'dates' => dates,
316
+ 'publication_year' => parse_attributes(meta.fetch('publicationYear', nil),
317
+ first: true).to_s.strip.presence,
318
+ 'descriptions' => descriptions,
319
+ 'rights_list' => Array.wrap(rights_list),
320
+ 'version_info' => meta.fetch('version', nil).to_s.presence,
321
+ 'subjects' => subjects,
322
+ 'language' => parse_attributes(meta.fetch('language', nil),
323
+ first: true).to_s.strip.presence,
324
+ 'geo_locations' => geo_locations,
325
+ 'related_identifiers' => related_identifiers,
326
+ 'related_items' => related_items,
327
+ 'formats' => formats,
328
+ 'sizes' => sizes,
329
+ 'schema_version' => schema_version,
330
+ 'state' => state }.merge(read_options)
301
331
  end
302
332
 
303
333
  def set_container(meta)
304
- series_information = Array.wrap(meta.dig("descriptions", "description")).find { |r| r["descriptionType"] == "SeriesInformation" }.to_h.fetch("__content__", nil)
334
+ series_information = Array.wrap(meta.dig('descriptions', 'description')).find do |r|
335
+ r['descriptionType'] == 'SeriesInformation'
336
+ end.to_h.fetch('__content__', nil)
305
337
  si = get_series_information(series_information)
306
338
 
307
- is_part_of = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).find { |ri| ri["relationType"] == "IsPartOf" }.to_h
339
+ is_part_of = Array.wrap(meta.dig('relatedIdentifiers', 'relatedIdentifier')).find do |ri|
340
+ ri['relationType'] == 'IsPartOf'
341
+ end.to_h
308
342
 
309
- if si["title"].present? || is_part_of.present?
343
+ if si['title'].present? || is_part_of.present?
310
344
  {
311
- "type" => meta.dig("resourceType", "resourceTypeGeneral") == "Dataset" ? "DataRepository" : "Series",
312
- "identifier" => is_part_of["__content__"],
313
- "identifierType" => is_part_of["relatedIdentifierType"],
314
- "title" => si["title"],
315
- "volume" => si["volume"],
316
- "issue" => si["issue"],
317
- "firstPage" => si["firstPage"],
318
- "lastPage" => si["lastPage"]
345
+ 'type' => if meta.dig('resourceType', 'resourceTypeGeneral') == 'Dataset'
346
+ 'DataRepository'
347
+ else
348
+ 'Series'
349
+ end,
350
+ 'identifier' => is_part_of['__content__'],
351
+ 'identifierType' => is_part_of['relatedIdentifierType'],
352
+ 'title' => si['title'],
353
+ 'volume' => si['volume'],
354
+ 'issue' => si['issue'],
355
+ 'firstPage' => si['firstPage'],
356
+ 'lastPage' => si['lastPage']
319
357
  }.compact
320
358
  else
321
359
  {}
@@ -323,19 +361,17 @@ module Briard
323
361
  end
324
362
 
325
363
  def get_titles(meta)
326
- titles = Array.wrap(meta.dig("titles", "title")).map do |r|
364
+ Array.wrap(meta.dig('titles', 'title')).map do |r|
327
365
  if r.blank?
328
366
  nil
329
367
  elsif r.is_a?(String)
330
- { "title" => sanitize(r) }
368
+ { 'title' => sanitize(r) }
331
369
  else
332
- { "title" => sanitize(r["__content__"]), "titleType" => r["titleType"], "lang" => r["lang"] }.compact
370
+ { 'title' => sanitize(r['__content__']), 'titleType' => r['titleType'],
371
+ 'lang' => r['lang'] }.compact
333
372
  end
334
373
  end.compact
335
-
336
- titles
337
374
  end
338
-
339
375
  end
340
376
  end
341
377
  end