briard 2.4.2 → 2.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +72 -0
  3. data/.github/workflows/rubocop.yml +50 -0
  4. data/.gitignore +1 -0
  5. data/.rubocop.yml +144 -620
  6. data/.rubocop_todo.yml +76 -0
  7. data/CHANGELOG.md +18 -0
  8. data/Gemfile +2 -0
  9. data/Gemfile.lock +43 -9
  10. data/Rakefile +1 -1
  11. data/{bolognese.gemspec → briard.gemspec} +46 -39
  12. data/lib/briard/array.rb +2 -2
  13. data/lib/briard/author_utils.rb +79 -71
  14. data/lib/briard/cli.rb +12 -13
  15. data/lib/briard/crossref_utils.rb +73 -61
  16. data/lib/briard/datacite_utils.rb +132 -106
  17. data/lib/briard/doi_utils.rb +10 -10
  18. data/lib/briard/metadata.rb +96 -106
  19. data/lib/briard/metadata_utils.rb +87 -78
  20. data/lib/briard/readers/bibtex_reader.rb +65 -65
  21. data/lib/briard/readers/cff_reader.rb +88 -70
  22. data/lib/briard/readers/citeproc_reader.rb +90 -84
  23. data/lib/briard/readers/codemeta_reader.rb +68 -50
  24. data/lib/briard/readers/crosscite_reader.rb +2 -2
  25. data/lib/briard/readers/crossref_reader.rb +249 -210
  26. data/lib/briard/readers/datacite_json_reader.rb +3 -3
  27. data/lib/briard/readers/datacite_reader.rb +225 -189
  28. data/lib/briard/readers/npm_reader.rb +49 -42
  29. data/lib/briard/readers/ris_reader.rb +82 -80
  30. data/lib/briard/readers/schema_org_reader.rb +182 -159
  31. data/lib/briard/string.rb +1 -1
  32. data/lib/briard/utils.rb +4 -4
  33. data/lib/briard/version.rb +3 -1
  34. data/lib/briard/whitelist_scrubber.rb +11 -4
  35. data/lib/briard/writers/bibtex_writer.rb +14 -8
  36. data/lib/briard/writers/cff_writer.rb +33 -26
  37. data/lib/briard/writers/codemeta_writer.rb +19 -15
  38. data/lib/briard/writers/csv_writer.rb +6 -4
  39. data/lib/briard/writers/datacite_json_writer.rb +8 -2
  40. data/lib/briard/writers/jats_writer.rb +33 -28
  41. data/lib/briard/writers/rdf_xml_writer.rb +1 -1
  42. data/lib/briard/writers/ris_writer.rb +30 -18
  43. data/lib/briard/writers/turtle_writer.rb +1 -1
  44. data/lib/briard.rb +6 -6
  45. data/rubocop.sarif +0 -0
  46. data/spec/array_spec.rb +5 -5
  47. data/spec/author_utils_spec.rb +151 -132
  48. data/spec/datacite_utils_spec.rb +135 -83
  49. data/spec/doi_utils_spec.rb +168 -164
  50. data/spec/find_from_format_spec.rb +69 -69
  51. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
  52. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
  53. data/spec/metadata_spec.rb +91 -90
  54. data/spec/readers/bibtex_reader_spec.rb +43 -38
  55. data/spec/readers/cff_reader_spec.rb +165 -153
  56. data/spec/readers/citeproc_reader_spec.rb +45 -40
  57. data/spec/readers/codemeta_reader_spec.rb +128 -115
  58. data/spec/readers/crosscite_reader_spec.rb +34 -24
  59. data/spec/readers/crossref_reader_spec.rb +1098 -939
  60. data/spec/readers/datacite_json_reader_spec.rb +53 -40
  61. data/spec/readers/datacite_reader_spec.rb +1541 -1337
  62. data/spec/readers/npm_reader_spec.rb +48 -43
  63. data/spec/readers/ris_reader_spec.rb +53 -47
  64. data/spec/readers/schema_org_reader_spec.rb +329 -267
  65. data/spec/spec_helper.rb +6 -5
  66. data/spec/utils_spec.rb +371 -347
  67. data/spec/writers/bibtex_writer_spec.rb +143 -143
  68. data/spec/writers/cff_writer_spec.rb +96 -90
  69. data/spec/writers/citation_writer_spec.rb +34 -33
  70. data/spec/writers/citeproc_writer_spec.rb +226 -224
  71. data/spec/writers/codemeta_writer_spec.rb +18 -16
  72. data/spec/writers/crosscite_writer_spec.rb +91 -73
  73. data/spec/writers/crossref_writer_spec.rb +99 -91
  74. data/spec/writers/csv_writer_spec.rb +70 -70
  75. data/spec/writers/datacite_json_writer_spec.rb +78 -68
  76. data/spec/writers/datacite_writer_spec.rb +417 -322
  77. data/spec/writers/jats_writer_spec.rb +177 -161
  78. data/spec/writers/rdf_xml_writer_spec.rb +68 -63
  79. data/spec/writers/ris_writer_spec.rb +162 -162
  80. data/spec/writers/schema_org_writer_spec.rb +329 -294
  81. data/spec/writers/turtle_writer_spec.rb +47 -47
  82. metadata +242 -166
  83. data/.github/workflows/release.yml +0 -47
@@ -4,27 +4,30 @@ module Briard
4
4
  module Readers
5
5
  module CrossrefReader
6
6
  # CrossRef types from https://api.crossref.org/types
7
- def get_crossref(id: nil, **options)
8
- return { "string" => nil, "state" => "not_found" } unless id.present?
7
+ def get_crossref(id: nil, **_options)
8
+ return { 'string' => nil, 'state' => 'not_found' } unless id.present?
9
9
 
10
10
  doi = doi_from_url(id)
11
11
  url = "https://api.crossref.org/works/#{doi}/transform/application/vnd.crossref.unixsd+xml"
12
- response = Maremma.get(url, accept: "text/xml;charset=utf-8", raw: true)
13
- string = response.body.fetch("data", nil)
12
+ response = Maremma.get(url, accept: 'text/xml;charset=utf-8', raw: true)
13
+ string = response.body.fetch('data', nil)
14
14
  string = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).to_s if string.present?
15
15
 
16
- { "string" => string }
16
+ { 'string' => string }
17
17
  end
18
18
 
19
19
  def read_crossref(string: nil, **options)
20
- read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
20
+ read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
21
+ :sandbox, :validate, :ra))
21
22
 
22
23
  if string.present?
23
- m = Maremma.from_xml(string).dig("crossref_result", "query_result", "body", "query", "doi_record") || {}
24
- meta = m.dig("doi_record", "crossref", "error").nil? ? m : {}
24
+ m = Maremma.from_xml(string).dig('crossref_result', 'query_result', 'body', 'query',
25
+ 'doi_record') || {}
26
+ meta = m.dig('doi_record', 'crossref', 'error').nil? ? m : {}
25
27
 
26
28
  # query contains information from outside metadata schema, e.g. publisher name
27
- query = Maremma.from_xml(string).dig("crossref_result", "query_result", "body", "query") || {}
29
+ query = Maremma.from_xml(string).dig('crossref_result', 'query_result', 'body',
30
+ 'query') || {}
28
31
  else
29
32
  meta = {}
30
33
  query = {}
@@ -32,7 +35,7 @@ module Briard
32
35
 
33
36
  # model should be one of book, conference, database, dissertation, journal, peer_review, posted_content,
34
37
  # report_paper, sa_component, standard
35
- model = meta.dig("crossref").to_h.keys.last
38
+ model = meta['crossref'].to_h.keys.last
36
39
 
37
40
  resource_type = nil
38
41
  bibliographic_metadata = {}
@@ -40,213 +43,240 @@ module Briard
40
43
  journal_metadata = nil
41
44
  journal_issue = {}
42
45
  journal_metadata = nil
43
- publisher = query.dig("crm_item", 0)
46
+ publisher = query.dig('crm_item', 0)
44
47
  publisher = nil unless publisher.is_a?(String)
45
48
 
46
49
  case model
47
- when "book"
48
- book_metadata = meta.dig("crossref", "book", "book_metadata")
49
- book_series_metadata = meta.dig("crossref", "book", "book_series_metadata")
50
- book_set_metadata = meta.dig("crossref", "book", "book_set_metadata")
51
- bibliographic_metadata = meta.dig("crossref", "book", "content_item") || book_metadata || book_series_metadata || book_set_metadata
52
- resource_type = bibliographic_metadata.fetch("component_type", nil) ? "book-" + bibliographic_metadata.fetch("component_type") : "book"
50
+ when 'book'
51
+ book_metadata = meta.dig('crossref', 'book', 'book_metadata')
52
+ book_series_metadata = meta.dig('crossref', 'book', 'book_series_metadata')
53
+ book_set_metadata = meta.dig('crossref', 'book', 'book_set_metadata')
54
+ bibliographic_metadata = meta.dig('crossref', 'book',
55
+ 'content_item') || book_metadata || book_series_metadata || book_set_metadata
56
+ resource_type = if bibliographic_metadata.fetch('component_type', nil)
57
+ "book-#{bibliographic_metadata.fetch('component_type')}"
58
+ else
59
+ 'book'
60
+ end
53
61
  # publisher = if book_metadata.present?
54
62
  # book_metadata.dig("publisher", "publisher_name")
55
63
  # elsif book_series_metadata.present?
56
64
  # book_series_metadata.dig("publisher", "publisher_name")
57
65
  # end
58
- when "conference"
59
- event_metadata = meta.dig("crossref", "conference", "event_metadata") || {}
60
- bibliographic_metadata = meta.dig("crossref", "conference", "conference_paper").to_h
61
- when "journal"
62
- journal_metadata = meta.dig("crossref", "journal", "journal_metadata") || {}
63
- journal_issue = meta.dig("crossref", "journal", "journal_issue") || {}
64
- journal_article = meta.dig("crossref", "journal", "journal_article") || {}
66
+ when 'conference'
67
+ event_metadata = meta.dig('crossref', 'conference', 'event_metadata') || {}
68
+ bibliographic_metadata = meta.dig('crossref', 'conference', 'conference_paper').to_h
69
+ when 'journal'
70
+ journal_metadata = meta.dig('crossref', 'journal', 'journal_metadata') || {}
71
+ journal_issue = meta.dig('crossref', 'journal', 'journal_issue') || {}
72
+ journal_article = meta.dig('crossref', 'journal', 'journal_article') || {}
65
73
  bibliographic_metadata = journal_article.presence || journal_issue.presence || journal_metadata
66
- program_metadata = bibliographic_metadata.dig("crossmark", "custom_metadata", "program") || bibliographic_metadata.dig("program")
74
+ program_metadata = bibliographic_metadata.dig('crossmark', 'custom_metadata',
75
+ 'program') || bibliographic_metadata['program']
67
76
  resource_type = if journal_article.present?
68
- "journal_article"
69
- elsif journal_issue.present?
70
- "journal_issue"
71
- else
72
- "journal"
73
- end
74
- when "posted_content"
75
- bibliographic_metadata = meta.dig("crossref", "posted_content").to_h
76
- publisher ||= bibliographic_metadata.dig("institution", "institution_name")
77
- when "sa_component"
78
- bibliographic_metadata = meta.dig("crossref", "sa_component", "component_list", "component").to_h
79
- related_identifier = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "relation" }
80
- journal_metadata = { "relatedIdentifier" => related_identifier.to_h.fetch("__content", nil) }
81
- when "database"
82
- bibliographic_metadata = meta.dig("crossref", "database", "dataset").to_h
83
- resource_type = "dataset"
84
- when "report_paper"
85
- bibliographic_metadata = meta.dig("crossref", "report_paper", "report_paper_metadata").to_h
86
- resource_type = "report"
87
- when "peer_review"
88
- bibliographic_metadata = meta.dig("crossref", "peer_review")
89
- when "dissertation"
90
- bibliographic_metadata = meta.dig("crossref", "dissertation")
77
+ 'journal_article'
78
+ elsif journal_issue.present?
79
+ 'journal_issue'
80
+ else
81
+ 'journal'
82
+ end
83
+ when 'posted_content'
84
+ bibliographic_metadata = meta.dig('crossref', 'posted_content').to_h
85
+ publisher ||= bibliographic_metadata.dig('institution', 'institution_name')
86
+ when 'sa_component'
87
+ bibliographic_metadata = meta.dig('crossref', 'sa_component', 'component_list',
88
+ 'component').to_h
89
+ related_identifier = Array.wrap(query.to_h['crm_item']).find do |cr|
90
+ cr['name'] == 'relation'
91
+ end
92
+ journal_metadata = { 'relatedIdentifier' => related_identifier.to_h.fetch('__content',
93
+ nil) }
94
+ when 'database'
95
+ bibliographic_metadata = meta.dig('crossref', 'database', 'dataset').to_h
96
+ resource_type = 'dataset'
97
+ when 'report_paper'
98
+ bibliographic_metadata = meta.dig('crossref', 'report_paper',
99
+ 'report_paper_metadata').to_h
100
+ resource_type = 'report'
101
+ when 'peer_review'
102
+ bibliographic_metadata = meta.dig('crossref', 'peer_review')
103
+ when 'dissertation'
104
+ bibliographic_metadata = meta.dig('crossref', 'dissertation')
91
105
  end
92
106
 
93
107
  resource_type = (resource_type || model).to_s.underscore.camelcase.presence
94
- schema_org = Briard::Utils::CR_TO_SO_TRANSLATIONS[resource_type] || "ScholarlyArticle"
108
+ schema_org = Briard::Utils::CR_TO_SO_TRANSLATIONS[resource_type] || 'ScholarlyArticle'
95
109
  types = {
96
- "resourceTypeGeneral" => Briard::Utils::CR_TO_DC_TRANSLATIONS[resource_type],
97
- "resourceType" => resource_type,
98
- "schemaOrg" => schema_org,
99
- "citeproc" => Briard::Utils::CR_TO_CP_TRANSLATIONS[resource_type] || "article-journal",
100
- "bibtex" => Briard::Utils::CR_TO_BIB_TRANSLATIONS[resource_type] || "misc",
101
- "ris" => Briard::Utils::CR_TO_RIS_TRANSLATIONS[resource_type] || "JOUR"
110
+ 'resourceTypeGeneral' => Briard::Utils::CR_TO_DC_TRANSLATIONS[resource_type],
111
+ 'resourceType' => resource_type,
112
+ 'schemaOrg' => schema_org,
113
+ 'citeproc' => Briard::Utils::CR_TO_CP_TRANSLATIONS[resource_type] || 'article-journal',
114
+ 'bibtex' => Briard::Utils::CR_TO_BIB_TRANSLATIONS[resource_type] || 'misc',
115
+ 'ris' => Briard::Utils::CR_TO_RIS_TRANSLATIONS[resource_type] || 'JOUR'
102
116
  }.compact
103
117
 
104
- titles = if bibliographic_metadata.dig("titles").present?
105
- Array.wrap(bibliographic_metadata.dig("titles")).map do |r|
106
- if r.blank? || (r["title"].blank? && r["original_language_title"].blank?)
118
+ titles = if bibliographic_metadata['titles'].present?
119
+ Array.wrap(bibliographic_metadata['titles']).map do |r|
120
+ if r.blank? || (r['title'].blank? && r['original_language_title'].blank?)
107
121
  nil
108
- elsif r["title"].is_a?(String)
109
- { "title" => sanitize(r["title"]) }
110
- elsif r["original_language_title"].present?
111
- { "title" => sanitize(r.dig("original_language_title", "__content__")), "lang" => r.dig("original_language_title", "language") }
122
+ elsif r['title'].is_a?(String)
123
+ { 'title' => sanitize(r['title']) }
124
+ elsif r['original_language_title'].present?
125
+ { 'title' => sanitize(r.dig('original_language_title', '__content__')),
126
+ 'lang' => r.dig('original_language_title', 'language') }
112
127
  else
113
- { "title" => sanitize(r.dig("title", "__content__")) }.compact
128
+ { 'title' => sanitize(r.dig('title', '__content__')) }.compact
114
129
  end
115
130
  end.compact
116
131
  else
117
- [{ "title" => ":(unav)" }]
132
+ [{ 'title' => ':(unav)' }]
118
133
  end
119
134
 
120
135
  date_published = crossref_date_published(bibliographic_metadata)
121
136
  if date_published.present?
122
- date_published = { "date" => date_published, "dateType" => "Issued" }
137
+ date_published = { 'date' => date_published, 'dateType' => 'Issued' }
123
138
  else
124
- date_published = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "created" }
125
- date_published = { "date" => date_published.fetch("__content__", "")[0..9], "dateType" => "Issued" } if date_published.present?
139
+ date_published = Array.wrap(query.to_h['crm_item']).find { |cr| cr['name'] == 'created' }
140
+ if date_published.present?
141
+ date_published = { 'date' => date_published.fetch('__content__', '')[0..9],
142
+ 'dateType' => 'Issued' }
143
+ end
144
+ end
145
+ date_updated = Array.wrap(query.to_h['crm_item']).find { |cr| cr['name'] == 'last-update' }
146
+ if date_updated.present?
147
+ date_updated = { 'date' => date_updated.fetch('__content__', nil),
148
+ 'dateType' => 'Updated' }
126
149
  end
127
- date_updated = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "last-update" }
128
- date_updated = { "date" => date_updated.fetch("__content__", nil), "dateType" => "Updated" } if date_updated.present?
129
150
 
130
- date_registered = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "deposit-timestamp" }
131
- date_registered = get_datetime_from_time(date_registered.fetch("__content__", nil)) if date_registered.present?
151
+ date_registered = Array.wrap(query.to_h['crm_item']).find do |cr|
152
+ cr['name'] == 'deposit-timestamp'
153
+ end
154
+ if date_registered.present?
155
+ date_registered = get_datetime_from_time(date_registered.fetch('__content__', nil))
156
+ end
132
157
 
133
158
  # check that date is valid iso8601 date
134
- date_published = nil unless Date.edtf(date_published.to_h["date"]).present?
135
- date_updated = nil unless Date.edtf(date_updated.to_h["date"]).present?
159
+ date_published = nil unless Date.edtf(date_published.to_h['date']).present?
160
+ date_updated = nil unless Date.edtf(date_updated.to_h['date']).present?
136
161
 
137
162
  dates = [date_published, date_updated].compact
138
- publication_year = date_published.to_h.fetch("date", "")[0..3].presence
163
+ publication_year = date_published.to_h.fetch('date', '')[0..3].presence
139
164
 
140
- state = meta.present? || read_options.present? ? "findable" : "not_found"
165
+ state = meta.present? || read_options.present? ? 'findable' : 'not_found'
141
166
 
142
167
  related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
143
168
 
144
169
  container = if journal_metadata.present?
145
- issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
146
-
147
- { "type" => "Journal",
148
- "identifier" => issn,
149
- "identifierType" => issn.present? ? "ISSN" : nil,
150
- "title" => parse_attributes(journal_metadata.to_h["full_title"]),
151
- "volume" => parse_attributes(journal_issue.dig("journal_volume", "volume")),
152
- "issue" => parse_attributes(journal_issue.dig("issue")),
153
- "firstPage" => bibliographic_metadata.dig("pages", "first_page") || parse_attributes(journal_article.to_h.dig("publisher_item", "item_number"), first: true),
154
- "lastPage" => bibliographic_metadata.dig("pages", "last_page") }.compact
170
+ issn = normalize_issn(journal_metadata.to_h.fetch('issn', nil))
171
+
172
+ { 'type' => 'Journal',
173
+ 'identifier' => issn,
174
+ 'identifierType' => issn.present? ? 'ISSN' : nil,
175
+ 'title' => parse_attributes(journal_metadata.to_h['full_title']),
176
+ 'volume' => parse_attributes(journal_issue.dig('journal_volume', 'volume')),
177
+ 'issue' => parse_attributes(journal_issue['issue']),
178
+ 'firstPage' => bibliographic_metadata.dig('pages',
179
+ 'first_page') || parse_attributes(journal_article.to_h.dig('publisher_item', 'item_number'),
180
+ first: true),
181
+ 'lastPage' => bibliographic_metadata.dig('pages', 'last_page') }.compact
155
182
 
156
183
  # By using book_metadata, we can account for where resource_type is `BookChapter` and not assume its a whole book
157
184
  elsif book_metadata.present?
158
185
  identifiers = crossref_alternate_identifiers(book_metadata)
159
186
 
160
187
  {
161
- "type" => "Book",
162
- "title" => book_metadata.dig("titles", "title"),
163
- "firstPage" => bibliographic_metadata.dig("pages", "first_page"),
164
- "lastPage" => bibliographic_metadata.dig("pages", "last_page"),
165
- "identifiers" => identifiers,
188
+ 'type' => 'Book',
189
+ 'title' => book_metadata.dig('titles', 'title'),
190
+ 'firstPage' => bibliographic_metadata.dig('pages', 'first_page'),
191
+ 'lastPage' => bibliographic_metadata.dig('pages', 'last_page'),
192
+ 'identifiers' => identifiers
166
193
  }.compact
167
194
 
168
- elsif book_series_metadata.to_h.fetch("series_metadata", nil).present?
169
- issn = normalize_issn(book_series_metadata.dig("series_metadata", "issn"))
195
+ elsif book_series_metadata.to_h.fetch('series_metadata', nil).present?
196
+ issn = normalize_issn(book_series_metadata.dig('series_metadata', 'issn'))
170
197
 
171
- { "type" => "Book Series",
172
- "identifier" => issn,
173
- "identifierType" => issn.present? ? "ISSN" : nil,
174
- "title" => book_series_metadata.dig("series_metadata", "titles", "title"),
175
- "volume" => bibliographic_metadata.fetch("volume", nil) }.compact
198
+ { 'type' => 'Book Series',
199
+ 'identifier' => issn,
200
+ 'identifierType' => issn.present? ? 'ISSN' : nil,
201
+ 'title' => book_series_metadata.dig('series_metadata', 'titles', 'title'),
202
+ 'volume' => bibliographic_metadata.fetch('volume', nil) }.compact
176
203
  end
177
204
 
178
- id = normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig("doi_data", "doi"))
205
+ id = normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig('doi_data',
206
+ 'doi'))
179
207
 
180
208
  # Let sections override this in case of alternative metadata structures, such as book chapters, which
181
209
  # have their meta inside `content_item`, but the main book indentifers inside of `book_metadata`
182
210
  identifiers ||= crossref_alternate_identifiers(bibliographic_metadata)
183
211
 
184
- { "id" => id,
185
- "types" => types,
186
- "doi" => doi_from_url(id),
187
- "url" => parse_attributes(bibliographic_metadata.dig("doi_data", "resource"), first: true),
188
- "titles" => titles,
189
- "identifiers" => identifiers,
190
- "creators" => crossref_people(bibliographic_metadata, "author"),
191
- "contributors" => crossref_people(bibliographic_metadata, "editor"),
192
- "funding_references" => crossref_funding_reference(program_metadata),
193
- "publisher" => publisher,
194
- "container" => container,
195
- "agency" => agency = options[:ra] || "crossref",
196
- "related_identifiers" => related_identifiers,
197
- "dates" => dates,
198
- "publication_year" => publication_year,
199
- "descriptions" => crossref_description(bibliographic_metadata),
200
- "rights_list" => crossref_license(program_metadata),
201
- "version_info" => nil,
202
- "subjects" => nil,
203
- "language" => nil,
204
- "sizes" => nil,
205
- "schema_version" => "http://datacite.org/schema/kernel-4",
206
- "state" => state,
207
- "date_registered" => date_registered
208
- }.merge(read_options)
212
+ { 'id' => id,
213
+ 'types' => types,
214
+ 'doi' => doi_from_url(id),
215
+ 'url' => parse_attributes(bibliographic_metadata.dig('doi_data', 'resource'),
216
+ first: true),
217
+ 'titles' => titles,
218
+ 'identifiers' => identifiers,
219
+ 'creators' => crossref_people(bibliographic_metadata, 'author'),
220
+ 'contributors' => crossref_people(bibliographic_metadata, 'editor'),
221
+ 'funding_references' => crossref_funding_reference(program_metadata),
222
+ 'publisher' => publisher,
223
+ 'container' => container,
224
+ 'agency' => agency = options[:ra] || 'crossref',
225
+ 'related_identifiers' => related_identifiers,
226
+ 'dates' => dates,
227
+ 'publication_year' => publication_year,
228
+ 'descriptions' => crossref_description(bibliographic_metadata),
229
+ 'rights_list' => crossref_license(program_metadata),
230
+ 'version_info' => nil,
231
+ 'subjects' => nil,
232
+ 'language' => nil,
233
+ 'sizes' => nil,
234
+ 'schema_version' => 'http://datacite.org/schema/kernel-4',
235
+ 'state' => state,
236
+ 'date_registered' => date_registered }.merge(read_options)
209
237
  end
210
238
 
211
239
  def crossref_alternate_identifiers(bibliographic_metadata)
212
- if bibliographic_metadata.dig("publisher_item", "item_number").present?
213
- Array.wrap(bibliographic_metadata.dig("publisher_item", "item_number")).map do |item|
240
+ if bibliographic_metadata.dig('publisher_item', 'item_number').present?
241
+ Array.wrap(bibliographic_metadata.dig('publisher_item', 'item_number')).map do |item|
214
242
  if item.is_a?(String)
215
- { "identifier" => item,
216
- "identifierType" => "Publisher ID" }
243
+ { 'identifier' => item,
244
+ 'identifierType' => 'Publisher ID' }
217
245
  else
218
- { "identifier" => item.fetch("__content__", nil),
219
- "identifierType" => item.fetch("item_number_type", nil) || "Publisher ID" }
246
+ { 'identifier' => item.fetch('__content__', nil),
247
+ 'identifierType' => item.fetch('item_number_type', nil) || 'Publisher ID' }
220
248
  end
221
249
  end
222
- elsif parse_attributes(bibliographic_metadata.fetch("item_number", nil)).present?
223
- [{ "identifier" => parse_attributes(bibliographic_metadata.fetch("item_number", nil)),
224
- "identifierType" => parse_attributes(bibliographic_metadata.dig("item_number", "item_number_type")) || "Publisher ID" }]
225
- elsif parse_attributes(bibliographic_metadata.fetch("isbn", nil)).present?
226
- [{ "identifier" => parse_attributes(bibliographic_metadata.fetch("isbn", nil), first: true),
227
- "identifierType" => "ISBN" }]
250
+ elsif parse_attributes(bibliographic_metadata.fetch('item_number', nil)).present?
251
+ [{ 'identifier' => parse_attributes(bibliographic_metadata.fetch('item_number', nil)),
252
+ 'identifierType' => parse_attributes(bibliographic_metadata.dig('item_number',
253
+ 'item_number_type')) || 'Publisher ID' }]
254
+ elsif parse_attributes(bibliographic_metadata.fetch('isbn', nil)).present?
255
+ [{ 'identifier' => parse_attributes(bibliographic_metadata.fetch('isbn', nil), first: true),
256
+ 'identifierType' => 'ISBN' }]
228
257
  else
229
258
  []
230
259
  end
231
260
  end
232
261
 
233
262
  def crossref_description(bibliographic_metadata)
234
- abstract = Array.wrap(bibliographic_metadata.dig("abstract")).map do |r|
235
- { "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p')) }.compact
263
+ abstract = Array.wrap(bibliographic_metadata['abstract']).map do |r|
264
+ { 'descriptionType' => 'Abstract',
265
+ 'description' => sanitize(parse_attributes(r, content: 'p')) }.compact
236
266
  end
237
267
 
238
- description = Array.wrap(bibliographic_metadata.dig("description")).map do |r|
239
- { "descriptionType" => "Other", "description" => sanitize(parse_attributes(r)) }.compact
268
+ description = Array.wrap(bibliographic_metadata['description']).map do |r|
269
+ { 'descriptionType' => 'Other', 'description' => sanitize(parse_attributes(r)) }.compact
240
270
  end
241
271
 
242
272
  (abstract + description)
243
273
  end
244
274
 
245
275
  def crossref_license(program_metadata)
246
- access_indicator = Array.wrap(program_metadata).find { |m| m["name"] == "AccessIndicators" }
276
+ access_indicator = Array.wrap(program_metadata).find { |m| m['name'] == 'AccessIndicators' }
247
277
  if access_indicator.present?
248
- Array.wrap(access_indicator["license_ref"]).map do |license|
249
- hsh_to_spdx("rightsURI" => parse_attributes(license))
278
+ Array.wrap(access_indicator['license_ref']).map do |license|
279
+ hsh_to_spdx('rightsURI' => parse_attributes(license))
250
280
  end.uniq
251
281
  else
252
282
  []
@@ -254,46 +284,61 @@ module Briard
254
284
  end
255
285
 
256
286
  def crossref_people(bibliographic_metadata, contributor_role)
257
- person = bibliographic_metadata.dig("contributors", "person_name") || bibliographic_metadata.dig("person_name")
258
- organization = Array.wrap(bibliographic_metadata.dig("contributors", "organization"))
259
- person = [{ "name" => ":(unav)", "contributor_role"=>"author" }] if contributor_role == "author" && Array.wrap(person).select { |a| a["contributor_role"] == "author" }.blank? && Array.wrap(organization).select { |a| a["contributor_role"] == "author" }.blank?
260
-
261
- (Array.wrap(person) + Array.wrap(organization)).select { |a| a["contributor_role"] == contributor_role }.map do |a|
262
- name_identifiers = normalize_orcid(parse_attributes(a["ORCID"])).present? ? [{ "nameIdentifier" => normalize_orcid(parse_attributes(a["ORCID"])), "nameIdentifierScheme" => "ORCID", "schemeUri"=>"https://orcid.org" }] : nil
263
- if a["surname"].present? || a["given_name"].present? || name_identifiers.present?
264
- given_name = parse_attributes(a["given_name"])
265
- family_name = parse_attributes(a["surname"])
266
- affiliation = Array.wrap(a["affiliation"]).map do |a|
287
+ person = bibliographic_metadata.dig('contributors',
288
+ 'person_name') || bibliographic_metadata['person_name']
289
+ organization = Array.wrap(bibliographic_metadata.dig('contributors', 'organization'))
290
+ if contributor_role == 'author' && Array.wrap(person).select do |a|
291
+ a['contributor_role'] == 'author'
292
+ end.blank? && Array.wrap(organization).select do |a|
293
+ a['contributor_role'] == 'author'
294
+ end.blank?
295
+ person = [{ 'name' => ':(unav)', 'contributor_role' => 'author' }]
296
+ end
297
+
298
+ (Array.wrap(person) + Array.wrap(organization)).select do |a|
299
+ a['contributor_role'] == contributor_role
300
+ end.map do |a|
301
+ name_identifiers = if normalize_orcid(parse_attributes(a['ORCID'])).present?
302
+ [{
303
+ 'nameIdentifier' => normalize_orcid(parse_attributes(a['ORCID'])), 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org'
304
+ }]
305
+ end
306
+ if a['surname'].present? || a['given_name'].present? || name_identifiers.present?
307
+ given_name = parse_attributes(a['given_name'])
308
+ family_name = parse_attributes(a['surname'])
309
+ affiliation = Array.wrap(a['affiliation']).map do |a|
267
310
  if a.is_a?(Hash)
268
311
  a
269
- elsif a.is_a?(Hash) && a.key?("__content__") && a["__content__"].strip.blank?
312
+ elsif a.is_a?(Hash) && a.key?('__content__') && a['__content__'].strip.blank?
270
313
  nil
271
- elsif a.is_a?(Hash) && a.key?("__content__")
272
- { "name" => a["__content__"] }
314
+ elsif a.is_a?(Hash) && a.key?('__content__')
315
+ { 'name' => a['__content__'] }
273
316
  elsif a.strip.blank?
274
317
  nil
275
318
  elsif a.is_a?(String)
276
- { "name" => a }
319
+ { 'name' => a }
277
320
  end
278
321
  end.compact
279
322
 
280
- { "nameType" => "Personal",
281
- "nameIdentifiers" => name_identifiers,
282
- "name" => [family_name, given_name].compact.join(", "),
283
- "givenName" => given_name,
284
- "familyName" => family_name,
285
- "affiliation" => affiliation.presence,
286
- "contributorType" => contributor_role == "editor" ? "Editor" : nil }.compact
323
+ { 'nameType' => 'Personal',
324
+ 'nameIdentifiers' => name_identifiers,
325
+ 'name' => [family_name, given_name].compact.join(', '),
326
+ 'givenName' => given_name,
327
+ 'familyName' => family_name,
328
+ 'affiliation' => affiliation.presence,
329
+ 'contributorType' => contributor_role == 'editor' ? 'Editor' : nil }.compact
287
330
  else
288
- { "nameType" => "Organizational",
289
- "name" => a["name"] || a["__content__"] }
331
+ { 'nameType' => 'Organizational',
332
+ 'name' => a['name'] || a['__content__'] }
290
333
  end
291
334
  end
292
335
  end
293
336
 
294
337
  def crossref_funding_reference(program_metadata)
295
- fundref = Array.wrap(program_metadata).find { |a| a["name"] == "fundref" } || {}
296
- Array.wrap(fundref.fetch("assertion", [])).select { |a| a["name"] == "fundgroup" && a["assertion"].present? }.map do |f|
338
+ fundref = Array.wrap(program_metadata).find { |a| a['name'] == 'fundref' } || {}
339
+ Array.wrap(fundref.fetch('assertion', [])).select do |a|
340
+ a['name'] == 'fundgroup' && a['assertion'].present?
341
+ end.map do |f|
297
342
  funder_identifier = nil
298
343
  funder_identifier_type = nil
299
344
  funder_name = nil
@@ -301,66 +346,60 @@ module Briard
301
346
  award_number = nil
302
347
  award_uri = nil
303
348
 
304
- Array.wrap(f.fetch("assertion")).each do |a|
305
- if a.fetch("name") == "award_number"
306
- award_number = a.fetch("__content__", nil)
307
- award_uri = a.fetch("awardURI", nil)
308
- elsif a.fetch("name") == "funder_name"
309
- funder_name = a.fetch("__content__", nil).to_s.squish.presence
310
- funder_identifier = validate_funder_doi(a.dig("assertion", "__content__"))
311
- funder_identifier_type = "Crossref Funder ID" if funder_identifier.present?
349
+ Array.wrap(f.fetch('assertion')).each do |a|
350
+ case a.fetch('name')
351
+ when 'award_number'
352
+ award_number = a.fetch('__content__', nil)
353
+ award_uri = a.fetch('awardURI', nil)
354
+ when 'funder_name'
355
+ funder_name = a.fetch('__content__', nil).to_s.squish.presence
356
+ funder_identifier = validate_funder_doi(a.dig('assertion', '__content__'))
357
+ funder_identifier_type = 'Crossref Funder ID' if funder_identifier.present?
312
358
  end
313
359
  end
314
360
 
315
361
  # funder_name is required in DataCite
316
- if funder_name.present?
317
- { "funderIdentifier" => funder_identifier,
318
- "funderIdentifierType" => funder_identifier_type,
319
- "funderName" => funder_name,
320
- "awardTitle" => award_title,
321
- "awardNumber" => award_number,
322
- "awardUri" => award_uri }.compact
323
- else
324
- nil
325
- end
362
+ next unless funder_name.present?
363
+
364
+ { 'funderIdentifier' => funder_identifier,
365
+ 'funderIdentifierType' => funder_identifier_type,
366
+ 'funderName' => funder_name,
367
+ 'awardTitle' => award_title,
368
+ 'awardNumber' => award_number,
369
+ 'awardUri' => award_uri }.compact
326
370
  end.compact
327
371
  end
328
372
 
329
373
  def crossref_date_published(bibliographic_metadata)
330
- pub_date = Array.wrap(bibliographic_metadata.fetch("publication_date", nil)).presence ||
331
- Array.wrap(bibliographic_metadata.fetch("acceptance_date", nil))
374
+ pub_date = Array.wrap(bibliographic_metadata.fetch('publication_date', nil)).presence ||
375
+ Array.wrap(bibliographic_metadata.fetch('acceptance_date', nil))
332
376
  if pub_date.present?
333
- get_date_from_parts(pub_date.first["year"], pub_date.first["month"], pub_date.first["day"])
334
- else
335
- nil
377
+ get_date_from_parts(pub_date.first['year'], pub_date.first['month'],
378
+ pub_date.first['day'])
336
379
  end
337
380
  end
338
381
 
339
382
  def crossref_is_part_of(model_metadata)
340
- if model_metadata.present? && model_metadata.fetch("issn", nil).present?
341
- { "relatedIdentifier" => normalize_issn(model_metadata.fetch("issn", nil)),
342
- "relationType" => "IsPartOf",
343
- "relatedIdentifierType" => "ISSN",
344
- "resourceTypeGeneral" => "Collection" }.compact
345
- elsif model_metadata.present? && model_metadata.fetch("relatedIdentifier", nil).present?
346
- { "relatedIdentifier" => model_metadata.fetch("relatedIdentifier", nil),
347
- "relationType" => "IsPartOf",
348
- "relatedIdentifierType" => "DOI" }.compact
349
- else
350
- nil
383
+ if model_metadata.present? && model_metadata.fetch('issn', nil).present?
384
+ { 'relatedIdentifier' => normalize_issn(model_metadata.fetch('issn', nil)),
385
+ 'relationType' => 'IsPartOf',
386
+ 'relatedIdentifierType' => 'ISSN',
387
+ 'resourceTypeGeneral' => 'Collection' }.compact
388
+ elsif model_metadata.present? && model_metadata.fetch('relatedIdentifier', nil).present?
389
+ { 'relatedIdentifier' => model_metadata.fetch('relatedIdentifier', nil),
390
+ 'relationType' => 'IsPartOf',
391
+ 'relatedIdentifierType' => 'DOI' }.compact
351
392
  end
352
393
  end
353
394
 
354
395
  def crossref_references(bibliographic_metadata)
355
- refs = bibliographic_metadata.dig("citation_list", "citation")
356
- Array.wrap(refs).select { |a| a["doi"].present? }.map do |c|
357
- if c["doi"].present?
358
- { "relatedIdentifier" => parse_attributes(c["doi"]).downcase,
359
- "relationType" => "References",
360
- "relatedIdentifierType" => "DOI" }.compact
361
- else
362
- nil
363
- end
396
+ refs = bibliographic_metadata.dig('citation_list', 'citation')
397
+ Array.wrap(refs).select { |a| a['doi'].present? }.map do |c|
398
+ next unless c['doi'].present?
399
+
400
+ { 'relatedIdentifier' => parse_attributes(c['doi']).downcase,
401
+ 'relationType' => 'References',
402
+ 'relatedIdentifierType' => 'DOI' }.compact
364
403
  end.compact.unwrap
365
404
  end
366
405
  end