briard 2.4.1 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +72 -0
  3. data/.github/workflows/rubocop.yml +50 -0
  4. data/.rubocop.yml +144 -620
  5. data/.rubocop_todo.yml +76 -0
  6. data/CHANGELOG.md +22 -0
  7. data/Gemfile +2 -0
  8. data/Gemfile.lock +43 -6
  9. data/Rakefile +1 -1
  10. data/{bolognese.gemspec → briard.gemspec} +46 -38
  11. data/lib/briard/array.rb +2 -2
  12. data/lib/briard/author_utils.rb +79 -71
  13. data/lib/briard/cli.rb +12 -13
  14. data/lib/briard/crossref_utils.rb +73 -61
  15. data/lib/briard/datacite_utils.rb +132 -106
  16. data/lib/briard/doi_utils.rb +10 -10
  17. data/lib/briard/metadata.rb +96 -106
  18. data/lib/briard/metadata_utils.rb +87 -78
  19. data/lib/briard/readers/bibtex_reader.rb +65 -65
  20. data/lib/briard/readers/cff_reader.rb +88 -70
  21. data/lib/briard/readers/citeproc_reader.rb +90 -84
  22. data/lib/briard/readers/codemeta_reader.rb +68 -50
  23. data/lib/briard/readers/crosscite_reader.rb +2 -2
  24. data/lib/briard/readers/crossref_reader.rb +249 -210
  25. data/lib/briard/readers/datacite_json_reader.rb +3 -3
  26. data/lib/briard/readers/datacite_reader.rb +225 -189
  27. data/lib/briard/readers/npm_reader.rb +49 -42
  28. data/lib/briard/readers/ris_reader.rb +82 -80
  29. data/lib/briard/readers/schema_org_reader.rb +182 -159
  30. data/lib/briard/string.rb +1 -1
  31. data/lib/briard/utils.rb +4 -4
  32. data/lib/briard/version.rb +3 -1
  33. data/lib/briard/whitelist_scrubber.rb +11 -4
  34. data/lib/briard/writers/bibtex_writer.rb +14 -8
  35. data/lib/briard/writers/cff_writer.rb +33 -26
  36. data/lib/briard/writers/codemeta_writer.rb +19 -15
  37. data/lib/briard/writers/csv_writer.rb +6 -4
  38. data/lib/briard/writers/datacite_json_writer.rb +8 -2
  39. data/lib/briard/writers/jats_writer.rb +33 -28
  40. data/lib/briard/writers/rdf_xml_writer.rb +1 -1
  41. data/lib/briard/writers/ris_writer.rb +30 -18
  42. data/lib/briard/writers/turtle_writer.rb +1 -1
  43. data/lib/briard.rb +6 -6
  44. data/rubocop.sarif +0 -0
  45. data/spec/array_spec.rb +5 -5
  46. data/spec/author_utils_spec.rb +151 -132
  47. data/spec/datacite_utils_spec.rb +135 -83
  48. data/spec/doi_utils_spec.rb +168 -164
  49. data/spec/find_from_format_spec.rb +69 -69
  50. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
  51. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
  52. data/spec/metadata_spec.rb +91 -90
  53. data/spec/readers/bibtex_reader_spec.rb +43 -38
  54. data/spec/readers/cff_reader_spec.rb +165 -153
  55. data/spec/readers/citeproc_reader_spec.rb +45 -40
  56. data/spec/readers/codemeta_reader_spec.rb +128 -115
  57. data/spec/readers/crosscite_reader_spec.rb +34 -24
  58. data/spec/readers/crossref_reader_spec.rb +1098 -939
  59. data/spec/readers/datacite_json_reader_spec.rb +53 -40
  60. data/spec/readers/datacite_reader_spec.rb +1541 -1337
  61. data/spec/readers/npm_reader_spec.rb +48 -43
  62. data/spec/readers/ris_reader_spec.rb +53 -47
  63. data/spec/readers/schema_org_reader_spec.rb +329 -267
  64. data/spec/spec_helper.rb +6 -5
  65. data/spec/utils_spec.rb +371 -347
  66. data/spec/writers/bibtex_writer_spec.rb +143 -143
  67. data/spec/writers/cff_writer_spec.rb +96 -90
  68. data/spec/writers/citation_writer_spec.rb +34 -33
  69. data/spec/writers/citeproc_writer_spec.rb +226 -224
  70. data/spec/writers/codemeta_writer_spec.rb +18 -16
  71. data/spec/writers/crosscite_writer_spec.rb +91 -73
  72. data/spec/writers/crossref_writer_spec.rb +99 -91
  73. data/spec/writers/csv_writer_spec.rb +70 -70
  74. data/spec/writers/datacite_json_writer_spec.rb +78 -68
  75. data/spec/writers/datacite_writer_spec.rb +417 -322
  76. data/spec/writers/jats_writer_spec.rb +177 -161
  77. data/spec/writers/rdf_xml_writer_spec.rb +68 -63
  78. data/spec/writers/ris_writer_spec.rb +162 -162
  79. data/spec/writers/turtle_writer_spec.rb +47 -47
  80. metadata +250 -160
  81. data/.github/workflows/release.yml +0 -47
@@ -4,27 +4,30 @@ module Briard
4
4
  module Readers
5
5
  module CrossrefReader
6
6
  # CrossRef types from https://api.crossref.org/types
7
- def get_crossref(id: nil, **options)
8
- return { "string" => nil, "state" => "not_found" } unless id.present?
7
+ def get_crossref(id: nil, **_options)
8
+ return { 'string' => nil, 'state' => 'not_found' } unless id.present?
9
9
 
10
10
  doi = doi_from_url(id)
11
11
  url = "https://api.crossref.org/works/#{doi}/transform/application/vnd.crossref.unixsd+xml"
12
- response = Maremma.get(url, accept: "text/xml;charset=utf-8", raw: true)
13
- string = response.body.fetch("data", nil)
12
+ response = Maremma.get(url, accept: 'text/xml;charset=utf-8', raw: true)
13
+ string = response.body.fetch('data', nil)
14
14
  string = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).to_s if string.present?
15
15
 
16
- { "string" => string }
16
+ { 'string' => string }
17
17
  end
18
18
 
19
19
  def read_crossref(string: nil, **options)
20
- read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
20
+ read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
21
+ :sandbox, :validate, :ra))
21
22
 
22
23
  if string.present?
23
- m = Maremma.from_xml(string).dig("crossref_result", "query_result", "body", "query", "doi_record") || {}
24
- meta = m.dig("doi_record", "crossref", "error").nil? ? m : {}
24
+ m = Maremma.from_xml(string).dig('crossref_result', 'query_result', 'body', 'query',
25
+ 'doi_record') || {}
26
+ meta = m.dig('doi_record', 'crossref', 'error').nil? ? m : {}
25
27
 
26
28
  # query contains information from outside metadata schema, e.g. publisher name
27
- query = Maremma.from_xml(string).dig("crossref_result", "query_result", "body", "query") || {}
29
+ query = Maremma.from_xml(string).dig('crossref_result', 'query_result', 'body',
30
+ 'query') || {}
28
31
  else
29
32
  meta = {}
30
33
  query = {}
@@ -32,7 +35,7 @@ module Briard
32
35
 
33
36
  # model should be one of book, conference, database, dissertation, journal, peer_review, posted_content,
34
37
  # report_paper, sa_component, standard
35
- model = meta.dig("crossref").to_h.keys.last
38
+ model = meta['crossref'].to_h.keys.last
36
39
 
37
40
  resource_type = nil
38
41
  bibliographic_metadata = {}
@@ -40,213 +43,240 @@ module Briard
40
43
  journal_metadata = nil
41
44
  journal_issue = {}
42
45
  journal_metadata = nil
43
- publisher = query.dig("crm_item", 0)
46
+ publisher = query.dig('crm_item', 0)
44
47
  publisher = nil unless publisher.is_a?(String)
45
48
 
46
49
  case model
47
- when "book"
48
- book_metadata = meta.dig("crossref", "book", "book_metadata")
49
- book_series_metadata = meta.dig("crossref", "book", "book_series_metadata")
50
- book_set_metadata = meta.dig("crossref", "book", "book_set_metadata")
51
- bibliographic_metadata = meta.dig("crossref", "book", "content_item") || book_metadata || book_series_metadata || book_set_metadata
52
- resource_type = bibliographic_metadata.fetch("component_type", nil) ? "book-" + bibliographic_metadata.fetch("component_type") : "book"
50
+ when 'book'
51
+ book_metadata = meta.dig('crossref', 'book', 'book_metadata')
52
+ book_series_metadata = meta.dig('crossref', 'book', 'book_series_metadata')
53
+ book_set_metadata = meta.dig('crossref', 'book', 'book_set_metadata')
54
+ bibliographic_metadata = meta.dig('crossref', 'book',
55
+ 'content_item') || book_metadata || book_series_metadata || book_set_metadata
56
+ resource_type = if bibliographic_metadata.fetch('component_type', nil)
57
+ "book-#{bibliographic_metadata.fetch('component_type')}"
58
+ else
59
+ 'book'
60
+ end
53
61
  # publisher = if book_metadata.present?
54
62
  # book_metadata.dig("publisher", "publisher_name")
55
63
  # elsif book_series_metadata.present?
56
64
  # book_series_metadata.dig("publisher", "publisher_name")
57
65
  # end
58
- when "conference"
59
- event_metadata = meta.dig("crossref", "conference", "event_metadata") || {}
60
- bibliographic_metadata = meta.dig("crossref", "conference", "conference_paper").to_h
61
- when "journal"
62
- journal_metadata = meta.dig("crossref", "journal", "journal_metadata") || {}
63
- journal_issue = meta.dig("crossref", "journal", "journal_issue") || {}
64
- journal_article = meta.dig("crossref", "journal", "journal_article") || {}
66
+ when 'conference'
67
+ event_metadata = meta.dig('crossref', 'conference', 'event_metadata') || {}
68
+ bibliographic_metadata = meta.dig('crossref', 'conference', 'conference_paper').to_h
69
+ when 'journal'
70
+ journal_metadata = meta.dig('crossref', 'journal', 'journal_metadata') || {}
71
+ journal_issue = meta.dig('crossref', 'journal', 'journal_issue') || {}
72
+ journal_article = meta.dig('crossref', 'journal', 'journal_article') || {}
65
73
  bibliographic_metadata = journal_article.presence || journal_issue.presence || journal_metadata
66
- program_metadata = bibliographic_metadata.dig("crossmark", "custom_metadata", "program") || bibliographic_metadata.dig("program")
74
+ program_metadata = bibliographic_metadata.dig('crossmark', 'custom_metadata',
75
+ 'program') || bibliographic_metadata['program']
67
76
  resource_type = if journal_article.present?
68
- "journal_article"
69
- elsif journal_issue.present?
70
- "journal_issue"
71
- else
72
- "journal"
73
- end
74
- when "posted_content"
75
- bibliographic_metadata = meta.dig("crossref", "posted_content").to_h
76
- publisher ||= bibliographic_metadata.dig("institution", "institution_name")
77
- when "sa_component"
78
- bibliographic_metadata = meta.dig("crossref", "sa_component", "component_list", "component").to_h
79
- related_identifier = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "relation" }
80
- journal_metadata = { "relatedIdentifier" => related_identifier.to_h.fetch("__content", nil) }
81
- when "database"
82
- bibliographic_metadata = meta.dig("crossref", "database", "dataset").to_h
83
- resource_type = "dataset"
84
- when "report_paper"
85
- bibliographic_metadata = meta.dig("crossref", "report_paper", "report_paper_metadata").to_h
86
- resource_type = "report"
87
- when "peer_review"
88
- bibliographic_metadata = meta.dig("crossref", "peer_review")
89
- when "dissertation"
90
- bibliographic_metadata = meta.dig("crossref", "dissertation")
77
+ 'journal_article'
78
+ elsif journal_issue.present?
79
+ 'journal_issue'
80
+ else
81
+ 'journal'
82
+ end
83
+ when 'posted_content'
84
+ bibliographic_metadata = meta.dig('crossref', 'posted_content').to_h
85
+ publisher ||= bibliographic_metadata.dig('institution', 'institution_name')
86
+ when 'sa_component'
87
+ bibliographic_metadata = meta.dig('crossref', 'sa_component', 'component_list',
88
+ 'component').to_h
89
+ related_identifier = Array.wrap(query.to_h['crm_item']).find do |cr|
90
+ cr['name'] == 'relation'
91
+ end
92
+ journal_metadata = { 'relatedIdentifier' => related_identifier.to_h.fetch('__content',
93
+ nil) }
94
+ when 'database'
95
+ bibliographic_metadata = meta.dig('crossref', 'database', 'dataset').to_h
96
+ resource_type = 'dataset'
97
+ when 'report_paper'
98
+ bibliographic_metadata = meta.dig('crossref', 'report_paper',
99
+ 'report_paper_metadata').to_h
100
+ resource_type = 'report'
101
+ when 'peer_review'
102
+ bibliographic_metadata = meta.dig('crossref', 'peer_review')
103
+ when 'dissertation'
104
+ bibliographic_metadata = meta.dig('crossref', 'dissertation')
91
105
  end
92
106
 
93
107
  resource_type = (resource_type || model).to_s.underscore.camelcase.presence
94
- schema_org = Briard::Utils::CR_TO_SO_TRANSLATIONS[resource_type] || "ScholarlyArticle"
108
+ schema_org = Briard::Utils::CR_TO_SO_TRANSLATIONS[resource_type] || 'ScholarlyArticle'
95
109
  types = {
96
- "resourceTypeGeneral" => Briard::Utils::CR_TO_DC_TRANSLATIONS[resource_type],
97
- "resourceType" => resource_type,
98
- "schemaOrg" => schema_org,
99
- "citeproc" => Briard::Utils::CR_TO_CP_TRANSLATIONS[resource_type] || "article-journal",
100
- "bibtex" => Briard::Utils::CR_TO_BIB_TRANSLATIONS[resource_type] || "misc",
101
- "ris" => Briard::Utils::CR_TO_RIS_TRANSLATIONS[resource_type] || "JOUR"
110
+ 'resourceTypeGeneral' => Briard::Utils::CR_TO_DC_TRANSLATIONS[resource_type],
111
+ 'resourceType' => resource_type,
112
+ 'schemaOrg' => schema_org,
113
+ 'citeproc' => Briard::Utils::CR_TO_CP_TRANSLATIONS[resource_type] || 'article-journal',
114
+ 'bibtex' => Briard::Utils::CR_TO_BIB_TRANSLATIONS[resource_type] || 'misc',
115
+ 'ris' => Briard::Utils::CR_TO_RIS_TRANSLATIONS[resource_type] || 'JOUR'
102
116
  }.compact
103
117
 
104
- titles = if bibliographic_metadata.dig("titles").present?
105
- Array.wrap(bibliographic_metadata.dig("titles")).map do |r|
106
- if r.blank? || (r["title"].blank? && r["original_language_title"].blank?)
118
+ titles = if bibliographic_metadata['titles'].present?
119
+ Array.wrap(bibliographic_metadata['titles']).map do |r|
120
+ if r.blank? || (r['title'].blank? && r['original_language_title'].blank?)
107
121
  nil
108
- elsif r["title"].is_a?(String)
109
- { "title" => sanitize(r["title"]) }
110
- elsif r["original_language_title"].present?
111
- { "title" => sanitize(r.dig("original_language_title", "__content__")), "lang" => r.dig("original_language_title", "language") }
122
+ elsif r['title'].is_a?(String)
123
+ { 'title' => sanitize(r['title']) }
124
+ elsif r['original_language_title'].present?
125
+ { 'title' => sanitize(r.dig('original_language_title', '__content__')),
126
+ 'lang' => r.dig('original_language_title', 'language') }
112
127
  else
113
- { "title" => sanitize(r.dig("title", "__content__")) }.compact
128
+ { 'title' => sanitize(r.dig('title', '__content__')) }.compact
114
129
  end
115
130
  end.compact
116
131
  else
117
- [{ "title" => ":(unav)" }]
132
+ [{ 'title' => ':(unav)' }]
118
133
  end
119
134
 
120
135
  date_published = crossref_date_published(bibliographic_metadata)
121
136
  if date_published.present?
122
- date_published = { "date" => date_published, "dateType" => "Issued" }
137
+ date_published = { 'date' => date_published, 'dateType' => 'Issued' }
123
138
  else
124
- date_published = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "created" }
125
- date_published = { "date" => date_published.fetch("__content__", "")[0..9], "dateType" => "Issued" } if date_published.present?
139
+ date_published = Array.wrap(query.to_h['crm_item']).find { |cr| cr['name'] == 'created' }
140
+ if date_published.present?
141
+ date_published = { 'date' => date_published.fetch('__content__', '')[0..9],
142
+ 'dateType' => 'Issued' }
143
+ end
144
+ end
145
+ date_updated = Array.wrap(query.to_h['crm_item']).find { |cr| cr['name'] == 'last-update' }
146
+ if date_updated.present?
147
+ date_updated = { 'date' => date_updated.fetch('__content__', nil),
148
+ 'dateType' => 'Updated' }
126
149
  end
127
- date_updated = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "last-update" }
128
- date_updated = { "date" => date_updated.fetch("__content__", nil), "dateType" => "Updated" } if date_updated.present?
129
150
 
130
- date_registered = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "deposit-timestamp" }
131
- date_registered = get_datetime_from_time(date_registered.fetch("__content__", nil)) if date_registered.present?
151
+ date_registered = Array.wrap(query.to_h['crm_item']).find do |cr|
152
+ cr['name'] == 'deposit-timestamp'
153
+ end
154
+ if date_registered.present?
155
+ date_registered = get_datetime_from_time(date_registered.fetch('__content__', nil))
156
+ end
132
157
 
133
158
  # check that date is valid iso8601 date
134
- date_published = nil unless Date.edtf(date_published.to_h["date"]).present?
135
- date_updated = nil unless Date.edtf(date_updated.to_h["date"]).present?
159
+ date_published = nil unless Date.edtf(date_published.to_h['date']).present?
160
+ date_updated = nil unless Date.edtf(date_updated.to_h['date']).present?
136
161
 
137
162
  dates = [date_published, date_updated].compact
138
- publication_year = date_published.to_h.fetch("date", "")[0..3].presence
163
+ publication_year = date_published.to_h.fetch('date', '')[0..3].presence
139
164
 
140
- state = meta.present? || read_options.present? ? "findable" : "not_found"
165
+ state = meta.present? || read_options.present? ? 'findable' : 'not_found'
141
166
 
142
167
  related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
143
168
 
144
169
  container = if journal_metadata.present?
145
- issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
146
-
147
- { "type" => "Journal",
148
- "identifier" => issn,
149
- "identifierType" => issn.present? ? "ISSN" : nil,
150
- "title" => parse_attributes(journal_metadata.to_h["full_title"]),
151
- "volume" => parse_attributes(journal_issue.dig("journal_volume", "volume")),
152
- "issue" => parse_attributes(journal_issue.dig("issue")),
153
- "firstPage" => bibliographic_metadata.dig("pages", "first_page") || parse_attributes(journal_article.to_h.dig("publisher_item", "item_number"), first: true),
154
- "lastPage" => bibliographic_metadata.dig("pages", "last_page") }.compact
170
+ issn = normalize_issn(journal_metadata.to_h.fetch('issn', nil))
171
+
172
+ { 'type' => 'Journal',
173
+ 'identifier' => issn,
174
+ 'identifierType' => issn.present? ? 'ISSN' : nil,
175
+ 'title' => parse_attributes(journal_metadata.to_h['full_title']),
176
+ 'volume' => parse_attributes(journal_issue.dig('journal_volume', 'volume')),
177
+ 'issue' => parse_attributes(journal_issue['issue']),
178
+ 'firstPage' => bibliographic_metadata.dig('pages',
179
+ 'first_page') || parse_attributes(journal_article.to_h.dig('publisher_item', 'item_number'),
180
+ first: true),
181
+ 'lastPage' => bibliographic_metadata.dig('pages', 'last_page') }.compact
155
182
 
156
183
  # By using book_metadata, we can account for where resource_type is `BookChapter` and not assume its a whole book
157
184
  elsif book_metadata.present?
158
185
  identifiers = crossref_alternate_identifiers(book_metadata)
159
186
 
160
187
  {
161
- "type" => "Book",
162
- "title" => book_metadata.dig("titles", "title"),
163
- "firstPage" => bibliographic_metadata.dig("pages", "first_page"),
164
- "lastPage" => bibliographic_metadata.dig("pages", "last_page"),
165
- "identifiers" => identifiers,
188
+ 'type' => 'Book',
189
+ 'title' => book_metadata.dig('titles', 'title'),
190
+ 'firstPage' => bibliographic_metadata.dig('pages', 'first_page'),
191
+ 'lastPage' => bibliographic_metadata.dig('pages', 'last_page'),
192
+ 'identifiers' => identifiers
166
193
  }.compact
167
194
 
168
- elsif book_series_metadata.to_h.fetch("series_metadata", nil).present?
169
- issn = normalize_issn(book_series_metadata.dig("series_metadata", "issn"))
195
+ elsif book_series_metadata.to_h.fetch('series_metadata', nil).present?
196
+ issn = normalize_issn(book_series_metadata.dig('series_metadata', 'issn'))
170
197
 
171
- { "type" => "Book Series",
172
- "identifier" => issn,
173
- "identifierType" => issn.present? ? "ISSN" : nil,
174
- "title" => book_series_metadata.dig("series_metadata", "titles", "title"),
175
- "volume" => bibliographic_metadata.fetch("volume", nil) }.compact
198
+ { 'type' => 'Book Series',
199
+ 'identifier' => issn,
200
+ 'identifierType' => issn.present? ? 'ISSN' : nil,
201
+ 'title' => book_series_metadata.dig('series_metadata', 'titles', 'title'),
202
+ 'volume' => bibliographic_metadata.fetch('volume', nil) }.compact
176
203
  end
177
204
 
178
- id = normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig("doi_data", "doi"))
205
+ id = normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig('doi_data',
206
+ 'doi'))
179
207
 
180
208
  # Let sections override this in case of alternative metadata structures, such as book chapters, which
181
209
  # have their meta inside `content_item`, but the main book indentifers inside of `book_metadata`
182
210
  identifiers ||= crossref_alternate_identifiers(bibliographic_metadata)
183
211
 
184
- { "id" => id,
185
- "types" => types,
186
- "doi" => doi_from_url(id),
187
- "url" => parse_attributes(bibliographic_metadata.dig("doi_data", "resource"), first: true),
188
- "titles" => titles,
189
- "identifiers" => identifiers,
190
- "creators" => crossref_people(bibliographic_metadata, "author"),
191
- "contributors" => crossref_people(bibliographic_metadata, "editor"),
192
- "funding_references" => crossref_funding_reference(program_metadata),
193
- "publisher" => publisher,
194
- "container" => container,
195
- "agency" => agency = options[:ra] || "crossref",
196
- "related_identifiers" => related_identifiers,
197
- "dates" => dates,
198
- "publication_year" => publication_year,
199
- "descriptions" => crossref_description(bibliographic_metadata),
200
- "rights_list" => crossref_license(program_metadata),
201
- "version_info" => nil,
202
- "subjects" => nil,
203
- "language" => nil,
204
- "sizes" => nil,
205
- "schema_version" => "http://datacite.org/schema/kernel-4",
206
- "state" => state,
207
- "date_registered" => date_registered
208
- }.merge(read_options)
212
+ { 'id' => id,
213
+ 'types' => types,
214
+ 'doi' => doi_from_url(id),
215
+ 'url' => parse_attributes(bibliographic_metadata.dig('doi_data', 'resource'),
216
+ first: true),
217
+ 'titles' => titles,
218
+ 'identifiers' => identifiers,
219
+ 'creators' => crossref_people(bibliographic_metadata, 'author'),
220
+ 'contributors' => crossref_people(bibliographic_metadata, 'editor'),
221
+ 'funding_references' => crossref_funding_reference(program_metadata),
222
+ 'publisher' => publisher,
223
+ 'container' => container,
224
+ 'agency' => agency = options[:ra] || 'crossref',
225
+ 'related_identifiers' => related_identifiers,
226
+ 'dates' => dates,
227
+ 'publication_year' => publication_year,
228
+ 'descriptions' => crossref_description(bibliographic_metadata),
229
+ 'rights_list' => crossref_license(program_metadata),
230
+ 'version_info' => nil,
231
+ 'subjects' => nil,
232
+ 'language' => nil,
233
+ 'sizes' => nil,
234
+ 'schema_version' => 'http://datacite.org/schema/kernel-4',
235
+ 'state' => state,
236
+ 'date_registered' => date_registered }.merge(read_options)
209
237
  end
210
238
 
211
239
  def crossref_alternate_identifiers(bibliographic_metadata)
212
- if bibliographic_metadata.dig("publisher_item", "item_number").present?
213
- Array.wrap(bibliographic_metadata.dig("publisher_item", "item_number")).map do |item|
240
+ if bibliographic_metadata.dig('publisher_item', 'item_number').present?
241
+ Array.wrap(bibliographic_metadata.dig('publisher_item', 'item_number')).map do |item|
214
242
  if item.is_a?(String)
215
- { "identifier" => item,
216
- "identifierType" => "Publisher ID" }
243
+ { 'identifier' => item,
244
+ 'identifierType' => 'Publisher ID' }
217
245
  else
218
- { "identifier" => item.fetch("__content__", nil),
219
- "identifierType" => item.fetch("item_number_type", nil) || "Publisher ID" }
246
+ { 'identifier' => item.fetch('__content__', nil),
247
+ 'identifierType' => item.fetch('item_number_type', nil) || 'Publisher ID' }
220
248
  end
221
249
  end
222
- elsif parse_attributes(bibliographic_metadata.fetch("item_number", nil)).present?
223
- [{ "identifier" => parse_attributes(bibliographic_metadata.fetch("item_number", nil)),
224
- "identifierType" => parse_attributes(bibliographic_metadata.dig("item_number", "item_number_type")) || "Publisher ID" }]
225
- elsif parse_attributes(bibliographic_metadata.fetch("isbn", nil)).present?
226
- [{ "identifier" => parse_attributes(bibliographic_metadata.fetch("isbn", nil), first: true),
227
- "identifierType" => "ISBN" }]
250
+ elsif parse_attributes(bibliographic_metadata.fetch('item_number', nil)).present?
251
+ [{ 'identifier' => parse_attributes(bibliographic_metadata.fetch('item_number', nil)),
252
+ 'identifierType' => parse_attributes(bibliographic_metadata.dig('item_number',
253
+ 'item_number_type')) || 'Publisher ID' }]
254
+ elsif parse_attributes(bibliographic_metadata.fetch('isbn', nil)).present?
255
+ [{ 'identifier' => parse_attributes(bibliographic_metadata.fetch('isbn', nil), first: true),
256
+ 'identifierType' => 'ISBN' }]
228
257
  else
229
258
  []
230
259
  end
231
260
  end
232
261
 
233
262
  def crossref_description(bibliographic_metadata)
234
- abstract = Array.wrap(bibliographic_metadata.dig("abstract")).map do |r|
235
- { "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p')) }.compact
263
+ abstract = Array.wrap(bibliographic_metadata['abstract']).map do |r|
264
+ { 'descriptionType' => 'Abstract',
265
+ 'description' => sanitize(parse_attributes(r, content: 'p')) }.compact
236
266
  end
237
267
 
238
- description = Array.wrap(bibliographic_metadata.dig("description")).map do |r|
239
- { "descriptionType" => "Other", "description" => sanitize(parse_attributes(r)) }.compact
268
+ description = Array.wrap(bibliographic_metadata['description']).map do |r|
269
+ { 'descriptionType' => 'Other', 'description' => sanitize(parse_attributes(r)) }.compact
240
270
  end
241
271
 
242
272
  (abstract + description)
243
273
  end
244
274
 
245
275
  def crossref_license(program_metadata)
246
- access_indicator = Array.wrap(program_metadata).find { |m| m["name"] == "AccessIndicators" }
276
+ access_indicator = Array.wrap(program_metadata).find { |m| m['name'] == 'AccessIndicators' }
247
277
  if access_indicator.present?
248
- Array.wrap(access_indicator["license_ref"]).map do |license|
249
- hsh_to_spdx("rightsURI" => parse_attributes(license))
278
+ Array.wrap(access_indicator['license_ref']).map do |license|
279
+ hsh_to_spdx('rightsURI' => parse_attributes(license))
250
280
  end.uniq
251
281
  else
252
282
  []
@@ -254,46 +284,61 @@ module Briard
254
284
  end
255
285
 
256
286
  def crossref_people(bibliographic_metadata, contributor_role)
257
- person = bibliographic_metadata.dig("contributors", "person_name") || bibliographic_metadata.dig("person_name")
258
- organization = Array.wrap(bibliographic_metadata.dig("contributors", "organization"))
259
- person = [{ "name" => ":(unav)", "contributor_role"=>"author" }] if contributor_role == "author" && Array.wrap(person).select { |a| a["contributor_role"] == "author" }.blank? && Array.wrap(organization).select { |a| a["contributor_role"] == "author" }.blank?
260
-
261
- (Array.wrap(person) + Array.wrap(organization)).select { |a| a["contributor_role"] == contributor_role }.map do |a|
262
- name_identifiers = normalize_orcid(parse_attributes(a["ORCID"])).present? ? [{ "nameIdentifier" => normalize_orcid(parse_attributes(a["ORCID"])), "nameIdentifierScheme" => "ORCID", "schemeUri"=>"https://orcid.org" }] : nil
263
- if a["surname"].present? || a["given_name"].present? || name_identifiers.present?
264
- given_name = parse_attributes(a["given_name"])
265
- family_name = parse_attributes(a["surname"])
266
- affiliation = Array.wrap(a["affiliation"]).map do |a|
287
+ person = bibliographic_metadata.dig('contributors',
288
+ 'person_name') || bibliographic_metadata['person_name']
289
+ organization = Array.wrap(bibliographic_metadata.dig('contributors', 'organization'))
290
+ if contributor_role == 'author' && Array.wrap(person).select do |a|
291
+ a['contributor_role'] == 'author'
292
+ end.blank? && Array.wrap(organization).select do |a|
293
+ a['contributor_role'] == 'author'
294
+ end.blank?
295
+ person = [{ 'name' => ':(unav)', 'contributor_role' => 'author' }]
296
+ end
297
+
298
+ (Array.wrap(person) + Array.wrap(organization)).select do |a|
299
+ a['contributor_role'] == contributor_role
300
+ end.map do |a|
301
+ name_identifiers = if normalize_orcid(parse_attributes(a['ORCID'])).present?
302
+ [{
303
+ 'nameIdentifier' => normalize_orcid(parse_attributes(a['ORCID'])), 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org'
304
+ }]
305
+ end
306
+ if a['surname'].present? || a['given_name'].present? || name_identifiers.present?
307
+ given_name = parse_attributes(a['given_name'])
308
+ family_name = parse_attributes(a['surname'])
309
+ affiliation = Array.wrap(a['affiliation']).map do |a|
267
310
  if a.is_a?(Hash)
268
311
  a
269
- elsif a.is_a?(Hash) && a.key?("__content__") && a["__content__"].strip.blank?
312
+ elsif a.is_a?(Hash) && a.key?('__content__') && a['__content__'].strip.blank?
270
313
  nil
271
- elsif a.is_a?(Hash) && a.key?("__content__")
272
- { "name" => a["__content__"] }
314
+ elsif a.is_a?(Hash) && a.key?('__content__')
315
+ { 'name' => a['__content__'] }
273
316
  elsif a.strip.blank?
274
317
  nil
275
318
  elsif a.is_a?(String)
276
- { "name" => a }
319
+ { 'name' => a }
277
320
  end
278
321
  end.compact
279
322
 
280
- { "nameType" => "Personal",
281
- "nameIdentifiers" => name_identifiers,
282
- "name" => [family_name, given_name].compact.join(", "),
283
- "givenName" => given_name,
284
- "familyName" => family_name,
285
- "affiliation" => affiliation.presence,
286
- "contributorType" => contributor_role == "editor" ? "Editor" : nil }.compact
323
+ { 'nameType' => 'Personal',
324
+ 'nameIdentifiers' => name_identifiers,
325
+ 'name' => [family_name, given_name].compact.join(', '),
326
+ 'givenName' => given_name,
327
+ 'familyName' => family_name,
328
+ 'affiliation' => affiliation.presence,
329
+ 'contributorType' => contributor_role == 'editor' ? 'Editor' : nil }.compact
287
330
  else
288
- { "nameType" => "Organizational",
289
- "name" => a["name"] || a["__content__"] }
331
+ { 'nameType' => 'Organizational',
332
+ 'name' => a['name'] || a['__content__'] }
290
333
  end
291
334
  end
292
335
  end
293
336
 
294
337
  def crossref_funding_reference(program_metadata)
295
- fundref = Array.wrap(program_metadata).find { |a| a["name"] == "fundref" } || {}
296
- Array.wrap(fundref.fetch("assertion", [])).select { |a| a["name"] == "fundgroup" && a["assertion"].present? }.map do |f|
338
+ fundref = Array.wrap(program_metadata).find { |a| a['name'] == 'fundref' } || {}
339
+ Array.wrap(fundref.fetch('assertion', [])).select do |a|
340
+ a['name'] == 'fundgroup' && a['assertion'].present?
341
+ end.map do |f|
297
342
  funder_identifier = nil
298
343
  funder_identifier_type = nil
299
344
  funder_name = nil
@@ -301,66 +346,60 @@ module Briard
301
346
  award_number = nil
302
347
  award_uri = nil
303
348
 
304
- Array.wrap(f.fetch("assertion")).each do |a|
305
- if a.fetch("name") == "award_number"
306
- award_number = a.fetch("__content__", nil)
307
- award_uri = a.fetch("awardURI", nil)
308
- elsif a.fetch("name") == "funder_name"
309
- funder_name = a.fetch("__content__", nil).to_s.squish.presence
310
- funder_identifier = validate_funder_doi(a.dig("assertion", "__content__"))
311
- funder_identifier_type = "Crossref Funder ID" if funder_identifier.present?
349
+ Array.wrap(f.fetch('assertion')).each do |a|
350
+ case a.fetch('name')
351
+ when 'award_number'
352
+ award_number = a.fetch('__content__', nil)
353
+ award_uri = a.fetch('awardURI', nil)
354
+ when 'funder_name'
355
+ funder_name = a.fetch('__content__', nil).to_s.squish.presence
356
+ funder_identifier = validate_funder_doi(a.dig('assertion', '__content__'))
357
+ funder_identifier_type = 'Crossref Funder ID' if funder_identifier.present?
312
358
  end
313
359
  end
314
360
 
315
361
  # funder_name is required in DataCite
316
- if funder_name.present?
317
- { "funderIdentifier" => funder_identifier,
318
- "funderIdentifierType" => funder_identifier_type,
319
- "funderName" => funder_name,
320
- "awardTitle" => award_title,
321
- "awardNumber" => award_number,
322
- "awardUri" => award_uri }.compact
323
- else
324
- nil
325
- end
362
+ next unless funder_name.present?
363
+
364
+ { 'funderIdentifier' => funder_identifier,
365
+ 'funderIdentifierType' => funder_identifier_type,
366
+ 'funderName' => funder_name,
367
+ 'awardTitle' => award_title,
368
+ 'awardNumber' => award_number,
369
+ 'awardUri' => award_uri }.compact
326
370
  end.compact
327
371
  end
328
372
 
329
373
  def crossref_date_published(bibliographic_metadata)
330
- pub_date = Array.wrap(bibliographic_metadata.fetch("publication_date", nil)).presence ||
331
- Array.wrap(bibliographic_metadata.fetch("acceptance_date", nil))
374
+ pub_date = Array.wrap(bibliographic_metadata.fetch('publication_date', nil)).presence ||
375
+ Array.wrap(bibliographic_metadata.fetch('acceptance_date', nil))
332
376
  if pub_date.present?
333
- get_date_from_parts(pub_date.first["year"], pub_date.first["month"], pub_date.first["day"])
334
- else
335
- nil
377
+ get_date_from_parts(pub_date.first['year'], pub_date.first['month'],
378
+ pub_date.first['day'])
336
379
  end
337
380
  end
338
381
 
339
382
  def crossref_is_part_of(model_metadata)
340
- if model_metadata.present? && model_metadata.fetch("issn", nil).present?
341
- { "relatedIdentifier" => normalize_issn(model_metadata.fetch("issn", nil)),
342
- "relationType" => "IsPartOf",
343
- "relatedIdentifierType" => "ISSN",
344
- "resourceTypeGeneral" => "Collection" }.compact
345
- elsif model_metadata.present? && model_metadata.fetch("relatedIdentifier", nil).present?
346
- { "relatedIdentifier" => model_metadata.fetch("relatedIdentifier", nil),
347
- "relationType" => "IsPartOf",
348
- "relatedIdentifierType" => "DOI" }.compact
349
- else
350
- nil
383
+ if model_metadata.present? && model_metadata.fetch('issn', nil).present?
384
+ { 'relatedIdentifier' => normalize_issn(model_metadata.fetch('issn', nil)),
385
+ 'relationType' => 'IsPartOf',
386
+ 'relatedIdentifierType' => 'ISSN',
387
+ 'resourceTypeGeneral' => 'Collection' }.compact
388
+ elsif model_metadata.present? && model_metadata.fetch('relatedIdentifier', nil).present?
389
+ { 'relatedIdentifier' => model_metadata.fetch('relatedIdentifier', nil),
390
+ 'relationType' => 'IsPartOf',
391
+ 'relatedIdentifierType' => 'DOI' }.compact
351
392
  end
352
393
  end
353
394
 
354
395
  def crossref_references(bibliographic_metadata)
355
- refs = bibliographic_metadata.dig("citation_list", "citation")
356
- Array.wrap(refs).select { |a| a["doi"].present? }.map do |c|
357
- if c["doi"].present?
358
- { "relatedIdentifier" => parse_attributes(c["doi"]).downcase,
359
- "relationType" => "References",
360
- "relatedIdentifierType" => "DOI" }.compact
361
- else
362
- nil
363
- end
396
+ refs = bibliographic_metadata.dig('citation_list', 'citation')
397
+ Array.wrap(refs).select { |a| a['doi'].present? }.map do |c|
398
+ next unless c['doi'].present?
399
+
400
+ { 'relatedIdentifier' => parse_attributes(c['doi']).downcase,
401
+ 'relationType' => 'References',
402
+ 'relatedIdentifierType' => 'DOI' }.compact
364
403
  end.compact.unwrap
365
404
  end
366
405
  end