bolognese 1.0.29 → 1.0.30

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/lib/bolognese/author_utils.rb +33 -56
  4. data/lib/bolognese/datacite_utils.rb +15 -7
  5. data/lib/bolognese/metadata.rb +13 -18
  6. data/lib/bolognese/metadata_utils.rb +41 -20
  7. data/lib/bolognese/readers/bibtex_reader.rb +21 -17
  8. data/lib/bolognese/readers/citeproc_reader.rb +28 -7
  9. data/lib/bolognese/readers/codemeta_reader.rb +21 -8
  10. data/lib/bolognese/readers/crossref_reader.rb +31 -23
  11. data/lib/bolognese/readers/datacite_json_reader.rb +1 -47
  12. data/lib/bolognese/readers/datacite_reader.rb +26 -16
  13. data/lib/bolognese/readers/ris_reader.rb +20 -13
  14. data/lib/bolognese/readers/schema_org_reader.rb +26 -22
  15. data/lib/bolognese/utils.rb +139 -39
  16. data/lib/bolognese/version.rb +1 -1
  17. data/lib/bolognese/writers/bibtex_writer.rb +7 -5
  18. data/lib/bolognese/writers/citation_writer.rb +1 -1
  19. data/lib/bolognese/writers/codemeta_writer.rb +2 -2
  20. data/lib/bolognese/writers/crosscite_writer.rb +1 -34
  21. data/lib/bolognese/writers/datacite_json_writer.rb +1 -29
  22. data/lib/bolognese/writers/jats_writer.rb +9 -9
  23. data/lib/bolognese/writers/ris_writer.rb +5 -5
  24. data/lib/bolognese/writers/schema_org_writer.rb +9 -9
  25. data/spec/author_utils_spec.rb +17 -47
  26. data/spec/cli_spec.rb +1 -1
  27. data/spec/fixtures/crosscite.json +10 -4
  28. data/spec/fixtures/datacite-example-polygon-v4.1.xml +163 -0
  29. data/spec/fixtures/datacite-seriesinformation.xml +41 -0
  30. data/spec/fixtures/datacite.json +12 -8
  31. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/only_title.yml +37 -0
  32. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/title_and_pages.yml +37 -0
  33. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/title_volume_and_pages.yml +37 -0
  34. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/title_volume_issue_and_pages.yml +37 -0
  35. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/is_personal_name_/has_orcid_id.yml +37 -0
  36. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/to_schema_org_identifiers/with_identifiers.yml +37 -0
  37. data/spec/readers/bibtex_reader_spec.rb +5 -5
  38. data/spec/readers/citeproc_reader_spec.rb +4 -4
  39. data/spec/readers/codemeta_reader_spec.rb +39 -18
  40. data/spec/readers/crosscite_reader_spec.rb +2 -2
  41. data/spec/readers/crossref_reader_spec.rb +26 -32
  42. data/spec/readers/datacite_json_reader_spec.rb +2 -2
  43. data/spec/readers/datacite_reader_spec.rb +156 -101
  44. data/spec/readers/ris_reader_spec.rb +7 -7
  45. data/spec/readers/schema_org_reader_spec.rb +34 -28
  46. data/spec/utils_spec.rb +37 -6
  47. data/spec/writers/bibtex_writer_spec.rb +1 -0
  48. data/spec/writers/codemeta_writer_spec.rb +1 -1
  49. data/spec/writers/crosscite_writer_spec.rb +13 -6
  50. data/spec/writers/datacite_json_writer_spec.rb +13 -5
  51. data/spec/writers/datacite_writer_spec.rb +18 -18
  52. data/spec/writers/schema_org_writer_spec.rb +40 -14
  53. metadata +10 -2
@@ -45,7 +45,7 @@ module Bolognese
45
45
  "bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
46
46
  "ris" => CP_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
47
47
  }.compact
48
- doi = normalize_doi(meta.fetch("DOI", nil))
48
+
49
49
  creators = get_authors(from_citeproc(Array.wrap(meta.fetch("author", nil))))
50
50
  contributors = get_authors(from_citeproc(Array.wrap(meta.fetch("editor", nil))))
51
51
  dates = if meta.fetch("issued", nil).present?
@@ -69,33 +69,54 @@ module Bolognese
69
69
  else
70
70
  nil
71
71
  end
72
- periodical = if meta.fetch("container-title", nil).present?
72
+ container = if meta.fetch("container-title", nil).present?
73
+ first_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip).first : nil
74
+ last_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip).last : nil
75
+
73
76
  { "type" => "Periodical",
74
77
  "title" => meta.fetch("container-title", nil),
75
- "issn" => meta.fetch("ISSN", nil) }.compact
78
+ "identifier" => meta.fetch("ISSN", nil),
79
+ "identifierType" => meta.fetch("ISSN", nil).present? ? "ISSN" : nil,
80
+ "volume" => meta.fetch("volume", nil),
81
+ "issue" => meta.fetch("issue", nil),
82
+ "firstPage" => first_page,
83
+ "lastPage" => last_page
84
+ }.compact
76
85
  else
77
86
  nil
78
87
  end
79
- id = normalize_id(meta.fetch("id", nil))
88
+
89
+ identifiers = [normalize_id(meta.fetch("id", nil)), normalize_doi(meta.fetch("DOI", nil))].compact.map do |r|
90
+ r = normalize_id(r)
91
+
92
+ if r.start_with?("https://doi.org")
93
+ { "identifierType" => "DOI", "identifier" => r }
94
+ else
95
+ { "identifierType" => "URL", "identifier" => r }
96
+ end
97
+ end.uniq
98
+
99
+ id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
100
+ doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
101
+
80
102
  state = id.present? || read_options.present? ? "findable" : "not_found"
81
103
  subjects = Array.wrap(meta.fetch("categories", nil)).map do |s|
82
104
  { "subject" => s }
83
105
  end
84
106
 
85
107
  { "id" => id,
108
+ "identifiers" => identifiers,
86
109
  "types" => types,
87
110
  "doi" => doi_from_url(doi),
88
111
  "url" => normalize_id(meta.fetch("URL", nil)),
89
112
  "titles" => [{ "title" => meta.fetch("title", nil) }],
90
113
  "creators" => creators,
91
114
  "contributors" => contributors,
92
- "periodical" => periodical,
115
+ "container" => container,
93
116
  "publisher" => meta.fetch("publisher", nil),
94
117
  "related_identifiers" => related_identifiers,
95
118
  "dates" => dates,
96
119
  "publication_year" => publication_year,
97
- "volume" => meta.fetch("volume", nil),
98
- #{}"pagination" => meta.pages.to_s.presence,
99
120
  "descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : [],
100
121
  "rights_list" => rights_list,
101
122
  "version_info" => meta.fetch("version", nil),
@@ -21,10 +21,23 @@ module Bolognese
21
21
  read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate))
22
22
 
23
23
  meta = string.present? ? Maremma.from_json(string) : {}
24
- identifier = meta.fetch("identifier", nil)
25
- id = normalize_id(meta.fetch("@id", nil) || identifier)
26
- author = get_authors(from_schema_org(Array.wrap(meta.fetch("agents", nil))))
27
- contributor = get_authors(from_schema_org(Array.wrap(meta.fetch("editor", nil))))
24
+
25
+ identifiers = ([meta.fetch("@id", nil)] + Array.wrap(meta.fetch("identifier", nil))).map do |r|
26
+ r = normalize_id(r) if r.is_a?(String)
27
+ if r.is_a?(String) && r.start_with?("https://doi.org")
28
+ { "identifierType" => "DOI", "identifier" => r }
29
+ elsif r.is_a?(String)
30
+ { "identifierType" => "URL", "identifier" => r }
31
+ elsif r.is_a?(Hash)
32
+ { "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
33
+ end
34
+ end.compact.uniq
35
+
36
+ id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
37
+ doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
38
+
39
+ creators = get_authors(from_schema_org_creators(Array.wrap(meta.fetch("agents", nil))))
40
+ contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
28
41
  dates = []
29
42
  dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if meta.fetch("datePublished", nil).present?
30
43
  dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if meta.fetch("dateCreated", nil).present?
@@ -47,12 +60,12 @@ module Bolognese
47
60
 
48
61
  { "id" => id,
49
62
  "types" => types,
50
- "identifier" => identifier,
51
- "doi" => validate_doi(id),
63
+ "identifiers" => identifiers,
64
+ "doi" => doi_from_url(doi),
52
65
  "url" => normalize_id(meta.fetch("codeRepository", nil)),
53
66
  "titles" => [{ "title" => meta.fetch("title", nil) }],
54
- "creators" => author,
55
- "contributors" => contributor,
67
+ "creators" => creators,
68
+ "contributors" => contributors,
56
69
  "publisher" => publisher,
57
70
  #{}"is_part_of" => is_part_of,
58
71
  "dates" => dates,
@@ -72,7 +72,7 @@ module Bolognese
72
72
  bibliographic_metadata = meta.dig("crossref", "sa_component", "component_list", "component").to_h
73
73
  end
74
74
 
75
- resource_type = (resource_type || model).to_s.underscore.camelize.presence
75
+ resource_type = (resource_type || model).to_s.underscore.camelcase.presence
76
76
  schema_org = Bolognese::Utils::CR_TO_SO_TRANSLATIONS[resource_type] || "ScholarlyArticle"
77
77
  types = {
78
78
  "resourceTypeGeneral" => Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org],
@@ -82,7 +82,6 @@ module Bolognese
82
82
  "bibtex" => Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[resource_type] || "misc",
83
83
  "ris" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[resource_type] || "JOUR"
84
84
  }.compact
85
- doi = bibliographic_metadata.dig("doi_data", "doi").to_s.downcase.presence #|| doi_from_url(options[:id])
86
85
 
87
86
  # Crossref servers run on Eastern Time
88
87
  Time.zone = 'Eastern Time (US & Canada)'
@@ -96,33 +95,41 @@ module Bolognese
96
95
  state = meta.present? || read_options.present? ? "findable" : "not_found"
97
96
 
98
97
  related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
99
- periodical = if journal_metadata.present?
100
- { "type" => "Periodical",
101
- "issn" => parse_attributes(journal_metadata.fetch("issn", nil), first: true),
102
- "title" => journal_metadata["full_title"] }.compact
98
+ container = if journal_metadata.present? || book_metadata.present?
99
+ issn = parse_attributes(journal_metadata.to_h.fetch("issn", nil), first: true)
100
+
101
+ { "type" => "Journal",
102
+ "identifier" => issn,
103
+ "identifierType" => issn.present? ? "ISSN" : nil,
104
+ "title" => journal_metadata.to_h["full_title"],
105
+ "volume" => journal_issue.dig("journal_volume", "volume"),
106
+ "issue" => journal_issue.dig("issue"),
107
+ "firstPage" => bibliographic_metadata.dig("pages", "first_page"),
108
+ "lastPage" => bibliographic_metadata.dig("pages", "last_page") }.compact
103
109
  else
104
110
  nil
105
111
  end
106
112
 
107
- { "id" => normalize_doi(doi),
113
+ identifiers = [{ "identifierType" => "DOI", "identifier" => normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig("doi_data", "doi")) }, crossref_alternate_identifiers(bibliographic_metadata)].compact
114
+
115
+ id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
116
+ doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
117
+
118
+ { "id" => id,
108
119
  "types" => types,
109
- "doi" => doi,
120
+ "doi" => doi_from_url(doi),
110
121
  "url" => bibliographic_metadata.dig("doi_data", "resource"),
111
122
  "titles" => [{ "title" => parse_attributes(bibliographic_metadata.dig("titles", "title")) }],
112
- "alternate_identifiers" => crossref_alternate_identifiers(bibliographic_metadata),
123
+ "identifiers" => identifiers,
113
124
  "creators" => crossref_people(bibliographic_metadata, "author"),
114
125
  "contributors" => crossref_people(bibliographic_metadata, "editor"),
115
126
  "funding_references" => crossref_funding_reference(program_metadata),
116
127
  "publisher" => publisher,
117
- "periodical" => periodical,
128
+ "container" => container,
118
129
  "agency" => "Crossref",
119
130
  "related_identifiers" => related_identifiers,
120
131
  "dates" => dates,
121
132
  "publication_year" => publication_year,
122
- "volume" => journal_issue.dig("journal_volume", "volume"),
123
- "issue" => journal_issue.dig("issue"),
124
- "first_page" => bibliographic_metadata.dig("pages", "first_page"),
125
- "last_page" => bibliographic_metadata.dig("pages", "last_page"),
126
133
  "descriptions" => crossref_description(bibliographic_metadata),
127
134
  "rights_list" => crossref_license(program_metadata),
128
135
  "version_info" => nil,
@@ -135,12 +142,12 @@ module Bolognese
135
142
  end
136
143
 
137
144
  def crossref_alternate_identifiers(bibliographic_metadata)
138
- if bibliographic_metadata.fetch("publisher_item", nil).present?
139
- { "alternateIdentifier" => parse_attributes(bibliographic_metadata.dig("publisher_item", "item_number")),
140
- "alternateIdentifierType" => "Publisher ID" }
141
- else
142
- { "alternateIdentifier" => parse_attributes(bibliographic_metadata.fetch("item_number", nil)),
143
- "alternateIdentifierType" => "Publisher ID" }
145
+ if bibliographic_metadata.dig("publisher_item", "item_number").present?
146
+ { "identifier" => parse_attributes(bibliographic_metadata.dig("publisher_item", "item_number")),
147
+ "identifierType" => "Publisher ID" }
148
+ elsif parse_attributes(bibliographic_metadata.fetch("item_number", nil)).present?
149
+ { "identifier" => parse_attributes(bibliographic_metadata.fetch("item_number", nil)),
150
+ "identifierType" => "Publisher ID" }
144
151
  end
145
152
  end
146
153
 
@@ -170,9 +177,10 @@ module Bolognese
170
177
  def crossref_people(bibliographic_metadata, contributor_role)
171
178
  person = bibliographic_metadata.dig("contributors", "person_name")
172
179
  Array.wrap(person).select { |a| a["contributor_role"] == contributor_role }.map do |a|
173
- { "type" => "Person",
174
- "id" => parse_attributes(a["ORCID"]),
175
- "name" => [a["given_name"], a["surname"]].join(" "),
180
+ name_identifiers = normalize_orcid(parse_attributes(a["ORCID"])).present? ? [{ "nameIdentifier" => normalize_orcid(parse_attributes(a["ORCID"])), "nameIdentifierScheme" => "ORCID" }] : nil
181
+ { "nameType" => "Personal",
182
+ "nameIdentifiers" => name_identifiers,
183
+ "name" => [a["surname"], a["given_name"]].join(", "),
176
184
  "givenName" => a["given_name"],
177
185
  "familyName" => a["surname"],
178
186
  "contributorType" => contributor_role == "editor" ? "Editor" : nil }.compact
@@ -7,53 +7,7 @@ module Bolognese
7
7
  errors = jsonlint(string)
8
8
  return { "errors" => errors } if errors.present?
9
9
 
10
- read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate))
11
-
12
- meta = string.present? ? Maremma.from_json(string) : {}
13
-
14
- state = meta.fetch("doi", nil).present? || read_options.present? ? "findable" : "not_found"
15
-
16
- dates = Array.wrap(meta.fetch("dates", nil)).map do |d|
17
- { "date" => d["date"],
18
- "dateType" => d["dateType"],
19
- "dateInformation" => d["dateInformation"] }.compact
20
- end
21
- dates << { "date" => meta.fetch("publicationYear", nil), "dateType" => "Issued" } if meta.fetch("publicationYear", nil).present? && get_date(dates, "Issued").blank?
22
- schema_org = meta.dig("types", "type") || Bolognese::Utils::CR_TO_SO_TRANSLATIONS[meta.dig("types", "resourceType").to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_SO_TRANSLATIONS[meta.dig("types", "resourceTypeGeneral").to_s.dasherize] || "CreativeWork"
23
- types = {
24
- "resourceTypeGeneral" => meta.dig("types", "resourceTypeGeneral"),
25
- "resourceType" => meta.dig("types", "resourceType"),
26
- "schemaOrg" => schema_org,
27
- "bibtex" => meta.dig("types", "bibtex") || Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[meta.dig("types", "resourceType").to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
28
- "citeproc" => meta.dig("types", "citeproc") || Bolognese::Utils::CR_TO_CP_TRANSLATIONS[meta.dig("types", "resourceType").to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || "article",
29
- "ris" => meta.dig("types", "ris") || Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[meta.dig("types", "resourceType").to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_RIS_TRANSLATIONS[meta.dig("types", "resourceTypeGeneral").to_s.dasherize] || "GEN" }.compact
30
-
31
- { "id" => meta.fetch("id", nil),
32
- "types" => types,
33
- "doi" => validate_doi(meta.fetch("doi", nil)),
34
- "url" => normalize_id(meta.fetch("url", nil)),
35
- "titles" => meta.fetch("titles", nil),
36
- "alternate_identifiers" => Array.wrap(meta.fetch("alternateIdentifiers", nil)),
37
- "creators" => meta.fetch("creators", nil),
38
- "contributors" => meta.fetch("contributors", nil),
39
- "publisher" => meta.fetch("publisher", nil),
40
- "periodical" => meta.fetch("periodical", nil),
41
- "agency" => "DataCite",
42
- "funding_references" => meta.fetch("fundingReferences", nil),
43
- "related_identifiers" => Array.wrap(meta.fetch("relatedIdentifiers", nil)),
44
- "dates" => dates,
45
- "publication_year" => meta.fetch("publicationYear", nil),
46
- "descriptions" => meta.fetch("descriptions", nil),
47
- "rights_list" => meta.fetch("rightsList", nil),
48
- "version_info" => meta.fetch("version", nil),
49
- "subjects" => meta.fetch("subjects", nil),
50
- "language" => meta.fetch("language", nil),
51
- "sizes" => meta.fetch("sizes", nil),
52
- "formats" => meta.fetch("formats", nil),
53
- "geo_locations" => meta.fetch("geoLocations", nil),
54
- "schema_version" => meta.fetch("schemaVersion", nil),
55
- "state" => state
56
- }.merge(read_options)
10
+ string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
57
11
  end
58
12
  end
59
13
  end
@@ -76,7 +76,11 @@ module Bolognese
76
76
  id = normalize_doi(meta.dig("identifier", "__content__") || options[:id], sandbox: options[:sandbox])
77
77
  end
78
78
 
79
- doi = doi_from_url(id)
79
+ identifiers = [{ "identifierType" => "DOI", "identifier" => id }] + Array.wrap(meta.dig("alternateIdentifiers", "alternateIdentifier")).map do |r|
80
+ { "identifierType" => get_identifier_type(r["alternateIdentifierType"]), "identifier" => r["__content__"].presence }.compact
81
+ end.compact
82
+
83
+ doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
80
84
 
81
85
  resource_type_general = meta.dig("resourceType", "resourceTypeGeneral")
82
86
  resource_type = meta.dig("resourceType", "__content__")
@@ -100,9 +104,6 @@ module Bolognese
100
104
  end
101
105
  end.compact
102
106
 
103
- alternate_identifiers = Array.wrap(meta.dig("alternateIdentifiers", "alternateIdentifier")).map do |r|
104
- { "alternateIdentifierType" => r["alternateIdentifierType"], "alternateIdentifier" => r["__content__"].presence }.compact
105
- end.compact
106
107
  descriptions = Array.wrap(meta.dig("descriptions", "description")).map do |r|
107
108
  if r.blank?
108
109
  nil
@@ -167,7 +168,7 @@ module Bolognese
167
168
  }.compact
168
169
  end
169
170
  geo_locations = Array.wrap(meta.dig("geoLocations", "geoLocation")).map do |gl|
170
- if !gl.is_a?(Hash) || gl["geoLocationPoint"].is_a?(String) || gl["geoLocationBox"].is_a?(String)
171
+ if !gl.is_a?(Hash) || gl["geoLocationPoint"].is_a?(String) || gl["geoLocationBox"].is_a?(String) || gl["geoLocationPolygon"].is_a?(String)
171
172
  nil
172
173
  else
173
174
  {
@@ -181,22 +182,23 @@ module Bolognese
181
182
  "southBoundLatitude" => gl.dig("geoLocationBox", "southBoundLatitude"),
182
183
  "northBoundLatitude" => gl.dig("geoLocationBox", "northBoundLatitude")
183
184
  }.compact.presence,
185
+ "geoLocationPolygon" => Array.wrap(gl.dig("geoLocationPolygon", "polygonPoint")).map { |glp| { "polygonPoint" => glp } }.compact.presence,
184
186
  "geoLocationPlace" => parse_attributes(gl["geoLocationPlace"], first: true).to_s.strip.presence
185
187
  }.compact
186
188
  end
187
189
  end.compact
188
- periodical = set_periodical(meta)
190
+ container = set_container(meta)
189
191
  state = doi.present? || read_options.present? ? "findable" : "not_found"
190
192
 
191
193
  { "id" => id,
192
194
  "types" => types,
193
- "doi" => doi,
194
- "alternate_identifiers" => alternate_identifiers,
195
+ "doi" => doi_from_url(doi),
196
+ "identifiers" => identifiers,
195
197
  "url" => options.fetch(:url, nil).to_s.strip.presence,
196
198
  "titles" => titles,
197
199
  "creators" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
198
200
  "contributors" => get_authors(Array.wrap(meta.dig("contributors", "contributor"))),
199
- "periodical" => periodical,
201
+ "container" => container,
200
202
  "publisher" => parse_attributes(meta.fetch("publisher", nil), first: true).to_s.strip.presence,
201
203
  "agency" => "DataCite",
202
204
  "funding_references" => funding_references,
@@ -216,17 +218,25 @@ module Bolognese
216
218
  }.merge(read_options)
217
219
  end
218
220
 
219
- def set_periodical(meta)
220
- container_title = Array.wrap(meta.dig("descriptions", "description")).find { |r| r["descriptionType"] == "SeriesInformation" }.to_h.fetch("__content__", nil)
221
+ def set_container(meta)
222
+ series_information = Array.wrap(meta.dig("descriptions", "description")).find { |r| r["descriptionType"] == "SeriesInformation" }.to_h.fetch("__content__", nil)
223
+ si = get_series_information(series_information)
224
+
221
225
  is_part_of = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).find { |ri| ri["relationType"] == "IsPartOf" }.to_h
222
226
 
223
- if container_title.present? || is_part_of.present?
227
+ if si["title"].present? || is_part_of.present?
224
228
  {
225
- "type" => meta.dig("resourceType", "resourceTypeGeneral") == "Dataset" ? "DataCatalog" : "Periodical",
226
- "id" => is_part_of["relatedIdentifierType"] == "DOI" ? normalize_doi(is_part_of["__content__"]) : is_part_of["__content__"],
227
- "title" => container_title,
228
- "issn" => is_part_of["relatedIdentifierType"] == "ISSN" ? is_part_of["__content__"] : nil
229
+ "type" => meta.dig("resourceType", "resourceTypeGeneral") == "Dataset" ? "DataRepository" : "Series",
230
+ "identifier" => is_part_of["__content__"],
231
+ "identifierType" => is_part_of["relatedIdentifierType"],
232
+ "title" => si["title"],
233
+ "volume" => si["volume"],
234
+ "issue" => si["issue"],
235
+ "firstPage" => si["firstPage"],
236
+ "lastPage" => si["lastPage"]
229
237
  }.compact
238
+ else
239
+ {}
230
240
  end
231
241
  end
232
242
  end
@@ -47,8 +47,14 @@ module Bolognese
47
47
  "ris" => ris_type
48
48
  }.compact
49
49
 
50
- doi = validate_doi(meta.fetch("DO", nil))
51
- author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "name" => a } }
50
+ identifiers = [normalize_doi(options[:doi]) || normalize_doi(meta.fetch("DO", nil))].map do |r|
51
+ { "identifierType" => "DOI", "identifier" => normalize_id(r) }
52
+ end.compact
53
+
54
+ id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
55
+ doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
56
+
57
+ author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "creatorName" => a } }
52
58
  date_parts = meta.fetch("PY", nil).to_s.split("/")
53
59
  created_date_parts = meta.fetch("Y1", nil).to_s.split("/")
54
60
  dates = []
@@ -64,34 +70,35 @@ module Bolognese
64
70
  else
65
71
  []
66
72
  end
67
- periodical = if meta.fetch("T2", nil).present?
68
- { "type" => "Periodical",
73
+ container = if meta.fetch("T2", nil).present?
74
+ { "type" => "Journal",
69
75
  "title" => meta.fetch("T2", nil),
70
- "id" => meta.fetch("SN", nil) }.compact
76
+ "identifier" => meta.fetch("SN", nil),
77
+ "volume" => meta.fetch("VL", nil),
78
+ "issue" => meta.fetch("IS", nil),
79
+ "firstPage" => meta.fetch("SP", nil),
80
+ "lastPage" => meta.fetch("EP", nil) }.compact
71
81
  else
72
82
  nil
73
83
  end
74
- state = doi.present? || read_options.present? ? "findable" : "not_found"
84
+ state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
75
85
  subjects = Array.wrap(meta.fetch("KW", nil)).map do |s|
76
86
  { "subject" => s }
77
87
  end
78
88
 
79
- { "id" => normalize_doi(doi),
89
+ { "id" => id,
80
90
  "types" => types,
81
- "doi" => doi,
91
+ "identifiers" => identifiers,
92
+ "doi" => doi_from_url(doi),
82
93
  "url" => meta.fetch("UR", nil),
83
94
  "titles" => meta.fetch("T1", nil).present? ? [{ "title" => meta.fetch("T1", nil) }] : nil,
84
95
  "creators" => get_authors(author),
85
96
  "publisher" => meta.fetch("PB", "(:unav)"),
86
- "periodical" => periodical,
97
+ "container" => container,
87
98
  "related_identifiers" => related_identifiers,
88
99
  "dates" => dates,
89
100
  "publication_year" => publication_year,
90
101
  "descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB")), "descriptionType" => "Abstract" }] : nil,
91
- "volume" => meta.fetch("VL", nil),
92
- "issue" => meta.fetch("IS", nil),
93
- "first_page" => meta.fetch("SP", nil),
94
- "last_page" => meta.fetch("EP", nil),
95
102
  "subjects" => subjects,
96
103
  "language" => meta.fetch("LA", nil),
97
104
  "state" => state
@@ -37,21 +37,19 @@ module Bolognese
37
37
 
38
38
  meta = string.present? ? Maremma.from_json(string) : {}
39
39
 
40
- identifier = Array.wrap(meta.fetch("identifier", nil))
41
- if identifier.length > 1
42
- alternate_identifiers = identifier[1..-1].map do |r|
43
- if r.is_a?(String)
44
- { "alternateIdentifierType" => "URL", "alternateIdentifier" => r }
45
- elsif r.is_a?(Hash)
46
- { "alternateIdentifierType" => r["propertyID"], "alternateIdentifier" => r["value"] }
47
- end
40
+ identifiers = ([options[:doi] || meta.fetch("@id", nil)] + Array.wrap(meta.fetch("identifier", nil))).map do |r|
41
+ r = normalize_id(r) if r.is_a?(String)
42
+ if r.is_a?(String) && r.start_with?("https://doi.org")
43
+ { "identifierType" => "DOI", "identifier" => r }
44
+ elsif r.is_a?(String)
45
+ { "identifierType" => "URL", "identifier" => r }
46
+ elsif r.is_a?(Hash)
47
+ { "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
48
48
  end
49
- else
50
- alternate_identifiers = nil
51
- end
52
- identifier = identifier.first
49
+ end.compact.uniq
50
+
51
+ id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
53
52
 
54
- id = normalize_id(meta.fetch("@id", nil) || meta.fetch("identifier", nil))
55
53
  schema_org = meta.fetch("@type", nil) && meta.fetch("@type").camelcase
56
54
  resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org]
57
55
  types = {
@@ -63,19 +61,26 @@ module Bolognese
63
61
  "ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
64
62
  }.compact
65
63
  authors = meta.fetch("author", nil) || meta.fetch("creator", nil)
66
- creators = get_authors(from_schema_org(Array.wrap(authors)))
67
- contributors = get_authors(from_schema_org(Array.wrap(meta.fetch("editor", nil))))
64
+ creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
65
+ contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
68
66
  publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
69
67
 
70
68
  ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
71
- periodical = if meta.fetch(ct, nil).present?
69
+ container = if meta.fetch(ct, nil).present?
70
+ url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
71
+
72
72
  {
73
- "type" => (schema_org == "Dataset") ? "DataCatalog" : "Periodical",
73
+ "type" => (schema_org == "Dataset") ? "DataRepository" : "Periodical",
74
74
  "title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true),
75
- "url" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
75
+ "identifier" => url,
76
+ "identifierType" => url.present? ? "URL" : nil,
77
+ "volume" => meta.fetch("volumeNumber", nil),
78
+ "issue" => meta.fetch("issueNumber", nil),
79
+ "firstPage" => meta.fetch("pageStart", nil),
80
+ "lastPage" => meta.fetch("pageEnd", nil)
76
81
  }.compact
77
82
  else
78
- nil
83
+ {}
79
84
  end
80
85
 
81
86
  related_identifiers = Array.wrap(schema_org_is_identical_to(meta)) +
@@ -138,8 +143,7 @@ module Bolognese
138
143
  { "id" => id,
139
144
  "types" => types,
140
145
  "doi" => validate_doi(id),
141
- "identifier" => identifier,
142
- "alternate_identifiers" => alternate_identifiers,
146
+ "identifiers" => identifiers,
143
147
  "url" => normalize_id(meta.fetch("url", nil)),
144
148
  "content_url" => Array.wrap(meta.fetch("contentUrl", nil)),
145
149
  "sizes" => Array.wrap(meta.fetch("contenSize", nil)).presence,
@@ -149,7 +153,7 @@ module Bolognese
149
153
  "contributors" => contributors,
150
154
  "publisher" => publisher,
151
155
  "agency" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true),
152
- "periodical" => periodical,
156
+ "container" => container,
153
157
  "related_identifiers" => related_identifiers,
154
158
  "publication_year" => publication_year,
155
159
  "dates" => dates,