bolognese 1.0.29 → 1.0.30
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/lib/bolognese/author_utils.rb +33 -56
- data/lib/bolognese/datacite_utils.rb +15 -7
- data/lib/bolognese/metadata.rb +13 -18
- data/lib/bolognese/metadata_utils.rb +41 -20
- data/lib/bolognese/readers/bibtex_reader.rb +21 -17
- data/lib/bolognese/readers/citeproc_reader.rb +28 -7
- data/lib/bolognese/readers/codemeta_reader.rb +21 -8
- data/lib/bolognese/readers/crossref_reader.rb +31 -23
- data/lib/bolognese/readers/datacite_json_reader.rb +1 -47
- data/lib/bolognese/readers/datacite_reader.rb +26 -16
- data/lib/bolognese/readers/ris_reader.rb +20 -13
- data/lib/bolognese/readers/schema_org_reader.rb +26 -22
- data/lib/bolognese/utils.rb +139 -39
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/writers/bibtex_writer.rb +7 -5
- data/lib/bolognese/writers/citation_writer.rb +1 -1
- data/lib/bolognese/writers/codemeta_writer.rb +2 -2
- data/lib/bolognese/writers/crosscite_writer.rb +1 -34
- data/lib/bolognese/writers/datacite_json_writer.rb +1 -29
- data/lib/bolognese/writers/jats_writer.rb +9 -9
- data/lib/bolognese/writers/ris_writer.rb +5 -5
- data/lib/bolognese/writers/schema_org_writer.rb +9 -9
- data/spec/author_utils_spec.rb +17 -47
- data/spec/cli_spec.rb +1 -1
- data/spec/fixtures/crosscite.json +10 -4
- data/spec/fixtures/datacite-example-polygon-v4.1.xml +163 -0
- data/spec/fixtures/datacite-seriesinformation.xml +41 -0
- data/spec/fixtures/datacite.json +12 -8
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/only_title.yml +37 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/title_and_pages.yml +37 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/title_volume_and_pages.yml +37 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_series_information/title_volume_issue_and_pages.yml +37 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/is_personal_name_/has_orcid_id.yml +37 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/to_schema_org_identifiers/with_identifiers.yml +37 -0
- data/spec/readers/bibtex_reader_spec.rb +5 -5
- data/spec/readers/citeproc_reader_spec.rb +4 -4
- data/spec/readers/codemeta_reader_spec.rb +39 -18
- data/spec/readers/crosscite_reader_spec.rb +2 -2
- data/spec/readers/crossref_reader_spec.rb +26 -32
- data/spec/readers/datacite_json_reader_spec.rb +2 -2
- data/spec/readers/datacite_reader_spec.rb +156 -101
- data/spec/readers/ris_reader_spec.rb +7 -7
- data/spec/readers/schema_org_reader_spec.rb +34 -28
- data/spec/utils_spec.rb +37 -6
- data/spec/writers/bibtex_writer_spec.rb +1 -0
- data/spec/writers/codemeta_writer_spec.rb +1 -1
- data/spec/writers/crosscite_writer_spec.rb +13 -6
- data/spec/writers/datacite_json_writer_spec.rb +13 -5
- data/spec/writers/datacite_writer_spec.rb +18 -18
- data/spec/writers/schema_org_writer_spec.rb +40 -14
- metadata +10 -2
@@ -45,7 +45,7 @@ module Bolognese
|
|
45
45
|
"bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
|
46
46
|
"ris" => CP_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
|
47
47
|
}.compact
|
48
|
-
|
48
|
+
|
49
49
|
creators = get_authors(from_citeproc(Array.wrap(meta.fetch("author", nil))))
|
50
50
|
contributors = get_authors(from_citeproc(Array.wrap(meta.fetch("editor", nil))))
|
51
51
|
dates = if meta.fetch("issued", nil).present?
|
@@ -69,33 +69,54 @@ module Bolognese
|
|
69
69
|
else
|
70
70
|
nil
|
71
71
|
end
|
72
|
-
|
72
|
+
container = if meta.fetch("container-title", nil).present?
|
73
|
+
first_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip).first : nil
|
74
|
+
last_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip).last : nil
|
75
|
+
|
73
76
|
{ "type" => "Periodical",
|
74
77
|
"title" => meta.fetch("container-title", nil),
|
75
|
-
"
|
78
|
+
"identifier" => meta.fetch("ISSN", nil),
|
79
|
+
"identifierType" => meta.fetch("ISSN", nil).present? ? "ISSN" : nil,
|
80
|
+
"volume" => meta.fetch("volume", nil),
|
81
|
+
"issue" => meta.fetch("issue", nil),
|
82
|
+
"firstPage" => first_page,
|
83
|
+
"lastPage" => last_page
|
84
|
+
}.compact
|
76
85
|
else
|
77
86
|
nil
|
78
87
|
end
|
79
|
-
|
88
|
+
|
89
|
+
identifiers = [normalize_id(meta.fetch("id", nil)), normalize_doi(meta.fetch("DOI", nil))].compact.map do |r|
|
90
|
+
r = normalize_id(r)
|
91
|
+
|
92
|
+
if r.start_with?("https://doi.org")
|
93
|
+
{ "identifierType" => "DOI", "identifier" => r }
|
94
|
+
else
|
95
|
+
{ "identifierType" => "URL", "identifier" => r }
|
96
|
+
end
|
97
|
+
end.uniq
|
98
|
+
|
99
|
+
id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
|
100
|
+
doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
|
101
|
+
|
80
102
|
state = id.present? || read_options.present? ? "findable" : "not_found"
|
81
103
|
subjects = Array.wrap(meta.fetch("categories", nil)).map do |s|
|
82
104
|
{ "subject" => s }
|
83
105
|
end
|
84
106
|
|
85
107
|
{ "id" => id,
|
108
|
+
"identifiers" => identifiers,
|
86
109
|
"types" => types,
|
87
110
|
"doi" => doi_from_url(doi),
|
88
111
|
"url" => normalize_id(meta.fetch("URL", nil)),
|
89
112
|
"titles" => [{ "title" => meta.fetch("title", nil) }],
|
90
113
|
"creators" => creators,
|
91
114
|
"contributors" => contributors,
|
92
|
-
"
|
115
|
+
"container" => container,
|
93
116
|
"publisher" => meta.fetch("publisher", nil),
|
94
117
|
"related_identifiers" => related_identifiers,
|
95
118
|
"dates" => dates,
|
96
119
|
"publication_year" => publication_year,
|
97
|
-
"volume" => meta.fetch("volume", nil),
|
98
|
-
#{}"pagination" => meta.pages.to_s.presence,
|
99
120
|
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : [],
|
100
121
|
"rights_list" => rights_list,
|
101
122
|
"version_info" => meta.fetch("version", nil),
|
@@ -21,10 +21,23 @@ module Bolognese
|
|
21
21
|
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate))
|
22
22
|
|
23
23
|
meta = string.present? ? Maremma.from_json(string) : {}
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
|
25
|
+
identifiers = ([meta.fetch("@id", nil)] + Array.wrap(meta.fetch("identifier", nil))).map do |r|
|
26
|
+
r = normalize_id(r) if r.is_a?(String)
|
27
|
+
if r.is_a?(String) && r.start_with?("https://doi.org")
|
28
|
+
{ "identifierType" => "DOI", "identifier" => r }
|
29
|
+
elsif r.is_a?(String)
|
30
|
+
{ "identifierType" => "URL", "identifier" => r }
|
31
|
+
elsif r.is_a?(Hash)
|
32
|
+
{ "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
|
33
|
+
end
|
34
|
+
end.compact.uniq
|
35
|
+
|
36
|
+
id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
|
37
|
+
doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
|
38
|
+
|
39
|
+
creators = get_authors(from_schema_org_creators(Array.wrap(meta.fetch("agents", nil))))
|
40
|
+
contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
|
28
41
|
dates = []
|
29
42
|
dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if meta.fetch("datePublished", nil).present?
|
30
43
|
dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if meta.fetch("dateCreated", nil).present?
|
@@ -47,12 +60,12 @@ module Bolognese
|
|
47
60
|
|
48
61
|
{ "id" => id,
|
49
62
|
"types" => types,
|
50
|
-
"
|
51
|
-
"doi" =>
|
63
|
+
"identifiers" => identifiers,
|
64
|
+
"doi" => doi_from_url(doi),
|
52
65
|
"url" => normalize_id(meta.fetch("codeRepository", nil)),
|
53
66
|
"titles" => [{ "title" => meta.fetch("title", nil) }],
|
54
|
-
"creators" =>
|
55
|
-
"contributors" =>
|
67
|
+
"creators" => creators,
|
68
|
+
"contributors" => contributors,
|
56
69
|
"publisher" => publisher,
|
57
70
|
#{}"is_part_of" => is_part_of,
|
58
71
|
"dates" => dates,
|
@@ -72,7 +72,7 @@ module Bolognese
|
|
72
72
|
bibliographic_metadata = meta.dig("crossref", "sa_component", "component_list", "component").to_h
|
73
73
|
end
|
74
74
|
|
75
|
-
resource_type = (resource_type || model).to_s.underscore.
|
75
|
+
resource_type = (resource_type || model).to_s.underscore.camelcase.presence
|
76
76
|
schema_org = Bolognese::Utils::CR_TO_SO_TRANSLATIONS[resource_type] || "ScholarlyArticle"
|
77
77
|
types = {
|
78
78
|
"resourceTypeGeneral" => Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org],
|
@@ -82,7 +82,6 @@ module Bolognese
|
|
82
82
|
"bibtex" => Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[resource_type] || "misc",
|
83
83
|
"ris" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[resource_type] || "JOUR"
|
84
84
|
}.compact
|
85
|
-
doi = bibliographic_metadata.dig("doi_data", "doi").to_s.downcase.presence #|| doi_from_url(options[:id])
|
86
85
|
|
87
86
|
# Crossref servers run on Eastern Time
|
88
87
|
Time.zone = 'Eastern Time (US & Canada)'
|
@@ -96,33 +95,41 @@ module Bolognese
|
|
96
95
|
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
97
96
|
|
98
97
|
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
98
|
+
container = if journal_metadata.present? || book_metadata.present?
|
99
|
+
issn = parse_attributes(journal_metadata.to_h.fetch("issn", nil), first: true)
|
100
|
+
|
101
|
+
{ "type" => "Journal",
|
102
|
+
"identifier" => issn,
|
103
|
+
"identifierType" => issn.present? ? "ISSN" : nil,
|
104
|
+
"title" => journal_metadata.to_h["full_title"],
|
105
|
+
"volume" => journal_issue.dig("journal_volume", "volume"),
|
106
|
+
"issue" => journal_issue.dig("issue"),
|
107
|
+
"firstPage" => bibliographic_metadata.dig("pages", "first_page"),
|
108
|
+
"lastPage" => bibliographic_metadata.dig("pages", "last_page") }.compact
|
103
109
|
else
|
104
110
|
nil
|
105
111
|
end
|
106
112
|
|
107
|
-
{ "
|
113
|
+
identifiers = [{ "identifierType" => "DOI", "identifier" => normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig("doi_data", "doi")) }, crossref_alternate_identifiers(bibliographic_metadata)].compact
|
114
|
+
|
115
|
+
id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
|
116
|
+
doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
|
117
|
+
|
118
|
+
{ "id" => id,
|
108
119
|
"types" => types,
|
109
|
-
"doi" => doi,
|
120
|
+
"doi" => doi_from_url(doi),
|
110
121
|
"url" => bibliographic_metadata.dig("doi_data", "resource"),
|
111
122
|
"titles" => [{ "title" => parse_attributes(bibliographic_metadata.dig("titles", "title")) }],
|
112
|
-
"
|
123
|
+
"identifiers" => identifiers,
|
113
124
|
"creators" => crossref_people(bibliographic_metadata, "author"),
|
114
125
|
"contributors" => crossref_people(bibliographic_metadata, "editor"),
|
115
126
|
"funding_references" => crossref_funding_reference(program_metadata),
|
116
127
|
"publisher" => publisher,
|
117
|
-
"
|
128
|
+
"container" => container,
|
118
129
|
"agency" => "Crossref",
|
119
130
|
"related_identifiers" => related_identifiers,
|
120
131
|
"dates" => dates,
|
121
132
|
"publication_year" => publication_year,
|
122
|
-
"volume" => journal_issue.dig("journal_volume", "volume"),
|
123
|
-
"issue" => journal_issue.dig("issue"),
|
124
|
-
"first_page" => bibliographic_metadata.dig("pages", "first_page"),
|
125
|
-
"last_page" => bibliographic_metadata.dig("pages", "last_page"),
|
126
133
|
"descriptions" => crossref_description(bibliographic_metadata),
|
127
134
|
"rights_list" => crossref_license(program_metadata),
|
128
135
|
"version_info" => nil,
|
@@ -135,12 +142,12 @@ module Bolognese
|
|
135
142
|
end
|
136
143
|
|
137
144
|
def crossref_alternate_identifiers(bibliographic_metadata)
|
138
|
-
if bibliographic_metadata.
|
139
|
-
{ "
|
140
|
-
"
|
141
|
-
|
142
|
-
{ "
|
143
|
-
"
|
145
|
+
if bibliographic_metadata.dig("publisher_item", "item_number").present?
|
146
|
+
{ "identifier" => parse_attributes(bibliographic_metadata.dig("publisher_item", "item_number")),
|
147
|
+
"identifierType" => "Publisher ID" }
|
148
|
+
elsif parse_attributes(bibliographic_metadata.fetch("item_number", nil)).present?
|
149
|
+
{ "identifier" => parse_attributes(bibliographic_metadata.fetch("item_number", nil)),
|
150
|
+
"identifierType" => "Publisher ID" }
|
144
151
|
end
|
145
152
|
end
|
146
153
|
|
@@ -170,9 +177,10 @@ module Bolognese
|
|
170
177
|
def crossref_people(bibliographic_metadata, contributor_role)
|
171
178
|
person = bibliographic_metadata.dig("contributors", "person_name")
|
172
179
|
Array.wrap(person).select { |a| a["contributor_role"] == contributor_role }.map do |a|
|
173
|
-
{ "
|
174
|
-
|
175
|
-
"
|
180
|
+
name_identifiers = normalize_orcid(parse_attributes(a["ORCID"])).present? ? [{ "nameIdentifier" => normalize_orcid(parse_attributes(a["ORCID"])), "nameIdentifierScheme" => "ORCID" }] : nil
|
181
|
+
{ "nameType" => "Personal",
|
182
|
+
"nameIdentifiers" => name_identifiers,
|
183
|
+
"name" => [a["surname"], a["given_name"]].join(", "),
|
176
184
|
"givenName" => a["given_name"],
|
177
185
|
"familyName" => a["surname"],
|
178
186
|
"contributorType" => contributor_role == "editor" ? "Editor" : nil }.compact
|
@@ -7,53 +7,7 @@ module Bolognese
|
|
7
7
|
errors = jsonlint(string)
|
8
8
|
return { "errors" => errors } if errors.present?
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
meta = string.present? ? Maremma.from_json(string) : {}
|
13
|
-
|
14
|
-
state = meta.fetch("doi", nil).present? || read_options.present? ? "findable" : "not_found"
|
15
|
-
|
16
|
-
dates = Array.wrap(meta.fetch("dates", nil)).map do |d|
|
17
|
-
{ "date" => d["date"],
|
18
|
-
"dateType" => d["dateType"],
|
19
|
-
"dateInformation" => d["dateInformation"] }.compact
|
20
|
-
end
|
21
|
-
dates << { "date" => meta.fetch("publicationYear", nil), "dateType" => "Issued" } if meta.fetch("publicationYear", nil).present? && get_date(dates, "Issued").blank?
|
22
|
-
schema_org = meta.dig("types", "type") || Bolognese::Utils::CR_TO_SO_TRANSLATIONS[meta.dig("types", "resourceType").to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_SO_TRANSLATIONS[meta.dig("types", "resourceTypeGeneral").to_s.dasherize] || "CreativeWork"
|
23
|
-
types = {
|
24
|
-
"resourceTypeGeneral" => meta.dig("types", "resourceTypeGeneral"),
|
25
|
-
"resourceType" => meta.dig("types", "resourceType"),
|
26
|
-
"schemaOrg" => schema_org,
|
27
|
-
"bibtex" => meta.dig("types", "bibtex") || Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[meta.dig("types", "resourceType").to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
|
28
|
-
"citeproc" => meta.dig("types", "citeproc") || Bolognese::Utils::CR_TO_CP_TRANSLATIONS[meta.dig("types", "resourceType").to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || "article",
|
29
|
-
"ris" => meta.dig("types", "ris") || Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[meta.dig("types", "resourceType").to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_RIS_TRANSLATIONS[meta.dig("types", "resourceTypeGeneral").to_s.dasherize] || "GEN" }.compact
|
30
|
-
|
31
|
-
{ "id" => meta.fetch("id", nil),
|
32
|
-
"types" => types,
|
33
|
-
"doi" => validate_doi(meta.fetch("doi", nil)),
|
34
|
-
"url" => normalize_id(meta.fetch("url", nil)),
|
35
|
-
"titles" => meta.fetch("titles", nil),
|
36
|
-
"alternate_identifiers" => Array.wrap(meta.fetch("alternateIdentifiers", nil)),
|
37
|
-
"creators" => meta.fetch("creators", nil),
|
38
|
-
"contributors" => meta.fetch("contributors", nil),
|
39
|
-
"publisher" => meta.fetch("publisher", nil),
|
40
|
-
"periodical" => meta.fetch("periodical", nil),
|
41
|
-
"agency" => "DataCite",
|
42
|
-
"funding_references" => meta.fetch("fundingReferences", nil),
|
43
|
-
"related_identifiers" => Array.wrap(meta.fetch("relatedIdentifiers", nil)),
|
44
|
-
"dates" => dates,
|
45
|
-
"publication_year" => meta.fetch("publicationYear", nil),
|
46
|
-
"descriptions" => meta.fetch("descriptions", nil),
|
47
|
-
"rights_list" => meta.fetch("rightsList", nil),
|
48
|
-
"version_info" => meta.fetch("version", nil),
|
49
|
-
"subjects" => meta.fetch("subjects", nil),
|
50
|
-
"language" => meta.fetch("language", nil),
|
51
|
-
"sizes" => meta.fetch("sizes", nil),
|
52
|
-
"formats" => meta.fetch("formats", nil),
|
53
|
-
"geo_locations" => meta.fetch("geoLocations", nil),
|
54
|
-
"schema_version" => meta.fetch("schemaVersion", nil),
|
55
|
-
"state" => state
|
56
|
-
}.merge(read_options)
|
10
|
+
string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
|
57
11
|
end
|
58
12
|
end
|
59
13
|
end
|
@@ -76,7 +76,11 @@ module Bolognese
|
|
76
76
|
id = normalize_doi(meta.dig("identifier", "__content__") || options[:id], sandbox: options[:sandbox])
|
77
77
|
end
|
78
78
|
|
79
|
-
|
79
|
+
identifiers = [{ "identifierType" => "DOI", "identifier" => id }] + Array.wrap(meta.dig("alternateIdentifiers", "alternateIdentifier")).map do |r|
|
80
|
+
{ "identifierType" => get_identifier_type(r["alternateIdentifierType"]), "identifier" => r["__content__"].presence }.compact
|
81
|
+
end.compact
|
82
|
+
|
83
|
+
doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
|
80
84
|
|
81
85
|
resource_type_general = meta.dig("resourceType", "resourceTypeGeneral")
|
82
86
|
resource_type = meta.dig("resourceType", "__content__")
|
@@ -100,9 +104,6 @@ module Bolognese
|
|
100
104
|
end
|
101
105
|
end.compact
|
102
106
|
|
103
|
-
alternate_identifiers = Array.wrap(meta.dig("alternateIdentifiers", "alternateIdentifier")).map do |r|
|
104
|
-
{ "alternateIdentifierType" => r["alternateIdentifierType"], "alternateIdentifier" => r["__content__"].presence }.compact
|
105
|
-
end.compact
|
106
107
|
descriptions = Array.wrap(meta.dig("descriptions", "description")).map do |r|
|
107
108
|
if r.blank?
|
108
109
|
nil
|
@@ -167,7 +168,7 @@ module Bolognese
|
|
167
168
|
}.compact
|
168
169
|
end
|
169
170
|
geo_locations = Array.wrap(meta.dig("geoLocations", "geoLocation")).map do |gl|
|
170
|
-
if !gl.is_a?(Hash) || gl["geoLocationPoint"].is_a?(String) || gl["geoLocationBox"].is_a?(String)
|
171
|
+
if !gl.is_a?(Hash) || gl["geoLocationPoint"].is_a?(String) || gl["geoLocationBox"].is_a?(String) || gl["geoLocationPolygon"].is_a?(String)
|
171
172
|
nil
|
172
173
|
else
|
173
174
|
{
|
@@ -181,22 +182,23 @@ module Bolognese
|
|
181
182
|
"southBoundLatitude" => gl.dig("geoLocationBox", "southBoundLatitude"),
|
182
183
|
"northBoundLatitude" => gl.dig("geoLocationBox", "northBoundLatitude")
|
183
184
|
}.compact.presence,
|
185
|
+
"geoLocationPolygon" => Array.wrap(gl.dig("geoLocationPolygon", "polygonPoint")).map { |glp| { "polygonPoint" => glp } }.compact.presence,
|
184
186
|
"geoLocationPlace" => parse_attributes(gl["geoLocationPlace"], first: true).to_s.strip.presence
|
185
187
|
}.compact
|
186
188
|
end
|
187
189
|
end.compact
|
188
|
-
|
190
|
+
container = set_container(meta)
|
189
191
|
state = doi.present? || read_options.present? ? "findable" : "not_found"
|
190
192
|
|
191
193
|
{ "id" => id,
|
192
194
|
"types" => types,
|
193
|
-
"doi" => doi,
|
194
|
-
"
|
195
|
+
"doi" => doi_from_url(doi),
|
196
|
+
"identifiers" => identifiers,
|
195
197
|
"url" => options.fetch(:url, nil).to_s.strip.presence,
|
196
198
|
"titles" => titles,
|
197
199
|
"creators" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
|
198
200
|
"contributors" => get_authors(Array.wrap(meta.dig("contributors", "contributor"))),
|
199
|
-
"
|
201
|
+
"container" => container,
|
200
202
|
"publisher" => parse_attributes(meta.fetch("publisher", nil), first: true).to_s.strip.presence,
|
201
203
|
"agency" => "DataCite",
|
202
204
|
"funding_references" => funding_references,
|
@@ -216,17 +218,25 @@ module Bolognese
|
|
216
218
|
}.merge(read_options)
|
217
219
|
end
|
218
220
|
|
219
|
-
def
|
220
|
-
|
221
|
+
def set_container(meta)
|
222
|
+
series_information = Array.wrap(meta.dig("descriptions", "description")).find { |r| r["descriptionType"] == "SeriesInformation" }.to_h.fetch("__content__", nil)
|
223
|
+
si = get_series_information(series_information)
|
224
|
+
|
221
225
|
is_part_of = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).find { |ri| ri["relationType"] == "IsPartOf" }.to_h
|
222
226
|
|
223
|
-
if
|
227
|
+
if si["title"].present? || is_part_of.present?
|
224
228
|
{
|
225
|
-
"type" => meta.dig("resourceType", "resourceTypeGeneral") == "Dataset" ? "
|
226
|
-
"
|
227
|
-
"
|
228
|
-
"
|
229
|
+
"type" => meta.dig("resourceType", "resourceTypeGeneral") == "Dataset" ? "DataRepository" : "Series",
|
230
|
+
"identifier" => is_part_of["__content__"],
|
231
|
+
"identifierType" => is_part_of["relatedIdentifierType"],
|
232
|
+
"title" => si["title"],
|
233
|
+
"volume" => si["volume"],
|
234
|
+
"issue" => si["issue"],
|
235
|
+
"firstPage" => si["firstPage"],
|
236
|
+
"lastPage" => si["lastPage"]
|
229
237
|
}.compact
|
238
|
+
else
|
239
|
+
{}
|
230
240
|
end
|
231
241
|
end
|
232
242
|
end
|
@@ -47,8 +47,14 @@ module Bolognese
|
|
47
47
|
"ris" => ris_type
|
48
48
|
}.compact
|
49
49
|
|
50
|
-
|
51
|
-
|
50
|
+
identifiers = [normalize_doi(options[:doi]) || normalize_doi(meta.fetch("DO", nil))].map do |r|
|
51
|
+
{ "identifierType" => "DOI", "identifier" => normalize_id(r) }
|
52
|
+
end.compact
|
53
|
+
|
54
|
+
id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
|
55
|
+
doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
|
56
|
+
|
57
|
+
author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "creatorName" => a } }
|
52
58
|
date_parts = meta.fetch("PY", nil).to_s.split("/")
|
53
59
|
created_date_parts = meta.fetch("Y1", nil).to_s.split("/")
|
54
60
|
dates = []
|
@@ -64,34 +70,35 @@ module Bolognese
|
|
64
70
|
else
|
65
71
|
[]
|
66
72
|
end
|
67
|
-
|
68
|
-
{ "type" => "
|
73
|
+
container = if meta.fetch("T2", nil).present?
|
74
|
+
{ "type" => "Journal",
|
69
75
|
"title" => meta.fetch("T2", nil),
|
70
|
-
"
|
76
|
+
"identifier" => meta.fetch("SN", nil),
|
77
|
+
"volume" => meta.fetch("VL", nil),
|
78
|
+
"issue" => meta.fetch("IS", nil),
|
79
|
+
"firstPage" => meta.fetch("SP", nil),
|
80
|
+
"lastPage" => meta.fetch("EP", nil) }.compact
|
71
81
|
else
|
72
82
|
nil
|
73
83
|
end
|
74
|
-
state =
|
84
|
+
state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
|
75
85
|
subjects = Array.wrap(meta.fetch("KW", nil)).map do |s|
|
76
86
|
{ "subject" => s }
|
77
87
|
end
|
78
88
|
|
79
|
-
{ "id" =>
|
89
|
+
{ "id" => id,
|
80
90
|
"types" => types,
|
81
|
-
"
|
91
|
+
"identifiers" => identifiers,
|
92
|
+
"doi" => doi_from_url(doi),
|
82
93
|
"url" => meta.fetch("UR", nil),
|
83
94
|
"titles" => meta.fetch("T1", nil).present? ? [{ "title" => meta.fetch("T1", nil) }] : nil,
|
84
95
|
"creators" => get_authors(author),
|
85
96
|
"publisher" => meta.fetch("PB", "(:unav)"),
|
86
|
-
"
|
97
|
+
"container" => container,
|
87
98
|
"related_identifiers" => related_identifiers,
|
88
99
|
"dates" => dates,
|
89
100
|
"publication_year" => publication_year,
|
90
101
|
"descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB")), "descriptionType" => "Abstract" }] : nil,
|
91
|
-
"volume" => meta.fetch("VL", nil),
|
92
|
-
"issue" => meta.fetch("IS", nil),
|
93
|
-
"first_page" => meta.fetch("SP", nil),
|
94
|
-
"last_page" => meta.fetch("EP", nil),
|
95
102
|
"subjects" => subjects,
|
96
103
|
"language" => meta.fetch("LA", nil),
|
97
104
|
"state" => state
|
@@ -37,21 +37,19 @@ module Bolognese
|
|
37
37
|
|
38
38
|
meta = string.present? ? Maremma.from_json(string) : {}
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
40
|
+
identifiers = ([options[:doi] || meta.fetch("@id", nil)] + Array.wrap(meta.fetch("identifier", nil))).map do |r|
|
41
|
+
r = normalize_id(r) if r.is_a?(String)
|
42
|
+
if r.is_a?(String) && r.start_with?("https://doi.org")
|
43
|
+
{ "identifierType" => "DOI", "identifier" => r }
|
44
|
+
elsif r.is_a?(String)
|
45
|
+
{ "identifierType" => "URL", "identifier" => r }
|
46
|
+
elsif r.is_a?(Hash)
|
47
|
+
{ "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
|
48
48
|
end
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
identifier = identifier.first
|
49
|
+
end.compact.uniq
|
50
|
+
|
51
|
+
id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
|
53
52
|
|
54
|
-
id = normalize_id(meta.fetch("@id", nil) || meta.fetch("identifier", nil))
|
55
53
|
schema_org = meta.fetch("@type", nil) && meta.fetch("@type").camelcase
|
56
54
|
resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org]
|
57
55
|
types = {
|
@@ -63,19 +61,26 @@ module Bolognese
|
|
63
61
|
"ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
|
64
62
|
}.compact
|
65
63
|
authors = meta.fetch("author", nil) || meta.fetch("creator", nil)
|
66
|
-
creators = get_authors(
|
67
|
-
contributors = get_authors(
|
64
|
+
creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
|
65
|
+
contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
|
68
66
|
publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
|
69
67
|
|
70
68
|
ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
|
71
|
-
|
69
|
+
container = if meta.fetch(ct, nil).present?
|
70
|
+
url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
|
71
|
+
|
72
72
|
{
|
73
|
-
"type" => (schema_org == "Dataset") ? "
|
73
|
+
"type" => (schema_org == "Dataset") ? "DataRepository" : "Periodical",
|
74
74
|
"title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true),
|
75
|
-
"
|
75
|
+
"identifier" => url,
|
76
|
+
"identifierType" => url.present? ? "URL" : nil,
|
77
|
+
"volume" => meta.fetch("volumeNumber", nil),
|
78
|
+
"issue" => meta.fetch("issueNumber", nil),
|
79
|
+
"firstPage" => meta.fetch("pageStart", nil),
|
80
|
+
"lastPage" => meta.fetch("pageEnd", nil)
|
76
81
|
}.compact
|
77
82
|
else
|
78
|
-
|
83
|
+
{}
|
79
84
|
end
|
80
85
|
|
81
86
|
related_identifiers = Array.wrap(schema_org_is_identical_to(meta)) +
|
@@ -138,8 +143,7 @@ module Bolognese
|
|
138
143
|
{ "id" => id,
|
139
144
|
"types" => types,
|
140
145
|
"doi" => validate_doi(id),
|
141
|
-
"
|
142
|
-
"alternate_identifiers" => alternate_identifiers,
|
146
|
+
"identifiers" => identifiers,
|
143
147
|
"url" => normalize_id(meta.fetch("url", nil)),
|
144
148
|
"content_url" => Array.wrap(meta.fetch("contentUrl", nil)),
|
145
149
|
"sizes" => Array.wrap(meta.fetch("contenSize", nil)).presence,
|
@@ -149,7 +153,7 @@ module Bolognese
|
|
149
153
|
"contributors" => contributors,
|
150
154
|
"publisher" => publisher,
|
151
155
|
"agency" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true),
|
152
|
-
"
|
156
|
+
"container" => container,
|
153
157
|
"related_identifiers" => related_identifiers,
|
154
158
|
"publication_year" => publication_year,
|
155
159
|
"dates" => dates,
|