bolognese 0.15.9 → 1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -6
- data/bolognese.gemspec +1 -0
- data/lib/bolognese/datacite_utils.rb +19 -51
- data/lib/bolognese/doi_utils.rb +1 -1
- data/lib/bolognese/metadata_utils.rb +43 -125
- data/lib/bolognese/readers/bibtex_reader.rb +21 -10
- data/lib/bolognese/readers/citeproc_reader.rb +20 -12
- data/lib/bolognese/readers/codemeta_reader.rb +8 -3
- data/lib/bolognese/readers/crossref_reader.rb +41 -50
- data/lib/bolognese/readers/datacite_json_reader.rb +17 -40
- data/lib/bolognese/readers/datacite_reader.rb +78 -93
- data/lib/bolognese/readers/ris_reader.rb +20 -11
- data/lib/bolognese/readers/schema_org_reader.rb +62 -29
- data/lib/bolognese/utils.rb +90 -17
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/writers/bibtex_writer.rb +2 -2
- data/lib/bolognese/writers/codemeta_writer.rb +1 -2
- data/lib/bolognese/writers/crosscite_writer.rb +10 -29
- data/lib/bolognese/writers/datacite_json_writer.rb +18 -28
- data/lib/bolognese/writers/jats_writer.rb +4 -4
- data/lib/bolognese/writers/ris_writer.rb +3 -2
- data/lib/bolognese/writers/schema_org_writer.rb +16 -15
- data/spec/author_utils_spec.rb +3 -3
- data/spec/datacite_utils_spec.rb +4 -14
- data/spec/fixtures/crosscite.json +3 -3
- data/spec/fixtures/crossref.ris +1 -0
- data/spec/fixtures/datacite-example-geolocation-2.xml +140 -0
- data/spec/fixtures/datacite-example-geolocation.xml +66 -0
- data/spec/fixtures/datacite.json +33 -12
- data/spec/fixtures/datacite_software_missing_comma.json +1 -1
- data/spec/fixtures/schema_org_geolocation.json +82 -0
- data/spec/fixtures/schema_org_geoshape.json +550 -0
- data/spec/fixtures/schema_org_gtex.json +1 -1
- data/spec/fixtures/schema_org_list.json +1 -1
- data/spec/fixtures/schema_org_topmed.json +1 -1
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/default.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_bibtex.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_citation.yml +4 -4
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_crossref.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_datacite.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_jats.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_schema_org.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/default.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_bibtex.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_citation.yml +4 -4
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_datacite.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_datacite_json.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_jats.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_schema_org.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/find_from_format_by_id/crossref.yml +2 -2
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/find_from_format_by_id/datacite.yml +2 -2
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_state.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_title.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/doi_registration_agency/crossref.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/doi_registration_agency/datacite.yml +11 -11
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/doi_registration_agency/medra.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/doi_registration_agency/not_found.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/crossref.yml +5 -5
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/crossref_doi_not_url.yml +5 -5
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/datacite.yml +5 -5
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/datacite_doi_http.yml +5 -5
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/github.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/orcid.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/schema_org.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/unknown_DOI_registration_agency.yml +5 -5
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/bibtex.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/citeproc.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/codemeta.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/crosscite.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/crossref.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/datacite.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/datacite_json.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/ris.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/schema_org.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_string/crosscite.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/DOI_with_ORCID_ID.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/DOI_with_SICI_DOI.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/DOI_with_data_citation.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/book_chapter.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/dataset.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/date_in_future.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/journal_article.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/journal_article_with.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/not_found_error.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/posted_content.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/BlogPosting.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/DOI_in_test_system.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/DOI_in_with_related_id_system.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/DOI_not_found.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/Dataset.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/Funding.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/Funding_schema_version_4.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/Referee_report_in_test_system.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/author_only_full_name.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/author_with_scheme.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/author_with_wrong_orcid_scheme.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/content_url.yml +12 -12
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/date.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/dissertation.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/doi_with_sign.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/empty_subject.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/funding_schema_version_3.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/is_identical_to.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/keywords_with_attributes.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/leading_and_trailing_whitespace.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/multiple_author_names_in_one_creatorName.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/multiple_licenses.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/series-information.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/subject_scheme.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/xs_string_attributes.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/handle_input/DOI_RA_not_Crossref_or_DataCite.yml +2 -2
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/handle_input/unknown_DOI_prefix.yml +2 -2
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/jsonlint/missing_comma.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/jsonlint/nil.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/jsonlint/overlapping_keys.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/jsonlint/valid.yml +3 -3
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_ris/BlogPosting_schema_org.yml +16 -20
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_schema_org/geo_location_box.yml +42 -0
- data/spec/readers/bibtex_reader_spec.rb +5 -5
- data/spec/readers/citeproc_reader_spec.rb +1 -2
- data/spec/readers/codemeta_reader_spec.rb +9 -9
- data/spec/readers/crosscite_reader_spec.rb +2 -2
- data/spec/readers/crossref_reader_spec.rb +55 -48
- data/spec/readers/datacite_json_reader_spec.rb +5 -4
- data/spec/readers/datacite_reader_spec.rb +150 -114
- data/spec/readers/ris_reader_spec.rb +5 -5
- data/spec/readers/schema_org_reader_spec.rb +62 -48
- data/spec/utils_spec.rb +4 -4
- data/spec/writers/crosscite_writer_spec.rb +21 -17
- data/spec/writers/datacite_json_writer_spec.rb +8 -8
- data/spec/writers/datacite_writer_spec.rb +17 -18
- data/spec/writers/ris_writer_spec.rb +26 -18
- data/spec/writers/schema_org_writer_spec.rb +45 -18
- data/spec/writers/turtle_writer_spec.rb +1 -1
- metadata +22 -3
@@ -41,16 +41,24 @@ module Bolognese
|
|
41
41
|
|
42
42
|
doi = validate_doi(meta.fetch("DO", nil))
|
43
43
|
author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "name" => a } }
|
44
|
-
container_title = meta.fetch("T2", nil)
|
45
44
|
date_parts = meta.fetch("PY", nil).to_s.split("/")
|
46
45
|
date_published = get_date_from_parts(*date_parts)
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
46
|
+
related_identifiers = if meta.fetch("T2", nil).present? && meta.fetch("SN", nil).present?
|
47
|
+
[{ "type" => "Periodical",
|
48
|
+
"id" => meta.fetch("SN", nil),
|
49
|
+
"related_identifier_type" => "ISSN",
|
50
|
+
"relation_type" => "IsPartOf",
|
51
|
+
"title" => meta.fetch("T2", nil), }.compact]
|
52
|
+
else
|
53
|
+
[]
|
54
|
+
end
|
55
|
+
periodical = if meta.fetch("T2", nil).present?
|
56
|
+
{ "type" => "Periodical",
|
57
|
+
"title" => meta.fetch("T2", nil),
|
58
|
+
"id" => meta.fetch("SN", nil) }.compact
|
59
|
+
else
|
60
|
+
nil
|
61
|
+
end
|
54
62
|
state = doi.present? ? "findable" : "not_found"
|
55
63
|
|
56
64
|
{ "id" => normalize_doi(doi),
|
@@ -61,9 +69,10 @@ module Bolognese
|
|
61
69
|
"doi" => doi,
|
62
70
|
"b_url" => meta.fetch("UR", nil),
|
63
71
|
"title" => meta.fetch("T1", nil),
|
64
|
-
"
|
65
|
-
"publisher" => meta.fetch("PB",
|
66
|
-
"
|
72
|
+
"creator" => get_authors(author),
|
73
|
+
"publisher" => meta.fetch("PB", "(:unav)"),
|
74
|
+
"periodical" => periodical,
|
75
|
+
"related_identifiers" => related_identifiers,
|
67
76
|
"date_created" => meta.fetch("Y1", nil),
|
68
77
|
"date_published" => date_published,
|
69
78
|
"date_accessed" => meta.fetch("Y2", nil),
|
@@ -37,7 +37,7 @@ module Bolognese
|
|
37
37
|
|
38
38
|
identifier = Array.wrap(meta.fetch("identifier", nil))
|
39
39
|
if identifier.length > 1
|
40
|
-
|
40
|
+
alternate_identifiers = identifier[1..-1].map do |r|
|
41
41
|
if r.is_a?(String)
|
42
42
|
{ "type" => "URL", "name" => r }
|
43
43
|
elsif r.is_a?(Hash)
|
@@ -45,7 +45,7 @@ module Bolognese
|
|
45
45
|
end
|
46
46
|
end.unwrap
|
47
47
|
else
|
48
|
-
|
48
|
+
alternate_identifiers = nil
|
49
49
|
end
|
50
50
|
identifier = identifier.first
|
51
51
|
|
@@ -57,24 +57,61 @@ module Bolognese
|
|
57
57
|
editor = get_authors(from_schema_org(Array.wrap(meta.fetch("editor", nil))))
|
58
58
|
publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
|
59
59
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
60
|
+
ct = (type == "Dataset") ? "includedInDataCatalog" : "Periodical"
|
61
|
+
periodical = if meta.fetch(ct, nil).present?
|
62
|
+
{
|
63
|
+
"type" => (type == "Dataset") ? "DataCatalog" : "Periodical",
|
64
|
+
"title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true),
|
65
|
+
"url" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
|
66
|
+
}.compact
|
67
|
+
else
|
68
|
+
nil
|
69
|
+
end
|
66
70
|
|
67
|
-
|
71
|
+
related_identifiers = Array.wrap(schema_org_is_identical_to(meta)) +
|
72
|
+
Array.wrap(schema_org_is_part_of(meta)) +
|
73
|
+
Array.wrap(schema_org_has_part(meta)) +
|
74
|
+
Array.wrap(schema_org_is_previous_version_of(meta)) +
|
75
|
+
Array.wrap(schema_org_is_new_version_of(meta)) +
|
76
|
+
Array.wrap(schema_org_references(meta)) +
|
77
|
+
Array.wrap(schema_org_is_referenced_by(meta)) +
|
78
|
+
Array.wrap(schema_org_is_supplement_to(meta)) +
|
79
|
+
Array.wrap(schema_org_is_supplemented_by(meta))
|
80
|
+
|
81
|
+
rights = {
|
68
82
|
"id" => parse_attributes(meta.fetch("license", nil), content: "id", first: true),
|
69
83
|
"name" => parse_attributes(meta.fetch("license", nil), content: "name", first: true)
|
70
84
|
}
|
71
85
|
|
72
|
-
|
86
|
+
funding_references = from_schema_org(Array.wrap(meta.fetch("funder", nil)))
|
87
|
+
funding_references = Array.wrap(meta.fetch("funder", nil)).compact.map do |fr|
|
88
|
+
{
|
89
|
+
"funder_name" => fr["name"],
|
90
|
+
"funder_identifier" => fr["@id"],
|
91
|
+
"funder_identifier_type" => fr["@id"].to_s.start_with?("https://doi.org/10.13039") ? "Crossref Funder ID" : nil }.compact
|
92
|
+
end
|
73
93
|
date_published = meta.fetch("datePublished", nil)
|
74
94
|
state = meta.present? ? "findable" : "not_found"
|
75
|
-
|
76
|
-
|
77
|
-
|
95
|
+
geo_location = Array.wrap(meta.fetch("spatialCoverage", nil)).map do |gl|
|
96
|
+
if gl.dig("geo", "box")
|
97
|
+
s, w, n, e = gl.dig("geo", "box").split(" ", 4)
|
98
|
+
geo_location_box = {
|
99
|
+
"west_bound_longitude" => w,
|
100
|
+
"east_bound_longitude" => e,
|
101
|
+
"south_bound_latitude" => s,
|
102
|
+
"north_bound_latitude" => n
|
103
|
+
}.compact.presence
|
104
|
+
else
|
105
|
+
geo_location_box = nil
|
106
|
+
end
|
107
|
+
geo_location_point = { "point_longitude" => gl.dig("geo", "longitude"), "point_latitude" => gl.dig("geo", "latitude") }.compact.presence
|
108
|
+
|
109
|
+
{
|
110
|
+
"geo_location_place" => gl.dig("geo", "address"),
|
111
|
+
"geo_location_point" => geo_location_point,
|
112
|
+
"geo_location_box" => geo_location_box
|
113
|
+
}.compact
|
114
|
+
end
|
78
115
|
|
79
116
|
{ "id" => id,
|
80
117
|
"type" => type,
|
@@ -85,43 +122,38 @@ module Bolognese
|
|
85
122
|
"resource_type_general" => resource_type_general,
|
86
123
|
"doi" => validate_doi(id),
|
87
124
|
"identifier" => identifier,
|
88
|
-
"
|
125
|
+
"alternate_identifiers" => alternate_identifiers,
|
89
126
|
"b_url" => normalize_id(meta.fetch("url", nil)),
|
90
127
|
"content_url" => Array.wrap(meta.fetch("contentUrl", nil)).unwrap,
|
91
|
-
"
|
92
|
-
"
|
128
|
+
"size" => meta.fetch("contenSize", nil),
|
129
|
+
"format" => Array.wrap(meta.fetch("encodingFormat", nil) || meta.fetch("fileFormat", nil)).unwrap,
|
93
130
|
"title" => meta.fetch("name", nil),
|
94
|
-
"
|
131
|
+
"creator" => author,
|
95
132
|
"editor" => editor,
|
96
133
|
"publisher" => publisher,
|
97
134
|
"service_provider" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true),
|
98
|
-
"
|
99
|
-
"
|
100
|
-
"is_part_of" => is_part_of,
|
101
|
-
"has_part" => schema_org_has_part(meta),
|
102
|
-
"references" => schema_org_references(meta),
|
103
|
-
"is_referenced_by" => schema_org_is_referenced_by(meta),
|
104
|
-
"is_supplement_to" => schema_org_is_supplement_to(meta),
|
105
|
-
"is_supplemented_by" => schema_org_is_supplemented_by(meta),
|
135
|
+
"periodical" => periodical,
|
136
|
+
"related_identifiers" => related_identifiers,
|
106
137
|
"date_created" => meta.fetch("dateCreated", nil),
|
107
138
|
"date_published" => date_published,
|
108
139
|
"date_modified" => meta.fetch("dateModified", nil),
|
109
140
|
"description" => meta.fetch("description", nil).present? ? { "text" => sanitize(meta.fetch("description")) } : nil,
|
110
|
-
"
|
141
|
+
"rights" => rights,
|
111
142
|
"b_version" => meta.fetch("version", nil),
|
112
143
|
"keywords" => meta.fetch("keywords", nil).to_s.split(", "),
|
113
144
|
"state" => state,
|
114
145
|
"schema_version" => meta.fetch("schemaVersion", nil),
|
115
|
-
"
|
146
|
+
"funding_references" => funding_references,
|
147
|
+
"geo_location" => geo_location
|
116
148
|
}
|
117
149
|
end
|
118
150
|
|
119
151
|
def schema_org_related_identifier(meta, relation_type: nil)
|
120
|
-
normalize_ids(ids: meta.fetch(relation_type, nil))
|
152
|
+
normalize_ids(ids: meta.fetch(relation_type, nil), relation_type: SO_TO_DC_RELATION_TYPES[relation_type])
|
121
153
|
end
|
122
154
|
|
123
155
|
def schema_org_reverse_related_identifier(meta, relation_type: nil)
|
124
|
-
normalize_ids(ids: meta.dig("@reverse", relation_type))
|
156
|
+
normalize_ids(ids: meta.dig("@reverse", relation_type), relation_type: SO_TO_DC_RELATION_TYPES[relation_type])
|
125
157
|
end
|
126
158
|
|
127
159
|
def schema_org_is_identical_to(meta)
|
@@ -159,6 +191,7 @@ module Bolognese
|
|
159
191
|
def schema_org_is_supplemented_by(meta)
|
160
192
|
schema_org_related_identifier(meta, relation_type: "isBasedOn")
|
161
193
|
end
|
194
|
+
|
162
195
|
end
|
163
196
|
end
|
164
197
|
end
|
data/lib/bolognese/utils.rb
CHANGED
@@ -25,7 +25,10 @@ module Bolognese
|
|
25
25
|
"Sound" => "AudioObject",
|
26
26
|
"Text" => "ScholarlyArticle",
|
27
27
|
"Workflow" => nil,
|
28
|
-
"Other" => "CreativeWork"
|
28
|
+
"Other" => "CreativeWork",
|
29
|
+
# not part of DataCite schema, but used internally
|
30
|
+
"Periodical" => "Periodical",
|
31
|
+
"DataCatalog" => "DataCatalog"
|
29
32
|
}
|
30
33
|
|
31
34
|
DC_TO_CP_TRANSLATIONS = {
|
@@ -173,7 +176,6 @@ module Bolognese
|
|
173
176
|
"BlogPosting" => "Text",
|
174
177
|
"Chapter" => "Text",
|
175
178
|
"Collection" => "Collection",
|
176
|
-
"CreativeWork" => "Other",
|
177
179
|
"DataCatalog" => "Dataset",
|
178
180
|
"Dataset" => "Dataset",
|
179
181
|
"Event" => "Event",
|
@@ -362,7 +364,7 @@ module Bolognese
|
|
362
364
|
"codemeta"
|
363
365
|
elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("ris_type")
|
364
366
|
"crosscite"
|
365
|
-
elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("
|
367
|
+
elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("schema-version").to_s.start_with?("http://datacite.org/schema/kernel")
|
366
368
|
"datacite_json"
|
367
369
|
elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
|
368
370
|
"citeproc"
|
@@ -471,11 +473,17 @@ module Bolognese
|
|
471
473
|
"http://orcid.org/" + Addressable::URI.encode(orcid)
|
472
474
|
end
|
473
475
|
|
474
|
-
def normalize_ids(ids: nil)
|
475
|
-
Array.wrap(ids).map do |
|
476
|
-
|
477
|
-
|
478
|
-
|
476
|
+
def normalize_ids(ids: nil, relation_type: nil)
|
477
|
+
Array.wrap(ids).select { |idx| idx["@id"].present? }.map do |idx|
|
478
|
+
id = normalize_id(idx["@id"])
|
479
|
+
related_identifier_type = doi_from_url(id).present? ? "DOI" : "URL"
|
480
|
+
id = doi_from_url(id) || id
|
481
|
+
|
482
|
+
{ "id" => id,
|
483
|
+
"relation_type" => relation_type,
|
484
|
+
"related_identifier_type" => related_identifier_type,
|
485
|
+
"resource_type_general" => Metadata::SO_TO_DC_TRANSLATIONS[idx["@type"]],
|
486
|
+
"title" => idx["title"] || idx["name"] }.compact
|
479
487
|
end.unwrap
|
480
488
|
end
|
481
489
|
|
@@ -525,13 +533,10 @@ module Bolognese
|
|
525
533
|
def to_schema_org_container(element, options={})
|
526
534
|
return nil unless (element.is_a?(Hash) || (element.nil? && options[:container_title].present?))
|
527
535
|
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
element["title"] ||= options[:container_title]
|
533
|
-
|
534
|
-
map_hash_keys(element: element, mapping: mapping)
|
536
|
+
{
|
537
|
+
"@id" => element["id"],
|
538
|
+
"@type" => (options[:type] == "Dataset") ? "DataCatalog" : "Periodical",
|
539
|
+
"name" => element["title"] || options[:container_title] }
|
535
540
|
end
|
536
541
|
|
537
542
|
def to_schema_org_identifier(element, options={})
|
@@ -540,8 +545,8 @@ module Bolognese
|
|
540
545
|
"propertyID" => normalize_doi(element) ? "doi" : "url",
|
541
546
|
"value" => element }
|
542
547
|
|
543
|
-
if options[:
|
544
|
-
[ident] + Array.wrap(options[:
|
548
|
+
if options[:alternate_identifiers].present?
|
549
|
+
[ident] + Array.wrap(options[:alternate_identifiers]).map do |ai|
|
545
550
|
if ai["type"].to_s.downcase == "url"
|
546
551
|
ai["name"]
|
547
552
|
else
|
@@ -556,6 +561,67 @@ module Bolognese
|
|
556
561
|
end
|
557
562
|
end
|
558
563
|
|
564
|
+
def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
|
565
|
+
return nil unless related_identifiers.present? && relation_type.present?
|
566
|
+
|
567
|
+
relation_type = relation_type == "References" ? ["References", "Cites", "Documents"] : [relation_type]
|
568
|
+
|
569
|
+
Array.wrap(related_identifiers).select { |ri| relation_type.include?(ri["relation_type"]) }.map do |r|
|
570
|
+
if r["related_identifier_type"] == "ISSN" && r["relation_type"] == "IsPartOf"
|
571
|
+
{
|
572
|
+
"@type" => "Periodical",
|
573
|
+
"issn" => r["id"],
|
574
|
+
"name" => r["title"] }.compact
|
575
|
+
else
|
576
|
+
{
|
577
|
+
"@id" => normalize_id(r["id"]),
|
578
|
+
"@type" => DC_TO_SO_TRANSLATIONS[r["resource_type_general"]] || "CreativeWork",
|
579
|
+
"name" => r["title"] }.compact
|
580
|
+
end
|
581
|
+
end.unwrap
|
582
|
+
end
|
583
|
+
|
584
|
+
def to_schema_org_funder(funding_references)
|
585
|
+
return nil unless funding_references.present?
|
586
|
+
|
587
|
+
Array.wrap(funding_references).map do |fr|
|
588
|
+
{
|
589
|
+
"@id" => fr["funder_identifier"],
|
590
|
+
"@type" => "Organization",
|
591
|
+
"name" => fr["funder_name"] }.compact
|
592
|
+
end.unwrap
|
593
|
+
end
|
594
|
+
|
595
|
+
def to_schema_org_spatial_coverage(geo_location)
|
596
|
+
return nil unless geo_location.present?
|
597
|
+
|
598
|
+
Array.wrap(geo_location).map do |gl|
|
599
|
+
if gl.fetch("geo_location_point", nil)
|
600
|
+
{
|
601
|
+
"@type" => "Place",
|
602
|
+
"geo" => {
|
603
|
+
"@type" => "GeoCoordinates",
|
604
|
+
"address" => gl["geo_location_place"],
|
605
|
+
"latitude" => gl.dig("geo_location_point", "point_latitude"),
|
606
|
+
"longitude" => gl.dig("geo_location_point", "point_longitude")
|
607
|
+
}.compact
|
608
|
+
}
|
609
|
+
elsif gl.fetch("geo_location_box", nil)
|
610
|
+
{
|
611
|
+
"@type" => "Place",
|
612
|
+
"geo" => {
|
613
|
+
"@type" => "GeoShape",
|
614
|
+
"address" => gl["geo_location_place"],
|
615
|
+
"box" => [gl.dig("geo_location_box", "south_bound_latitude"),
|
616
|
+
gl.dig("geo_location_box", "west_bound_longitude"),
|
617
|
+
gl.dig("geo_location_box", "north_bound_latitude"),
|
618
|
+
gl.dig("geo_location_box", "east_bound_longitude")].join(" ")
|
619
|
+
}.compact
|
620
|
+
}
|
621
|
+
end
|
622
|
+
end.compact.unwrap
|
623
|
+
end
|
624
|
+
|
559
625
|
def from_schema_org(element)
|
560
626
|
mapping = { "@type" => "type", "@id" => "id" }
|
561
627
|
|
@@ -576,6 +642,13 @@ module Bolognese
|
|
576
642
|
end.unwrap
|
577
643
|
end
|
578
644
|
|
645
|
+
def to_identifier(identifier)
|
646
|
+
{
|
647
|
+
"@type" => "PropertyValue",
|
648
|
+
"propertyID" => identifier["related_identifier_type"],
|
649
|
+
"value" => identifier["id"] }
|
650
|
+
end
|
651
|
+
|
579
652
|
def from_citeproc(element)
|
580
653
|
Array.wrap(element).map do |a|
|
581
654
|
if a["literal"].present?
|
data/lib/bolognese/version.rb
CHANGED
@@ -11,11 +11,11 @@ module Bolognese
|
|
11
11
|
bibtex_key: identifier,
|
12
12
|
doi: doi,
|
13
13
|
url: b_url,
|
14
|
-
author: authors_as_string(
|
14
|
+
author: authors_as_string(creator),
|
15
15
|
keywords: keywords.present? ? Array.wrap(keywords).map { |k| parse_attributes(k, content: "text", first: true) }.join(", ") : nil,
|
16
16
|
language: language,
|
17
17
|
title: parse_attributes(title, content: "text", first: true),
|
18
|
-
journal:
|
18
|
+
journal: periodical && periodical["title"],
|
19
19
|
volume: volume,
|
20
20
|
issue: issue,
|
21
21
|
pages: [first_page, last_page].compact.join("-").presence,
|
@@ -13,11 +13,10 @@ module Bolognese
|
|
13
13
|
"identifier" => identifier,
|
14
14
|
"codeRepository" => b_url,
|
15
15
|
"title" => title,
|
16
|
-
"agents" =>
|
16
|
+
"agents" => creator,
|
17
17
|
"description" => parse_attributes(description, content: "text", first: true),
|
18
18
|
"version" => b_version,
|
19
19
|
"tags" => keywords.to_s.split(", ").presence,
|
20
|
-
"dateCreated" => date_created,
|
21
20
|
"datePublished" => date_published,
|
22
21
|
"dateModified" => date_modified,
|
23
22
|
"publisher" => publisher
|
@@ -15,48 +15,29 @@ module Bolognese
|
|
15
15
|
"ris_type" => ris_type,
|
16
16
|
"resource_type_general" => resource_type_general,
|
17
17
|
"resource_type" => additional_type,
|
18
|
-
"
|
18
|
+
"creator" => creator,
|
19
19
|
"title" => title,
|
20
20
|
"publisher" => publisher,
|
21
|
-
"container_title" =>
|
21
|
+
"container_title" => periodical && periodical["title"],
|
22
22
|
"keywords" => keywords,
|
23
23
|
"contributor" => contributor,
|
24
|
-
"
|
25
|
-
"date_available" => date_available,
|
26
|
-
"date_copyrighted" => date_copyrighted,
|
27
|
-
"date_collected" => date_collected,
|
28
|
-
"date_created" => date_created,
|
24
|
+
"dates" => dates,
|
29
25
|
"date_published" => date_published,
|
30
26
|
"date_modified" => date_modified,
|
31
|
-
"date_submitted" => date_submitted,
|
32
|
-
"date_registered" => date_registered,
|
33
|
-
"date_updated" => date_updated,
|
34
|
-
"date_valid" => date_valid,
|
35
27
|
"language" => language,
|
36
|
-
"
|
37
|
-
"
|
28
|
+
"alternate_identifiers" => alternate_identifiers,
|
29
|
+
"size" => size,
|
30
|
+
"format" => b_format,
|
38
31
|
"version" => b_version,
|
39
|
-
"
|
32
|
+
"rights" => rights,
|
40
33
|
"description" => description,
|
41
34
|
"volume" => volume,
|
42
35
|
"issue" => issue,
|
43
36
|
"first_page" => first_page,
|
44
37
|
"last_page" => last_page,
|
45
|
-
"
|
46
|
-
"
|
47
|
-
"
|
48
|
-
"is_part_of" => is_part_of,
|
49
|
-
"has_part" => has_part,
|
50
|
-
"is_previous_version_of" => is_previous_version_of,
|
51
|
-
"is_new_version_of" => is_new_version_of,
|
52
|
-
"is_variant_form_of" => is_variant_form_of,
|
53
|
-
"is_original_form_of" => is_original_form_of,
|
54
|
-
"references" => references,
|
55
|
-
"is_referenced_by" => is_referenced_by,
|
56
|
-
"is_supplement_to" => is_supplement_to,
|
57
|
-
"is_supplemented_by" => is_supplemented_by,
|
58
|
-
"reviews" => reviews,
|
59
|
-
"is_reviewed_by" => is_reviewed_by,
|
38
|
+
"geo_location" => geo_location,
|
39
|
+
"funding_references" => funding_references,
|
40
|
+
"related_identifiers" => related_identifiers,
|
60
41
|
"schema_version" => schema_version,
|
61
42
|
"provider_id" => provider_id,
|
62
43
|
"client_id" => client_id,
|