commonmeta-ruby 3.9.0 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +12 -11
- data/lib/commonmeta/author_utils.rb +12 -5
- data/lib/commonmeta/readers/commonmeta_reader.rb +1 -1
- data/lib/commonmeta/readers/datacite_reader.rb +120 -108
- data/lib/commonmeta/schema_utils.rb +1 -1
- data/lib/commonmeta/utils.rb +47 -2
- data/lib/commonmeta/version.rb +1 -1
- data/lib/commonmeta/writers/commonmeta_writer.rb +1 -1
- data/resources/{commonmeta_v0.10.5.json → commonmeta_v0.10.7.json} +21 -5
- data/resources/{datacite-v4.json → datacite-v45.json} +26 -5
- data/resources/kernel-4/include/datacite-relationType-v4.xsd +2 -0
- data/resources/kernel-4/include/datacite-resourceType-v4.xsd +2 -0
- data/resources/kernel-4/metadata.xsd +11 -7
- data/spec/author_utils_spec.rb +10 -0
- data/spec/fixtures/commonmeta.json +1 -1
- data/spec/fixtures/datacite-dataset_v4.5.json +736 -0
- data/spec/fixtures/datacite-instrument.json +135 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/SoftwareSourceCode.yml +8 -8
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/dissertation.yml +12 -12
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/funding_references.yml +12 -12
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/subject_scheme.yml +22 -22
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/medium_post_with_institutional_author.yml +317 -0
- data/spec/readers/commonmeta_reader_spec.rb +1 -1
- data/spec/readers/datacite_reader_spec.rb +68 -14
- data/spec/readers/json_feed_reader_spec.rb +25 -0
- data/spec/utils_spec.rb +30 -4
- data/spec/writers/commonmeta_writer_spec.rb +30 -3
- data/spec/writers/csl_writer_spec.rb +1 -0
- data/spec/writers/csv_writer_spec.rb +1 -0
- data/spec/writers/datacite_writer_spec.rb +0 -1
- metadata +7 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5acc4ac253ffc536724d14bf4d6cc58710978da3587c21035d946889b454a959
|
|
4
|
+
data.tar.gz: c1eac95196a7e2f01b52c5f6f94e0055299554e59824dfc88bda9de661c34193
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 95b8264ab1e837f26971d12df81ec4b3fc156d21d63387b0053646aacf80f8be6b03d77837759d2876ecd35c05400114a8c559df45219c74160422e58b73d868
|
|
7
|
+
data.tar.gz: 1cca6f5bfa1bd30d966744931a88054cea207f4ed50f1c94b5a428e656be4f7216b71055f9e66d2ac03cb60b2140e3610f1a8c60c5bd26d4644daa0d4ece0b13
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
commonmeta-ruby (3.
|
|
4
|
+
commonmeta-ruby (3.12.0)
|
|
5
5
|
activesupport (>= 4.2.5, < 8.0)
|
|
6
6
|
addressable (~> 2.8.1, < 2.8.2)
|
|
7
7
|
base32-url (>= 0.7.0, < 1)
|
|
@@ -27,7 +27,7 @@ PATH
|
|
|
27
27
|
GEM
|
|
28
28
|
remote: https://rubygems.org/
|
|
29
29
|
specs:
|
|
30
|
-
activesupport (7.1.
|
|
30
|
+
activesupport (7.1.3)
|
|
31
31
|
base64
|
|
32
32
|
bigdecimal
|
|
33
33
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
@@ -47,7 +47,7 @@ GEM
|
|
|
47
47
|
bibtex-ruby (6.1.0)
|
|
48
48
|
latex-decode (~> 0.0)
|
|
49
49
|
racc (~> 1.7)
|
|
50
|
-
bigdecimal (3.1.
|
|
50
|
+
bigdecimal (3.1.6)
|
|
51
51
|
builder (3.2.4)
|
|
52
52
|
citeproc (1.0.10)
|
|
53
53
|
namae (~> 1.0)
|
|
@@ -56,9 +56,10 @@ GEM
|
|
|
56
56
|
csl (~> 2.0)
|
|
57
57
|
code-scanning-rubocop (0.6.1)
|
|
58
58
|
rubocop (~> 1.0)
|
|
59
|
-
concurrent-ruby (1.2.
|
|
59
|
+
concurrent-ruby (1.2.3)
|
|
60
60
|
connection_pool (2.4.1)
|
|
61
|
-
crack (0.4.
|
|
61
|
+
crack (0.4.6)
|
|
62
|
+
bigdecimal
|
|
62
63
|
rexml
|
|
63
64
|
crass (1.0.6)
|
|
64
65
|
csl (2.0.0)
|
|
@@ -66,7 +67,7 @@ GEM
|
|
|
66
67
|
rexml
|
|
67
68
|
csl-styles (2.0.1)
|
|
68
69
|
csl (~> 2.0)
|
|
69
|
-
diff-lcs (1.5.
|
|
70
|
+
diff-lcs (1.5.1)
|
|
70
71
|
docile (1.4.0)
|
|
71
72
|
domain_name (0.6.20240107)
|
|
72
73
|
drb (2.2.0)
|
|
@@ -132,7 +133,7 @@ GEM
|
|
|
132
133
|
crass (~> 1.0.2)
|
|
133
134
|
nokogiri (>= 1.12.0)
|
|
134
135
|
matrix (0.4.2)
|
|
135
|
-
minitest (5.21.
|
|
136
|
+
minitest (5.21.2)
|
|
136
137
|
multi_json (1.15.0)
|
|
137
138
|
mutex_m (0.2.0)
|
|
138
139
|
namae (1.2.0)
|
|
@@ -143,7 +144,7 @@ GEM
|
|
|
143
144
|
bigdecimal (>= 3.0)
|
|
144
145
|
optimist (3.1.0)
|
|
145
146
|
parallel (1.24.0)
|
|
146
|
-
parser (3.3.0.
|
|
147
|
+
parser (3.3.0.5)
|
|
147
148
|
ast (~> 2.4.1)
|
|
148
149
|
racc
|
|
149
150
|
postrank-uri (1.1)
|
|
@@ -154,7 +155,7 @@ GEM
|
|
|
154
155
|
iniparser (>= 0.1.0)
|
|
155
156
|
public_suffix (4.0.7)
|
|
156
157
|
racc (1.7.3)
|
|
157
|
-
rack (3.0.
|
|
158
|
+
rack (3.0.9)
|
|
158
159
|
rack-test (2.1.0)
|
|
159
160
|
rack (>= 1.3)
|
|
160
161
|
rainbow (3.1.1)
|
|
@@ -191,11 +192,11 @@ GEM
|
|
|
191
192
|
rspec-xsd (0.1.0)
|
|
192
193
|
nokogiri (~> 1.6)
|
|
193
194
|
rspec (~> 3)
|
|
194
|
-
rubocop (1.
|
|
195
|
+
rubocop (1.60.2)
|
|
195
196
|
json (~> 2.3)
|
|
196
197
|
language_server-protocol (>= 3.17.0)
|
|
197
198
|
parallel (~> 1.10)
|
|
198
|
-
parser (>= 3.
|
|
199
|
+
parser (>= 3.3.0.2)
|
|
199
200
|
rainbow (>= 2.2.2, < 4.0)
|
|
200
201
|
regexp_parser (>= 1.8, < 3.0)
|
|
201
202
|
rexml (>= 3.2.5, < 4.0)
|
|
@@ -25,8 +25,8 @@ module Commonmeta
|
|
|
25
25
|
"Researcher" => "Other",
|
|
26
26
|
"Sponsor" => "Other",
|
|
27
27
|
"Supervisor" => "Supervision",
|
|
28
|
-
"WorkPackageLeader" => "Other"
|
|
29
|
-
}
|
|
28
|
+
"WorkPackageLeader" => "Other",
|
|
29
|
+
}
|
|
30
30
|
|
|
31
31
|
def get_one_author(author)
|
|
32
32
|
# basic sanity checks
|
|
@@ -54,18 +54,22 @@ module Commonmeta
|
|
|
54
54
|
id = parse_attributes(author.fetch("id", nil), first: true) ||
|
|
55
55
|
parse_attributes(author.fetch("identifier", nil), first: true) ||
|
|
56
56
|
parse_attributes(author.fetch("sameAs", nil), first: true)
|
|
57
|
+
id = normalize_orcid(id) || normalize_ror(id) if id.present?
|
|
57
58
|
|
|
58
59
|
# DataCite metadata
|
|
59
60
|
if id.nil? && author["nameIdentifiers"].present?
|
|
60
61
|
id = Array.wrap(author.dig("nameIdentifiers")).find do |ni|
|
|
61
|
-
ni
|
|
62
|
+
normalize_name_identifier(ni).present?
|
|
62
63
|
end
|
|
63
|
-
id = id
|
|
64
|
+
id = normalize_name_identifier(id) if id.present?
|
|
64
65
|
# Crossref metadata
|
|
65
66
|
elsif id.nil? && author["ORCID"].present?
|
|
66
67
|
id = author.fetch("ORCID")
|
|
68
|
+
id = normalize_orcid(id)
|
|
69
|
+
# JSON Feed metadata
|
|
70
|
+
elsif id.nil? && author["url"].present?
|
|
71
|
+
id = author.fetch("url")
|
|
67
72
|
end
|
|
68
|
-
id = normalize_orcid(id) || normalize_ror(id)
|
|
69
73
|
|
|
70
74
|
# parse author type, i.e. "Person", "Organization" or not specified
|
|
71
75
|
type = author.fetch("type", nil)
|
|
@@ -164,6 +168,9 @@ module Commonmeta
|
|
|
164
168
|
# check if a name has only one word, e.g. "FamousOrganization", not including commas
|
|
165
169
|
return false if name.to_s.split(" ").size == 1 && name.to_s.exclude?(",")
|
|
166
170
|
|
|
171
|
+
# check if name contains words known to be used in organization names
|
|
172
|
+
return false if %w[University College Institute School Center Department Laboratory Library Museum Foundation Society Association Company Corporation Collaboration Consortium Incorporated Inc. Institut Research Science].any? { |word| name.to_s.include?(word) }
|
|
173
|
+
|
|
167
174
|
# check for suffixes, e.g. "John Smith, MD"
|
|
168
175
|
return true if name && %w[MD PhD].include?(name.split(", ").last)
|
|
169
176
|
|
|
@@ -12,7 +12,7 @@ module Commonmeta
|
|
|
12
12
|
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
|
|
13
13
|
:sandbox, :validate, :ra))
|
|
14
14
|
meta = string.present? ? JSON.parse(string) : {}
|
|
15
|
-
meta["schema_version"] = "https://commonmeta.org/commonmeta_v0.10
|
|
15
|
+
meta["schema_version"] = "https://commonmeta.org/commonmeta_v0.10"
|
|
16
16
|
meta.compact.merge(read_options)
|
|
17
17
|
end
|
|
18
18
|
end
|
|
@@ -4,29 +4,29 @@ module Commonmeta
|
|
|
4
4
|
module Readers
|
|
5
5
|
module DataciteReader
|
|
6
6
|
def get_datacite(id: nil, **options)
|
|
7
|
-
return {
|
|
7
|
+
return { "string" => nil, "state" => "not_found" } unless id.present?
|
|
8
8
|
|
|
9
9
|
api_url = datacite_api_url(id, options)
|
|
10
10
|
response = HTTP.get(api_url)
|
|
11
|
-
return {
|
|
11
|
+
return { "string" => nil, "state" => "not_found" } unless response.status.success?
|
|
12
12
|
|
|
13
13
|
body = JSON.parse(response.body)
|
|
14
|
-
client = Array.wrap(body.fetch(
|
|
15
|
-
m[
|
|
14
|
+
client = Array.wrap(body.fetch("included", nil)).find do |m|
|
|
15
|
+
m["type"] == "clients"
|
|
16
16
|
end
|
|
17
|
-
client_id = client.to_h.fetch(
|
|
18
|
-
provider_id = Array.wrap(client.to_h.fetch(
|
|
19
|
-
m[
|
|
20
|
-
end.to_h.dig(
|
|
21
|
-
|
|
22
|
-
{
|
|
23
|
-
|
|
24
|
-
|
|
17
|
+
client_id = client.to_h.fetch("id", nil)
|
|
18
|
+
provider_id = Array.wrap(client.to_h.fetch("relationships", nil)).find do |m|
|
|
19
|
+
m["provider"].present?
|
|
20
|
+
end.to_h.dig("provider", "data", "id")
|
|
21
|
+
|
|
22
|
+
{ "string" => response.body.to_s,
|
|
23
|
+
"provider_id" => provider_id,
|
|
24
|
+
"client_id" => client_id }
|
|
25
25
|
end
|
|
26
26
|
|
|
27
27
|
def read_datacite(string: nil, **_options)
|
|
28
28
|
errors = jsonlint(string)
|
|
29
|
-
return {
|
|
29
|
+
return { "errors" => errors } if errors.present?
|
|
30
30
|
|
|
31
31
|
read_options = ActiveSupport::HashWithIndifferentAccess.new(_options.except(:doi, :id, :url,
|
|
32
32
|
:sandbox, :validate, :ra))
|
|
@@ -34,134 +34,146 @@ module Commonmeta
|
|
|
34
34
|
meta = string.present? ? JSON.parse(string) : {}
|
|
35
35
|
|
|
36
36
|
# optionally strip out the message wrapper from API
|
|
37
|
-
meta = meta.dig(
|
|
37
|
+
meta = meta.dig("data", "attributes") if meta.dig("data").present?
|
|
38
38
|
|
|
39
39
|
meta.transform_keys!(&:underscore)
|
|
40
40
|
|
|
41
|
-
id = normalize_doi(meta.fetch(
|
|
41
|
+
id = normalize_doi(meta.fetch("doi", nil))
|
|
42
42
|
|
|
43
|
-
resource_type_general = meta.dig(
|
|
44
|
-
resource_type = meta.dig(
|
|
43
|
+
resource_type_general = meta.dig("types", "resourceTypeGeneral")
|
|
44
|
+
resource_type = meta.dig("types", "resourceType")
|
|
45
45
|
# if resource_type is one of the new resource_type_general types introduced in schema 4.3, use it
|
|
46
46
|
type = Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type, nil) ||
|
|
47
|
-
Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type_general,
|
|
47
|
+
Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type_general, "Other")
|
|
48
48
|
|
|
49
|
-
alternate_identifiers = Array.wrap(meta.fetch(
|
|
49
|
+
alternate_identifiers = Array.wrap(meta.fetch("alternate_identifiers", nil)).map do |i|
|
|
50
50
|
i.transform_keys! { |k| k.camelize(:lower) }
|
|
51
51
|
end
|
|
52
|
-
url = meta.fetch(
|
|
53
|
-
titles = Array.wrap(meta.fetch(
|
|
54
|
-
title.
|
|
52
|
+
url = meta.fetch("url", nil)
|
|
53
|
+
titles = Array.wrap(meta.fetch("titles", nil)).map do |title|
|
|
54
|
+
{ "title" => title.fetch("title", nil),
|
|
55
|
+
"type" => title.fetch("titleType", nil),
|
|
56
|
+
"language" => title.fetch("lang", nil) }.compact
|
|
57
|
+
end
|
|
58
|
+
contributors = get_authors(from_datacite(meta.fetch("creators", nil)))
|
|
59
|
+
contributors += get_authors(from_datacite(meta.fetch("contributors", nil)))
|
|
60
|
+
if meta.fetch("publisher", nil).is_a?(Hash)
|
|
61
|
+
publisher = { "name" => meta.fetch("publisher", nil).fetch("name", nil) }
|
|
62
|
+
elsif meta.fetch("publisher", nil).is_a?(String)
|
|
63
|
+
publisher = { "name" => meta.fetch("publisher", nil) }
|
|
64
|
+
else
|
|
65
|
+
publisher = nil
|
|
55
66
|
end
|
|
56
|
-
contributors = get_authors(from_datacite(meta.fetch('creators', nil)))
|
|
57
|
-
contributors += get_authors(from_datacite(meta.fetch('contributors', nil)))
|
|
58
|
-
publisher = { 'name' => meta.fetch('publisher', nil) }
|
|
59
67
|
|
|
60
|
-
container = meta.fetch(
|
|
61
|
-
funding_references = meta.fetch(
|
|
68
|
+
container = meta.fetch("container", nil)
|
|
69
|
+
funding_references = meta.fetch("funding_references", nil)
|
|
62
70
|
|
|
63
71
|
date = {}
|
|
64
|
-
date[
|
|
65
|
-
get_iso8601_date(meta.dig(
|
|
66
|
-
date[
|
|
67
|
-
get_iso8601_date(meta.dig(
|
|
68
|
-
|
|
69
|
-
date[
|
|
70
|
-
date[
|
|
71
|
-
get_iso8601_date(meta.dig(
|
|
72
|
-
|
|
73
|
-
descriptions = Array.wrap(meta.fetch(
|
|
74
|
-
description.
|
|
72
|
+
date["created"] =
|
|
73
|
+
get_iso8601_date(meta.dig("created")) || get_date(meta.dig("dates"), "Created")
|
|
74
|
+
date["published"] =
|
|
75
|
+
get_iso8601_date(meta.dig("published")) || get_date(meta.dig("dates"),
|
|
76
|
+
"Issued") || get_iso8601_date(meta.dig("publication_year"))
|
|
77
|
+
date["registered"] = get_iso8601_date(meta.dig("registered"))
|
|
78
|
+
date["updated"] =
|
|
79
|
+
get_iso8601_date(meta.dig("updated")) || get_date(meta.dig("dates"), "Updated")
|
|
80
|
+
|
|
81
|
+
descriptions = Array.wrap(meta.fetch("descriptions", nil)).map do |description|
|
|
82
|
+
description_type = description.fetch("descriptionType", nil)
|
|
83
|
+
description_type = "Other" unless %w[Abstract Methods TechnicalInfo].include?(description_type)
|
|
84
|
+
{ "description" => description.fetch("description", nil),
|
|
85
|
+
"type" => description_type,
|
|
86
|
+
"language" => description.fetch("lang", nil) }.compact
|
|
75
87
|
end
|
|
76
|
-
license = Array.wrap(meta.fetch(
|
|
77
|
-
r[
|
|
88
|
+
license = Array.wrap(meta.fetch("rights_list", nil)).find do |r|
|
|
89
|
+
r["rightsUri"].present?
|
|
78
90
|
end
|
|
79
|
-
license = hsh_to_spdx(
|
|
80
|
-
version = meta.fetch(
|
|
81
|
-
subjects = meta.fetch(
|
|
82
|
-
language = meta.fetch(
|
|
83
|
-
geo_locations = meta.fetch(
|
|
84
|
-
references = (Array.wrap(meta.fetch(
|
|
85
|
-
nil)) + Array.wrap(meta.fetch(
|
|
91
|
+
license = hsh_to_spdx("rightsURI" => license["rightsUri"]) if license.present?
|
|
92
|
+
version = meta.fetch("version", nil)
|
|
93
|
+
subjects = meta.fetch("subjects", nil)
|
|
94
|
+
language = meta.fetch("language", nil)
|
|
95
|
+
geo_locations = meta.fetch("geo_locations", nil)
|
|
96
|
+
references = (Array.wrap(meta.fetch("related_identifiers",
|
|
97
|
+
nil)) + Array.wrap(meta.fetch("related_items",
|
|
86
98
|
nil))).select do |r|
|
|
87
|
-
|
|
88
|
-
|
|
99
|
+
%w[References Cites IsSupplementedBy].include?(r["relationType"])
|
|
100
|
+
end.map do |reference|
|
|
89
101
|
get_datacite_reference(reference)
|
|
90
102
|
end
|
|
91
|
-
files = Array.wrap(meta.fetch("content_url", nil)).map { |file| { "url" => file } }
|
|
92
|
-
formats = meta.fetch(
|
|
93
|
-
sizes = meta.fetch(
|
|
94
|
-
schema_version = meta.fetch(
|
|
95
|
-
state = id.present? || read_options.present? ?
|
|
96
|
-
|
|
97
|
-
{
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
103
|
+
files = Array.wrap(meta.fetch("content_url", nil)).map { |file| { "url" => file } }
|
|
104
|
+
formats = meta.fetch("formats", nil)
|
|
105
|
+
sizes = meta.fetch("sizes", nil)
|
|
106
|
+
schema_version = meta.fetch("schema_version", nil) || "http://datacite.org/schema/kernel-4"
|
|
107
|
+
state = id.present? || read_options.present? ? "findable" : "not_found"
|
|
108
|
+
|
|
109
|
+
{ "id" => id,
|
|
110
|
+
"type" => type,
|
|
111
|
+
"additional_type" => resource_type == type ? nil : resource_type,
|
|
112
|
+
"url" => url,
|
|
113
|
+
"titles" => titles,
|
|
114
|
+
"contributors" => contributors,
|
|
115
|
+
"container" => container,
|
|
116
|
+
"publisher" => publisher,
|
|
117
|
+
"provider" => "DataCite",
|
|
118
|
+
"alternate_identifiers" => alternate_identifiers.presence,
|
|
119
|
+
"references" => references,
|
|
120
|
+
"funding_references" => funding_references,
|
|
121
|
+
"files" => files.presence,
|
|
122
|
+
"date" => date.compact,
|
|
123
|
+
"descriptions" => descriptions,
|
|
124
|
+
"license" => license,
|
|
125
|
+
"version" => version,
|
|
126
|
+
"subjects" => subjects,
|
|
127
|
+
"language" => language,
|
|
128
|
+
"geo_locations" => geo_locations,
|
|
129
|
+
"formats" => formats,
|
|
130
|
+
"sizes" => sizes,
|
|
131
|
+
"state" => state }.compact # .merge(read_options)
|
|
120
132
|
end
|
|
121
133
|
|
|
122
134
|
def format_contributor(contributor)
|
|
123
|
-
type = contributor.fetch(
|
|
124
|
-
|
|
125
|
-
{
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
135
|
+
type = contributor.fetch("nameType", nil)
|
|
136
|
+
|
|
137
|
+
{ "name" => type == "Person" ? nil : contributor.fetch("name", nil),
|
|
138
|
+
"type" => type,
|
|
139
|
+
"givenName" => contributor.fetch("givenName", nil),
|
|
140
|
+
"familyName" => contributor.fetch("familyName", nil),
|
|
141
|
+
"nameIdentifiers" => contributor.fetch("nameIdentifiers", nil).presence,
|
|
142
|
+
"affiliations" => contributor.fetch("affiliations", nil).presence,
|
|
143
|
+
"contributorType" => contributor.fetch("contributorType", nil) }.compact
|
|
132
144
|
end
|
|
133
145
|
|
|
134
146
|
def get_datacite_reference(reference)
|
|
135
147
|
return nil unless reference.present? || !reference.is_a?(Hash)
|
|
136
148
|
|
|
137
|
-
key = reference[
|
|
149
|
+
key = reference["relatedIdentifier"]
|
|
138
150
|
doi = nil
|
|
139
151
|
url = nil
|
|
140
152
|
|
|
141
|
-
case reference[
|
|
142
|
-
when
|
|
143
|
-
doi = normalize_doi(reference[
|
|
144
|
-
when
|
|
145
|
-
url = reference[
|
|
153
|
+
case reference["relatedIdentifierType"]
|
|
154
|
+
when "DOI"
|
|
155
|
+
doi = normalize_doi(reference["relatedIdentifier"])
|
|
156
|
+
when "URL"
|
|
157
|
+
url = reference["relatedIdentifier"]
|
|
146
158
|
else
|
|
147
|
-
url = reference[
|
|
159
|
+
url = reference["relatedIdentifier"]
|
|
148
160
|
end
|
|
149
161
|
|
|
150
162
|
{
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
163
|
+
"key" => key,
|
|
164
|
+
"doi" => doi,
|
|
165
|
+
"url" => url,
|
|
166
|
+
"contributor" => reference.dig("author"),
|
|
167
|
+
"title" => reference.dig("article-title"),
|
|
168
|
+
"publisher" => reference.dig("publisher"),
|
|
169
|
+
"publicationYear" => reference.dig("year"),
|
|
170
|
+
"volume" => reference.dig("volume"),
|
|
171
|
+
"issue" => reference.dig("issue"),
|
|
172
|
+
"firstPage" => reference.dig("first-page"),
|
|
173
|
+
"lastPage" => reference.dig("last-page"),
|
|
174
|
+
"containerTitle" => reference.dig("journal-title"),
|
|
175
|
+
"edition" => nil,
|
|
176
|
+
"unstructured" => doi.nil? ? reference.dig("unstructured") : nil,
|
|
165
177
|
}.compact
|
|
166
178
|
end
|
|
167
179
|
end
|
|
@@ -5,7 +5,7 @@ require "pathname"
|
|
|
5
5
|
|
|
6
6
|
module Commonmeta
|
|
7
7
|
module SchemaUtils
|
|
8
|
-
COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.
|
|
8
|
+
COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.7.json",
|
|
9
9
|
__dir__))
|
|
10
10
|
|
|
11
11
|
def json_schema_errors
|
data/lib/commonmeta/utils.rb
CHANGED
|
@@ -223,6 +223,7 @@ module Commonmeta
|
|
|
223
223
|
"Dissertation" => "Dissertation",
|
|
224
224
|
"Event" => "Event",
|
|
225
225
|
"Image" => "Image",
|
|
226
|
+
"Instrument" => "Instrument",
|
|
226
227
|
"InteractiveResource" => "InteractiveResource",
|
|
227
228
|
"Journal" => "Journal",
|
|
228
229
|
"JournalArticle" => "JournalArticle",
|
|
@@ -237,6 +238,7 @@ module Commonmeta
|
|
|
237
238
|
"Software" => "Software",
|
|
238
239
|
"Sound" => "Sound",
|
|
239
240
|
"Standard" => "Standard",
|
|
241
|
+
"StudyRegistration" => "StudyRegistration",
|
|
240
242
|
"Text" => "Document",
|
|
241
243
|
"Thesis" => "Dissertation",
|
|
242
244
|
"Workflow" => "Workflow",
|
|
@@ -256,6 +258,7 @@ module Commonmeta
|
|
|
256
258
|
"Event" => "Event",
|
|
257
259
|
"Figure" => "Image",
|
|
258
260
|
"Image" => "Image",
|
|
261
|
+
"Instrument" => "Instrument",
|
|
259
262
|
"JournalArticle" => "JournalArticle",
|
|
260
263
|
"LegalDocument" => "Text",
|
|
261
264
|
"Manuscript" => "Text",
|
|
@@ -271,6 +274,7 @@ module Commonmeta
|
|
|
271
274
|
"Software" => "Software",
|
|
272
275
|
"Sound" => "Sound",
|
|
273
276
|
"Standard" => "Standard",
|
|
277
|
+
"StudyRegistration" => "StudyRegistration",
|
|
274
278
|
"WebPage" => "Text",
|
|
275
279
|
}
|
|
276
280
|
|
|
@@ -370,6 +374,7 @@ module Commonmeta
|
|
|
370
374
|
"CreativeWork" => "Other",
|
|
371
375
|
"Dataset" => "Dataset",
|
|
372
376
|
"Dissertation" => "Dissertation",
|
|
377
|
+
"Instrument" => "Instrument",
|
|
373
378
|
"NewsArticle" => "Article",
|
|
374
379
|
"Legislation" => "LegalDocument",
|
|
375
380
|
"ScholarlyArticle" => "JournalArticle",
|
|
@@ -389,6 +394,7 @@ module Commonmeta
|
|
|
389
394
|
"Event" => "CreativeWork",
|
|
390
395
|
"Figure" => "CreativeWork",
|
|
391
396
|
"Image" => "CreativeWork",
|
|
397
|
+
"Instrument" => "Instrument",
|
|
392
398
|
"JournalArticle" => "ScholarlyArticle",
|
|
393
399
|
"LegalDocument" => "Legislation",
|
|
394
400
|
"Software" => "SoftwareSourceCode",
|
|
@@ -497,7 +503,9 @@ module Commonmeta
|
|
|
497
503
|
def find_from_format_by_string(string)
|
|
498
504
|
begin # try to parse as JSON
|
|
499
505
|
hsh = MultiJson.load(string).to_h
|
|
500
|
-
if hsh.dig("
|
|
506
|
+
if hsh.dig("blog", "version") == "https://jsonfeed.org/version/1.1"
|
|
507
|
+
return "json_feed_item"
|
|
508
|
+
elsif hsh.dig("schema_version").to_s.start_with?("https://commonmeta.org")
|
|
501
509
|
return "commonmeta"
|
|
502
510
|
elsif hsh.dig("@context") && URI.parse(hsh.dig("@context")).host == "schema.org"
|
|
503
511
|
return "schema_org"
|
|
@@ -507,7 +515,7 @@ module Commonmeta
|
|
|
507
515
|
return "crossref"
|
|
508
516
|
elsif hsh.dig("issued", "date-parts").present?
|
|
509
517
|
return "csl"
|
|
510
|
-
elsif URI.parse(hsh.dig("@context")).to_s == "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld"
|
|
518
|
+
elsif hsh.dig("@context") && URI.parse(hsh.dig("@context")).to_s == "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld"
|
|
511
519
|
return "codemeta"
|
|
512
520
|
end
|
|
513
521
|
rescue MultiJson::ParseError
|
|
@@ -649,6 +657,43 @@ module Commonmeta
|
|
|
649
657
|
"https://ror.org/" + Addressable::URI.encode(ror)
|
|
650
658
|
end
|
|
651
659
|
|
|
660
|
+
def normalize_name_identifier(hsh)
|
|
661
|
+
return nil unless hsh.present? && hsh.is_a?(Hash)
|
|
662
|
+
|
|
663
|
+
name_identifier = hsh["nameIdentifier"]
|
|
664
|
+
name_identifier_scheme = hsh["nameIdentifierScheme"]
|
|
665
|
+
scheme_uri = hsh["schemeURI"] || hsh["schemeUri"]
|
|
666
|
+
return nil unless name_identifier.present?
|
|
667
|
+
|
|
668
|
+
if name_identifier_scheme == "ORCID" || scheme_uri == "https://orcid.org"
|
|
669
|
+
return normalize_orcid(name_identifier)
|
|
670
|
+
elsif name_identifier_scheme == "ROR" || scheme_uri == "https://ror.org"
|
|
671
|
+
return normalize_ror(name_identifier)
|
|
672
|
+
elsif name_identifier_scheme == "ISNI" || scheme_uri == "https://isni.org"
|
|
673
|
+
return normalize_isni(name_identifier)
|
|
674
|
+
elsif validate_url(name_identifier) == "URL"
|
|
675
|
+
return name_identifier
|
|
676
|
+
elsif scheme_uri.present?
|
|
677
|
+
return scheme_uri + Addressable::URI.encode(name_identifier)
|
|
678
|
+
end
|
|
679
|
+
|
|
680
|
+
return nil
|
|
681
|
+
end
|
|
682
|
+
|
|
683
|
+
def validate_isni(isni)
|
|
684
|
+
isni = Array(%r{\A(?:(?:http|https)://)?(isni\.org/isni/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z}.match(isni)).last
|
|
685
|
+
isni.gsub(/[[:space:]]/, "-") if isni.present?
|
|
686
|
+
end
|
|
687
|
+
|
|
688
|
+
def normalize_isni(isni)
|
|
689
|
+
# TODO fix validation
|
|
690
|
+
# isni = validate_isni(isni)
|
|
691
|
+
return nil unless isni.present?
|
|
692
|
+
|
|
693
|
+
# turn ISNI ID into URL
|
|
694
|
+
"https://isni.org/isni/" + Addressable::URI.encode(isni)
|
|
695
|
+
end
|
|
696
|
+
|
|
652
697
|
# pick electronic issn if there are multiple
|
|
653
698
|
# format issn as xxxx-xxxx
|
|
654
699
|
def normalize_issn(input, options = {})
|
data/lib/commonmeta/version.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
-
"$id": "https://commonmeta.org/commonmeta_v0.10.
|
|
4
|
-
"title": "Commonmeta v0.10.
|
|
3
|
+
"$id": "https://commonmeta.org/commonmeta_v0.10.7.json",
|
|
4
|
+
"title": "Commonmeta v0.10.7",
|
|
5
5
|
"description": "JSON representation of the Commonmeta schema.",
|
|
6
6
|
"additionalProperties": false,
|
|
7
7
|
"definitions": {
|
|
@@ -125,6 +125,7 @@
|
|
|
125
125
|
"ReportSeries",
|
|
126
126
|
"Report",
|
|
127
127
|
"Software",
|
|
128
|
+
"StudyRegistration",
|
|
128
129
|
"Other"
|
|
129
130
|
]
|
|
130
131
|
},
|
|
@@ -252,6 +253,10 @@
|
|
|
252
253
|
"description": "The type of the title.",
|
|
253
254
|
"type": "string",
|
|
254
255
|
"enum": ["AlternativeTitle", "Subtitle", "TranslatedTitle"]
|
|
256
|
+
},
|
|
257
|
+
"language": {
|
|
258
|
+
"description": "The language of the title. Use one of the language codes from the IETF BCP 47 standard.",
|
|
259
|
+
"type": "string"
|
|
255
260
|
}
|
|
256
261
|
},
|
|
257
262
|
"required": ["title"]
|
|
@@ -423,7 +428,11 @@
|
|
|
423
428
|
"type": {
|
|
424
429
|
"description": "The type of the description.",
|
|
425
430
|
"type": "string",
|
|
426
|
-
"enum": ["Abstract", "
|
|
431
|
+
"enum": ["Abstract", "Summary", "Methods", "TechnicalInfo", "Other"]
|
|
432
|
+
},
|
|
433
|
+
"language": {
|
|
434
|
+
"description": "The language of the title. Use one of the language codes from the IETF BCP 47 standard.",
|
|
435
|
+
"type": "string"
|
|
427
436
|
}
|
|
428
437
|
},
|
|
429
438
|
"required": ["description"]
|
|
@@ -508,7 +517,7 @@
|
|
|
508
517
|
"description": "The schema version of the resource.",
|
|
509
518
|
"type": "string",
|
|
510
519
|
"enum": [
|
|
511
|
-
"https://commonmeta.org/commonmeta_v0.10
|
|
520
|
+
"https://commonmeta.org/commonmeta_v0.10",
|
|
512
521
|
"http://datacite.org/schema/kernel-3",
|
|
513
522
|
"http://datacite.org/schema/kernel-4"
|
|
514
523
|
]
|
|
@@ -523,7 +532,14 @@
|
|
|
523
532
|
"type": "array",
|
|
524
533
|
"items": {
|
|
525
534
|
"type": "string",
|
|
526
|
-
"enum": [
|
|
535
|
+
"enum": [
|
|
536
|
+
"CLOCKSS",
|
|
537
|
+
"LOCKSS",
|
|
538
|
+
"Portico",
|
|
539
|
+
"KB",
|
|
540
|
+
"Internet Archive",
|
|
541
|
+
"DWT"
|
|
542
|
+
]
|
|
527
543
|
},
|
|
528
544
|
"uniqueItems": true
|
|
529
545
|
}
|