commonmeta-ruby 3.9.0 → 3.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +12 -11
- data/lib/commonmeta/author_utils.rb +12 -5
- data/lib/commonmeta/readers/commonmeta_reader.rb +1 -1
- data/lib/commonmeta/readers/datacite_reader.rb +120 -108
- data/lib/commonmeta/schema_utils.rb +1 -1
- data/lib/commonmeta/utils.rb +47 -2
- data/lib/commonmeta/version.rb +1 -1
- data/lib/commonmeta/writers/commonmeta_writer.rb +1 -1
- data/resources/{commonmeta_v0.10.5.json → commonmeta_v0.10.7.json} +21 -5
- data/resources/{datacite-v4.json → datacite-v45.json} +26 -5
- data/resources/kernel-4/include/datacite-relationType-v4.xsd +2 -0
- data/resources/kernel-4/include/datacite-resourceType-v4.xsd +2 -0
- data/resources/kernel-4/metadata.xsd +11 -7
- data/spec/author_utils_spec.rb +10 -0
- data/spec/fixtures/commonmeta.json +1 -1
- data/spec/fixtures/datacite-dataset_v4.5.json +736 -0
- data/spec/fixtures/datacite-instrument.json +135 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/SoftwareSourceCode.yml +8 -8
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/dissertation.yml +12 -12
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/funding_references.yml +12 -12
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/subject_scheme.yml +22 -22
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/medium_post_with_institutional_author.yml +317 -0
- data/spec/readers/commonmeta_reader_spec.rb +1 -1
- data/spec/readers/datacite_reader_spec.rb +68 -14
- data/spec/readers/json_feed_reader_spec.rb +25 -0
- data/spec/utils_spec.rb +30 -4
- data/spec/writers/commonmeta_writer_spec.rb +30 -3
- data/spec/writers/csl_writer_spec.rb +1 -0
- data/spec/writers/csv_writer_spec.rb +1 -0
- data/spec/writers/datacite_writer_spec.rb +0 -1
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5acc4ac253ffc536724d14bf4d6cc58710978da3587c21035d946889b454a959
|
4
|
+
data.tar.gz: c1eac95196a7e2f01b52c5f6f94e0055299554e59824dfc88bda9de661c34193
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95b8264ab1e837f26971d12df81ec4b3fc156d21d63387b0053646aacf80f8be6b03d77837759d2876ecd35c05400114a8c559df45219c74160422e58b73d868
|
7
|
+
data.tar.gz: 1cca6f5bfa1bd30d966744931a88054cea207f4ed50f1c94b5a428e656be4f7216b71055f9e66d2ac03cb60b2140e3610f1a8c60c5bd26d4644daa0d4ece0b13
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
commonmeta-ruby (3.
|
4
|
+
commonmeta-ruby (3.12.0)
|
5
5
|
activesupport (>= 4.2.5, < 8.0)
|
6
6
|
addressable (~> 2.8.1, < 2.8.2)
|
7
7
|
base32-url (>= 0.7.0, < 1)
|
@@ -27,7 +27,7 @@ PATH
|
|
27
27
|
GEM
|
28
28
|
remote: https://rubygems.org/
|
29
29
|
specs:
|
30
|
-
activesupport (7.1.
|
30
|
+
activesupport (7.1.3)
|
31
31
|
base64
|
32
32
|
bigdecimal
|
33
33
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
@@ -47,7 +47,7 @@ GEM
|
|
47
47
|
bibtex-ruby (6.1.0)
|
48
48
|
latex-decode (~> 0.0)
|
49
49
|
racc (~> 1.7)
|
50
|
-
bigdecimal (3.1.
|
50
|
+
bigdecimal (3.1.6)
|
51
51
|
builder (3.2.4)
|
52
52
|
citeproc (1.0.10)
|
53
53
|
namae (~> 1.0)
|
@@ -56,9 +56,10 @@ GEM
|
|
56
56
|
csl (~> 2.0)
|
57
57
|
code-scanning-rubocop (0.6.1)
|
58
58
|
rubocop (~> 1.0)
|
59
|
-
concurrent-ruby (1.2.
|
59
|
+
concurrent-ruby (1.2.3)
|
60
60
|
connection_pool (2.4.1)
|
61
|
-
crack (0.4.
|
61
|
+
crack (0.4.6)
|
62
|
+
bigdecimal
|
62
63
|
rexml
|
63
64
|
crass (1.0.6)
|
64
65
|
csl (2.0.0)
|
@@ -66,7 +67,7 @@ GEM
|
|
66
67
|
rexml
|
67
68
|
csl-styles (2.0.1)
|
68
69
|
csl (~> 2.0)
|
69
|
-
diff-lcs (1.5.
|
70
|
+
diff-lcs (1.5.1)
|
70
71
|
docile (1.4.0)
|
71
72
|
domain_name (0.6.20240107)
|
72
73
|
drb (2.2.0)
|
@@ -132,7 +133,7 @@ GEM
|
|
132
133
|
crass (~> 1.0.2)
|
133
134
|
nokogiri (>= 1.12.0)
|
134
135
|
matrix (0.4.2)
|
135
|
-
minitest (5.21.
|
136
|
+
minitest (5.21.2)
|
136
137
|
multi_json (1.15.0)
|
137
138
|
mutex_m (0.2.0)
|
138
139
|
namae (1.2.0)
|
@@ -143,7 +144,7 @@ GEM
|
|
143
144
|
bigdecimal (>= 3.0)
|
144
145
|
optimist (3.1.0)
|
145
146
|
parallel (1.24.0)
|
146
|
-
parser (3.3.0.
|
147
|
+
parser (3.3.0.5)
|
147
148
|
ast (~> 2.4.1)
|
148
149
|
racc
|
149
150
|
postrank-uri (1.1)
|
@@ -154,7 +155,7 @@ GEM
|
|
154
155
|
iniparser (>= 0.1.0)
|
155
156
|
public_suffix (4.0.7)
|
156
157
|
racc (1.7.3)
|
157
|
-
rack (3.0.
|
158
|
+
rack (3.0.9)
|
158
159
|
rack-test (2.1.0)
|
159
160
|
rack (>= 1.3)
|
160
161
|
rainbow (3.1.1)
|
@@ -191,11 +192,11 @@ GEM
|
|
191
192
|
rspec-xsd (0.1.0)
|
192
193
|
nokogiri (~> 1.6)
|
193
194
|
rspec (~> 3)
|
194
|
-
rubocop (1.
|
195
|
+
rubocop (1.60.2)
|
195
196
|
json (~> 2.3)
|
196
197
|
language_server-protocol (>= 3.17.0)
|
197
198
|
parallel (~> 1.10)
|
198
|
-
parser (>= 3.
|
199
|
+
parser (>= 3.3.0.2)
|
199
200
|
rainbow (>= 2.2.2, < 4.0)
|
200
201
|
regexp_parser (>= 1.8, < 3.0)
|
201
202
|
rexml (>= 3.2.5, < 4.0)
|
@@ -25,8 +25,8 @@ module Commonmeta
|
|
25
25
|
"Researcher" => "Other",
|
26
26
|
"Sponsor" => "Other",
|
27
27
|
"Supervisor" => "Supervision",
|
28
|
-
"WorkPackageLeader" => "Other"
|
29
|
-
}
|
28
|
+
"WorkPackageLeader" => "Other",
|
29
|
+
}
|
30
30
|
|
31
31
|
def get_one_author(author)
|
32
32
|
# basic sanity checks
|
@@ -54,18 +54,22 @@ module Commonmeta
|
|
54
54
|
id = parse_attributes(author.fetch("id", nil), first: true) ||
|
55
55
|
parse_attributes(author.fetch("identifier", nil), first: true) ||
|
56
56
|
parse_attributes(author.fetch("sameAs", nil), first: true)
|
57
|
+
id = normalize_orcid(id) || normalize_ror(id) if id.present?
|
57
58
|
|
58
59
|
# DataCite metadata
|
59
60
|
if id.nil? && author["nameIdentifiers"].present?
|
60
61
|
id = Array.wrap(author.dig("nameIdentifiers")).find do |ni|
|
61
|
-
ni
|
62
|
+
normalize_name_identifier(ni).present?
|
62
63
|
end
|
63
|
-
id = id
|
64
|
+
id = normalize_name_identifier(id) if id.present?
|
64
65
|
# Crossref metadata
|
65
66
|
elsif id.nil? && author["ORCID"].present?
|
66
67
|
id = author.fetch("ORCID")
|
68
|
+
id = normalize_orcid(id)
|
69
|
+
# JSON Feed metadata
|
70
|
+
elsif id.nil? && author["url"].present?
|
71
|
+
id = author.fetch("url")
|
67
72
|
end
|
68
|
-
id = normalize_orcid(id) || normalize_ror(id)
|
69
73
|
|
70
74
|
# parse author type, i.e. "Person", "Organization" or not specified
|
71
75
|
type = author.fetch("type", nil)
|
@@ -164,6 +168,9 @@ module Commonmeta
|
|
164
168
|
# check if a name has only one word, e.g. "FamousOrganization", not including commas
|
165
169
|
return false if name.to_s.split(" ").size == 1 && name.to_s.exclude?(",")
|
166
170
|
|
171
|
+
# check if name contains words known to be used in organization names
|
172
|
+
return false if %w[University College Institute School Center Department Laboratory Library Museum Foundation Society Association Company Corporation Collaboration Consortium Incorporated Inc. Institut Research Science].any? { |word| name.to_s.include?(word) }
|
173
|
+
|
167
174
|
# check for suffixes, e.g. "John Smith, MD"
|
168
175
|
return true if name && %w[MD PhD].include?(name.split(", ").last)
|
169
176
|
|
@@ -12,7 +12,7 @@ module Commonmeta
|
|
12
12
|
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
|
13
13
|
:sandbox, :validate, :ra))
|
14
14
|
meta = string.present? ? JSON.parse(string) : {}
|
15
|
-
meta["schema_version"] = "https://commonmeta.org/commonmeta_v0.10
|
15
|
+
meta["schema_version"] = "https://commonmeta.org/commonmeta_v0.10"
|
16
16
|
meta.compact.merge(read_options)
|
17
17
|
end
|
18
18
|
end
|
@@ -4,29 +4,29 @@ module Commonmeta
|
|
4
4
|
module Readers
|
5
5
|
module DataciteReader
|
6
6
|
def get_datacite(id: nil, **options)
|
7
|
-
return {
|
7
|
+
return { "string" => nil, "state" => "not_found" } unless id.present?
|
8
8
|
|
9
9
|
api_url = datacite_api_url(id, options)
|
10
10
|
response = HTTP.get(api_url)
|
11
|
-
return {
|
11
|
+
return { "string" => nil, "state" => "not_found" } unless response.status.success?
|
12
12
|
|
13
13
|
body = JSON.parse(response.body)
|
14
|
-
client = Array.wrap(body.fetch(
|
15
|
-
m[
|
14
|
+
client = Array.wrap(body.fetch("included", nil)).find do |m|
|
15
|
+
m["type"] == "clients"
|
16
16
|
end
|
17
|
-
client_id = client.to_h.fetch(
|
18
|
-
provider_id = Array.wrap(client.to_h.fetch(
|
19
|
-
m[
|
20
|
-
end.to_h.dig(
|
21
|
-
|
22
|
-
{
|
23
|
-
|
24
|
-
|
17
|
+
client_id = client.to_h.fetch("id", nil)
|
18
|
+
provider_id = Array.wrap(client.to_h.fetch("relationships", nil)).find do |m|
|
19
|
+
m["provider"].present?
|
20
|
+
end.to_h.dig("provider", "data", "id")
|
21
|
+
|
22
|
+
{ "string" => response.body.to_s,
|
23
|
+
"provider_id" => provider_id,
|
24
|
+
"client_id" => client_id }
|
25
25
|
end
|
26
26
|
|
27
27
|
def read_datacite(string: nil, **_options)
|
28
28
|
errors = jsonlint(string)
|
29
|
-
return {
|
29
|
+
return { "errors" => errors } if errors.present?
|
30
30
|
|
31
31
|
read_options = ActiveSupport::HashWithIndifferentAccess.new(_options.except(:doi, :id, :url,
|
32
32
|
:sandbox, :validate, :ra))
|
@@ -34,134 +34,146 @@ module Commonmeta
|
|
34
34
|
meta = string.present? ? JSON.parse(string) : {}
|
35
35
|
|
36
36
|
# optionally strip out the message wrapper from API
|
37
|
-
meta = meta.dig(
|
37
|
+
meta = meta.dig("data", "attributes") if meta.dig("data").present?
|
38
38
|
|
39
39
|
meta.transform_keys!(&:underscore)
|
40
40
|
|
41
|
-
id = normalize_doi(meta.fetch(
|
41
|
+
id = normalize_doi(meta.fetch("doi", nil))
|
42
42
|
|
43
|
-
resource_type_general = meta.dig(
|
44
|
-
resource_type = meta.dig(
|
43
|
+
resource_type_general = meta.dig("types", "resourceTypeGeneral")
|
44
|
+
resource_type = meta.dig("types", "resourceType")
|
45
45
|
# if resource_type is one of the new resource_type_general types introduced in schema 4.3, use it
|
46
46
|
type = Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type, nil) ||
|
47
|
-
Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type_general,
|
47
|
+
Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type_general, "Other")
|
48
48
|
|
49
|
-
alternate_identifiers = Array.wrap(meta.fetch(
|
49
|
+
alternate_identifiers = Array.wrap(meta.fetch("alternate_identifiers", nil)).map do |i|
|
50
50
|
i.transform_keys! { |k| k.camelize(:lower) }
|
51
51
|
end
|
52
|
-
url = meta.fetch(
|
53
|
-
titles = Array.wrap(meta.fetch(
|
54
|
-
title.
|
52
|
+
url = meta.fetch("url", nil)
|
53
|
+
titles = Array.wrap(meta.fetch("titles", nil)).map do |title|
|
54
|
+
{ "title" => title.fetch("title", nil),
|
55
|
+
"type" => title.fetch("titleType", nil),
|
56
|
+
"language" => title.fetch("lang", nil) }.compact
|
57
|
+
end
|
58
|
+
contributors = get_authors(from_datacite(meta.fetch("creators", nil)))
|
59
|
+
contributors += get_authors(from_datacite(meta.fetch("contributors", nil)))
|
60
|
+
if meta.fetch("publisher", nil).is_a?(Hash)
|
61
|
+
publisher = { "name" => meta.fetch("publisher", nil).fetch("name", nil) }
|
62
|
+
elsif meta.fetch("publisher", nil).is_a?(String)
|
63
|
+
publisher = { "name" => meta.fetch("publisher", nil) }
|
64
|
+
else
|
65
|
+
publisher = nil
|
55
66
|
end
|
56
|
-
contributors = get_authors(from_datacite(meta.fetch('creators', nil)))
|
57
|
-
contributors += get_authors(from_datacite(meta.fetch('contributors', nil)))
|
58
|
-
publisher = { 'name' => meta.fetch('publisher', nil) }
|
59
67
|
|
60
|
-
container = meta.fetch(
|
61
|
-
funding_references = meta.fetch(
|
68
|
+
container = meta.fetch("container", nil)
|
69
|
+
funding_references = meta.fetch("funding_references", nil)
|
62
70
|
|
63
71
|
date = {}
|
64
|
-
date[
|
65
|
-
get_iso8601_date(meta.dig(
|
66
|
-
date[
|
67
|
-
get_iso8601_date(meta.dig(
|
68
|
-
|
69
|
-
date[
|
70
|
-
date[
|
71
|
-
get_iso8601_date(meta.dig(
|
72
|
-
|
73
|
-
descriptions = Array.wrap(meta.fetch(
|
74
|
-
description.
|
72
|
+
date["created"] =
|
73
|
+
get_iso8601_date(meta.dig("created")) || get_date(meta.dig("dates"), "Created")
|
74
|
+
date["published"] =
|
75
|
+
get_iso8601_date(meta.dig("published")) || get_date(meta.dig("dates"),
|
76
|
+
"Issued") || get_iso8601_date(meta.dig("publication_year"))
|
77
|
+
date["registered"] = get_iso8601_date(meta.dig("registered"))
|
78
|
+
date["updated"] =
|
79
|
+
get_iso8601_date(meta.dig("updated")) || get_date(meta.dig("dates"), "Updated")
|
80
|
+
|
81
|
+
descriptions = Array.wrap(meta.fetch("descriptions", nil)).map do |description|
|
82
|
+
description_type = description.fetch("descriptionType", nil)
|
83
|
+
description_type = "Other" unless %w[Abstract Methods TechnicalInfo].include?(description_type)
|
84
|
+
{ "description" => description.fetch("description", nil),
|
85
|
+
"type" => description_type,
|
86
|
+
"language" => description.fetch("lang", nil) }.compact
|
75
87
|
end
|
76
|
-
license = Array.wrap(meta.fetch(
|
77
|
-
r[
|
88
|
+
license = Array.wrap(meta.fetch("rights_list", nil)).find do |r|
|
89
|
+
r["rightsUri"].present?
|
78
90
|
end
|
79
|
-
license = hsh_to_spdx(
|
80
|
-
version = meta.fetch(
|
81
|
-
subjects = meta.fetch(
|
82
|
-
language = meta.fetch(
|
83
|
-
geo_locations = meta.fetch(
|
84
|
-
references = (Array.wrap(meta.fetch(
|
85
|
-
nil)) + Array.wrap(meta.fetch(
|
91
|
+
license = hsh_to_spdx("rightsURI" => license["rightsUri"]) if license.present?
|
92
|
+
version = meta.fetch("version", nil)
|
93
|
+
subjects = meta.fetch("subjects", nil)
|
94
|
+
language = meta.fetch("language", nil)
|
95
|
+
geo_locations = meta.fetch("geo_locations", nil)
|
96
|
+
references = (Array.wrap(meta.fetch("related_identifiers",
|
97
|
+
nil)) + Array.wrap(meta.fetch("related_items",
|
86
98
|
nil))).select do |r|
|
87
|
-
|
88
|
-
|
99
|
+
%w[References Cites IsSupplementedBy].include?(r["relationType"])
|
100
|
+
end.map do |reference|
|
89
101
|
get_datacite_reference(reference)
|
90
102
|
end
|
91
|
-
files = Array.wrap(meta.fetch("content_url", nil)).map { |file| { "url" => file } }
|
92
|
-
formats = meta.fetch(
|
93
|
-
sizes = meta.fetch(
|
94
|
-
schema_version = meta.fetch(
|
95
|
-
state = id.present? || read_options.present? ?
|
96
|
-
|
97
|
-
{
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
103
|
+
files = Array.wrap(meta.fetch("content_url", nil)).map { |file| { "url" => file } }
|
104
|
+
formats = meta.fetch("formats", nil)
|
105
|
+
sizes = meta.fetch("sizes", nil)
|
106
|
+
schema_version = meta.fetch("schema_version", nil) || "http://datacite.org/schema/kernel-4"
|
107
|
+
state = id.present? || read_options.present? ? "findable" : "not_found"
|
108
|
+
|
109
|
+
{ "id" => id,
|
110
|
+
"type" => type,
|
111
|
+
"additional_type" => resource_type == type ? nil : resource_type,
|
112
|
+
"url" => url,
|
113
|
+
"titles" => titles,
|
114
|
+
"contributors" => contributors,
|
115
|
+
"container" => container,
|
116
|
+
"publisher" => publisher,
|
117
|
+
"provider" => "DataCite",
|
118
|
+
"alternate_identifiers" => alternate_identifiers.presence,
|
119
|
+
"references" => references,
|
120
|
+
"funding_references" => funding_references,
|
121
|
+
"files" => files.presence,
|
122
|
+
"date" => date.compact,
|
123
|
+
"descriptions" => descriptions,
|
124
|
+
"license" => license,
|
125
|
+
"version" => version,
|
126
|
+
"subjects" => subjects,
|
127
|
+
"language" => language,
|
128
|
+
"geo_locations" => geo_locations,
|
129
|
+
"formats" => formats,
|
130
|
+
"sizes" => sizes,
|
131
|
+
"state" => state }.compact # .merge(read_options)
|
120
132
|
end
|
121
133
|
|
122
134
|
def format_contributor(contributor)
|
123
|
-
type = contributor.fetch(
|
124
|
-
|
125
|
-
{
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
135
|
+
type = contributor.fetch("nameType", nil)
|
136
|
+
|
137
|
+
{ "name" => type == "Person" ? nil : contributor.fetch("name", nil),
|
138
|
+
"type" => type,
|
139
|
+
"givenName" => contributor.fetch("givenName", nil),
|
140
|
+
"familyName" => contributor.fetch("familyName", nil),
|
141
|
+
"nameIdentifiers" => contributor.fetch("nameIdentifiers", nil).presence,
|
142
|
+
"affiliations" => contributor.fetch("affiliations", nil).presence,
|
143
|
+
"contributorType" => contributor.fetch("contributorType", nil) }.compact
|
132
144
|
end
|
133
145
|
|
134
146
|
def get_datacite_reference(reference)
|
135
147
|
return nil unless reference.present? || !reference.is_a?(Hash)
|
136
148
|
|
137
|
-
key = reference[
|
149
|
+
key = reference["relatedIdentifier"]
|
138
150
|
doi = nil
|
139
151
|
url = nil
|
140
152
|
|
141
|
-
case reference[
|
142
|
-
when
|
143
|
-
doi = normalize_doi(reference[
|
144
|
-
when
|
145
|
-
url = reference[
|
153
|
+
case reference["relatedIdentifierType"]
|
154
|
+
when "DOI"
|
155
|
+
doi = normalize_doi(reference["relatedIdentifier"])
|
156
|
+
when "URL"
|
157
|
+
url = reference["relatedIdentifier"]
|
146
158
|
else
|
147
|
-
url = reference[
|
159
|
+
url = reference["relatedIdentifier"]
|
148
160
|
end
|
149
161
|
|
150
162
|
{
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
163
|
+
"key" => key,
|
164
|
+
"doi" => doi,
|
165
|
+
"url" => url,
|
166
|
+
"contributor" => reference.dig("author"),
|
167
|
+
"title" => reference.dig("article-title"),
|
168
|
+
"publisher" => reference.dig("publisher"),
|
169
|
+
"publicationYear" => reference.dig("year"),
|
170
|
+
"volume" => reference.dig("volume"),
|
171
|
+
"issue" => reference.dig("issue"),
|
172
|
+
"firstPage" => reference.dig("first-page"),
|
173
|
+
"lastPage" => reference.dig("last-page"),
|
174
|
+
"containerTitle" => reference.dig("journal-title"),
|
175
|
+
"edition" => nil,
|
176
|
+
"unstructured" => doi.nil? ? reference.dig("unstructured") : nil,
|
165
177
|
}.compact
|
166
178
|
end
|
167
179
|
end
|
@@ -5,7 +5,7 @@ require "pathname"
|
|
5
5
|
|
6
6
|
module Commonmeta
|
7
7
|
module SchemaUtils
|
8
|
-
COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.
|
8
|
+
COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.7.json",
|
9
9
|
__dir__))
|
10
10
|
|
11
11
|
def json_schema_errors
|
data/lib/commonmeta/utils.rb
CHANGED
@@ -223,6 +223,7 @@ module Commonmeta
|
|
223
223
|
"Dissertation" => "Dissertation",
|
224
224
|
"Event" => "Event",
|
225
225
|
"Image" => "Image",
|
226
|
+
"Instrument" => "Instrument",
|
226
227
|
"InteractiveResource" => "InteractiveResource",
|
227
228
|
"Journal" => "Journal",
|
228
229
|
"JournalArticle" => "JournalArticle",
|
@@ -237,6 +238,7 @@ module Commonmeta
|
|
237
238
|
"Software" => "Software",
|
238
239
|
"Sound" => "Sound",
|
239
240
|
"Standard" => "Standard",
|
241
|
+
"StudyRegistration" => "StudyRegistration",
|
240
242
|
"Text" => "Document",
|
241
243
|
"Thesis" => "Dissertation",
|
242
244
|
"Workflow" => "Workflow",
|
@@ -256,6 +258,7 @@ module Commonmeta
|
|
256
258
|
"Event" => "Event",
|
257
259
|
"Figure" => "Image",
|
258
260
|
"Image" => "Image",
|
261
|
+
"Instrument" => "Instrument",
|
259
262
|
"JournalArticle" => "JournalArticle",
|
260
263
|
"LegalDocument" => "Text",
|
261
264
|
"Manuscript" => "Text",
|
@@ -271,6 +274,7 @@ module Commonmeta
|
|
271
274
|
"Software" => "Software",
|
272
275
|
"Sound" => "Sound",
|
273
276
|
"Standard" => "Standard",
|
277
|
+
"StudyRegistration" => "StudyRegistration",
|
274
278
|
"WebPage" => "Text",
|
275
279
|
}
|
276
280
|
|
@@ -370,6 +374,7 @@ module Commonmeta
|
|
370
374
|
"CreativeWork" => "Other",
|
371
375
|
"Dataset" => "Dataset",
|
372
376
|
"Dissertation" => "Dissertation",
|
377
|
+
"Instrument" => "Instrument",
|
373
378
|
"NewsArticle" => "Article",
|
374
379
|
"Legislation" => "LegalDocument",
|
375
380
|
"ScholarlyArticle" => "JournalArticle",
|
@@ -389,6 +394,7 @@ module Commonmeta
|
|
389
394
|
"Event" => "CreativeWork",
|
390
395
|
"Figure" => "CreativeWork",
|
391
396
|
"Image" => "CreativeWork",
|
397
|
+
"Instrument" => "Instrument",
|
392
398
|
"JournalArticle" => "ScholarlyArticle",
|
393
399
|
"LegalDocument" => "Legislation",
|
394
400
|
"Software" => "SoftwareSourceCode",
|
@@ -497,7 +503,9 @@ module Commonmeta
|
|
497
503
|
def find_from_format_by_string(string)
|
498
504
|
begin # try to parse as JSON
|
499
505
|
hsh = MultiJson.load(string).to_h
|
500
|
-
if hsh.dig("
|
506
|
+
if hsh.dig("blog", "version") == "https://jsonfeed.org/version/1.1"
|
507
|
+
return "json_feed_item"
|
508
|
+
elsif hsh.dig("schema_version").to_s.start_with?("https://commonmeta.org")
|
501
509
|
return "commonmeta"
|
502
510
|
elsif hsh.dig("@context") && URI.parse(hsh.dig("@context")).host == "schema.org"
|
503
511
|
return "schema_org"
|
@@ -507,7 +515,7 @@ module Commonmeta
|
|
507
515
|
return "crossref"
|
508
516
|
elsif hsh.dig("issued", "date-parts").present?
|
509
517
|
return "csl"
|
510
|
-
elsif URI.parse(hsh.dig("@context")).to_s == "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld"
|
518
|
+
elsif hsh.dig("@context") && URI.parse(hsh.dig("@context")).to_s == "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld"
|
511
519
|
return "codemeta"
|
512
520
|
end
|
513
521
|
rescue MultiJson::ParseError
|
@@ -649,6 +657,43 @@ module Commonmeta
|
|
649
657
|
"https://ror.org/" + Addressable::URI.encode(ror)
|
650
658
|
end
|
651
659
|
|
660
|
+
def normalize_name_identifier(hsh)
|
661
|
+
return nil unless hsh.present? && hsh.is_a?(Hash)
|
662
|
+
|
663
|
+
name_identifier = hsh["nameIdentifier"]
|
664
|
+
name_identifier_scheme = hsh["nameIdentifierScheme"]
|
665
|
+
scheme_uri = hsh["schemeURI"] || hsh["schemeUri"]
|
666
|
+
return nil unless name_identifier.present?
|
667
|
+
|
668
|
+
if name_identifier_scheme == "ORCID" || scheme_uri == "https://orcid.org"
|
669
|
+
return normalize_orcid(name_identifier)
|
670
|
+
elsif name_identifier_scheme == "ROR" || scheme_uri == "https://ror.org"
|
671
|
+
return normalize_ror(name_identifier)
|
672
|
+
elsif name_identifier_scheme == "ISNI" || scheme_uri == "https://isni.org"
|
673
|
+
return normalize_isni(name_identifier)
|
674
|
+
elsif validate_url(name_identifier) == "URL"
|
675
|
+
return name_identifier
|
676
|
+
elsif scheme_uri.present?
|
677
|
+
return scheme_uri + Addressable::URI.encode(name_identifier)
|
678
|
+
end
|
679
|
+
|
680
|
+
return nil
|
681
|
+
end
|
682
|
+
|
683
|
+
def validate_isni(isni)
|
684
|
+
isni = Array(%r{\A(?:(?:http|https)://)?(isni\.org/isni/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z}.match(isni)).last
|
685
|
+
isni.gsub(/[[:space:]]/, "-") if isni.present?
|
686
|
+
end
|
687
|
+
|
688
|
+
def normalize_isni(isni)
|
689
|
+
# TODO fix validation
|
690
|
+
# isni = validate_isni(isni)
|
691
|
+
return nil unless isni.present?
|
692
|
+
|
693
|
+
# turn ISNI ID into URL
|
694
|
+
"https://isni.org/isni/" + Addressable::URI.encode(isni)
|
695
|
+
end
|
696
|
+
|
652
697
|
# pick electronic issn if there are multiple
|
653
698
|
# format issn as xxxx-xxxx
|
654
699
|
def normalize_issn(input, options = {})
|
data/lib/commonmeta/version.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
{
|
2
2
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
3
|
-
"$id": "https://commonmeta.org/commonmeta_v0.10.
|
4
|
-
"title": "Commonmeta v0.10.
|
3
|
+
"$id": "https://commonmeta.org/commonmeta_v0.10.7.json",
|
4
|
+
"title": "Commonmeta v0.10.7",
|
5
5
|
"description": "JSON representation of the Commonmeta schema.",
|
6
6
|
"additionalProperties": false,
|
7
7
|
"definitions": {
|
@@ -125,6 +125,7 @@
|
|
125
125
|
"ReportSeries",
|
126
126
|
"Report",
|
127
127
|
"Software",
|
128
|
+
"StudyRegistration",
|
128
129
|
"Other"
|
129
130
|
]
|
130
131
|
},
|
@@ -252,6 +253,10 @@
|
|
252
253
|
"description": "The type of the title.",
|
253
254
|
"type": "string",
|
254
255
|
"enum": ["AlternativeTitle", "Subtitle", "TranslatedTitle"]
|
256
|
+
},
|
257
|
+
"language": {
|
258
|
+
"description": "The language of the title. Use one of the language codes from the IETF BCP 47 standard.",
|
259
|
+
"type": "string"
|
255
260
|
}
|
256
261
|
},
|
257
262
|
"required": ["title"]
|
@@ -423,7 +428,11 @@
|
|
423
428
|
"type": {
|
424
429
|
"description": "The type of the description.",
|
425
430
|
"type": "string",
|
426
|
-
"enum": ["Abstract", "
|
431
|
+
"enum": ["Abstract", "Summary", "Methods", "TechnicalInfo", "Other"]
|
432
|
+
},
|
433
|
+
"language": {
|
434
|
+
"description": "The language of the title. Use one of the language codes from the IETF BCP 47 standard.",
|
435
|
+
"type": "string"
|
427
436
|
}
|
428
437
|
},
|
429
438
|
"required": ["description"]
|
@@ -508,7 +517,7 @@
|
|
508
517
|
"description": "The schema version of the resource.",
|
509
518
|
"type": "string",
|
510
519
|
"enum": [
|
511
|
-
"https://commonmeta.org/commonmeta_v0.10
|
520
|
+
"https://commonmeta.org/commonmeta_v0.10",
|
512
521
|
"http://datacite.org/schema/kernel-3",
|
513
522
|
"http://datacite.org/schema/kernel-4"
|
514
523
|
]
|
@@ -523,7 +532,14 @@
|
|
523
532
|
"type": "array",
|
524
533
|
"items": {
|
525
534
|
"type": "string",
|
526
|
-
"enum": [
|
535
|
+
"enum": [
|
536
|
+
"CLOCKSS",
|
537
|
+
"LOCKSS",
|
538
|
+
"Portico",
|
539
|
+
"KB",
|
540
|
+
"Internet Archive",
|
541
|
+
"DWT"
|
542
|
+
]
|
527
543
|
},
|
528
544
|
"uniqueItems": true
|
529
545
|
}
|