uc3-dmp-id 0.0.139 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/asserter.rb +162 -78
- data/lib/uc3-dmp-id/creator.rb +20 -16
- data/lib/uc3-dmp-id/deleter.rb +17 -18
- data/lib/uc3-dmp-id/finder.rb +49 -31
- data/lib/uc3-dmp-id/helper.rb +38 -22
- data/lib/uc3-dmp-id/schemas/amend.rb +269 -267
- data/lib/uc3-dmp-id/schemas/author.rb +1413 -1142
- data/lib/uc3-dmp-id/updater.rb +74 -39
- data/lib/uc3-dmp-id/validator.rb +10 -4
- data/lib/uc3-dmp-id/version.rb +1 -1
- data/lib/uc3-dmp-id/versioner.rb +16 -7
- data/lib/uc3-dmp-id.rb +1 -11
- metadata +2 -58
data/lib/uc3-dmp-id/finder.rb
CHANGED
@@ -9,30 +9,37 @@ module Uc3DmpId
|
|
9
9
|
class Finder
|
10
10
|
MSG_INVALID_ARGS = 'Expected JSON to be structured as `{ "dmp": { "PK": "value"} } OR \
|
11
11
|
{ "dmp": { "dmp_id": { "identifier": "value", "type": "value" } }`'
|
12
|
-
MSG_INVALID_OWNER_ID = 'Invalid :owner_orcid. Expected
|
13
|
-
MSG_INVALID_OWNER_ORG = 'Invalid :owner_org_ror. Expected
|
12
|
+
MSG_INVALID_OWNER_ID = 'Invalid :owner_orcid. Expected a valid ORCID id (excluding the domain)`.'
|
13
|
+
MSG_INVALID_OWNER_ORG = 'Invalid :owner_org_ror. Expected a valid ROR id (excluding the domain)`.'
|
14
14
|
MSG_INVALID_MOD_DATE = 'Invalid :modification_day. Expected value to be in the `YYYY-MM-DD` format.'
|
15
15
|
MSG_MISSING_PK = 'No PK was provided'
|
16
16
|
MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
|
17
17
|
Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
|
18
18
|
|
19
|
-
|
20
19
|
class << self
|
21
20
|
# TODO: Replace this with ElasticSearch
|
22
21
|
def search_dmps(args:, logger: nil)
|
23
|
-
|
24
|
-
return
|
25
|
-
|
22
|
+
client = Uc3DmpDynamo::Client.new
|
23
|
+
return _by_owner(owner_org: args['owner_orcid'], client: client, logger: logger) unless args['owner_orcid'].nil?
|
24
|
+
|
25
|
+
unless args['owner_org_ror'].nil?
|
26
|
+
return _by_owner_org(owner_org: args['owner_org_ror'], client: client,
|
27
|
+
logger: logger)
|
28
|
+
end
|
29
|
+
unless args['modification_day'].nil?
|
30
|
+
return _by_mod_day(day: args['modification_day'], client: client,
|
31
|
+
logger: logger)
|
32
|
+
end
|
26
33
|
|
27
34
|
[]
|
28
35
|
end
|
29
|
-
# rubocop:enable Metrics/MethodLength
|
30
36
|
|
31
37
|
# Find a DMP based on the contents of the incoming JSON
|
32
38
|
# -------------------------------------------------------------------------
|
33
|
-
|
39
|
+
# rubocop:disable Metrics/AbcSize
|
40
|
+
def by_json(json:, client: nil, cleanse: true, logger: nil)
|
34
41
|
json = Helper.parse_json(json: json)&.fetch('dmp', {})
|
35
|
-
raise FinderError, MSG_INVALID_ARGS if json.
|
42
|
+
raise FinderError, MSG_INVALID_ARGS if !json.is_a?(Hash) || (json['PK'].nil? && json['dmp_id'].nil?)
|
36
43
|
|
37
44
|
p_key = json['PK']
|
38
45
|
# Translate the incoming :dmp_id into a PK
|
@@ -46,9 +53,11 @@ module Uc3DmpId
|
|
46
53
|
# find_by_PK
|
47
54
|
p_key.nil? ? nil : by_pk(p_key: p_key, s_key: json['SK'], client: client, cleanse: cleanse, logger: logger)
|
48
55
|
end
|
56
|
+
# rubocop:enable Metrics/AbcSize
|
49
57
|
|
50
58
|
# Find the DMP by its PK and SK
|
51
59
|
# -------------------------------------------------------------------------
|
60
|
+
# rubocop:disable Metrics/AbcSize
|
52
61
|
def by_pk(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, cleanse: true, logger: nil)
|
53
62
|
raise FinderError, MSG_MISSING_PK if p_key.nil?
|
54
63
|
|
@@ -57,7 +66,7 @@ module Uc3DmpId
|
|
57
66
|
resp = client.get_item(
|
58
67
|
key: {
|
59
68
|
PK: Helper.append_pk_prefix(p_key: p_key),
|
60
|
-
SK: s_key
|
69
|
+
SK: Helper.append_sk_prefix(s_key: s_key)
|
61
70
|
},
|
62
71
|
logger: logger
|
63
72
|
)
|
@@ -66,9 +75,10 @@ module Uc3DmpId
|
|
66
75
|
dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
|
67
76
|
return nil if dmp['dmp']['PK'].nil?
|
68
77
|
|
69
|
-
dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp: dmp, client: client, logger: logger)
|
78
|
+
dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp: dmp, client: client, logger: logger) if cleanse
|
70
79
|
cleanse ? Helper.cleanse_dmp_json(json: dmp) : dmp
|
71
80
|
end
|
81
|
+
# rubocop:enable Metrics/AbcSize
|
72
82
|
|
73
83
|
# Fetch just the PK to see if a record exists
|
74
84
|
# -------------------------------------------------------------------------
|
@@ -79,7 +89,7 @@ module Uc3DmpId
|
|
79
89
|
client.pk_exists?(
|
80
90
|
key: {
|
81
91
|
PK: Helper.append_pk_prefix(p_key: p_key),
|
82
|
-
SK: s_key
|
92
|
+
SK: Helper.append_sk_prefix(s_key: s_key)
|
83
93
|
},
|
84
94
|
logger: logger
|
85
95
|
)
|
@@ -89,7 +99,10 @@ module Uc3DmpId
|
|
89
99
|
# -------------------------------------------------------------------------
|
90
100
|
# rubocop:disable Metrics/AbcSize
|
91
101
|
def by_provenance_identifier(json:, client: nil, cleanse: true, logger: nil)
|
92
|
-
raise FinderError, MSG_MISSING_PROV_ID
|
102
|
+
raise FinderError, MSG_MISSING_PROV_ID unless json.is_a?(Hash)
|
103
|
+
|
104
|
+
json = json['dmp'] unless json['dmp'].nil?
|
105
|
+
raise FinderError, MSG_MISSING_PROV_ID if json.fetch('dmp_id', {})['identifier'].nil?
|
93
106
|
|
94
107
|
args = {
|
95
108
|
index_name: 'dmphub_provenance_identifier_gsi',
|
@@ -117,17 +130,17 @@ module Uc3DmpId
|
|
117
130
|
private
|
118
131
|
|
119
132
|
# Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
|
120
|
-
def _by_owner(owner_id:, logger: nil)
|
121
|
-
regex =
|
122
|
-
raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s
|
133
|
+
def _by_owner(owner_id:, client: nil, logger: nil)
|
134
|
+
regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
|
135
|
+
raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
|
123
136
|
|
124
137
|
args = {
|
125
138
|
index_name: 'dmphub_owner_id_gsi',
|
126
139
|
key_conditions: {
|
127
140
|
dmphub_owner_id: {
|
128
141
|
attribute_value_list: [
|
129
|
-
"http://orcid.org/#{owner_id
|
130
|
-
"https://orcid.org/#{owner_id
|
142
|
+
"http://orcid.org/#{owner_id}",
|
143
|
+
"https://orcid.org/#{owner_id}"
|
131
144
|
],
|
132
145
|
comparison_operator: 'IN'
|
133
146
|
}
|
@@ -135,35 +148,39 @@ module Uc3DmpId
|
|
135
148
|
filter_expression: 'SK = :version',
|
136
149
|
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
137
150
|
}
|
138
|
-
logger.info(message: "Querying _by_owner with #{args}")
|
151
|
+
logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
|
139
152
|
client = client.nil? ? Uc3DmpDynamo::Client.new : client
|
140
153
|
_process_search_response(response: client.query(args: args, logger: logger))
|
141
154
|
end
|
142
155
|
|
143
|
-
# Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
|
144
|
-
|
145
|
-
|
146
|
-
|
156
|
+
# Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
|
157
|
+
# on the DMP ID record)
|
158
|
+
def _by_owner_org(owner_org:, client: nil, logger: nil)
|
159
|
+
regex = /^[a-zA-Z0-9]+$/
|
160
|
+
raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
|
147
161
|
|
148
162
|
args = {
|
149
163
|
index_name: 'dmphub_owner_org_gsi',
|
150
164
|
key_conditions: {
|
151
165
|
dmphub_owner_org: {
|
152
|
-
attribute_value_list: [
|
153
|
-
|
166
|
+
attribute_value_list: [
|
167
|
+
"https://ror.org/#{owner_org.to_s.downcase}",
|
168
|
+
"http://ror.org/#{owner_org.to_s.downcase}"
|
169
|
+
],
|
170
|
+
comparison_operator: 'IN'
|
154
171
|
}
|
155
172
|
},
|
156
173
|
filter_expression: 'SK = :version',
|
157
174
|
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
158
175
|
}
|
159
|
-
logger.info(message: "Querying _by_owner_org with #{args}")
|
176
|
+
logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
|
160
177
|
client = client.nil? ? Uc3DmpDynamo::Client.new : client
|
161
178
|
_process_search_response(response: client.query(args: args, logger: logger))
|
162
179
|
end
|
163
180
|
|
164
181
|
# Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
|
165
|
-
def _by_mod_day(day:, logger: nil)
|
166
|
-
regex =
|
182
|
+
def _by_mod_day(day:, client: nil, logger: nil)
|
183
|
+
regex = /^[0-9]{4}(-[0-9]{2}){2}/
|
167
184
|
raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
|
168
185
|
|
169
186
|
args = {
|
@@ -177,17 +194,18 @@ module Uc3DmpId
|
|
177
194
|
filter_expression: 'SK = :version',
|
178
195
|
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
179
196
|
}
|
180
|
-
logger.info(message: "Querying _by_mod_day with #{args}")
|
197
|
+
logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
|
181
198
|
client = client.nil? ? Uc3DmpDynamo::Client.new : client
|
182
199
|
_process_search_response(response: client.query(args: args, logger: logger))
|
183
200
|
end
|
184
201
|
|
185
|
-
|
186
202
|
# Transform the search results so that we do not include any of the DMPHub specific metadata
|
187
203
|
def _process_search_response(response:)
|
188
204
|
return [] unless response.is_a?(Array) && response.any?
|
189
205
|
|
190
|
-
results = response.
|
206
|
+
results = response.map do |item|
|
207
|
+
next if item.nil?
|
208
|
+
|
191
209
|
dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
|
192
210
|
Helper.cleanse_dmp_json(json: dmp)
|
193
211
|
end
|
data/lib/uc3-dmp-id/helper.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
3
|
# TODO: Be sure to update the API functions so that they call cleanse_dmp_json before
|
5
4
|
# calling Uc3DmpApiCore::Responder.respond !!!!!!!!!!
|
6
5
|
|
7
|
-
|
8
6
|
module Uc3DmpId
|
9
7
|
# Helper functions for working with DMP IDs
|
10
8
|
class Helper
|
@@ -21,6 +19,24 @@ module Uc3DmpId
|
|
21
19
|
DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest"
|
22
20
|
DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone"
|
23
21
|
|
22
|
+
DEFAULT_API_URL = 'https://api.dmphub.uc3dev.cdlib.net/dmps/'
|
23
|
+
DEFAULT_LANDING_PAGE_URL = 'https://dmphub.uc3dev.cdlib.net/dmps/'
|
24
|
+
|
25
|
+
MSG_DMP_EXISTS = 'DMP already exists. Try :update instead.'
|
26
|
+
MSG_DMP_FORBIDDEN = 'You do not have permission.'
|
27
|
+
MSG_DMP_INVALID_DMP_ID = 'Invalid DMP ID format.'
|
28
|
+
MSG_DMP_NO_DMP_ID = 'A DMP ID could not be registered at this time.'
|
29
|
+
MSG_DMP_NO_HISTORICALS = 'You cannot modify a historical version of the DMP.'
|
30
|
+
MSG_DMP_NO_TOMBSTONE = 'Unable to tombstone the DMP ID at this time.'
|
31
|
+
MSG_DMP_NO_UPDATE = 'Unable to update the DMP ID at this time.'
|
32
|
+
MSG_DMP_NOT_FOUND = 'DMP does not exist.'
|
33
|
+
MSG_DMP_UNABLE_TO_VERSION = 'Unable to version this DMP.'
|
34
|
+
MSG_DMP_UNKNOWN = 'DMP does not exist. Try :create instead.'
|
35
|
+
MSG_NO_CHANGE = 'The updated record has no changes.'
|
36
|
+
MSG_NO_OWNER_ORG = 'Could not determine ownership of the DMP ID.'
|
37
|
+
MSG_NO_PROVENANCE_OWNER = 'Unable to determine the provenance of the DMP ID.'
|
38
|
+
MSG_SERVER_ERROR = 'Something went wrong.'
|
39
|
+
|
24
40
|
class << self
|
25
41
|
# Append the PK prefix for the object
|
26
42
|
# -------------------------------------------------------------------------------------
|
@@ -48,19 +64,13 @@ module Uc3DmpId
|
|
48
64
|
|
49
65
|
# Return the base URL for a DMP ID
|
50
66
|
def dmp_id_base_url
|
51
|
-
url = ENV.fetch('DMP_ID_BASE_URL',
|
67
|
+
url = ENV.fetch('DMP_ID_BASE_URL', DEFAULT_LANDING_PAGE_URL)
|
52
68
|
url&.end_with?('/') ? url : "#{url}/"
|
53
69
|
end
|
54
70
|
|
55
71
|
# The landing page URL (NOT the DOI URL)
|
56
72
|
def landing_page_url
|
57
|
-
url = ENV.fetch('DMP_ID_LANDING_URL',
|
58
|
-
url&.end_with?('/') ? url : "#{url}/"
|
59
|
-
end
|
60
|
-
|
61
|
-
# Return the base URL for the API
|
62
|
-
def api_base_url
|
63
|
-
url = ENV.fetch('DMP_ID_BASE_URL', 'https://api.dmphub.uc3dev.cdlib.net/dmps/')
|
73
|
+
url = ENV.fetch('DMP_ID_LANDING_URL', DEFAULT_LANDING_PAGE_URL)
|
64
74
|
url&.end_with?('/') ? url : "#{url}/"
|
65
75
|
end
|
66
76
|
|
@@ -69,7 +79,7 @@ module Uc3DmpId
|
|
69
79
|
dmp_id = value.match(DOI_REGEX).to_s
|
70
80
|
return nil if dmp_id.nil? || dmp_id == ''
|
71
81
|
# If it's already a URL, return it as is
|
72
|
-
return value if value.start_with?('http')
|
82
|
+
return with_protocol ? value : value.gsub(%r{https?://}, '') if value.start_with?('http')
|
73
83
|
|
74
84
|
dmp_id = dmp_id.gsub('doi:', '')
|
75
85
|
dmp_id = dmp_id.start_with?('/') ? dmp_id[1..dmp_id.length] : dmp_id
|
@@ -90,7 +100,7 @@ module Uc3DmpId
|
|
90
100
|
|
91
101
|
# Append the :PK prefix to the :dmp_id
|
92
102
|
def dmp_id_to_pk(json:)
|
93
|
-
return nil if json.
|
103
|
+
return nil if !json.is_a?(Hash) || json['identifier'].nil?
|
94
104
|
|
95
105
|
# If it's a DOI format it correctly
|
96
106
|
dmp_id = format_dmp_id(value: json['identifier'].to_s)
|
@@ -117,6 +127,7 @@ module Uc3DmpId
|
|
117
127
|
end
|
118
128
|
|
119
129
|
# Compare the DMP IDs to see if they are the same
|
130
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
120
131
|
def eql?(dmp_a:, dmp_b:)
|
121
132
|
return dmp_a == dmp_b unless dmp_a.is_a?(Hash) && !dmp_a['dmp'].nil? && dmp_b.is_a?(Hash) && !dmp_b['dmp'].nil?
|
122
133
|
|
@@ -132,9 +143,9 @@ module Uc3DmpId
|
|
132
143
|
b = deep_copy_dmp(obj: dmp_b)
|
133
144
|
|
134
145
|
# ignore some of the attributes before comparing
|
135
|
-
%w[SK dmphub_modification_day modified created
|
136
|
-
|
137
|
-
|
146
|
+
%w[SK dmphub_modification_day modified created dmphub_versions].each do |key|
|
147
|
+
a['dmp'].delete(key) unless a['dmp'][key].nil?
|
148
|
+
b['dmp'].delete(key) unless b['dmp'][key].nil?
|
138
149
|
end
|
139
150
|
a == b
|
140
151
|
end
|
@@ -147,7 +158,7 @@ module Uc3DmpId
|
|
147
158
|
id = dmp.fetch('contact', {}).fetch('contact_id', {})['identifier']
|
148
159
|
return id unless id.nil?
|
149
160
|
|
150
|
-
dmp.fetch('contributor', []).map { |contributor| contributor.fetch('
|
161
|
+
dmp.fetch('contributor', []).map { |contributor| contributor.fetch('contributor_id', {})['identifier'] }.first
|
151
162
|
end
|
152
163
|
|
153
164
|
# Extract the Contact's affiliaiton ROR ID
|
@@ -155,7 +166,8 @@ module Uc3DmpId
|
|
155
166
|
return nil unless json.is_a?(Hash)
|
156
167
|
|
157
168
|
dmp = json['dmp'].nil? ? json : json['dmp']
|
158
|
-
owner_org = dmp.fetch('contact', {}).fetch('dmproadmap_affiliation', {}).fetch('affiliation_id',
|
169
|
+
owner_org = dmp.fetch('contact', {}).fetch('dmproadmap_affiliation', {}).fetch('affiliation_id',
|
170
|
+
{})['identifier']
|
159
171
|
return owner_org unless owner_org.nil?
|
160
172
|
|
161
173
|
orgs = dmp.fetch('contributor', []).map do |contributor|
|
@@ -163,8 +175,10 @@ module Uc3DmpId
|
|
163
175
|
end
|
164
176
|
orgs.compact.max_by { |i| orgs.count(i) }
|
165
177
|
end
|
178
|
+
# rubocop:enable Metrics/AbcSize
|
166
179
|
|
167
180
|
# Add DMPHub specific fields to the DMP ID JSON
|
181
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
168
182
|
def annotate_dmp_json(provenance:, p_key:, json:)
|
169
183
|
json = parse_json(json: json)
|
170
184
|
bool_vals = [1, '1', true, 'true', 'yes']
|
@@ -179,7 +193,7 @@ module Uc3DmpId
|
|
179
193
|
annotated['SK'] = DMP_LATEST_VERSION
|
180
194
|
|
181
195
|
# Ensure that the :dmp_id matches the :PK
|
182
|
-
annotated['dmp_id'] = pk_to_dmp_id(p_key: remove_pk_prefix(p_key: annotated['PK']))
|
196
|
+
annotated['dmp_id'] = JSON.parse(pk_to_dmp_id(p_key: remove_pk_prefix(p_key: annotated['PK'])).to_json)
|
183
197
|
|
184
198
|
owner_id = extract_owner_id(json: json)
|
185
199
|
owner_org = extract_owner_org(json: json)
|
@@ -189,7 +203,7 @@ module Uc3DmpId
|
|
189
203
|
annotated['dmproadmap_featured'] = bool_vals.include?(featured.to_s.downcase) ? '1' : '0'
|
190
204
|
|
191
205
|
# Update the modification timestamps
|
192
|
-
annotated['dmphub_modification_day'] = Time.now.strftime('%Y-%m-%d')
|
206
|
+
annotated['dmphub_modification_day'] = Time.now.utc.strftime('%Y-%m-%d')
|
193
207
|
annotated['dmphub_owner_id'] = owner_id unless owner_id.nil?
|
194
208
|
annotated['dmphub_owner_org'] = owner_org unless owner_org.nil?
|
195
209
|
return annotated unless json['dmphub_provenance_id'].nil?
|
@@ -210,6 +224,8 @@ module Uc3DmpId
|
|
210
224
|
end
|
211
225
|
annotated
|
212
226
|
end
|
227
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
228
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
213
229
|
|
214
230
|
# Recursive method that strips out any DMPHub related metadata from a DMP record before sending
|
215
231
|
# it to the caller
|
@@ -220,7 +236,7 @@ module Uc3DmpId
|
|
220
236
|
return json.map { |obj| cleanse_dmp_json(json: obj) }.compact if json.is_a?(Array)
|
221
237
|
|
222
238
|
cleansed = {}
|
223
|
-
allowable = %w[dmphub_versions]
|
239
|
+
allowable = %w[dmphub_modifications dmphub_versions]
|
224
240
|
json.each_key do |key|
|
225
241
|
next if (key.to_s.start_with?('dmphub') && !allowable.include?(key)) || %w[PK SK].include?(key.to_s)
|
226
242
|
|
@@ -289,8 +305,8 @@ module Uc3DmpId
|
|
289
305
|
|
290
306
|
# Remove the homepage or callbackUri because we will add this when needed. we just want the id
|
291
307
|
val = value.downcase
|
292
|
-
|
293
|
-
|
308
|
+
.gsub(provenance.fetch('callbackUri', '').downcase, '')
|
309
|
+
.gsub(provenance.fetch('homepage', '').downcase, '')
|
294
310
|
val = val.gsub(%r{https?://}, '')
|
295
311
|
val = val[1..val.length] if val.start_with?('/')
|
296
312
|
id = provenance['PK']&.gsub('PROVENANCE#', '')
|