uc3-dmp-id 0.0.140 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,30 +9,37 @@ module Uc3DmpId
9
9
  class Finder
10
10
  MSG_INVALID_ARGS = 'Expected JSON to be structured as `{ "dmp": { "PK": "value"} } OR \
11
11
  { "dmp": { "dmp_id": { "identifier": "value", "type": "value" } }`'
12
- MSG_INVALID_OWNER_ID = 'Invalid :owner_orcid. Expected value to start with `https://orcid.org/`.'
13
- MSG_INVALID_OWNER_ORG = 'Invalid :owner_org_ror. Expected value to start with `https://ror.org/`.'
12
+ MSG_INVALID_OWNER_ID = 'Invalid :owner_orcid. Expected a valid ORCID id (excluding the domain)`.'
13
+ MSG_INVALID_OWNER_ORG = 'Invalid :owner_org_ror. Expected a valid ROR id (excluding the domain)`.'
14
14
  MSG_INVALID_MOD_DATE = 'Invalid :modification_day. Expected value to be in the `YYYY-MM-DD` format.'
15
15
  MSG_MISSING_PK = 'No PK was provided'
16
16
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
17
17
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
18
18
 
19
-
20
19
  class << self
21
20
  # TODO: Replace this with ElasticSearch
22
21
  def search_dmps(args:, logger: nil)
23
- return _by_owner(owner_org: args['owner_orcid'], logger: logger) unless args['owner_orcid'].nil?
24
- return _by_owner_org(owner_org: args['owner_org_ror'], logger: logger) unless args['owner_org_ror'].nil?
25
- return _by_mod_day(day: args['modification_day'], logger: logger) unless args['modification_day'].nil?
22
+ client = Uc3DmpDynamo::Client.new
23
+ return _by_owner(owner_org: args['owner_orcid'], client: client, logger: logger) unless args['owner_orcid'].nil?
24
+
25
+ unless args['owner_org_ror'].nil?
26
+ return _by_owner_org(owner_org: args['owner_org_ror'], client: client,
27
+ logger: logger)
28
+ end
29
+ unless args['modification_day'].nil?
30
+ return _by_mod_day(day: args['modification_day'], client: client,
31
+ logger: logger)
32
+ end
26
33
 
27
34
  []
28
35
  end
29
- # rubocop:enable Metrics/MethodLength
30
36
 
31
37
  # Find a DMP based on the contents of the incoming JSON
32
38
  # -------------------------------------------------------------------------
33
- def by_json(json:, cleanse: true, logger: nil)
39
+ # rubocop:disable Metrics/AbcSize
40
+ def by_json(json:, client: nil, cleanse: true, logger: nil)
34
41
  json = Helper.parse_json(json: json)&.fetch('dmp', {})
35
- raise FinderError, MSG_INVALID_ARGS if json.nil? || (json['PK'].nil? && json['dmp_id'].nil?)
42
+ raise FinderError, MSG_INVALID_ARGS if !json.is_a?(Hash) || (json['PK'].nil? && json['dmp_id'].nil?)
36
43
 
37
44
  p_key = json['PK']
38
45
  # Translate the incoming :dmp_id into a PK
@@ -46,9 +53,11 @@ module Uc3DmpId
46
53
  # find_by_PK
47
54
  p_key.nil? ? nil : by_pk(p_key: p_key, s_key: json['SK'], client: client, cleanse: cleanse, logger: logger)
48
55
  end
56
+ # rubocop:enable Metrics/AbcSize
49
57
 
50
58
  # Find the DMP by its PK and SK
51
59
  # -------------------------------------------------------------------------
60
+ # rubocop:disable Metrics/AbcSize
52
61
  def by_pk(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, cleanse: true, logger: nil)
53
62
  raise FinderError, MSG_MISSING_PK if p_key.nil?
54
63
 
@@ -57,7 +66,7 @@ module Uc3DmpId
57
66
  resp = client.get_item(
58
67
  key: {
59
68
  PK: Helper.append_pk_prefix(p_key: p_key),
60
- SK: s_key
69
+ SK: Helper.append_sk_prefix(s_key: s_key)
61
70
  },
62
71
  logger: logger
63
72
  )
@@ -66,9 +75,10 @@ module Uc3DmpId
66
75
  dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
67
76
  return nil if dmp['dmp']['PK'].nil?
68
77
 
69
- dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp: dmp, client: client, logger: logger)
78
+ dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp: dmp, client: client, logger: logger) if cleanse
70
79
  cleanse ? Helper.cleanse_dmp_json(json: dmp) : dmp
71
80
  end
81
+ # rubocop:enable Metrics/AbcSize
72
82
 
73
83
  # Fetch just the PK to see if a record exists
74
84
  # -------------------------------------------------------------------------
@@ -79,7 +89,7 @@ module Uc3DmpId
79
89
  client.pk_exists?(
80
90
  key: {
81
91
  PK: Helper.append_pk_prefix(p_key: p_key),
82
- SK: s_key
92
+ SK: Helper.append_sk_prefix(s_key: s_key)
83
93
  },
84
94
  logger: logger
85
95
  )
@@ -89,7 +99,10 @@ module Uc3DmpId
89
99
  # -------------------------------------------------------------------------
90
100
  # rubocop:disable Metrics/AbcSize
91
101
  def by_provenance_identifier(json:, client: nil, cleanse: true, logger: nil)
92
- raise FinderError, MSG_MISSING_PROV_ID if json.nil? || json.fetch('dmp_id', {})['identifier'].nil?
102
+ raise FinderError, MSG_MISSING_PROV_ID unless json.is_a?(Hash)
103
+
104
+ json = json['dmp'] unless json['dmp'].nil?
105
+ raise FinderError, MSG_MISSING_PROV_ID if json.fetch('dmp_id', {})['identifier'].nil?
93
106
 
94
107
  args = {
95
108
  index_name: 'dmphub_provenance_identifier_gsi',
@@ -117,17 +130,17 @@ module Uc3DmpId
117
130
  private
118
131
 
119
132
  # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
120
- def _by_owner(owner_id:, logger: nil)
121
- regex = %r{^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$}
122
- raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s.downcase =~ regex).nil?
133
+ def _by_owner(owner_id:, client: nil, logger: nil)
134
+ regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
135
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
123
136
 
124
137
  args = {
125
138
  index_name: 'dmphub_owner_id_gsi',
126
139
  key_conditions: {
127
140
  dmphub_owner_id: {
128
141
  attribute_value_list: [
129
- "http://orcid.org/#{owner_id.to_s.downcase}",
130
- "https://orcid.org/#{owner_id.to_s.downcase}"
142
+ "http://orcid.org/#{owner_id}",
143
+ "https://orcid.org/#{owner_id}"
131
144
  ],
132
145
  comparison_operator: 'IN'
133
146
  }
@@ -135,35 +148,39 @@ module Uc3DmpId
135
148
  filter_expression: 'SK = :version',
136
149
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
137
150
  }
138
- logger.info(message: "Querying _by_owner with #{args}") unless logger.nil?
151
+ logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
139
152
  client = client.nil? ? Uc3DmpDynamo::Client.new : client
140
153
  _process_search_response(response: client.query(args: args, logger: logger))
141
154
  end
142
155
 
143
- # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org on the DMP ID record)
144
- def _by_owner_org(owner_org:, logger: nil)
145
- regex = %r{^[a-zA-Z0-9]+$}
146
- raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? ||(owner_org.to_s.downcase =~ regex).nil?
156
+ # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
157
+ # on the DMP ID record)
158
+ def _by_owner_org(owner_org:, client: nil, logger: nil)
159
+ regex = /^[a-zA-Z0-9]+$/
160
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
147
161
 
148
162
  args = {
149
163
  index_name: 'dmphub_owner_org_gsi',
150
164
  key_conditions: {
151
165
  dmphub_owner_org: {
152
- attribute_value_list: ["https://ror.org/#{owner_org.to_s.downcase}"],
153
- comparison_operator: 'EQ'
166
+ attribute_value_list: [
167
+ "https://ror.org/#{owner_org.to_s.downcase}",
168
+ "http://ror.org/#{owner_org.to_s.downcase}"
169
+ ],
170
+ comparison_operator: 'IN'
154
171
  }
155
172
  },
156
173
  filter_expression: 'SK = :version',
157
174
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
158
175
  }
159
- logger.info(message: "Querying _by_owner_org with #{args}") unless logger.nil?
176
+ logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
160
177
  client = client.nil? ? Uc3DmpDynamo::Client.new : client
161
178
  _process_search_response(response: client.query(args: args, logger: logger))
162
179
  end
163
180
 
164
181
  # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
165
- def _by_mod_day(day:, logger: nil)
166
- regex = %r{^[0-9]{4}(-[0-9]{2}){2}}
182
+ def _by_mod_day(day:, client: nil, logger: nil)
183
+ regex = /^[0-9]{4}(-[0-9]{2}){2}/
167
184
  raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
168
185
 
169
186
  args = {
@@ -177,17 +194,18 @@ module Uc3DmpId
177
194
  filter_expression: 'SK = :version',
178
195
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
179
196
  }
180
- logger.info(message: "Querying _by_mod_day with #{args}") unless logger.nil?
197
+ logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
181
198
  client = client.nil? ? Uc3DmpDynamo::Client.new : client
182
199
  _process_search_response(response: client.query(args: args, logger: logger))
183
200
  end
184
201
 
185
-
186
202
  # Transform the search results so that we do not include any of the DMPHub specific metadata
187
203
  def _process_search_response(response:)
188
204
  return [] unless response.is_a?(Array) && response.any?
189
205
 
190
- results = response.each do |item|
206
+ results = response.map do |item|
207
+ next if item.nil?
208
+
191
209
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
192
210
  Helper.cleanse_dmp_json(json: dmp)
193
211
  end
@@ -1,10 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
-
4
3
  # TODO: Be sure to update the API functions so that they call cleanse_dmp_json before
5
4
  # calling Uc3DmpApiCore::Responder.respond !!!!!!!!!!
6
5
 
7
-
8
6
  module Uc3DmpId
9
7
  # Helper functions for working with DMP IDs
10
8
  class Helper
@@ -21,6 +19,24 @@ module Uc3DmpId
21
19
  DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest"
22
20
  DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone"
23
21
 
22
+ DEFAULT_API_URL = 'https://api.dmphub.uc3dev.cdlib.net/dmps/'
23
+ DEFAULT_LANDING_PAGE_URL = 'https://dmphub.uc3dev.cdlib.net/dmps/'
24
+
25
+ MSG_DMP_EXISTS = 'DMP already exists. Try :update instead.'
26
+ MSG_DMP_FORBIDDEN = 'You do not have permission.'
27
+ MSG_DMP_INVALID_DMP_ID = 'Invalid DMP ID format.'
28
+ MSG_DMP_NO_DMP_ID = 'A DMP ID could not be registered at this time.'
29
+ MSG_DMP_NO_HISTORICALS = 'You cannot modify a historical version of the DMP.'
30
+ MSG_DMP_NO_TOMBSTONE = 'Unable to tombstone the DMP ID at this time.'
31
+ MSG_DMP_NO_UPDATE = 'Unable to update the DMP ID at this time.'
32
+ MSG_DMP_NOT_FOUND = 'DMP does not exist.'
33
+ MSG_DMP_UNABLE_TO_VERSION = 'Unable to version this DMP.'
34
+ MSG_DMP_UNKNOWN = 'DMP does not exist. Try :create instead.'
35
+ MSG_NO_CHANGE = 'The updated record has no changes.'
36
+ MSG_NO_OWNER_ORG = 'Could not determine ownership of the DMP ID.'
37
+ MSG_NO_PROVENANCE_OWNER = 'Unable to determine the provenance of the DMP ID.'
38
+ MSG_SERVER_ERROR = 'Something went wrong.'
39
+
24
40
  class << self
25
41
  # Append the PK prefix for the object
26
42
  # -------------------------------------------------------------------------------------
@@ -48,19 +64,13 @@ module Uc3DmpId
48
64
 
49
65
  # Return the base URL for a DMP ID
50
66
  def dmp_id_base_url
51
- url = ENV.fetch('DMP_ID_BASE_URL', 'https://dmphub.uc3dev.cdlib.net/dmps/')
67
+ url = ENV.fetch('DMP_ID_BASE_URL', DEFAULT_LANDING_PAGE_URL)
52
68
  url&.end_with?('/') ? url : "#{url}/"
53
69
  end
54
70
 
55
71
  # The landing page URL (NOT the DOI URL)
56
72
  def landing_page_url
57
- url = ENV.fetch('DMP_ID_LANDING_URL', 'https://dmphub.uc3dev.cdlib.net/dmps/')
58
- url&.end_with?('/') ? url : "#{url}/"
59
- end
60
-
61
- # Return the base URL for the API
62
- def api_base_url
63
- url = ENV.fetch('DMP_ID_BASE_URL', 'https://api.dmphub.uc3dev.cdlib.net/dmps/')
73
+ url = ENV.fetch('DMP_ID_LANDING_URL', DEFAULT_LANDING_PAGE_URL)
64
74
  url&.end_with?('/') ? url : "#{url}/"
65
75
  end
66
76
 
@@ -69,7 +79,7 @@ module Uc3DmpId
69
79
  dmp_id = value.match(DOI_REGEX).to_s
70
80
  return nil if dmp_id.nil? || dmp_id == ''
71
81
  # If it's already a URL, return it as is
72
- return value if value.start_with?('http')
82
+ return with_protocol ? value : value.gsub(%r{https?://}, '') if value.start_with?('http')
73
83
 
74
84
  dmp_id = dmp_id.gsub('doi:', '')
75
85
  dmp_id = dmp_id.start_with?('/') ? dmp_id[1..dmp_id.length] : dmp_id
@@ -90,7 +100,7 @@ module Uc3DmpId
90
100
 
91
101
  # Append the :PK prefix to the :dmp_id
92
102
  def dmp_id_to_pk(json:)
93
- return nil if json.nil? || json['identifier'].nil?
103
+ return nil if !json.is_a?(Hash) || json['identifier'].nil?
94
104
 
95
105
  # If it's a DOI format it correctly
96
106
  dmp_id = format_dmp_id(value: json['identifier'].to_s)
@@ -117,6 +127,7 @@ module Uc3DmpId
117
127
  end
118
128
 
119
129
  # Compare the DMP IDs to see if they are the same
130
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
120
131
  def eql?(dmp_a:, dmp_b:)
121
132
  return dmp_a == dmp_b unless dmp_a.is_a?(Hash) && !dmp_a['dmp'].nil? && dmp_b.is_a?(Hash) && !dmp_b['dmp'].nil?
122
133
 
@@ -132,9 +143,9 @@ module Uc3DmpId
132
143
  b = deep_copy_dmp(obj: dmp_b)
133
144
 
134
145
  # ignore some of the attributes before comparing
135
- %w[SK dmphub_modification_day modified created dmphub_assertions].each do |key|
136
- a['dmp'].delete(key) unless a['dmp'][key].nil?
137
- b['dmp'].delete(key) unless b['dmp'][key].nil?
146
+ %w[SK dmphub_modification_day modified created dmphub_versions].each do |key|
147
+ a['dmp'].delete(key) unless a['dmp'][key].nil?
148
+ b['dmp'].delete(key) unless b['dmp'][key].nil?
138
149
  end
139
150
  a == b
140
151
  end
@@ -147,7 +158,7 @@ module Uc3DmpId
147
158
  id = dmp.fetch('contact', {}).fetch('contact_id', {})['identifier']
148
159
  return id unless id.nil?
149
160
 
150
- dmp.fetch('contributor', []).map { |contributor| contributor.fetch('contact_id', {})['identifier'] }.first
161
+ dmp.fetch('contributor', []).map { |contributor| contributor.fetch('contributor_id', {})['identifier'] }.first
151
162
  end
152
163
 
153
164
  # Extract the Contact's affiliaiton ROR ID
@@ -155,7 +166,8 @@ module Uc3DmpId
155
166
  return nil unless json.is_a?(Hash)
156
167
 
157
168
  dmp = json['dmp'].nil? ? json : json['dmp']
158
- owner_org = dmp.fetch('contact', {}).fetch('dmproadmap_affiliation', {}).fetch('affiliation_id', {})['identifier']
169
+ owner_org = dmp.fetch('contact', {}).fetch('dmproadmap_affiliation', {}).fetch('affiliation_id',
170
+ {})['identifier']
159
171
  return owner_org unless owner_org.nil?
160
172
 
161
173
  orgs = dmp.fetch('contributor', []).map do |contributor|
@@ -163,8 +175,10 @@ module Uc3DmpId
163
175
  end
164
176
  orgs.compact.max_by { |i| orgs.count(i) }
165
177
  end
178
+ # rubocop:enable Metrics/AbcSize
166
179
 
167
180
  # Add DMPHub specific fields to the DMP ID JSON
181
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
168
182
  def annotate_dmp_json(provenance:, p_key:, json:)
169
183
  json = parse_json(json: json)
170
184
  bool_vals = [1, '1', true, 'true', 'yes']
@@ -179,7 +193,7 @@ module Uc3DmpId
179
193
  annotated['SK'] = DMP_LATEST_VERSION
180
194
 
181
195
  # Ensure that the :dmp_id matches the :PK
182
- annotated['dmp_id'] = pk_to_dmp_id(p_key: remove_pk_prefix(p_key: annotated['PK']))
196
+ annotated['dmp_id'] = JSON.parse(pk_to_dmp_id(p_key: remove_pk_prefix(p_key: annotated['PK'])).to_json)
183
197
 
184
198
  owner_id = extract_owner_id(json: json)
185
199
  owner_org = extract_owner_org(json: json)
@@ -189,7 +203,7 @@ module Uc3DmpId
189
203
  annotated['dmproadmap_featured'] = bool_vals.include?(featured.to_s.downcase) ? '1' : '0'
190
204
 
191
205
  # Update the modification timestamps
192
- annotated['dmphub_modification_day'] = Time.now.strftime('%Y-%m-%d')
206
+ annotated['dmphub_modification_day'] = Time.now.utc.strftime('%Y-%m-%d')
193
207
  annotated['dmphub_owner_id'] = owner_id unless owner_id.nil?
194
208
  annotated['dmphub_owner_org'] = owner_org unless owner_org.nil?
195
209
  return annotated unless json['dmphub_provenance_id'].nil?
@@ -210,6 +224,8 @@ module Uc3DmpId
210
224
  end
211
225
  annotated
212
226
  end
227
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
228
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
213
229
 
214
230
  # Recursive method that strips out any DMPHub related metadata from a DMP record before sending
215
231
  # it to the caller
@@ -289,8 +305,8 @@ module Uc3DmpId
289
305
 
290
306
  # Remove the homepage or callbackUri because we will add this when needed. we just want the id
291
307
  val = value.downcase
292
- .gsub(provenance.fetch('callbackUri', '').downcase, '')
293
- .gsub(provenance.fetch('homepage', '').downcase, '')
308
+ .gsub(provenance.fetch('callbackUri', '').downcase, '')
309
+ .gsub(provenance.fetch('homepage', '').downcase, '')
294
310
  val = val.gsub(%r{https?://}, '')
295
311
  val = val[1..val.length] if val.start_with?('/')
296
312
  id = provenance['PK']&.gsub('PROVENANCE#', '')