uc3-dmp-id 0.0.140 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,30 +9,37 @@ module Uc3DmpId
9
9
  class Finder
10
10
  MSG_INVALID_ARGS = 'Expected JSON to be structured as `{ "dmp": { "PK": "value"} } OR \
11
11
  { "dmp": { "dmp_id": { "identifier": "value", "type": "value" } }`'
12
- MSG_INVALID_OWNER_ID = 'Invalid :owner_orcid. Expected value to start with `https://orcid.org/`.'
13
- MSG_INVALID_OWNER_ORG = 'Invalid :owner_org_ror. Expected value to start with `https://ror.org/`.'
12
+ MSG_INVALID_OWNER_ID = 'Invalid :owner_orcid. Expected a valid ORCID id (excluding the domain)`.'
13
+ MSG_INVALID_OWNER_ORG = 'Invalid :owner_org_ror. Expected a valid ROR id (excluding the domain)`.'
14
14
  MSG_INVALID_MOD_DATE = 'Invalid :modification_day. Expected value to be in the `YYYY-MM-DD` format.'
15
15
  MSG_MISSING_PK = 'No PK was provided'
16
16
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
17
17
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
18
18
 
19
-
20
19
  class << self
21
20
  # TODO: Replace this with ElasticSearch
22
21
  def search_dmps(args:, logger: nil)
23
- return _by_owner(owner_org: args['owner_orcid'], logger: logger) unless args['owner_orcid'].nil?
24
- return _by_owner_org(owner_org: args['owner_org_ror'], logger: logger) unless args['owner_org_ror'].nil?
25
- return _by_mod_day(day: args['modification_day'], logger: logger) unless args['modification_day'].nil?
22
+ client = Uc3DmpDynamo::Client.new
23
+ return _by_owner(owner_org: args['owner_orcid'], client: client, logger: logger) unless args['owner_orcid'].nil?
24
+
25
+ unless args['owner_org_ror'].nil?
26
+ return _by_owner_org(owner_org: args['owner_org_ror'], client: client,
27
+ logger: logger)
28
+ end
29
+ unless args['modification_day'].nil?
30
+ return _by_mod_day(day: args['modification_day'], client: client,
31
+ logger: logger)
32
+ end
26
33
 
27
34
  []
28
35
  end
29
- # rubocop:enable Metrics/MethodLength
30
36
 
31
37
  # Find a DMP based on the contents of the incoming JSON
32
38
  # -------------------------------------------------------------------------
33
- def by_json(json:, cleanse: true, logger: nil)
39
+ # rubocop:disable Metrics/AbcSize
40
+ def by_json(json:, client: nil, cleanse: true, logger: nil)
34
41
  json = Helper.parse_json(json: json)&.fetch('dmp', {})
35
- raise FinderError, MSG_INVALID_ARGS if json.nil? || (json['PK'].nil? && json['dmp_id'].nil?)
42
+ raise FinderError, MSG_INVALID_ARGS if !json.is_a?(Hash) || (json['PK'].nil? && json['dmp_id'].nil?)
36
43
 
37
44
  p_key = json['PK']
38
45
  # Translate the incoming :dmp_id into a PK
@@ -46,9 +53,11 @@ module Uc3DmpId
46
53
  # find_by_PK
47
54
  p_key.nil? ? nil : by_pk(p_key: p_key, s_key: json['SK'], client: client, cleanse: cleanse, logger: logger)
48
55
  end
56
+ # rubocop:enable Metrics/AbcSize
49
57
 
50
58
  # Find the DMP by its PK and SK
51
59
  # -------------------------------------------------------------------------
60
+ # rubocop:disable Metrics/AbcSize
52
61
  def by_pk(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, cleanse: true, logger: nil)
53
62
  raise FinderError, MSG_MISSING_PK if p_key.nil?
54
63
 
@@ -57,7 +66,7 @@ module Uc3DmpId
57
66
  resp = client.get_item(
58
67
  key: {
59
68
  PK: Helper.append_pk_prefix(p_key: p_key),
60
- SK: s_key
69
+ SK: Helper.append_sk_prefix(s_key: s_key)
61
70
  },
62
71
  logger: logger
63
72
  )
@@ -66,9 +75,10 @@ module Uc3DmpId
66
75
  dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
67
76
  return nil if dmp['dmp']['PK'].nil?
68
77
 
69
- dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp: dmp, client: client, logger: logger)
78
+ dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp: dmp, client: client, logger: logger) if cleanse
70
79
  cleanse ? Helper.cleanse_dmp_json(json: dmp) : dmp
71
80
  end
81
+ # rubocop:enable Metrics/AbcSize
72
82
 
73
83
  # Fetch just the PK to see if a record exists
74
84
  # -------------------------------------------------------------------------
@@ -79,7 +89,7 @@ module Uc3DmpId
79
89
  client.pk_exists?(
80
90
  key: {
81
91
  PK: Helper.append_pk_prefix(p_key: p_key),
82
- SK: s_key
92
+ SK: Helper.append_sk_prefix(s_key: s_key)
83
93
  },
84
94
  logger: logger
85
95
  )
@@ -89,7 +99,10 @@ module Uc3DmpId
89
99
  # -------------------------------------------------------------------------
90
100
  # rubocop:disable Metrics/AbcSize
91
101
  def by_provenance_identifier(json:, client: nil, cleanse: true, logger: nil)
92
- raise FinderError, MSG_MISSING_PROV_ID if json.nil? || json.fetch('dmp_id', {})['identifier'].nil?
102
+ raise FinderError, MSG_MISSING_PROV_ID unless json.is_a?(Hash)
103
+
104
+ json = json['dmp'] unless json['dmp'].nil?
105
+ raise FinderError, MSG_MISSING_PROV_ID if json.fetch('dmp_id', {})['identifier'].nil?
93
106
 
94
107
  args = {
95
108
  index_name: 'dmphub_provenance_identifier_gsi',
@@ -117,17 +130,17 @@ module Uc3DmpId
117
130
  private
118
131
 
119
132
  # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
120
- def _by_owner(owner_id:, logger: nil)
121
- regex = %r{^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$}
122
- raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s.downcase =~ regex).nil?
133
+ def _by_owner(owner_id:, client: nil, logger: nil)
134
+ regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
135
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
123
136
 
124
137
  args = {
125
138
  index_name: 'dmphub_owner_id_gsi',
126
139
  key_conditions: {
127
140
  dmphub_owner_id: {
128
141
  attribute_value_list: [
129
- "http://orcid.org/#{owner_id.to_s.downcase}",
130
- "https://orcid.org/#{owner_id.to_s.downcase}"
142
+ "http://orcid.org/#{owner_id}",
143
+ "https://orcid.org/#{owner_id}"
131
144
  ],
132
145
  comparison_operator: 'IN'
133
146
  }
@@ -135,35 +148,39 @@ module Uc3DmpId
135
148
  filter_expression: 'SK = :version',
136
149
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
137
150
  }
138
- logger.info(message: "Querying _by_owner with #{args}") unless logger.nil?
151
+ logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
139
152
  client = client.nil? ? Uc3DmpDynamo::Client.new : client
140
153
  _process_search_response(response: client.query(args: args, logger: logger))
141
154
  end
142
155
 
143
- # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org on the DMP ID record)
144
- def _by_owner_org(owner_org:, logger: nil)
145
- regex = %r{^[a-zA-Z0-9]+$}
146
- raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? ||(owner_org.to_s.downcase =~ regex).nil?
156
+ # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
157
+ # on the DMP ID record)
158
+ def _by_owner_org(owner_org:, client: nil, logger: nil)
159
+ regex = /^[a-zA-Z0-9]+$/
160
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
147
161
 
148
162
  args = {
149
163
  index_name: 'dmphub_owner_org_gsi',
150
164
  key_conditions: {
151
165
  dmphub_owner_org: {
152
- attribute_value_list: ["https://ror.org/#{owner_org.to_s.downcase}"],
153
- comparison_operator: 'EQ'
166
+ attribute_value_list: [
167
+ "https://ror.org/#{owner_org.to_s.downcase}",
168
+ "http://ror.org/#{owner_org.to_s.downcase}"
169
+ ],
170
+ comparison_operator: 'IN'
154
171
  }
155
172
  },
156
173
  filter_expression: 'SK = :version',
157
174
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
158
175
  }
159
- logger.info(message: "Querying _by_owner_org with #{args}") unless logger.nil?
176
+ logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
160
177
  client = client.nil? ? Uc3DmpDynamo::Client.new : client
161
178
  _process_search_response(response: client.query(args: args, logger: logger))
162
179
  end
163
180
 
164
181
  # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
165
- def _by_mod_day(day:, logger: nil)
166
- regex = %r{^[0-9]{4}(-[0-9]{2}){2}}
182
+ def _by_mod_day(day:, client: nil, logger: nil)
183
+ regex = /^[0-9]{4}(-[0-9]{2}){2}/
167
184
  raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
168
185
 
169
186
  args = {
@@ -177,17 +194,18 @@ module Uc3DmpId
177
194
  filter_expression: 'SK = :version',
178
195
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
179
196
  }
180
- logger.info(message: "Querying _by_mod_day with #{args}") unless logger.nil?
197
+ logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
181
198
  client = client.nil? ? Uc3DmpDynamo::Client.new : client
182
199
  _process_search_response(response: client.query(args: args, logger: logger))
183
200
  end
184
201
 
185
-
186
202
  # Transform the search results so that we do not include any of the DMPHub specific metadata
187
203
  def _process_search_response(response:)
188
204
  return [] unless response.is_a?(Array) && response.any?
189
205
 
190
- results = response.each do |item|
206
+ results = response.map do |item|
207
+ next if item.nil?
208
+
191
209
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
192
210
  Helper.cleanse_dmp_json(json: dmp)
193
211
  end
@@ -1,10 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
-
4
3
  # TODO: Be sure to update the API functions so that they call cleanse_dmp_json before
5
4
  # calling Uc3DmpApiCore::Responder.respond !!!!!!!!!!
6
5
 
7
-
8
6
  module Uc3DmpId
9
7
  # Helper functions for working with DMP IDs
10
8
  class Helper
@@ -21,6 +19,24 @@ module Uc3DmpId
21
19
  DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest"
22
20
  DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone"
23
21
 
22
+ DEFAULT_API_URL = 'https://api.dmphub.uc3dev.cdlib.net/dmps/'
23
+ DEFAULT_LANDING_PAGE_URL = 'https://dmphub.uc3dev.cdlib.net/dmps/'
24
+
25
+ MSG_DMP_EXISTS = 'DMP already exists. Try :update instead.'
26
+ MSG_DMP_FORBIDDEN = 'You do not have permission.'
27
+ MSG_DMP_INVALID_DMP_ID = 'Invalid DMP ID format.'
28
+ MSG_DMP_NO_DMP_ID = 'A DMP ID could not be registered at this time.'
29
+ MSG_DMP_NO_HISTORICALS = 'You cannot modify a historical version of the DMP.'
30
+ MSG_DMP_NO_TOMBSTONE = 'Unable to tombstone the DMP ID at this time.'
31
+ MSG_DMP_NO_UPDATE = 'Unable to update the DMP ID at this time.'
32
+ MSG_DMP_NOT_FOUND = 'DMP does not exist.'
33
+ MSG_DMP_UNABLE_TO_VERSION = 'Unable to version this DMP.'
34
+ MSG_DMP_UNKNOWN = 'DMP does not exist. Try :create instead.'
35
+ MSG_NO_CHANGE = 'The updated record has no changes.'
36
+ MSG_NO_OWNER_ORG = 'Could not determine ownership of the DMP ID.'
37
+ MSG_NO_PROVENANCE_OWNER = 'Unable to determine the provenance of the DMP ID.'
38
+ MSG_SERVER_ERROR = 'Something went wrong.'
39
+
24
40
  class << self
25
41
  # Append the PK prefix for the object
26
42
  # -------------------------------------------------------------------------------------
@@ -48,19 +64,13 @@ module Uc3DmpId
48
64
 
49
65
  # Return the base URL for a DMP ID
50
66
  def dmp_id_base_url
51
- url = ENV.fetch('DMP_ID_BASE_URL', 'https://dmphub.uc3dev.cdlib.net/dmps/')
67
+ url = ENV.fetch('DMP_ID_BASE_URL', DEFAULT_LANDING_PAGE_URL)
52
68
  url&.end_with?('/') ? url : "#{url}/"
53
69
  end
54
70
 
55
71
  # The landing page URL (NOT the DOI URL)
56
72
  def landing_page_url
57
- url = ENV.fetch('DMP_ID_LANDING_URL', 'https://dmphub.uc3dev.cdlib.net/dmps/')
58
- url&.end_with?('/') ? url : "#{url}/"
59
- end
60
-
61
- # Return the base URL for the API
62
- def api_base_url
63
- url = ENV.fetch('DMP_ID_BASE_URL', 'https://api.dmphub.uc3dev.cdlib.net/dmps/')
73
+ url = ENV.fetch('DMP_ID_LANDING_URL', DEFAULT_LANDING_PAGE_URL)
64
74
  url&.end_with?('/') ? url : "#{url}/"
65
75
  end
66
76
 
@@ -69,7 +79,7 @@ module Uc3DmpId
69
79
  dmp_id = value.match(DOI_REGEX).to_s
70
80
  return nil if dmp_id.nil? || dmp_id == ''
71
81
  # If it's already a URL, return it as is
72
- return value if value.start_with?('http')
82
+ return with_protocol ? value : value.gsub(%r{https?://}, '') if value.start_with?('http')
73
83
 
74
84
  dmp_id = dmp_id.gsub('doi:', '')
75
85
  dmp_id = dmp_id.start_with?('/') ? dmp_id[1..dmp_id.length] : dmp_id
@@ -90,7 +100,7 @@ module Uc3DmpId
90
100
 
91
101
  # Append the :PK prefix to the :dmp_id
92
102
  def dmp_id_to_pk(json:)
93
- return nil if json.nil? || json['identifier'].nil?
103
+ return nil if !json.is_a?(Hash) || json['identifier'].nil?
94
104
 
95
105
  # If it's a DOI format it correctly
96
106
  dmp_id = format_dmp_id(value: json['identifier'].to_s)
@@ -117,6 +127,7 @@ module Uc3DmpId
117
127
  end
118
128
 
119
129
  # Compare the DMP IDs to see if they are the same
130
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
120
131
  def eql?(dmp_a:, dmp_b:)
121
132
  return dmp_a == dmp_b unless dmp_a.is_a?(Hash) && !dmp_a['dmp'].nil? && dmp_b.is_a?(Hash) && !dmp_b['dmp'].nil?
122
133
 
@@ -132,9 +143,9 @@ module Uc3DmpId
132
143
  b = deep_copy_dmp(obj: dmp_b)
133
144
 
134
145
  # ignore some of the attributes before comparing
135
- %w[SK dmphub_modification_day modified created dmphub_assertions].each do |key|
136
- a['dmp'].delete(key) unless a['dmp'][key].nil?
137
- b['dmp'].delete(key) unless b['dmp'][key].nil?
146
+ %w[SK dmphub_modification_day modified created dmphub_versions].each do |key|
147
+ a['dmp'].delete(key) unless a['dmp'][key].nil?
148
+ b['dmp'].delete(key) unless b['dmp'][key].nil?
138
149
  end
139
150
  a == b
140
151
  end
@@ -147,7 +158,7 @@ module Uc3DmpId
147
158
  id = dmp.fetch('contact', {}).fetch('contact_id', {})['identifier']
148
159
  return id unless id.nil?
149
160
 
150
- dmp.fetch('contributor', []).map { |contributor| contributor.fetch('contact_id', {})['identifier'] }.first
161
+ dmp.fetch('contributor', []).map { |contributor| contributor.fetch('contributor_id', {})['identifier'] }.first
151
162
  end
152
163
 
153
164
  # Extract the Contact's affiliaiton ROR ID
@@ -155,7 +166,8 @@ module Uc3DmpId
155
166
  return nil unless json.is_a?(Hash)
156
167
 
157
168
  dmp = json['dmp'].nil? ? json : json['dmp']
158
- owner_org = dmp.fetch('contact', {}).fetch('dmproadmap_affiliation', {}).fetch('affiliation_id', {})['identifier']
169
+ owner_org = dmp.fetch('contact', {}).fetch('dmproadmap_affiliation', {}).fetch('affiliation_id',
170
+ {})['identifier']
159
171
  return owner_org unless owner_org.nil?
160
172
 
161
173
  orgs = dmp.fetch('contributor', []).map do |contributor|
@@ -163,8 +175,10 @@ module Uc3DmpId
163
175
  end
164
176
  orgs.compact.max_by { |i| orgs.count(i) }
165
177
  end
178
+ # rubocop:enable Metrics/AbcSize
166
179
 
167
180
  # Add DMPHub specific fields to the DMP ID JSON
181
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
168
182
  def annotate_dmp_json(provenance:, p_key:, json:)
169
183
  json = parse_json(json: json)
170
184
  bool_vals = [1, '1', true, 'true', 'yes']
@@ -179,7 +193,7 @@ module Uc3DmpId
179
193
  annotated['SK'] = DMP_LATEST_VERSION
180
194
 
181
195
  # Ensure that the :dmp_id matches the :PK
182
- annotated['dmp_id'] = pk_to_dmp_id(p_key: remove_pk_prefix(p_key: annotated['PK']))
196
+ annotated['dmp_id'] = JSON.parse(pk_to_dmp_id(p_key: remove_pk_prefix(p_key: annotated['PK'])).to_json)
183
197
 
184
198
  owner_id = extract_owner_id(json: json)
185
199
  owner_org = extract_owner_org(json: json)
@@ -189,7 +203,7 @@ module Uc3DmpId
189
203
  annotated['dmproadmap_featured'] = bool_vals.include?(featured.to_s.downcase) ? '1' : '0'
190
204
 
191
205
  # Update the modification timestamps
192
- annotated['dmphub_modification_day'] = Time.now.strftime('%Y-%m-%d')
206
+ annotated['dmphub_modification_day'] = Time.now.utc.strftime('%Y-%m-%d')
193
207
  annotated['dmphub_owner_id'] = owner_id unless owner_id.nil?
194
208
  annotated['dmphub_owner_org'] = owner_org unless owner_org.nil?
195
209
  return annotated unless json['dmphub_provenance_id'].nil?
@@ -210,6 +224,8 @@ module Uc3DmpId
210
224
  end
211
225
  annotated
212
226
  end
227
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
228
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
213
229
 
214
230
  # Recursive method that strips out any DMPHub related metadata from a DMP record before sending
215
231
  # it to the caller
@@ -289,8 +305,8 @@ module Uc3DmpId
289
305
 
290
306
  # Remove the homepage or callbackUri because we will add this when needed. we just want the id
291
307
  val = value.downcase
292
- .gsub(provenance.fetch('callbackUri', '').downcase, '')
293
- .gsub(provenance.fetch('homepage', '').downcase, '')
308
+ .gsub(provenance.fetch('callbackUri', '').downcase, '')
309
+ .gsub(provenance.fetch('homepage', '').downcase, '')
294
310
  val = val.gsub(%r{https?://}, '')
295
311
  val = val[1..val.length] if val.start_with?('/')
296
312
  id = provenance['PK']&.gsub('PROVENANCE#', '')