uc3-dmp-id 0.1.81 → 0.1.82

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3de2c7edd72200592a63d6bad11a19ed289c9f3dfa35272d82ff98ec666b89ab
4
- data.tar.gz: 7b887212a1babb8f7928ba8bc3062c4c6a282d29af346c36d5e0f61083e67356
3
+ metadata.gz: 9c9845166f4bdaebc57491464cf46037645a6fccd2cc3274aa988ee6c7a735ad
4
+ data.tar.gz: 39d2e885ac1b4a8fd95796464d117c6a86b060e4842b99814dc30972160dc9ce
5
5
  SHA512:
6
- metadata.gz: 8685a865bb59f270cced7d0ffab66cd12a87a03a2d078accb2676a4d28ba73f13cf3d728b829e7e207aecddaa4cc10a53257414cb53c8e13f8cd0559b34e1210
7
- data.tar.gz: 13886474742a157b539d70b7e77961ab2a5272818ab0bd6df519ea11b7a6d18720b1f4268ec43ee532f3e4493bd92b9a5b31485d0530b38e6450972a821ef429
6
+ metadata.gz: d850ae3e53fb18bae12856a45f2fda95d66664f70052ed6f9c33e047de47728a5936012529a8727c59ccd671d687c6834d6880a97f915c9e4538d282ec3ea320
7
+ data.tar.gz: 2fd983b7d683a5da85f21a1f7001d5f8b1f7a47e798f5fffcb9db98e3486890d6982746d7a65b8d30377aa7eab5c8fcde377ba9b4d31fdc2cfd633d4bcb9fcb1
@@ -17,43 +17,22 @@ module Uc3DmpId
17
17
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
18
18
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
19
19
 
20
- ORCID_DOMAIN = 'https://orcid.org/'
21
- ROR_DOMAIN = 'https://ror.org/'
22
- DOI_DOMAIN = 'https://doi.org/'
23
- SORT_OPTIONS = %w[title modified]
24
- SORT_DIRECTIONS = %w[asc desc]
25
- MAX_PAGE_SIZE = 100
26
- DEFAULT_PAGE_SIZE = 25
27
- DEFAULT_SORT_OPTION = 'modified'
28
- DEFAULT_SORT_DIR = 'desc'
29
-
30
20
  class << self
31
21
  # TODO: Replace this with ElasticSearch
32
22
  def search_dmps(args:, logger: nil)
33
- # Fetch the DMPs for each of the possible filter options
34
- client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
35
- owner = args['owner']
36
- org = args['org']
37
- funder = args['funder']
38
-
39
- owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
40
- # There may be multiple Org ids, so query them all
41
- org_pks = org.nil? ? [] : org.split('|').map { |o| _by_org(org: o, client:, logger:) }
42
- org_pks = org_pks.flatten.uniq
43
- funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
44
- # pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
45
- logger&.debug(
46
- message: 'PKs found',
47
- details: { owner: owner_pks, org: org_pks, funder: funder_pks }
48
- )
49
- # return [] if pks.nil? || pks.empty?
50
-
51
- # Only use the DMPs that fit all of the filter criteria
52
- # dmps = pks.reduce(:&).flatten.uniq
53
- # return [] if dmps.nil? || dmps.empty?
23
+ client = Uc3DmpDynamo::Client.new
24
+ return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
54
25
 
55
- [owner_pks, org_pks, funder_pks].flatten.uniq
26
+ unless args['owner_org_ror'].nil?
27
+ return _by_owner_org(owner_org: args['owner_org_ror'], client:,
28
+ logger:)
29
+ end
30
+ unless args['modification_day'].nil?
31
+ return _by_mod_day(day: args['modification_day'], client:,
32
+ logger:)
33
+ end
56
34
 
35
+ []
57
36
  end
58
37
 
59
38
  # Find a DMP based on the contents of the incoming JSON
@@ -155,68 +134,74 @@ module Uc3DmpId
155
134
 
156
135
  private
157
136
 
158
- # Fetch the DMP IDs for the specified person's ORCID (or email)
159
- def _by_owner(owner:, client: nil, logger: nil)
160
- orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
161
- email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
162
- orcid = owner.to_s.strip
163
- return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
137
+ # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
138
+ def _by_owner(owner_id:, client: nil, logger: nil)
139
+ regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
140
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
164
141
 
165
- orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
166
- resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
167
- return [] unless resp.is_a?(Hash)
168
-
169
- logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
170
- resp.fetch('dmps', [])
171
- end
172
-
173
- # Fetch the DMP IDs for the specified organization/institution
174
- def _by_org(org:, client: nil, logger: nil)
175
- regex = /^[a-zA-Z0-9]+$/
176
- id = "#{ROR_DOMAIN}#{org.strip}" unless (org.to_s =~ regex).nil?
177
- return [] if id.nil?
178
-
179
- resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: id }, logger:)
180
- return [] unless resp.is_a?(Hash)
181
-
182
- logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
183
- resp.fetch('dmps', [])
142
+ args = {
143
+ index_name: 'dmphub_owner_id_gsi',
144
+ key_conditions: {
145
+ dmphub_owner_id: {
146
+ attribute_value_list: [
147
+ "http://orcid.org/#{owner_id}",
148
+ "https://orcid.org/#{owner_id}"
149
+ ],
150
+ comparison_operator: 'IN'
151
+ }
152
+ },
153
+ filter_expression: 'SK = :version',
154
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
155
+ }
156
+ logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
157
+ client = Uc3DmpDynamo::Client.new if client.nil?
158
+ _process_search_response(response: client.query(args:, logger:))
184
159
  end
185
160
 
186
- # Fetch the DMP IDs for the specified funder
187
- def _by_funder(funder:, client: nil, logger: nil)
161
+ # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
162
+ # on the DMP ID record)
163
+ def _by_owner_org(owner_org:, client: nil, logger: nil)
188
164
  regex = /^[a-zA-Z0-9]+$/
189
- id = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
190
- id = "#{DOI_DOMAIN}#{funder.strip}" if id.nil? && !(funder.to_s =~ Helper::DOI_REGEX).nil?
191
- return [] if id.nil?
192
-
193
- resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
194
- return [] unless resp.is_a?(Hash)
195
-
196
- logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
197
- resp.fetch('dmps', [])
198
- end
165
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
199
166
 
200
- # Fetch the DMP IDs that are marked as featured
201
- def _by_featured(client: nil, logger: nil)
202
167
  args = {
203
- filter_expression: 'featured = :featured AND SK = :sk',
204
- expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
168
+ index_name: 'dmphub_owner_org_gsi',
169
+ key_conditions: {
170
+ dmphub_owner_org: {
171
+ attribute_value_list: [
172
+ "https://ror.org/#{owner_org.to_s.downcase}",
173
+ "http://ror.org/#{owner_org.to_s.downcase}"
174
+ ],
175
+ comparison_operator: 'IN'
176
+ }
177
+ },
178
+ filter_expression: 'SK = :version',
179
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
205
180
  }
206
- logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
181
+ logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
207
182
  client = Uc3DmpDynamo::Client.new if client.nil?
208
- _process_search_response(response: client.scan(args:))
183
+ _process_search_response(response: client.query(args:, logger:))
209
184
  end
210
185
 
211
- # Return all of the publicly visible DMPs
212
- def _publicly_visible(client: nil, logger: nil)
186
+ # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
187
+ def _by_mod_day(day:, client: nil, logger: nil)
188
+ regex = /^[0-9]{4}(-[0-9]{2}){2}/
189
+ raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
190
+
213
191
  args = {
214
- filter_expression: 'visibility = :visibility AND SK = :sk',
215
- expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
192
+ index_name: 'dmphub_modification_day_gsi',
193
+ key_conditions: {
194
+ dmphub_modification_day: {
195
+ attribute_value_list: [day.to_s],
196
+ comparison_operator: 'IN'
197
+ }
198
+ },
199
+ filter_expression: 'SK = :version',
200
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
216
201
  }
217
- logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
202
+ logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
218
203
  client = Uc3DmpDynamo::Client.new if client.nil?
219
- _process_search_response(response: client.scan(args:))
204
+ _process_search_response(response: client.query(args:, logger:))
220
205
  end
221
206
 
222
207
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -227,8 +212,8 @@ module Uc3DmpId
227
212
  next if item.nil?
228
213
 
229
214
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
230
- # dmp = _remove_narrative_if_private(json: dmp)
231
- # Helper.cleanse_dmp_json(json: dmp)
215
+ dmp = _remove_narrative_if_private(json: dmp)
216
+ Helper.cleanse_dmp_json(json: dmp)
232
217
  end
233
218
  results.compact.uniq
234
219
  end
@@ -49,27 +49,21 @@ module Uc3DmpId
49
49
  logger&.debug(message: 'Version after process_harvester_mods', details: version)
50
50
  raise UpdaterError, Helper::MSG_SERVER_ERROR if version.nil?
51
51
 
52
- # Remove the version info any any lingering modification blocks
53
- version.delete('dmphub_versions')
54
- version.delete('dmphub_modifications')
55
-
56
- # Set the :modified timestamps
57
- now = Time.now.utc
58
- version['modified'] = now.iso8601
59
- version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
52
+ # Process the incoming payload
53
+ payload = _process_modifications(owner:, updater:, version:, payload:, logger:)
60
54
 
61
55
  # Save the changes
62
- resp = client.put_item(json: version, logger:)
56
+ resp = client.put_item(json: payload, logger:)
63
57
  raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
64
58
 
65
59
  # Send the updates to EZID
66
- _post_process(provenance:, json: version, logger:)
60
+ _post_process(provenance:, json: payload, logger:)
67
61
 
68
62
  # Return the new version record
69
63
  logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
70
64
 
71
65
  # Append the :dmphub_versions Array
72
- out = JSON.parse({ dmp: version }.to_json)
66
+ out = JSON.parse({ dmp: payload }.to_json)
73
67
  out = Versioner.append_versions(p_key:, dmp: out, client:, logger:)
74
68
  Helper.cleanse_dmp_json(json: out)
75
69
  end
@@ -235,6 +229,40 @@ module Uc3DmpId
235
229
  logger&.debug(message: 'Returning updated VERSION:', details: version)
236
230
  version
237
231
  end
232
+
233
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
234
+ def _process_modifications(owner:, updater:, version:, payload:, logger: nil)
235
+ return version unless payload.is_a?(Hash) && !updater.nil?
236
+ return payload unless version.is_a?(Hash) && !owner.nil?
237
+
238
+ logger.debug(message: 'Modifications before processing.', details: payload) if logger.respond_to?(:debug)
239
+
240
+ excluded_keys = %w[modified dmphub_modifications dmphub_versions]
241
+
242
+ # Always remove the dmphub_versions and dmphub_modifications
243
+ excluded_keys.each { |key| payload.delete(key) }
244
+
245
+ # Always include the PK and SK!
246
+ payload['PK'] = version['PK']
247
+ payload['SK'] = Helper::DMP_LATEST_VERSION
248
+
249
+ # Set the :modified timestamps
250
+ now = Time.now.utc
251
+ version['modified'] = now.iso8601
252
+ version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
253
+
254
+ # Retain all the other attributes on the original version unless they are
255
+ # in the list of exclusions OR the incoming payload already has a value for it
256
+ version.keys.each do |key|
257
+ next if excluded_keys.include?(key) || !payload[key].nil?
258
+
259
+ payload[key] = version[key]
260
+ end
261
+ logger.debug(message: 'Modifications after processing.', details: payload) if logger.respond_to?(:debug)
262
+ payload
263
+ end
264
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
265
+
238
266
  end
239
267
  end
240
268
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.81'
4
+ VERSION = '0.1.82'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.81
4
+ version: 0.1.82
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley