uc3-dmp-id 0.1.80 → 0.1.82

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e69e96c9dc20009c9a8ae00a332cff2348e529ffbfce306418a781977bf13b0
4
- data.tar.gz: c3d4e37d7fefef7d98aa6606e5063a6539270c31d7916af1f964b99065b80724
3
+ metadata.gz: 9c9845166f4bdaebc57491464cf46037645a6fccd2cc3274aa988ee6c7a735ad
4
+ data.tar.gz: 39d2e885ac1b4a8fd95796464d117c6a86b060e4842b99814dc30972160dc9ce
5
5
  SHA512:
6
- metadata.gz: 92b6adb8619e99a00dcd61876c429d55757a6bb79841ffb5ddffbf04c0cc2fe10003464a8a3274b0886c639c2f527e241ca3b714c1b9e31c16d2fc125938d559
7
- data.tar.gz: 6f6519e59f7c708be73fe721a7b68013e17dd523ebd42e33d5b6aedc7755a5364d3dafbf4df4534a10d3d34c320aa79b17e122e15eea0c3a8477032b3c454e89
6
+ metadata.gz: d850ae3e53fb18bae12856a45f2fda95d66664f70052ed6f9c33e047de47728a5936012529a8727c59ccd671d687c6834d6880a97f915c9e4538d282ec3ea320
7
+ data.tar.gz: 2fd983b7d683a5da85f21a1f7001d5f8b1f7a47e798f5fffcb9db98e3486890d6982746d7a65b8d30377aa7eab5c8fcde377ba9b4d31fdc2cfd633d4bcb9fcb1
@@ -17,41 +17,22 @@ module Uc3DmpId
17
17
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
18
18
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
19
19
 
20
- ORCID_DOMAIN = 'https://orcid.org/'
21
- ROR_DOMAIN = 'https://ror.org/'
22
- DOI_DOMAIN = 'https://doi.org/'
23
- SORT_OPTIONS = %w[title modified]
24
- SORT_DIRECTIONS = %w[asc desc]
25
- MAX_PAGE_SIZE = 100
26
- DEFAULT_PAGE_SIZE = 25
27
- DEFAULT_SORT_OPTION = 'modified'
28
- DEFAULT_SORT_DIR = 'desc'
29
-
30
20
  class << self
31
21
  # TODO: Replace this with ElasticSearch
32
22
  def search_dmps(args:, logger: nil)
33
- # Fetch the DMPs for each of the possible filter options
34
- client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
35
- owner = args['owner']
36
- org = args['org']
37
- funder = args['funder']
38
-
39
- owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
40
- org_pks = org.nil? ? [] : _by_org(org: org, client:, logger:)
41
- funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
42
- # pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
43
- logger&.debug(
44
- message: 'PKs found',
45
- details: { owner: owner_pks, org: org_pks, funder: funder_pks }
46
- )
47
- # return [] if pks.nil? || pks.empty?
48
-
49
- # Only use the DMPs that fit all of the filter criteria
50
- # dmps = pks.reduce(:&).flatten.uniq
51
- # return [] if dmps.nil? || dmps.empty?
23
+ client = Uc3DmpDynamo::Client.new
24
+ return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
52
25
 
53
- [owner_pks, org_pks, funder_pks].flatten.uniq
26
+ unless args['owner_org_ror'].nil?
27
+ return _by_owner_org(owner_org: args['owner_org_ror'], client:,
28
+ logger:)
29
+ end
30
+ unless args['modification_day'].nil?
31
+ return _by_mod_day(day: args['modification_day'], client:,
32
+ logger:)
33
+ end
54
34
 
35
+ []
55
36
  end
56
37
 
57
38
  # Find a DMP based on the contents of the incoming JSON
@@ -153,68 +134,74 @@ module Uc3DmpId
153
134
 
154
135
  private
155
136
 
156
- # Fetch the DMP IDs for the specified person's ORCID (or email)
157
- def _by_owner(owner:, client: nil, logger: nil)
158
- orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
159
- email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
160
- orcid = owner.to_s.strip
161
- return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
137
+ # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
138
+ def _by_owner(owner_id:, client: nil, logger: nil)
139
+ regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
140
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
162
141
 
163
- orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
164
- resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
165
- return [] unless resp.is_a?(Hash)
166
-
167
- logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
168
- resp.fetch('dmps', [])
169
- end
170
-
171
- # Fetch the DMP IDs for the specified organization/institution
172
- def _by_org(org:, client: nil, logger: nil)
173
- regex = /^[a-zA-Z0-9]+$/
174
- id = "#{ROR_DOMAIN}#{org.strip}" unless (org.to_s =~ regex).nil?
175
- return [] if id.nil?
176
-
177
- resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: id }, logger:)
178
- return [] unless resp.is_a?(Hash)
179
-
180
- logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
181
- resp.fetch('dmps', [])
142
+ args = {
143
+ index_name: 'dmphub_owner_id_gsi',
144
+ key_conditions: {
145
+ dmphub_owner_id: {
146
+ attribute_value_list: [
147
+ "http://orcid.org/#{owner_id}",
148
+ "https://orcid.org/#{owner_id}"
149
+ ],
150
+ comparison_operator: 'IN'
151
+ }
152
+ },
153
+ filter_expression: 'SK = :version',
154
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
155
+ }
156
+ logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
157
+ client = Uc3DmpDynamo::Client.new if client.nil?
158
+ _process_search_response(response: client.query(args:, logger:))
182
159
  end
183
160
 
184
- # Fetch the DMP IDs for the specified funder
185
- def _by_funder(funder:, client: nil, logger: nil)
161
+ # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
162
+ # on the DMP ID record)
163
+ def _by_owner_org(owner_org:, client: nil, logger: nil)
186
164
  regex = /^[a-zA-Z0-9]+$/
187
- id = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
188
- id = "#{DOI_DOMAIN}#{funder.strip}" if id.nil? && !(funder.to_s =~ Helper::DOI_REGEX).nil?
189
- return [] if id.nil?
190
-
191
- resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
192
- return [] unless resp.is_a?(Hash)
193
-
194
- logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
195
- resp.fetch('dmps', [])
196
- end
165
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
197
166
 
198
- # Fetch the DMP IDs that are marked as featured
199
- def _by_featured(client: nil, logger: nil)
200
167
  args = {
201
- filter_expression: 'featured = :featured AND SK = :sk',
202
- expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
168
+ index_name: 'dmphub_owner_org_gsi',
169
+ key_conditions: {
170
+ dmphub_owner_org: {
171
+ attribute_value_list: [
172
+ "https://ror.org/#{owner_org.to_s.downcase}",
173
+ "http://ror.org/#{owner_org.to_s.downcase}"
174
+ ],
175
+ comparison_operator: 'IN'
176
+ }
177
+ },
178
+ filter_expression: 'SK = :version',
179
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
203
180
  }
204
- logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
181
+ logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
205
182
  client = Uc3DmpDynamo::Client.new if client.nil?
206
- _process_search_response(response: client.scan(args:))
183
+ _process_search_response(response: client.query(args:, logger:))
207
184
  end
208
185
 
209
- # Return all of the publicly visible DMPs
210
- def _publicly_visible(client: nil, logger: nil)
186
+ # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
187
+ def _by_mod_day(day:, client: nil, logger: nil)
188
+ regex = /^[0-9]{4}(-[0-9]{2}){2}/
189
+ raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
190
+
211
191
  args = {
212
- filter_expression: 'visibility = :visibility AND SK = :sk',
213
- expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
192
+ index_name: 'dmphub_modification_day_gsi',
193
+ key_conditions: {
194
+ dmphub_modification_day: {
195
+ attribute_value_list: [day.to_s],
196
+ comparison_operator: 'IN'
197
+ }
198
+ },
199
+ filter_expression: 'SK = :version',
200
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
214
201
  }
215
- logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
202
+ logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
216
203
  client = Uc3DmpDynamo::Client.new if client.nil?
217
- _process_search_response(response: client.scan(args:))
204
+ _process_search_response(response: client.query(args:, logger:))
218
205
  end
219
206
 
220
207
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -225,8 +212,8 @@ module Uc3DmpId
225
212
  next if item.nil?
226
213
 
227
214
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
228
- # dmp = _remove_narrative_if_private(json: dmp)
229
- # Helper.cleanse_dmp_json(json: dmp)
215
+ dmp = _remove_narrative_if_private(json: dmp)
216
+ Helper.cleanse_dmp_json(json: dmp)
230
217
  end
231
218
  results.compact.uniq
232
219
  end
@@ -49,27 +49,21 @@ module Uc3DmpId
49
49
  logger&.debug(message: 'Version after process_harvester_mods', details: version)
50
50
  raise UpdaterError, Helper::MSG_SERVER_ERROR if version.nil?
51
51
 
52
- # Remove the version info any any lingering modification blocks
53
- version.delete('dmphub_versions')
54
- version.delete('dmphub_modifications')
55
-
56
- # Set the :modified timestamps
57
- now = Time.now.utc
58
- version['modified'] = now.iso8601
59
- version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
52
+ # Process the incoming payload
53
+ payload = _process_modifications(owner:, updater:, version:, payload:, logger:)
60
54
 
61
55
  # Save the changes
62
- resp = client.put_item(json: version, logger:)
56
+ resp = client.put_item(json: payload, logger:)
63
57
  raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
64
58
 
65
59
  # Send the updates to EZID
66
- _post_process(provenance:, json: version, logger:)
60
+ _post_process(provenance:, json: payload, logger:)
67
61
 
68
62
  # Return the new version record
69
63
  logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
70
64
 
71
65
  # Append the :dmphub_versions Array
72
- out = JSON.parse({ dmp: version }.to_json)
66
+ out = JSON.parse({ dmp: payload }.to_json)
73
67
  out = Versioner.append_versions(p_key:, dmp: out, client:, logger:)
74
68
  Helper.cleanse_dmp_json(json: out)
75
69
  end
@@ -235,6 +229,40 @@ module Uc3DmpId
235
229
  logger&.debug(message: 'Returning updated VERSION:', details: version)
236
230
  version
237
231
  end
232
+
233
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
234
+ def _process_modifications(owner:, updater:, version:, payload:, logger: nil)
235
+ return version unless payload.is_a?(Hash) && !updater.nil?
236
+ return payload unless version.is_a?(Hash) && !owner.nil?
237
+
238
+ logger.debug(message: 'Modifications before processing.', details: payload) if logger.respond_to?(:debug)
239
+
240
+ excluded_keys = %w[modified dmphub_modifications dmphub_versions]
241
+
242
+ # Always remove the dmphub_versions and dmphub_modifications
243
+ excluded_keys.each { |key| payload.delete(key) }
244
+
245
+ # Always include the PK and SK!
246
+ payload['PK'] = version['PK']
247
+ payload['SK'] = Helper::DMP_LATEST_VERSION
248
+
249
+ # Set the :modified timestamps
250
+ now = Time.now.utc
251
+ version['modified'] = now.iso8601
252
+ version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
253
+
254
+ # Retain all the other attributes on the original version unless they are
255
+ # in the list of exclusions OR the incoming payload already has a value for it
256
+ version.keys.each do |key|
257
+ next if excluded_keys.include?(key) || !payload[key].nil?
258
+
259
+ payload[key] = version[key]
260
+ end
261
+ logger.debug(message: 'Modifications after processing.', details: payload) if logger.respond_to?(:debug)
262
+ payload
263
+ end
264
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
265
+
238
266
  end
239
267
  end
240
268
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.80'
4
+ VERSION = '0.1.82'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.80
4
+ version: 0.1.82
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley