uc3-dmp-id 0.1.81 → 0.1.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3de2c7edd72200592a63d6bad11a19ed289c9f3dfa35272d82ff98ec666b89ab
4
- data.tar.gz: 7b887212a1babb8f7928ba8bc3062c4c6a282d29af346c36d5e0f61083e67356
3
+ metadata.gz: 9c9845166f4bdaebc57491464cf46037645a6fccd2cc3274aa988ee6c7a735ad
4
+ data.tar.gz: 39d2e885ac1b4a8fd95796464d117c6a86b060e4842b99814dc30972160dc9ce
5
5
  SHA512:
6
- metadata.gz: 8685a865bb59f270cced7d0ffab66cd12a87a03a2d078accb2676a4d28ba73f13cf3d728b829e7e207aecddaa4cc10a53257414cb53c8e13f8cd0559b34e1210
7
- data.tar.gz: 13886474742a157b539d70b7e77961ab2a5272818ab0bd6df519ea11b7a6d18720b1f4268ec43ee532f3e4493bd92b9a5b31485d0530b38e6450972a821ef429
6
+ metadata.gz: d850ae3e53fb18bae12856a45f2fda95d66664f70052ed6f9c33e047de47728a5936012529a8727c59ccd671d687c6834d6880a97f915c9e4538d282ec3ea320
7
+ data.tar.gz: 2fd983b7d683a5da85f21a1f7001d5f8b1f7a47e798f5fffcb9db98e3486890d6982746d7a65b8d30377aa7eab5c8fcde377ba9b4d31fdc2cfd633d4bcb9fcb1
@@ -17,43 +17,22 @@ module Uc3DmpId
17
17
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
18
18
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
19
19
 
20
- ORCID_DOMAIN = 'https://orcid.org/'
21
- ROR_DOMAIN = 'https://ror.org/'
22
- DOI_DOMAIN = 'https://doi.org/'
23
- SORT_OPTIONS = %w[title modified]
24
- SORT_DIRECTIONS = %w[asc desc]
25
- MAX_PAGE_SIZE = 100
26
- DEFAULT_PAGE_SIZE = 25
27
- DEFAULT_SORT_OPTION = 'modified'
28
- DEFAULT_SORT_DIR = 'desc'
29
-
30
20
  class << self
31
21
  # TODO: Replace this with ElasticSearch
32
22
  def search_dmps(args:, logger: nil)
33
- # Fetch the DMPs for each of the possible filter options
34
- client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
35
- owner = args['owner']
36
- org = args['org']
37
- funder = args['funder']
38
-
39
- owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
40
- # There may be multiple Org ids, so query them all
41
- org_pks = org.nil? ? [] : org.split('|').map { |o| _by_org(org: o, client:, logger:) }
42
- org_pks = org_pks.flatten.uniq
43
- funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
44
- # pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
45
- logger&.debug(
46
- message: 'PKs found',
47
- details: { owner: owner_pks, org: org_pks, funder: funder_pks }
48
- )
49
- # return [] if pks.nil? || pks.empty?
50
-
51
- # Only use the DMPs that fit all of the filter criteria
52
- # dmps = pks.reduce(:&).flatten.uniq
53
- # return [] if dmps.nil? || dmps.empty?
23
+ client = Uc3DmpDynamo::Client.new
24
+ return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
54
25
 
55
- [owner_pks, org_pks, funder_pks].flatten.uniq
26
+ unless args['owner_org_ror'].nil?
27
+ return _by_owner_org(owner_org: args['owner_org_ror'], client:,
28
+ logger:)
29
+ end
30
+ unless args['modification_day'].nil?
31
+ return _by_mod_day(day: args['modification_day'], client:,
32
+ logger:)
33
+ end
56
34
 
35
+ []
57
36
  end
58
37
 
59
38
  # Find a DMP based on the contents of the incoming JSON
@@ -155,68 +134,74 @@ module Uc3DmpId
155
134
 
156
135
  private
157
136
 
158
- # Fetch the DMP IDs for the specified person's ORCID (or email)
159
- def _by_owner(owner:, client: nil, logger: nil)
160
- orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
161
- email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
162
- orcid = owner.to_s.strip
163
- return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
137
+ # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
138
+ def _by_owner(owner_id:, client: nil, logger: nil)
139
+ regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
140
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
164
141
 
165
- orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
166
- resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
167
- return [] unless resp.is_a?(Hash)
168
-
169
- logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
170
- resp.fetch('dmps', [])
171
- end
172
-
173
- # Fetch the DMP IDs for the specified organization/institution
174
- def _by_org(org:, client: nil, logger: nil)
175
- regex = /^[a-zA-Z0-9]+$/
176
- id = "#{ROR_DOMAIN}#{org.strip}" unless (org.to_s =~ regex).nil?
177
- return [] if id.nil?
178
-
179
- resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: id }, logger:)
180
- return [] unless resp.is_a?(Hash)
181
-
182
- logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
183
- resp.fetch('dmps', [])
142
+ args = {
143
+ index_name: 'dmphub_owner_id_gsi',
144
+ key_conditions: {
145
+ dmphub_owner_id: {
146
+ attribute_value_list: [
147
+ "http://orcid.org/#{owner_id}",
148
+ "https://orcid.org/#{owner_id}"
149
+ ],
150
+ comparison_operator: 'IN'
151
+ }
152
+ },
153
+ filter_expression: 'SK = :version',
154
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
155
+ }
156
+ logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
157
+ client = Uc3DmpDynamo::Client.new if client.nil?
158
+ _process_search_response(response: client.query(args:, logger:))
184
159
  end
185
160
 
186
- # Fetch the DMP IDs for the specified funder
187
- def _by_funder(funder:, client: nil, logger: nil)
161
+ # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
162
+ # on the DMP ID record)
163
+ def _by_owner_org(owner_org:, client: nil, logger: nil)
188
164
  regex = /^[a-zA-Z0-9]+$/
189
- id = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
190
- id = "#{DOI_DOMAIN}#{funder.strip}" if id.nil? && !(funder.to_s =~ Helper::DOI_REGEX).nil?
191
- return [] if id.nil?
192
-
193
- resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
194
- return [] unless resp.is_a?(Hash)
195
-
196
- logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
197
- resp.fetch('dmps', [])
198
- end
165
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
199
166
 
200
- # Fetch the DMP IDs that are marked as featured
201
- def _by_featured(client: nil, logger: nil)
202
167
  args = {
203
- filter_expression: 'featured = :featured AND SK = :sk',
204
- expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
168
+ index_name: 'dmphub_owner_org_gsi',
169
+ key_conditions: {
170
+ dmphub_owner_org: {
171
+ attribute_value_list: [
172
+ "https://ror.org/#{owner_org.to_s.downcase}",
173
+ "http://ror.org/#{owner_org.to_s.downcase}"
174
+ ],
175
+ comparison_operator: 'IN'
176
+ }
177
+ },
178
+ filter_expression: 'SK = :version',
179
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
205
180
  }
206
- logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
181
+ logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
207
182
  client = Uc3DmpDynamo::Client.new if client.nil?
208
- _process_search_response(response: client.scan(args:))
183
+ _process_search_response(response: client.query(args:, logger:))
209
184
  end
210
185
 
211
- # Return all of the publicly visible DMPs
212
- def _publicly_visible(client: nil, logger: nil)
186
+ # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
187
+ def _by_mod_day(day:, client: nil, logger: nil)
188
+ regex = /^[0-9]{4}(-[0-9]{2}){2}/
189
+ raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
190
+
213
191
  args = {
214
- filter_expression: 'visibility = :visibility AND SK = :sk',
215
- expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
192
+ index_name: 'dmphub_modification_day_gsi',
193
+ key_conditions: {
194
+ dmphub_modification_day: {
195
+ attribute_value_list: [day.to_s],
196
+ comparison_operator: 'IN'
197
+ }
198
+ },
199
+ filter_expression: 'SK = :version',
200
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
216
201
  }
217
- logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
202
+ logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
218
203
  client = Uc3DmpDynamo::Client.new if client.nil?
219
- _process_search_response(response: client.scan(args:))
204
+ _process_search_response(response: client.query(args:, logger:))
220
205
  end
221
206
 
222
207
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -227,8 +212,8 @@ module Uc3DmpId
227
212
  next if item.nil?
228
213
 
229
214
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
230
- # dmp = _remove_narrative_if_private(json: dmp)
231
- # Helper.cleanse_dmp_json(json: dmp)
215
+ dmp = _remove_narrative_if_private(json: dmp)
216
+ Helper.cleanse_dmp_json(json: dmp)
232
217
  end
233
218
  results.compact.uniq
234
219
  end
@@ -49,27 +49,21 @@ module Uc3DmpId
49
49
  logger&.debug(message: 'Version after process_harvester_mods', details: version)
50
50
  raise UpdaterError, Helper::MSG_SERVER_ERROR if version.nil?
51
51
 
52
- # Remove the version info any any lingering modification blocks
53
- version.delete('dmphub_versions')
54
- version.delete('dmphub_modifications')
55
-
56
- # Set the :modified timestamps
57
- now = Time.now.utc
58
- version['modified'] = now.iso8601
59
- version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
52
+ # Process the incoming payload
53
+ payload = _process_modifications(owner:, updater:, version:, payload:, logger:)
60
54
 
61
55
  # Save the changes
62
- resp = client.put_item(json: version, logger:)
56
+ resp = client.put_item(json: payload, logger:)
63
57
  raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
64
58
 
65
59
  # Send the updates to EZID
66
- _post_process(provenance:, json: version, logger:)
60
+ _post_process(provenance:, json: payload, logger:)
67
61
 
68
62
  # Return the new version record
69
63
  logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
70
64
 
71
65
  # Append the :dmphub_versions Array
72
- out = JSON.parse({ dmp: version }.to_json)
66
+ out = JSON.parse({ dmp: payload }.to_json)
73
67
  out = Versioner.append_versions(p_key:, dmp: out, client:, logger:)
74
68
  Helper.cleanse_dmp_json(json: out)
75
69
  end
@@ -235,6 +229,40 @@ module Uc3DmpId
235
229
  logger&.debug(message: 'Returning updated VERSION:', details: version)
236
230
  version
237
231
  end
232
+
233
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
234
+ def _process_modifications(owner:, updater:, version:, payload:, logger: nil)
235
+ return version unless payload.is_a?(Hash) && !updater.nil?
236
+ return payload unless version.is_a?(Hash) && !owner.nil?
237
+
238
+ logger.debug(message: 'Modifications before processing.', details: payload) if logger.respond_to?(:debug)
239
+
240
+ excluded_keys = %w[modified dmphub_modifications dmphub_versions]
241
+
242
+ # Always remove the dmphub_versions and dmphub_modifications
243
+ excluded_keys.each { |key| payload.delete(key) }
244
+
245
+ # Always include the PK and SK!
246
+ payload['PK'] = version['PK']
247
+ payload['SK'] = Helper::DMP_LATEST_VERSION
248
+
249
+ # Set the :modified timestamps
250
+ now = Time.now.utc
251
+ version['modified'] = now.iso8601
252
+ version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
253
+
254
+ # Retain all the other attributes on the original version unless they are
255
+ # in the list of exclusions OR the incoming payload already has a value for it
256
+ version.keys.each do |key|
257
+ next if excluded_keys.include?(key) || !payload[key].nil?
258
+
259
+ payload[key] = version[key]
260
+ end
261
+ logger.debug(message: 'Modifications after processing.', details: payload) if logger.respond_to?(:debug)
262
+ payload
263
+ end
264
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
265
+
238
266
  end
239
267
  end
240
268
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.81'
4
+ VERSION = '0.1.82'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.81
4
+ version: 0.1.82
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley