uc3-dmp-id 0.1.78 → 0.1.79

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 10b0e02321270e309ee7352e0adfc06f9f86d5b57397dc2d34cdd373aaacaadf
4
- data.tar.gz: 2985739c95e04e3d515cfce61f6ab61ec2e99bbb8edce925c8222b6b194216a6
3
+ metadata.gz: d05302ee8b8190c77e7efc0ad602ad0de2a74739b1653672d707b1fda95eec16
4
+ data.tar.gz: 3cca6911d51d58ff974308f8647bab075fb9bf717092bce45f3cbe2a151c7712
5
5
  SHA512:
6
- metadata.gz: f0594eb5e5faa1c214f953747a649dd5ff932eb7c554eada2db2e6e5dcc81c52565942768132c6d781000a84c55ccfa606829dd68f9775cd0f104b5070927c5b
7
- data.tar.gz: 50142c1ea8e666446ca51075ed1ddfa73a46e8ef68df87e64ca75e5da5bdfc1470fded80c52fe0c6524e65b1c365dce57146535bf744552da77273d9b3881074
6
+ metadata.gz: e19a514c8233bdf9abc32c375b636611181738f6f64818431d0b3e26b1fd40c3817b5dcf1cfc8e1a270e3c521ad67636dd50baaec7638af9a162ebb864b26975
7
+ data.tar.gz: 3b09fdc606348c0c3dddcc7536563920c2971d475981c1b11a89a7147157db712d416d52eafec3689ec0e004be3d845acdf83cbb2113b2df08aca58936421915
@@ -17,22 +17,41 @@ module Uc3DmpId
17
17
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
18
18
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
19
19
 
20
+ ORCID_DOMAIN = 'https://orcid.org/'
21
+ ROR_DOMAIN = 'https://ror.org/'
22
+ DOI_DOMAIN = 'https://doi.org/'
23
+ SORT_OPTIONS = %w[title modified]
24
+ SORT_DIRECTIONS = %w[asc desc]
25
+ MAX_PAGE_SIZE = 100
26
+ DEFAULT_PAGE_SIZE = 25
27
+ DEFAULT_SORT_OPTION = 'modified'
28
+ DEFAULT_SORT_DIR = 'desc'
29
+
20
30
  class << self
21
31
  # TODO: Replace this with ElasticSearch
22
32
  def search_dmps(args:, logger: nil)
23
- client = Uc3DmpDynamo::Client.new
24
- return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
33
+ # Fetch the DMPs for each of the possible filter options
34
+ client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
35
+ owner = args['owner']
36
+ org = args['org']
37
+ funder = args['funder']
38
+
39
+ owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
40
+ org_pks = org.nil? ? [] : _by_org(org: org, client:, logger:)
41
+ funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
42
+ # pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
43
+ logger&.debug(
44
+ message: 'PKs found',
45
+ details: { owner: owner_pks, org: org_pks, funder: funder_pks }
46
+ )
47
+ # return [] if pks.nil? || pks.empty?
25
48
 
26
- unless args['owner_org_ror'].nil?
27
- return _by_owner_org(owner_org: args['owner_org_ror'], client:,
28
- logger:)
29
- end
30
- unless args['modification_day'].nil?
31
- return _by_mod_day(day: args['modification_day'], client:,
32
- logger:)
33
- end
49
+ # Only use the DMPs that fit all of the filter criteria
50
+ # dmps = pks.reduce(:&).flatten.uniq
51
+ # return [] if dmps.nil? || dmps.empty?
52
+
53
+ [owner_pks, org_pks, funder_pks].flatten.uniq
34
54
 
35
- []
36
55
  end
37
56
 
38
57
  # Find a DMP based on the contents of the incoming JSON
@@ -134,74 +153,66 @@ module Uc3DmpId
134
153
 
135
154
  private
136
155
 
137
- # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
138
- def _by_owner(owner_id:, client: nil, logger: nil)
139
- regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
140
- raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
156
+ # Fetch the DMP IDs for the specified person's ORCID (or email)
157
+ def _by_owner(owner:, client: nil, logger: nil)
158
+ orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
159
+ email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
160
+ orcid = owner.to_s.strip
161
+ return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
141
162
 
142
- args = {
143
- index_name: 'dmphub_owner_id_gsi',
144
- key_conditions: {
145
- dmphub_owner_id: {
146
- attribute_value_list: [
147
- "http://orcid.org/#{owner_id}",
148
- "https://orcid.org/#{owner_id}"
149
- ],
150
- comparison_operator: 'IN'
151
- }
152
- },
153
- filter_expression: 'SK = :version',
154
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
155
- }
156
- logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
157
- client = Uc3DmpDynamo::Client.new if client.nil?
158
- _process_search_response(response: client.query(args:, logger:))
163
+ orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
164
+ resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
165
+ return [] unless resp.is_a?(Hash)
166
+
167
+ logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
168
+ resp.fetch('dmps', [])
159
169
  end
160
170
 
161
- # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
162
- # on the DMP ID record)
163
- def _by_owner_org(owner_org:, client: nil, logger: nil)
171
+ # Fetch the DMP IDs for the specified organization/institution
172
+ def _by_org(org:, client: nil, logger: nil)
164
173
  regex = /^[a-zA-Z0-9]+$/
165
- raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
174
+ id = "#{ROR_DOMAIN}#{org.strip}" unless (org.to_s =~ regex).nil?
175
+
176
+ resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: id }, logger:)
177
+ return [] unless resp.is_a?(Hash)
178
+
179
+ logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
180
+ resp.fetch('dmps', [])
181
+ end
166
182
 
183
+ # Fetch the DMP IDs for the specified funder
184
+ def _by_funder(funder:, client: nil, logger: nil)
185
+ regex = /^[a-zA-Z0-9]+$/
186
+ id = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
187
+ id = "#{DOI_DOMAIN}#{org.strip}" if id.nil? && !(org.to_s =~ Helper::DOI_REGEX).nil?
188
+
189
+ resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
190
+ return [] unless resp.is_a?(Hash)
191
+
192
+ logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
193
+ resp.fetch('dmps', [])
194
+ end
195
+
196
+ # Fetch the DMP IDs that are marked as featured
197
+ def _by_featured(client: nil, logger: nil)
167
198
  args = {
168
- index_name: 'dmphub_owner_org_gsi',
169
- key_conditions: {
170
- dmphub_owner_org: {
171
- attribute_value_list: [
172
- "https://ror.org/#{owner_org.to_s.downcase}",
173
- "http://ror.org/#{owner_org.to_s.downcase}"
174
- ],
175
- comparison_operator: 'IN'
176
- }
177
- },
178
- filter_expression: 'SK = :version',
179
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
199
+ filter_expression: 'featured = :featured AND SK = :sk',
200
+ expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
180
201
  }
181
- logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
202
+ logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
182
203
  client = Uc3DmpDynamo::Client.new if client.nil?
183
- _process_search_response(response: client.query(args:, logger:))
204
+ _process_search_response(response: client.scan(args:))
184
205
  end
185
206
 
186
- # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
187
- def _by_mod_day(day:, client: nil, logger: nil)
188
- regex = /^[0-9]{4}(-[0-9]{2}){2}/
189
- raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
190
-
207
+ # Return all of the publicly visible DMPs
208
+ def _publicly_visible(client: nil, logger: nil)
191
209
  args = {
192
- index_name: 'dmphub_modification_day_gsi',
193
- key_conditions: {
194
- dmphub_modification_day: {
195
- attribute_value_list: [day.to_s],
196
- comparison_operator: 'IN'
197
- }
198
- },
199
- filter_expression: 'SK = :version',
200
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
210
+ filter_expression: 'visibility = :visibility AND SK = :sk',
211
+ expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
201
212
  }
202
- logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
213
+ logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
203
214
  client = Uc3DmpDynamo::Client.new if client.nil?
204
- _process_search_response(response: client.query(args:, logger:))
215
+ _process_search_response(response: client.scan(args:))
205
216
  end
206
217
 
207
218
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -212,8 +223,8 @@ module Uc3DmpId
212
223
  next if item.nil?
213
224
 
214
225
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
215
- dmp = _remove_narrative_if_private(json: dmp)
216
- Helper.cleanse_dmp_json(json: dmp)
226
+ # dmp = _remove_narrative_if_private(json: dmp)
227
+ # Helper.cleanse_dmp_json(json: dmp)
217
228
  end
218
229
  results.compact.uniq
219
230
  end
@@ -16,9 +16,9 @@ module Uc3DmpId
16
16
  def update(provenance:, p_key:, json: {}, logger: nil)
17
17
  raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
18
18
 
19
- payload = Helper.parse_json(json:).fetch('dmp', {})
19
+ mods = Helper.parse_json(json:).fetch('dmp', {})
20
20
  p_key = Helper.append_pk_prefix(p_key:)
21
- logger.debug(message: "Incoming modifications for PK #{p_key}", details: payload) if logger.respond_to?(:debug)
21
+ logger.debug(message: "Incoming modifications for PK #{p_key}", details: mods) if logger.respond_to?(:debug)
22
22
 
23
23
  # Fetch the latest version of the DMP ID
24
24
  client = Uc3DmpDynamo::Client.new
@@ -28,30 +28,29 @@ module Uc3DmpId
28
28
 
29
29
  # Verify that the DMP ID is updateable with the info passed in
30
30
  errs = _updateable?(provenance:, p_key:, latest_version: latest_version['dmp'],
31
- mods: payload['dmp'])
31
+ mods: mods['dmp'])
32
32
  logger.error(message: errs.join(', ')) if logger.respond_to?(:error) && errs.is_a?(Array) && errs.any?
33
33
  raise UpdaterError, errs if errs.is_a?(Array) && errs.any?
34
34
  # Don't continue if nothing has changed!
35
- raise UpdaterError, Helper::MSG_NO_CHANGE if Helper.eql?(dmp_a: latest_version, dmp_b: payload)
35
+ raise UpdaterError, Helper::MSG_NO_CHANGE if Helper.eql?(dmp_a: latest_version, dmp_b: mods)
36
36
 
37
37
  # Version the DMP ID record (if applicable).
38
38
  owner = latest_version['dmphub_provenance_id']
39
39
  updater = provenance['PK']
40
40
  version = Versioner.generate_version(client:, latest_version:, owner:,
41
41
  updater:, logger:)
42
- logger&.debug(message: 'New Version', details: version)
43
42
  raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if version.nil?
44
43
  # Bail if the system trying to make the update is not the creator of the DMP ID
45
44
  raise UpdaterError, Helper::MSG_DMP_FORBIDDEN if owner != updater
46
45
 
47
46
  # Handle any changes to the dmphub_modifications section
48
- version = _process_harvester_mods(client:, p_key:, json: payload, version:, logger:)
49
- logger&.debug(message: 'Version after process_harvester_mods', details: version)
47
+ version = _process_harvester_mods(client:, p_key:, json: version, logger:)
50
48
 
51
- # Remove the version info any any lingering modification blocks
49
+ # Remove the version info because we don't want to save it on the record
52
50
  version.delete('dmphub_versions')
53
- version.delete('dmphub_modifications')
54
51
 
52
+ # Splice the assertions
53
+ version = _process_modifications(owner:, updater:, version:, mods:, logger:)
55
54
  # Set the :modified timestamps
56
55
  now = Time.now.utc
57
56
  version['modified'] = now.iso8601
@@ -68,9 +67,9 @@ module Uc3DmpId
68
67
  logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
69
68
 
70
69
  # Append the :dmphub_versions Array
71
- out = JSON.parse({ dmp: version }.to_json)
72
- out = Versioner.append_versions(p_key:, dmp: out, client:, logger:)
73
- Helper.cleanse_dmp_json(json: out)
70
+ json = JSON.parse({ dmp: version }.to_json)
71
+ json = Versioner.append_versions(p_key:, dmp: json, client:, logger:)
72
+ Helper.cleanse_dmp_json(json:)
74
73
  end
75
74
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
76
75
  # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
@@ -131,6 +130,24 @@ module Uc3DmpId
131
130
  end
132
131
  # rubocop:enable Metrics/AbcSize
133
132
 
133
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
134
+ def _process_modifications(owner:, updater:, version:, mods:, logger: nil)
135
+ return version unless mods.is_a?(Hash) && !updater.nil?
136
+ return mods unless version.is_a?(Hash) && !owner.nil?
137
+
138
+ logger.debug(message: 'Modifications before merge.', details: mods) if logger.respond_to?(:debug)
139
+ keys_to_retain = version.keys.select do |key|
140
+ (key.start_with?('dmphub_') && !%w[dmphub_modifications dmphub_versions].include?(key)) ||
141
+ key.start_with?('PK') || key.start_with?('SK') || key.start_with?('dmproadmap_related_identifiers')
142
+ end
143
+ keys_to_retain.each do |key|
144
+ mods[key] = version[key]
145
+ end
146
+ logger.debug(message: 'Modifications after merge.', details: mods) if logger.respond_to?(:debug)
147
+ mods
148
+ end
149
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
150
+
134
151
  # Once the DMP has been updated, we need to update it's DOI metadata
135
152
  # -------------------------------------------------------------------------
136
153
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
@@ -174,8 +191,7 @@ module Uc3DmpId
174
191
  # rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
175
192
 
176
193
  # Fetch any Harvester modifications to the JSON
177
- def _process_harvester_mods(client:, p_key:, json:, version:, logger: nil)
178
- logger&.debug(message: 'Incoming modifications', details: json)
194
+ def _process_harvester_mods(client:, p_key:, json:, logger: nil)
179
195
  return json if json.fetch('dmphub_modifications', []).empty?
180
196
 
181
197
  # Fetch the `"SK": "HARVESTER_MODS"` record
@@ -185,54 +201,22 @@ module Uc3DmpId
185
201
  )
186
202
  return json unless resp.is_a?(Hash) && resp['related_works'].is_a?(Hash)
187
203
 
188
- logger&.debug(message: 'Original HARVESTER_MODS record', details: resp)
189
204
  # The `dmphub_modifications` array will ONLY ever have things the harvester mods know about
190
205
  # so just find them and update the status accordingly
191
- original = resp.dup
192
- json['dmproadmap_related_identifiers'] = [] if json['dmproadmap_related_identifiers'].nil?
193
-
206
+ mods = resp.dup
194
207
  json['dmphub_modifications'].each do |entry|
195
208
  next if entry.fetch('dmproadmap_related_identifiers', []).empty?
196
209
 
197
210
  entry['dmproadmap_related_identifiers'].each do |related|
198
- # Detrmine if the HARVESTER_MODS record even knows about the mod
199
- related_id = related.respond_to?(:identifier) ? related.identifier : related['identifier']
200
- related_domain = related.respond_to?(:domain) ? related.domain : related['domain']
201
- key = "#{related_domain.end_with?('/') ? related_domain : "#{related_domain}/"}#{related_id}"
202
- key_found = original['related_works'].has_key?(key)
203
- logger&.debug(message: "No matching HARVEST_MOD found for #{key}") unless key_found
204
- next unless key_found
205
-
206
- # Update the status in the HARVESTER_MODS record
207
- logger&.debug(message: "Updating status for #{key} from #{original['related_works'][key]['status']} to #{related['status']}")
208
- original['related_works'][key]['status'] = related['status']
209
-
210
- existing = version['dmproadmap_related_identifiers'].select do |ri|
211
- ri['identifier'] == key
212
- end
213
-
214
- # Add it if it was approved and doesn't exist in dmproadmap_related_identifiers
215
- if related['status'] == 'approved' && existing.empty?
216
- version['dmproadmap_related_identifiers'] << {
217
- identifier: key,
218
- work_type: related['work_type'],
219
- type: related['type'],
220
- descriptor: related['descriptor'],
221
- citation: related['citation']
222
- }
223
- elsif related['status'] == 'rejected' && existing.any?
224
- # otherwise remove it
225
- version['dmproadmap_related_identifiers'] = version['dmproadmap_related_identifiers'].reject { |ri| ri == existing.first }
226
- end
211
+ next if mods['related_works'][related.identifier].nil?
212
+
213
+ mods['related_works'][related.identifier]['status'] = related['status']
227
214
  end
228
215
  end
229
216
 
230
- logger&.debug(message: 'Updating HARVESTER_MODS with:', details: original)
231
- resp = client.put_item(json: original, logger:)
232
- logger&.error(message: 'Unable to update HARVESTER_MODS', details: original) if resp.nil?
233
-
234
- logger&.debug(message: 'Returning updated VERSION:', details: version)
235
- version
217
+ client.put_item(json: mods, logger:)
218
+ json.delete('dmphub_modifications')
219
+ json
236
220
  end
237
221
  end
238
222
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.78'
4
+ VERSION = '0.1.79'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.78
4
+ version: 0.1.79
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-12 00:00:00.000000000 Z
11
+ date: 2024-07-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json