uc3-dmp-id 0.1.78 → 0.1.79

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 10b0e02321270e309ee7352e0adfc06f9f86d5b57397dc2d34cdd373aaacaadf
4
- data.tar.gz: 2985739c95e04e3d515cfce61f6ab61ec2e99bbb8edce925c8222b6b194216a6
3
+ metadata.gz: d05302ee8b8190c77e7efc0ad602ad0de2a74739b1653672d707b1fda95eec16
4
+ data.tar.gz: 3cca6911d51d58ff974308f8647bab075fb9bf717092bce45f3cbe2a151c7712
5
5
  SHA512:
6
- metadata.gz: f0594eb5e5faa1c214f953747a649dd5ff932eb7c554eada2db2e6e5dcc81c52565942768132c6d781000a84c55ccfa606829dd68f9775cd0f104b5070927c5b
7
- data.tar.gz: 50142c1ea8e666446ca51075ed1ddfa73a46e8ef68df87e64ca75e5da5bdfc1470fded80c52fe0c6524e65b1c365dce57146535bf744552da77273d9b3881074
6
+ metadata.gz: e19a514c8233bdf9abc32c375b636611181738f6f64818431d0b3e26b1fd40c3817b5dcf1cfc8e1a270e3c521ad67636dd50baaec7638af9a162ebb864b26975
7
+ data.tar.gz: 3b09fdc606348c0c3dddcc7536563920c2971d475981c1b11a89a7147157db712d416d52eafec3689ec0e004be3d845acdf83cbb2113b2df08aca58936421915
@@ -17,22 +17,41 @@ module Uc3DmpId
17
17
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
18
18
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
19
19
 
20
+ ORCID_DOMAIN = 'https://orcid.org/'
21
+ ROR_DOMAIN = 'https://ror.org/'
22
+ DOI_DOMAIN = 'https://doi.org/'
23
+ SORT_OPTIONS = %w[title modified]
24
+ SORT_DIRECTIONS = %w[asc desc]
25
+ MAX_PAGE_SIZE = 100
26
+ DEFAULT_PAGE_SIZE = 25
27
+ DEFAULT_SORT_OPTION = 'modified'
28
+ DEFAULT_SORT_DIR = 'desc'
29
+
20
30
  class << self
21
31
  # TODO: Replace this with ElasticSearch
22
32
  def search_dmps(args:, logger: nil)
23
- client = Uc3DmpDynamo::Client.new
24
- return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
33
+ # Fetch the DMPs for each of the possible filter options
34
+ client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
35
+ owner = args['owner']
36
+ org = args['org']
37
+ funder = args['funder']
38
+
39
+ owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
40
+ org_pks = org.nil? ? [] : _by_org(org: org, client:, logger:)
41
+ funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
42
+ # pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
43
+ logger&.debug(
44
+ message: 'PKs found',
45
+ details: { owner: owner_pks, org: org_pks, funder: funder_pks }
46
+ )
47
+ # return [] if pks.nil? || pks.empty?
25
48
 
26
- unless args['owner_org_ror'].nil?
27
- return _by_owner_org(owner_org: args['owner_org_ror'], client:,
28
- logger:)
29
- end
30
- unless args['modification_day'].nil?
31
- return _by_mod_day(day: args['modification_day'], client:,
32
- logger:)
33
- end
49
+ # Only use the DMPs that fit all of the filter criteria
50
+ # dmps = pks.reduce(:&).flatten.uniq
51
+ # return [] if dmps.nil? || dmps.empty?
52
+
53
+ [owner_pks, org_pks, funder_pks].flatten.uniq
34
54
 
35
- []
36
55
  end
37
56
 
38
57
  # Find a DMP based on the contents of the incoming JSON
@@ -134,74 +153,66 @@ module Uc3DmpId
134
153
 
135
154
  private
136
155
 
137
- # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
138
- def _by_owner(owner_id:, client: nil, logger: nil)
139
- regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
140
- raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
156
+ # Fetch the DMP IDs for the specified person's ORCID (or email)
157
+ def _by_owner(owner:, client: nil, logger: nil)
158
+ orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
159
+ email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
160
+ orcid = owner.to_s.strip
161
+ return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
141
162
 
142
- args = {
143
- index_name: 'dmphub_owner_id_gsi',
144
- key_conditions: {
145
- dmphub_owner_id: {
146
- attribute_value_list: [
147
- "http://orcid.org/#{owner_id}",
148
- "https://orcid.org/#{owner_id}"
149
- ],
150
- comparison_operator: 'IN'
151
- }
152
- },
153
- filter_expression: 'SK = :version',
154
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
155
- }
156
- logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
157
- client = Uc3DmpDynamo::Client.new if client.nil?
158
- _process_search_response(response: client.query(args:, logger:))
163
+ orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
164
+ resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
165
+ return [] unless resp.is_a?(Hash)
166
+
167
+ logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
168
+ resp.fetch('dmps', [])
159
169
  end
160
170
 
161
- # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
162
- # on the DMP ID record)
163
- def _by_owner_org(owner_org:, client: nil, logger: nil)
171
+ # Fetch the DMP IDs for the specified organization/institution
172
+ def _by_org(org:, client: nil, logger: nil)
164
173
  regex = /^[a-zA-Z0-9]+$/
165
- raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
174
+ id = "#{ROR_DOMAIN}#{org.strip}" unless (org.to_s =~ regex).nil?
175
+
176
+ resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: id }, logger:)
177
+ return [] unless resp.is_a?(Hash)
178
+
179
+ logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
180
+ resp.fetch('dmps', [])
181
+ end
166
182
 
183
+ # Fetch the DMP IDs for the specified funder
184
+ def _by_funder(funder:, client: nil, logger: nil)
185
+ regex = /^[a-zA-Z0-9]+$/
186
+ id = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
187
+ id = "#{DOI_DOMAIN}#{org.strip}" if id.nil? && !(org.to_s =~ Helper::DOI_REGEX).nil?
188
+
189
+ resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
190
+ return [] unless resp.is_a?(Hash)
191
+
192
+ logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
193
+ resp.fetch('dmps', [])
194
+ end
195
+
196
+ # Fetch the DMP IDs that are marked as featured
197
+ def _by_featured(client: nil, logger: nil)
167
198
  args = {
168
- index_name: 'dmphub_owner_org_gsi',
169
- key_conditions: {
170
- dmphub_owner_org: {
171
- attribute_value_list: [
172
- "https://ror.org/#{owner_org.to_s.downcase}",
173
- "http://ror.org/#{owner_org.to_s.downcase}"
174
- ],
175
- comparison_operator: 'IN'
176
- }
177
- },
178
- filter_expression: 'SK = :version',
179
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
199
+ filter_expression: 'featured = :featured AND SK = :sk',
200
+ expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
180
201
  }
181
- logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
202
+ logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
182
203
  client = Uc3DmpDynamo::Client.new if client.nil?
183
- _process_search_response(response: client.query(args:, logger:))
204
+ _process_search_response(response: client.scan(args:))
184
205
  end
185
206
 
186
- # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
187
- def _by_mod_day(day:, client: nil, logger: nil)
188
- regex = /^[0-9]{4}(-[0-9]{2}){2}/
189
- raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
190
-
207
+ # Return all of the publicly visible DMPs
208
+ def _publicly_visible(client: nil, logger: nil)
191
209
  args = {
192
- index_name: 'dmphub_modification_day_gsi',
193
- key_conditions: {
194
- dmphub_modification_day: {
195
- attribute_value_list: [day.to_s],
196
- comparison_operator: 'IN'
197
- }
198
- },
199
- filter_expression: 'SK = :version',
200
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
210
+ filter_expression: 'visibility = :visibility AND SK = :sk',
211
+ expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
201
212
  }
202
- logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
213
+ logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
203
214
  client = Uc3DmpDynamo::Client.new if client.nil?
204
- _process_search_response(response: client.query(args:, logger:))
215
+ _process_search_response(response: client.scan(args:))
205
216
  end
206
217
 
207
218
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -212,8 +223,8 @@ module Uc3DmpId
212
223
  next if item.nil?
213
224
 
214
225
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
215
- dmp = _remove_narrative_if_private(json: dmp)
216
- Helper.cleanse_dmp_json(json: dmp)
226
+ # dmp = _remove_narrative_if_private(json: dmp)
227
+ # Helper.cleanse_dmp_json(json: dmp)
217
228
  end
218
229
  results.compact.uniq
219
230
  end
@@ -16,9 +16,9 @@ module Uc3DmpId
16
16
  def update(provenance:, p_key:, json: {}, logger: nil)
17
17
  raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
18
18
 
19
- payload = Helper.parse_json(json:).fetch('dmp', {})
19
+ mods = Helper.parse_json(json:).fetch('dmp', {})
20
20
  p_key = Helper.append_pk_prefix(p_key:)
21
- logger.debug(message: "Incoming modifications for PK #{p_key}", details: payload) if logger.respond_to?(:debug)
21
+ logger.debug(message: "Incoming modifications for PK #{p_key}", details: mods) if logger.respond_to?(:debug)
22
22
 
23
23
  # Fetch the latest version of the DMP ID
24
24
  client = Uc3DmpDynamo::Client.new
@@ -28,30 +28,29 @@ module Uc3DmpId
28
28
 
29
29
  # Verify that the DMP ID is updateable with the info passed in
30
30
  errs = _updateable?(provenance:, p_key:, latest_version: latest_version['dmp'],
31
- mods: payload['dmp'])
31
+ mods: mods['dmp'])
32
32
  logger.error(message: errs.join(', ')) if logger.respond_to?(:error) && errs.is_a?(Array) && errs.any?
33
33
  raise UpdaterError, errs if errs.is_a?(Array) && errs.any?
34
34
  # Don't continue if nothing has changed!
35
- raise UpdaterError, Helper::MSG_NO_CHANGE if Helper.eql?(dmp_a: latest_version, dmp_b: payload)
35
+ raise UpdaterError, Helper::MSG_NO_CHANGE if Helper.eql?(dmp_a: latest_version, dmp_b: mods)
36
36
 
37
37
  # Version the DMP ID record (if applicable).
38
38
  owner = latest_version['dmphub_provenance_id']
39
39
  updater = provenance['PK']
40
40
  version = Versioner.generate_version(client:, latest_version:, owner:,
41
41
  updater:, logger:)
42
- logger&.debug(message: 'New Version', details: version)
43
42
  raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if version.nil?
44
43
  # Bail if the system trying to make the update is not the creator of the DMP ID
45
44
  raise UpdaterError, Helper::MSG_DMP_FORBIDDEN if owner != updater
46
45
 
47
46
  # Handle any changes to the dmphub_modifications section
48
- version = _process_harvester_mods(client:, p_key:, json: payload, version:, logger:)
49
- logger&.debug(message: 'Version after process_harvester_mods', details: version)
47
+ version = _process_harvester_mods(client:, p_key:, json: version, logger:)
50
48
 
51
- # Remove the version info any any lingering modification blocks
49
+ # Remove the version info because we don't want to save it on the record
52
50
  version.delete('dmphub_versions')
53
- version.delete('dmphub_modifications')
54
51
 
52
+ # Splice the assertions
53
+ version = _process_modifications(owner:, updater:, version:, mods:, logger:)
55
54
  # Set the :modified timestamps
56
55
  now = Time.now.utc
57
56
  version['modified'] = now.iso8601
@@ -68,9 +67,9 @@ module Uc3DmpId
68
67
  logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
69
68
 
70
69
  # Append the :dmphub_versions Array
71
- out = JSON.parse({ dmp: version }.to_json)
72
- out = Versioner.append_versions(p_key:, dmp: out, client:, logger:)
73
- Helper.cleanse_dmp_json(json: out)
70
+ json = JSON.parse({ dmp: version }.to_json)
71
+ json = Versioner.append_versions(p_key:, dmp: json, client:, logger:)
72
+ Helper.cleanse_dmp_json(json:)
74
73
  end
75
74
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
76
75
  # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
@@ -131,6 +130,24 @@ module Uc3DmpId
131
130
  end
132
131
  # rubocop:enable Metrics/AbcSize
133
132
 
133
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
134
+ def _process_modifications(owner:, updater:, version:, mods:, logger: nil)
135
+ return version unless mods.is_a?(Hash) && !updater.nil?
136
+ return mods unless version.is_a?(Hash) && !owner.nil?
137
+
138
+ logger.debug(message: 'Modifications before merge.', details: mods) if logger.respond_to?(:debug)
139
+ keys_to_retain = version.keys.select do |key|
140
+ (key.start_with?('dmphub_') && !%w[dmphub_modifications dmphub_versions].include?(key)) ||
141
+ key.start_with?('PK') || key.start_with?('SK') || key.start_with?('dmproadmap_related_identifiers')
142
+ end
143
+ keys_to_retain.each do |key|
144
+ mods[key] = version[key]
145
+ end
146
+ logger.debug(message: 'Modifications after merge.', details: mods) if logger.respond_to?(:debug)
147
+ mods
148
+ end
149
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
150
+
134
151
  # Once the DMP has been updated, we need to update it's DOI metadata
135
152
  # -------------------------------------------------------------------------
136
153
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
@@ -174,8 +191,7 @@ module Uc3DmpId
174
191
  # rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
175
192
 
176
193
  # Fetch any Harvester modifications to the JSON
177
- def _process_harvester_mods(client:, p_key:, json:, version:, logger: nil)
178
- logger&.debug(message: 'Incoming modifications', details: json)
194
+ def _process_harvester_mods(client:, p_key:, json:, logger: nil)
179
195
  return json if json.fetch('dmphub_modifications', []).empty?
180
196
 
181
197
  # Fetch the `"SK": "HARVESTER_MODS"` record
@@ -185,54 +201,22 @@ module Uc3DmpId
185
201
  )
186
202
  return json unless resp.is_a?(Hash) && resp['related_works'].is_a?(Hash)
187
203
 
188
- logger&.debug(message: 'Original HARVESTER_MODS record', details: resp)
189
204
  # The `dmphub_modifications` array will ONLY ever have things the harvester mods know about
190
205
  # so just find them and update the status accordingly
191
- original = resp.dup
192
- json['dmproadmap_related_identifiers'] = [] if json['dmproadmap_related_identifiers'].nil?
193
-
206
+ mods = resp.dup
194
207
  json['dmphub_modifications'].each do |entry|
195
208
  next if entry.fetch('dmproadmap_related_identifiers', []).empty?
196
209
 
197
210
  entry['dmproadmap_related_identifiers'].each do |related|
198
- # Detrmine if the HARVESTER_MODS record even knows about the mod
199
- related_id = related.respond_to?(:identifier) ? related.identifier : related['identifier']
200
- related_domain = related.respond_to?(:domain) ? related.domain : related['domain']
201
- key = "#{related_domain.end_with?('/') ? related_domain : "#{related_domain}/"}#{related_id}"
202
- key_found = original['related_works'].has_key?(key)
203
- logger&.debug(message: "No matching HARVEST_MOD found for #{key}") unless key_found
204
- next unless key_found
205
-
206
- # Update the status in the HARVESTER_MODS record
207
- logger&.debug(message: "Updating status for #{key} from #{original['related_works'][key]['status']} to #{related['status']}")
208
- original['related_works'][key]['status'] = related['status']
209
-
210
- existing = version['dmproadmap_related_identifiers'].select do |ri|
211
- ri['identifier'] == key
212
- end
213
-
214
- # Add it if it was approved and doesn't exist in dmproadmap_related_identifiers
215
- if related['status'] == 'approved' && existing.empty?
216
- version['dmproadmap_related_identifiers'] << {
217
- identifier: key,
218
- work_type: related['work_type'],
219
- type: related['type'],
220
- descriptor: related['descriptor'],
221
- citation: related['citation']
222
- }
223
- elsif related['status'] == 'rejected' && existing.any?
224
- # otherwise remove it
225
- version['dmproadmap_related_identifiers'] = version['dmproadmap_related_identifiers'].reject { |ri| ri == existing.first }
226
- end
211
+ next if mods['related_works'][related.identifier].nil?
212
+
213
+ mods['related_works'][related.identifier]['status'] = related['status']
227
214
  end
228
215
  end
229
216
 
230
- logger&.debug(message: 'Updating HARVESTER_MODS with:', details: original)
231
- resp = client.put_item(json: original, logger:)
232
- logger&.error(message: 'Unable to update HARVESTER_MODS', details: original) if resp.nil?
233
-
234
- logger&.debug(message: 'Returning updated VERSION:', details: version)
235
- version
217
+ client.put_item(json: mods, logger:)
218
+ json.delete('dmphub_modifications')
219
+ json
236
220
  end
237
221
  end
238
222
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.78'
4
+ VERSION = '0.1.79'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.78
4
+ version: 0.1.79
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-12 00:00:00.000000000 Z
11
+ date: 2024-07-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json