uc3-dmp-id 0.1.68 → 0.1.70

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f6ec06f994f12700ca9584e1a9f1bb42c5ff556e7ff6f1d9610c31dbd6cad2b
4
- data.tar.gz: 32684f7693d73bc03c6eb7ea07906046895031d6aa3ec045c4c898fe04c03318
3
+ metadata.gz: ccb63f060db6bbbf29aacb329a163adcb1c3ee80ecd9eed385894b5d3f824380
4
+ data.tar.gz: 9caa229b0f0e270ac9da5160af0d1b36876cf95bbc5d4b9e2dc92a8a032c0406
5
5
  SHA512:
6
- metadata.gz: 37817e517884f4fc2d79d4c63aa71bbdede1fe74b182212b84a6e17aa685ac4696415bdb2b7a3ad19ab4d55a83e1015f50dda730ca9f84cc125627b38c8606eb
7
- data.tar.gz: 700da77d44d8302b2b4fdfd4d985ae4a0f48ffbed10a74b1924a6a7fd42f1eb7cf8bb722342c93a205d84eb70cfa65a2abe81e902e6be394df1e363e37f47977
6
+ metadata.gz: ba8bfca2d71f74097386240b43888e1f4fbe2d0443a881f5a264b0db475f224bfc7e96f9592f61b108d82a9570d15613a7d5b098d4495319c6fc601e40d194e5
7
+ data.tar.gz: e86673a1a825b4541ee868d5c7dac78d3cae186c328151c09e9ef6390bf687594ed36cd434d6f0ddd5a5b22cbd22c1627de0b5d6cbf465ca221f5c3b7b23d420
@@ -17,40 +17,22 @@ module Uc3DmpId
17
17
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
18
18
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
19
19
 
20
- ORCID_DOMAIN = 'https://orcid.org/'
21
- ROR_DOMAIN = 'https://ror.org/'
22
- SORT_OPTIONS = %w[title modified]
23
- SORT_DIRECTIONS = %w[asc desc]
24
- MAX_PAGE_SIZE = 100
25
- DEFAULT_PAGE_SIZE = 25
26
- DEFAULT_SORT_OPTION = 'modified'
27
- DEFAULT_SORT_DIR = 'desc'
28
-
29
20
  class << self
30
21
  # TODO: Replace this with ElasticSearch
31
22
  def search_dmps(args:, logger: nil)
32
- # Fetch the DMPs for each of the possible filter options
33
- client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
34
- owner = args['owner']
35
- org = args['org']
36
- funder = args['funder']
37
-
38
- owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
39
- org_pks = org.nil? ? [] : _by_org(org: org, client:, logger:)
40
- funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
41
- # pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
42
- logger&.debug(
43
- message: 'PKs found',
44
- details: { owner: owner_pks, org: org_pks, funder: funder_pks }
45
- )
46
- # return [] if pks.nil? || pks.empty?
47
-
48
- # Only use the DMPs that fit all of the filter criteria
49
- # dmps = pks.reduce(:&).flatten.uniq
50
- # return [] if dmps.nil? || dmps.empty?
23
+ client = Uc3DmpDynamo::Client.new
24
+ return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
51
25
 
52
- [owner_pks, org_pks, funder_pks].flatten.uniq
26
+ unless args['owner_org_ror'].nil?
27
+ return _by_owner_org(owner_org: args['owner_org_ror'], client:,
28
+ logger:)
29
+ end
30
+ unless args['modification_day'].nil?
31
+ return _by_mod_day(day: args['modification_day'], client:,
32
+ logger:)
33
+ end
53
34
 
35
+ []
54
36
  end
55
37
 
56
38
  # Find a DMP based on the contents of the incoming JSON
@@ -152,65 +134,74 @@ module Uc3DmpId
152
134
 
153
135
  private
154
136
 
155
- # Fetch the DMP IDs for the specified person's ORCID (or email)
156
- def _by_owner(owner:, client: nil, logger: nil)
157
- orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
158
- email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
159
- orcid = owner.to_s.strip
160
- return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
137
+ # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
138
+ def _by_owner(owner_id:, client: nil, logger: nil)
139
+ regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
140
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
161
141
 
162
- orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
163
- resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
164
- return [] unless resp.is_a?(Hash)
165
-
166
- logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
167
- resp.fetch('dmps', [])
168
- end
169
-
170
- # Fetch the DMP IDs for the specified organization/institution
171
- def _by_org(org:, client: nil, logger: nil)
172
- regex = /^[a-zA-Z0-9]+$/
173
- ror = "#{ROR_DOMAIN}/#{org.strip}" unless (org.to_s =~ regex).nil?
174
-
175
- resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: ror }, logger:)
176
- return [] unless resp.is_a?(Hash)
177
-
178
- logger&.debug(message: "DMPs for AFFILIATION #{ror}", details: resp)
179
- resp.fetch('dmps', [])
142
+ args = {
143
+ index_name: 'dmphub_owner_id_gsi',
144
+ key_conditions: {
145
+ dmphub_owner_id: {
146
+ attribute_value_list: [
147
+ "http://orcid.org/#{owner_id}",
148
+ "https://orcid.org/#{owner_id}"
149
+ ],
150
+ comparison_operator: 'IN'
151
+ }
152
+ },
153
+ filter_expression: 'SK = :version',
154
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
155
+ }
156
+ logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
157
+ client = Uc3DmpDynamo::Client.new if client.nil?
158
+ _process_search_response(response: client.query(args:, logger:))
180
159
  end
181
160
 
182
- # Fetch the DMP IDs for the specified funder
183
- def _by_funder(funder:, client: nil, logger: nil)
161
+ # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
162
+ # on the DMP ID record)
163
+ def _by_owner_org(owner_org:, client: nil, logger: nil)
184
164
  regex = /^[a-zA-Z0-9]+$/
185
- ror = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
186
-
187
- resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: ror }, logger:)
188
- return [] unless resp.is_a?(Hash)
189
-
190
- logger&.debug(message: "DMPs for FUNDER #{ror}", details: resp)
191
- resp.fetch('dmps', [])
192
- end
165
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
193
166
 
194
- # Fetch the DMP IDs that are marked as featured
195
- def _by_featured(client: nil, logger: nil)
196
167
  args = {
197
- filter_expression: 'featured = :featured AND SK = :sk',
198
- expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
168
+ index_name: 'dmphub_owner_org_gsi',
169
+ key_conditions: {
170
+ dmphub_owner_org: {
171
+ attribute_value_list: [
172
+ "https://ror.org/#{owner_org.to_s.downcase}",
173
+ "http://ror.org/#{owner_org.to_s.downcase}"
174
+ ],
175
+ comparison_operator: 'IN'
176
+ }
177
+ },
178
+ filter_expression: 'SK = :version',
179
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
199
180
  }
200
- logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
181
+ logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
201
182
  client = Uc3DmpDynamo::Client.new if client.nil?
202
- _process_search_response(response: client.scan(args:))
183
+ _process_search_response(response: client.query(args:, logger:))
203
184
  end
204
185
 
205
- # Return all of the publicly visible DMPs
206
- def _publicly_visible(client: nil, logger: nil)
186
+ # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
187
+ def _by_mod_day(day:, client: nil, logger: nil)
188
+ regex = /^[0-9]{4}(-[0-9]{2}){2}/
189
+ raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
190
+
207
191
  args = {
208
- filter_expression: 'visibility = :visibility AND SK = :sk',
209
- expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
192
+ index_name: 'dmphub_modification_day_gsi',
193
+ key_conditions: {
194
+ dmphub_modification_day: {
195
+ attribute_value_list: [day.to_s],
196
+ comparison_operator: 'IN'
197
+ }
198
+ },
199
+ filter_expression: 'SK = :version',
200
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
210
201
  }
211
- logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
202
+ logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
212
203
  client = Uc3DmpDynamo::Client.new if client.nil?
213
- _process_search_response(response: client.scan(args:))
204
+ _process_search_response(response: client.query(args:, logger:))
214
205
  end
215
206
 
216
207
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -221,8 +212,8 @@ module Uc3DmpId
221
212
  next if item.nil?
222
213
 
223
214
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
224
- # dmp = _remove_narrative_if_private(json: dmp)
225
- # Helper.cleanse_dmp_json(json: dmp)
215
+ dmp = _remove_narrative_if_private(json: dmp)
216
+ Helper.cleanse_dmp_json(json: dmp)
226
217
  end
227
218
  results.compact.uniq
228
219
  end
@@ -44,13 +44,15 @@ module Uc3DmpId
44
44
  raise UpdaterError, Helper::MSG_DMP_FORBIDDEN if owner != updater
45
45
 
46
46
  # Handle any changes to the dmphub_modifications section
47
- version = _process_harvester_mods(client:, p_key:, json: version, logger:)
47
+ version = _process_harvester_mods(client:, p_key:, json: mods, logger:)
48
48
 
49
49
  # Remove the version info because we don't want to save it on the record
50
50
  version.delete('dmphub_versions')
51
51
 
52
52
  # Splice the assertions
53
53
  version = _process_modifications(owner:, updater:, version:, mods:, logger:)
54
+ logger&.debug(message: 'Mods after process_modifications', details: version)
55
+
54
56
  # Set the :modified timestamps
55
57
  now = Time.now.utc
56
58
  version['modified'] = now.iso8601
@@ -192,6 +194,7 @@ module Uc3DmpId
192
194
 
193
195
  # Fetch any Harvester modifications to the JSON
194
196
  def _process_harvester_mods(client:, p_key:, json:, logger: nil)
197
+ logger&.debug(message: 'Incoming modifications', details: json)
195
198
  return json if json.fetch('dmphub_modifications', []).empty?
196
199
 
197
200
  # Fetch the `"SK": "HARVESTER_MODS"` record
@@ -201,6 +204,7 @@ module Uc3DmpId
201
204
  )
202
205
  return json unless resp.is_a?(Hash) && resp['related_works'].is_a?(Hash)
203
206
 
207
+ logger&.debug(message: 'Original HARVESTER_MODS record', details: resp)
204
208
  # The `dmphub_modifications` array will ONLY ever have things the harvester mods know about
205
209
  # so just find them and update the status accordingly
206
210
  mods = resp.dup
@@ -208,14 +212,18 @@ module Uc3DmpId
208
212
  next if entry.fetch('dmproadmap_related_identifiers', []).empty?
209
213
 
210
214
  entry['dmproadmap_related_identifiers'].each do |related|
211
- next if mods['related_works'][related.identifier].nil?
215
+ related_id = mods['related_works'][related.identifier] if related.respond_to?(:identifier)
216
+ related_id = mods['related_works'][related['identifier']] if related_id.nil?
217
+ next if related_id.nil?
212
218
 
213
- mods['related_works'][related.identifier]['status'] = related['status']
219
+ mods['related_works'][related_id]['status'] = related['status']
214
220
  end
215
221
  end
216
222
 
223
+ logger&.debug(message: 'Updating HARVESTER_MODS with:', details: mods)
217
224
  client.put_item(json: mods, logger:)
218
225
  json.delete('dmphub_modifications')
226
+ logger&.debug(message: 'After deleting dmphub_modifications:', details: json)
219
227
  json
220
228
  end
221
229
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.68'
4
+ VERSION = '0.1.70'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.68
4
+ version: 0.1.70
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley