uc3-dmp-id 0.1.68 → 0.1.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f6ec06f994f12700ca9584e1a9f1bb42c5ff556e7ff6f1d9610c31dbd6cad2b
4
- data.tar.gz: 32684f7693d73bc03c6eb7ea07906046895031d6aa3ec045c4c898fe04c03318
3
+ metadata.gz: c4355f81f314520bd52003c27e4593e670322a0efe7d400e647edb4dac10dfd3
4
+ data.tar.gz: af81b71d4a6a1ad0c415afc7f2b7f69e916feab5e9cdbf99079ffc9e30cc1397
5
5
  SHA512:
6
- metadata.gz: 37817e517884f4fc2d79d4c63aa71bbdede1fe74b182212b84a6e17aa685ac4696415bdb2b7a3ad19ab4d55a83e1015f50dda730ca9f84cc125627b38c8606eb
7
- data.tar.gz: 700da77d44d8302b2b4fdfd4d985ae4a0f48ffbed10a74b1924a6a7fd42f1eb7cf8bb722342c93a205d84eb70cfa65a2abe81e902e6be394df1e363e37f47977
6
+ metadata.gz: ced1e2298a918308e0948326645ffdfeb0519c07d611dfd8909f70138bbf32a485926d9b1752b9fe556a71466bca2b8cc44f25e904891559ca929f69aeaf2483
7
+ data.tar.gz: c96ddbefb0409be5da359ef0158efa3d876fd2f28856c6bc4a12b9591651013b1a13f478af454129497f1e86515274bcad5f60da89fd7b0824b60196d5b65974
@@ -17,40 +17,22 @@ module Uc3DmpId
17
17
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
18
18
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
19
19
 
20
- ORCID_DOMAIN = 'https://orcid.org/'
21
- ROR_DOMAIN = 'https://ror.org/'
22
- SORT_OPTIONS = %w[title modified]
23
- SORT_DIRECTIONS = %w[asc desc]
24
- MAX_PAGE_SIZE = 100
25
- DEFAULT_PAGE_SIZE = 25
26
- DEFAULT_SORT_OPTION = 'modified'
27
- DEFAULT_SORT_DIR = 'desc'
28
-
29
20
  class << self
30
21
  # TODO: Replace this with ElasticSearch
31
22
  def search_dmps(args:, logger: nil)
32
- # Fetch the DMPs for each of the possible filter options
33
- client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
34
- owner = args['owner']
35
- org = args['org']
36
- funder = args['funder']
37
-
38
- owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
39
- org_pks = org.nil? ? [] : _by_org(org: org, client:, logger:)
40
- funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
41
- # pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
42
- logger&.debug(
43
- message: 'PKs found',
44
- details: { owner: owner_pks, org: org_pks, funder: funder_pks }
45
- )
46
- # return [] if pks.nil? || pks.empty?
47
-
48
- # Only use the DMPs that fit all of the filter criteria
49
- # dmps = pks.reduce(:&).flatten.uniq
50
- # return [] if dmps.nil? || dmps.empty?
23
+ client = Uc3DmpDynamo::Client.new
24
+ return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
51
25
 
52
- [owner_pks, org_pks, funder_pks].flatten.uniq
26
+ unless args['owner_org_ror'].nil?
27
+ return _by_owner_org(owner_org: args['owner_org_ror'], client:,
28
+ logger:)
29
+ end
30
+ unless args['modification_day'].nil?
31
+ return _by_mod_day(day: args['modification_day'], client:,
32
+ logger:)
33
+ end
53
34
 
35
+ []
54
36
  end
55
37
 
56
38
  # Find a DMP based on the contents of the incoming JSON
@@ -152,65 +134,74 @@ module Uc3DmpId
152
134
 
153
135
  private
154
136
 
155
- # Fetch the DMP IDs for the specified person's ORCID (or email)
156
- def _by_owner(owner:, client: nil, logger: nil)
157
- orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
158
- email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
159
- orcid = owner.to_s.strip
160
- return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
137
+ # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
138
+ def _by_owner(owner_id:, client: nil, logger: nil)
139
+ regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
140
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
161
141
 
162
- orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
163
- resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
164
- return [] unless resp.is_a?(Hash)
165
-
166
- logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
167
- resp.fetch('dmps', [])
168
- end
169
-
170
- # Fetch the DMP IDs for the specified organization/institution
171
- def _by_org(org:, client: nil, logger: nil)
172
- regex = /^[a-zA-Z0-9]+$/
173
- ror = "#{ROR_DOMAIN}/#{org.strip}" unless (org.to_s =~ regex).nil?
174
-
175
- resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: ror }, logger:)
176
- return [] unless resp.is_a?(Hash)
177
-
178
- logger&.debug(message: "DMPs for AFFILIATION #{ror}", details: resp)
179
- resp.fetch('dmps', [])
142
+ args = {
143
+ index_name: 'dmphub_owner_id_gsi',
144
+ key_conditions: {
145
+ dmphub_owner_id: {
146
+ attribute_value_list: [
147
+ "http://orcid.org/#{owner_id}",
148
+ "https://orcid.org/#{owner_id}"
149
+ ],
150
+ comparison_operator: 'IN'
151
+ }
152
+ },
153
+ filter_expression: 'SK = :version',
154
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
155
+ }
156
+ logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
157
+ client = Uc3DmpDynamo::Client.new if client.nil?
158
+ _process_search_response(response: client.query(args:, logger:))
180
159
  end
181
160
 
182
- # Fetch the DMP IDs for the specified funder
183
- def _by_funder(funder:, client: nil, logger: nil)
161
+ # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
162
+ # on the DMP ID record)
163
+ def _by_owner_org(owner_org:, client: nil, logger: nil)
184
164
  regex = /^[a-zA-Z0-9]+$/
185
- ror = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
186
-
187
- resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: ror }, logger:)
188
- return [] unless resp.is_a?(Hash)
189
-
190
- logger&.debug(message: "DMPs for FUNDER #{ror}", details: resp)
191
- resp.fetch('dmps', [])
192
- end
165
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
193
166
 
194
- # Fetch the DMP IDs that are marked as featured
195
- def _by_featured(client: nil, logger: nil)
196
167
  args = {
197
- filter_expression: 'featured = :featured AND SK = :sk',
198
- expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
168
+ index_name: 'dmphub_owner_org_gsi',
169
+ key_conditions: {
170
+ dmphub_owner_org: {
171
+ attribute_value_list: [
172
+ "https://ror.org/#{owner_org.to_s.downcase}",
173
+ "http://ror.org/#{owner_org.to_s.downcase}"
174
+ ],
175
+ comparison_operator: 'IN'
176
+ }
177
+ },
178
+ filter_expression: 'SK = :version',
179
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
199
180
  }
200
- logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
181
+ logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
201
182
  client = Uc3DmpDynamo::Client.new if client.nil?
202
- _process_search_response(response: client.scan(args:))
183
+ _process_search_response(response: client.query(args:, logger:))
203
184
  end
204
185
 
205
- # Return all of the publicly visible DMPs
206
- def _publicly_visible(client: nil, logger: nil)
186
+ # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
187
+ def _by_mod_day(day:, client: nil, logger: nil)
188
+ regex = /^[0-9]{4}(-[0-9]{2}){2}/
189
+ raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
190
+
207
191
  args = {
208
- filter_expression: 'visibility = :visibility AND SK = :sk',
209
- expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
192
+ index_name: 'dmphub_modification_day_gsi',
193
+ key_conditions: {
194
+ dmphub_modification_day: {
195
+ attribute_value_list: [day.to_s],
196
+ comparison_operator: 'IN'
197
+ }
198
+ },
199
+ filter_expression: 'SK = :version',
200
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
210
201
  }
211
- logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
202
+ logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
212
203
  client = Uc3DmpDynamo::Client.new if client.nil?
213
- _process_search_response(response: client.scan(args:))
204
+ _process_search_response(response: client.query(args:, logger:))
214
205
  end
215
206
 
216
207
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -221,8 +212,8 @@ module Uc3DmpId
221
212
  next if item.nil?
222
213
 
223
214
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
224
- # dmp = _remove_narrative_if_private(json: dmp)
225
- # Helper.cleanse_dmp_json(json: dmp)
215
+ dmp = _remove_narrative_if_private(json: dmp)
216
+ Helper.cleanse_dmp_json(json: dmp)
226
217
  end
227
218
  results.compact.uniq
228
219
  end
@@ -44,13 +44,15 @@ module Uc3DmpId
44
44
  raise UpdaterError, Helper::MSG_DMP_FORBIDDEN if owner != updater
45
45
 
46
46
  # Handle any changes to the dmphub_modifications section
47
- version = _process_harvester_mods(client:, p_key:, json: version, logger:)
47
+ version = _process_harvester_mods(client:, p_key:, json: mods, logger:)
48
48
 
49
49
  # Remove the version info because we don't want to save it on the record
50
50
  version.delete('dmphub_versions')
51
51
 
52
52
  # Splice the assertions
53
53
  version = _process_modifications(owner:, updater:, version:, mods:, logger:)
54
+ logger&.debug(message: 'Mods after process_modifications', details: mods)
55
+
54
56
  # Set the :modified timestamps
55
57
  now = Time.now.utc
56
58
  version['modified'] = now.iso8601
@@ -192,6 +194,7 @@ module Uc3DmpId
192
194
 
193
195
  # Fetch any Harvester modifications to the JSON
194
196
  def _process_harvester_mods(client:, p_key:, json:, logger: nil)
197
+ logger&.debug(message: 'Incoming modifications', details: json)
195
198
  return json if json.fetch('dmphub_modifications', []).empty?
196
199
 
197
200
  # Fetch the `"SK": "HARVESTER_MODS"` record
@@ -201,6 +204,7 @@ module Uc3DmpId
201
204
  )
202
205
  return json unless resp.is_a?(Hash) && resp['related_works'].is_a?(Hash)
203
206
 
207
+ logger&.debug(message: 'Original HARVESTER_MODS record', details: resp)
204
208
  # The `dmphub_modifications` array will ONLY ever have things the harvester mods know about
205
209
  # so just find them and update the status accordingly
206
210
  mods = resp.dup
@@ -208,14 +212,18 @@ module Uc3DmpId
208
212
  next if entry.fetch('dmproadmap_related_identifiers', []).empty?
209
213
 
210
214
  entry['dmproadmap_related_identifiers'].each do |related|
211
- next if mods['related_works'][related.identifier].nil?
215
+ related_id = mods['related_works'][related.identifier] if related.respond_to?(:identifier)
216
+ related_id = mods['related_works'][related['identifier']] if related_id.nil?
217
+ next if related_id.nil?
212
218
 
213
- mods['related_works'][related.identifier]['status'] = related['status']
219
+ mods['related_works'][related_id]['status'] = related['status']
214
220
  end
215
221
  end
216
222
 
223
+ logger&.debug(message: 'Updating HARVESTER_MODS with:', details: mods)
217
224
  client.put_item(json: mods, logger:)
218
225
  json.delete('dmphub_modifications')
226
+ logger&.debug(message: 'After deleting dmphub_modifications:', details: json)
219
227
  json
220
228
  end
221
229
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.68'
4
+ VERSION = '0.1.69'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.68
4
+ version: 0.1.69
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley