uc3-dmp-id 0.1.67 → 0.1.69

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c2deeeeb8ad969be7b2f9e16463dd1877735078692146d69a481d9cb8a8e2ba
4
- data.tar.gz: e8dd8cd23aa8613a78114a7c211cdbd79a5fb07bdd056cb1617b20218bbf36d8
3
+ metadata.gz: c4355f81f314520bd52003c27e4593e670322a0efe7d400e647edb4dac10dfd3
4
+ data.tar.gz: af81b71d4a6a1ad0c415afc7f2b7f69e916feab5e9cdbf99079ffc9e30cc1397
5
5
  SHA512:
6
- metadata.gz: c0ec1f87093fb330703a54e81f7e5374cce198652b16f2185f93438729bdbbf3ea084efc513f9a91abb7724aef2d773984adc9c9cbde50f64b29fa2f122e273e
7
- data.tar.gz: 2976f80563a048fb5270fd77880171e7cb21d455142022b60f0d9390a4b5226fb515cb553a484885518e0d2407f4a059c9b1f9cbf65c4c31fb3c7a0c1054b276
6
+ metadata.gz: ced1e2298a918308e0948326645ffdfeb0519c07d611dfd8909f70138bbf32a485926d9b1752b9fe556a71466bca2b8cc44f25e904891559ca929f69aeaf2483
7
+ data.tar.gz: c96ddbefb0409be5da359ef0158efa3d876fd2f28856c6bc4a12b9591651013b1a13f478af454129497f1e86515274bcad5f60da89fd7b0824b60196d5b65974
@@ -17,43 +17,22 @@ module Uc3DmpId
17
17
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
18
18
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
19
19
 
20
- ORCID_DOMAIN = 'https://orcid.org/'
21
- ROR_DOMAIN = 'https://ror.org/'
22
- SORT_OPTIONS = %w[title modified]
23
- SORT_DIRECTIONS = %w[asc desc]
24
- MAX_PAGE_SIZE = 100
25
- DEFAULT_PAGE_SIZE = 25
26
- DEFAULT_SORT_OPTION = 'modified'
27
- DEFAULT_SORT_DIR = 'desc'
28
-
29
20
  class << self
30
21
  # TODO: Replace this with ElasticSearch
31
22
  def search_dmps(args:, logger: nil)
32
- # Fetch the DMPs for each of the possible filter options
33
- client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
34
- owner = args['owner']
35
- org = args['org']
36
- funder = args['funder']
37
-
38
- owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
39
- org_pks = org.nil? ? [] : _by_org(org: org, client:, logger:)
40
- funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
41
- # pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
42
- logger&.debug(
43
- message: 'PKs found',
44
- details: { owner: owner_pks, org: org_pks, funder: funder_pks }
45
- )
46
- # return [] if pks.nil? || pks.empty?
47
-
48
- # Only use the DMPs that fit all of the filter criteria
49
- # dmps = pks.reduce(:&).flatten.uniq
50
- # return [] if dmps.nil? || dmps.empty?
23
+ client = Uc3DmpDynamo::Client.new
24
+ return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
51
25
 
52
- pks = [owner_pks, org_pks, funder_pks].flatten.uniq
26
+ unless args['owner_org_ror'].nil?
27
+ return _by_owner_org(owner_org: args['owner_org_ror'], client:,
28
+ logger:)
29
+ end
30
+ unless args['modification_day'].nil?
31
+ return _by_mod_day(day: args['modification_day'], client:,
32
+ logger:)
33
+ end
53
34
 
54
- # Fetch full DMP records for the results
55
- client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_TABLE'])
56
- pks.map { |p_key| by_pk(p_key:, client:, logger:, cleanse: true) }
35
+ []
57
36
  end
58
37
 
59
38
  # Find a DMP based on the contents of the incoming JSON
@@ -155,65 +134,74 @@ module Uc3DmpId
155
134
 
156
135
  private
157
136
 
158
- # Fetch the DMP IDs for the specified person's ORCID (or email)
159
- def _by_owner(owner:, client: nil, logger: nil)
160
- orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
161
- email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
162
- orcid = owner.to_s.strip
163
- return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
137
+ # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
138
+ def _by_owner(owner_id:, client: nil, logger: nil)
139
+ regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
140
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
164
141
 
165
- orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
166
- resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
167
- return [] unless resp.is_a?(Hash)
168
-
169
- logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
170
- resp.fetch('dmps', [])
171
- end
172
-
173
- # Fetch the DMP IDs for the specified organization/institution
174
- def _by_org(org:, client: nil, logger: nil)
175
- regex = /^[a-zA-Z0-9]+$/
176
- ror = "#{ROR_DOMAIN}/#{org.strip}" unless (org.to_s =~ regex).nil?
177
-
178
- resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: ror }, logger:)
179
- return [] unless resp.is_a?(Hash)
180
-
181
- logger&.debug(message: "DMPs for AFFILIATION #{ror}", details: resp)
182
- resp.fetch('dmps', [])
142
+ args = {
143
+ index_name: 'dmphub_owner_id_gsi',
144
+ key_conditions: {
145
+ dmphub_owner_id: {
146
+ attribute_value_list: [
147
+ "http://orcid.org/#{owner_id}",
148
+ "https://orcid.org/#{owner_id}"
149
+ ],
150
+ comparison_operator: 'IN'
151
+ }
152
+ },
153
+ filter_expression: 'SK = :version',
154
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
155
+ }
156
+ logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
157
+ client = Uc3DmpDynamo::Client.new if client.nil?
158
+ _process_search_response(response: client.query(args:, logger:))
183
159
  end
184
160
 
185
- # Fetch the DMP IDs for the specified funder
186
- def _by_funder(funder:, client: nil, logger: nil)
161
+ # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
162
+ # on the DMP ID record)
163
+ def _by_owner_org(owner_org:, client: nil, logger: nil)
187
164
  regex = /^[a-zA-Z0-9]+$/
188
- ror = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
189
-
190
- resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: ror }, logger:)
191
- return [] unless resp.is_a?(Hash)
192
-
193
- logger&.debug(message: "DMPs for FUNDER #{ror}", details: resp)
194
- resp.fetch('dmps', [])
195
- end
165
+ raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
196
166
 
197
- # Fetch the DMP IDs that are marked as featured
198
- def _by_featured(client: nil, logger: nil)
199
167
  args = {
200
- filter_expression: 'featured = :featured AND SK = :sk',
201
- expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
168
+ index_name: 'dmphub_owner_org_gsi',
169
+ key_conditions: {
170
+ dmphub_owner_org: {
171
+ attribute_value_list: [
172
+ "https://ror.org/#{owner_org.to_s.downcase}",
173
+ "http://ror.org/#{owner_org.to_s.downcase}"
174
+ ],
175
+ comparison_operator: 'IN'
176
+ }
177
+ },
178
+ filter_expression: 'SK = :version',
179
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
202
180
  }
203
- logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
181
+ logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
204
182
  client = Uc3DmpDynamo::Client.new if client.nil?
205
- _process_search_response(response: client.scan(args:))
183
+ _process_search_response(response: client.query(args:, logger:))
206
184
  end
207
185
 
208
- # Return all of the publicly visible DMPs
209
- def _publicly_visible(client: nil, logger: nil)
186
+ # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
187
+ def _by_mod_day(day:, client: nil, logger: nil)
188
+ regex = /^[0-9]{4}(-[0-9]{2}){2}/
189
+ raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
190
+
210
191
  args = {
211
- filter_expression: 'visibility = :visibility AND SK = :sk',
212
- expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
192
+ index_name: 'dmphub_modification_day_gsi',
193
+ key_conditions: {
194
+ dmphub_modification_day: {
195
+ attribute_value_list: [day.to_s],
196
+ comparison_operator: 'IN'
197
+ }
198
+ },
199
+ filter_expression: 'SK = :version',
200
+ expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
213
201
  }
214
- logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
202
+ logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
215
203
  client = Uc3DmpDynamo::Client.new if client.nil?
216
- _process_search_response(response: client.scan(args:))
204
+ _process_search_response(response: client.query(args:, logger:))
217
205
  end
218
206
 
219
207
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -224,8 +212,8 @@ module Uc3DmpId
224
212
  next if item.nil?
225
213
 
226
214
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
227
- # dmp = _remove_narrative_if_private(json: dmp)
228
- # Helper.cleanse_dmp_json(json: dmp)
215
+ dmp = _remove_narrative_if_private(json: dmp)
216
+ Helper.cleanse_dmp_json(json: dmp)
229
217
  end
230
218
  results.compact.uniq
231
219
  end
@@ -44,13 +44,15 @@ module Uc3DmpId
44
44
  raise UpdaterError, Helper::MSG_DMP_FORBIDDEN if owner != updater
45
45
 
46
46
  # Handle any changes to the dmphub_modifications section
47
- version = _process_harvester_mods(client:, p_key:, json: version, logger:)
47
+ version = _process_harvester_mods(client:, p_key:, json: mods, logger:)
48
48
 
49
49
  # Remove the version info because we don't want to save it on the record
50
50
  version.delete('dmphub_versions')
51
51
 
52
52
  # Splice the assertions
53
53
  version = _process_modifications(owner:, updater:, version:, mods:, logger:)
54
+ logger&.debug(message: 'Mods after process_modifications', details: mods)
55
+
54
56
  # Set the :modified timestamps
55
57
  now = Time.now.utc
56
58
  version['modified'] = now.iso8601
@@ -192,6 +194,7 @@ module Uc3DmpId
192
194
 
193
195
  # Fetch any Harvester modifications to the JSON
194
196
  def _process_harvester_mods(client:, p_key:, json:, logger: nil)
197
+ logger&.debug(message: 'Incoming modifications', details: json)
195
198
  return json if json.fetch('dmphub_modifications', []).empty?
196
199
 
197
200
  # Fetch the `"SK": "HARVESTER_MODS"` record
@@ -201,6 +204,7 @@ module Uc3DmpId
201
204
  )
202
205
  return json unless resp.is_a?(Hash) && resp['related_works'].is_a?(Hash)
203
206
 
207
+ logger&.debug(message: 'Original HARVESTER_MODS record', details: resp)
204
208
  # The `dmphub_modifications` array will ONLY ever have things the harvester mods know about
205
209
  # so just find them and update the status accordingly
206
210
  mods = resp.dup
@@ -208,14 +212,18 @@ module Uc3DmpId
208
212
  next if entry.fetch('dmproadmap_related_identifiers', []).empty?
209
213
 
210
214
  entry['dmproadmap_related_identifiers'].each do |related|
211
- next if mods['related_works'][related.identifier].nil?
215
+ related_id = mods['related_works'][related.identifier] if related.respond_to?(:identifier)
216
+ related_id = mods['related_works'][related['identifier']] if related_id.nil?
217
+ next if related_id.nil?
212
218
 
213
- mods['related_works'][related.identifier]['status'] = related['status']
219
+ mods['related_works'][related_id]['status'] = related['status']
214
220
  end
215
221
  end
216
222
 
223
+ logger&.debug(message: 'Updating HARVESTER_MODS with:', details: mods)
217
224
  client.put_item(json: mods, logger:)
218
225
  json.delete('dmphub_modifications')
226
+ logger&.debug(message: 'After deleting dmphub_modifications:', details: json)
219
227
  json
220
228
  end
221
229
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.67'
4
+ VERSION = '0.1.69'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.67
4
+ version: 0.1.69
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley