uc3-dmp-id 0.1.78 → 0.1.80

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 10b0e02321270e309ee7352e0adfc06f9f86d5b57397dc2d34cdd373aaacaadf
4
- data.tar.gz: 2985739c95e04e3d515cfce61f6ab61ec2e99bbb8edce925c8222b6b194216a6
3
+ metadata.gz: 9e69e96c9dc20009c9a8ae00a332cff2348e529ffbfce306418a781977bf13b0
4
+ data.tar.gz: c3d4e37d7fefef7d98aa6606e5063a6539270c31d7916af1f964b99065b80724
5
5
  SHA512:
6
- metadata.gz: f0594eb5e5faa1c214f953747a649dd5ff932eb7c554eada2db2e6e5dcc81c52565942768132c6d781000a84c55ccfa606829dd68f9775cd0f104b5070927c5b
7
- data.tar.gz: 50142c1ea8e666446ca51075ed1ddfa73a46e8ef68df87e64ca75e5da5bdfc1470fded80c52fe0c6524e65b1c365dce57146535bf744552da77273d9b3881074
6
+ metadata.gz: 92b6adb8619e99a00dcd61876c429d55757a6bb79841ffb5ddffbf04c0cc2fe10003464a8a3274b0886c639c2f527e241ca3b714c1b9e31c16d2fc125938d559
7
+ data.tar.gz: 6f6519e59f7c708be73fe721a7b68013e17dd523ebd42e33d5b6aedc7755a5364d3dafbf4df4534a10d3d34c320aa79b17e122e15eea0c3a8477032b3c454e89
@@ -17,22 +17,41 @@ module Uc3DmpId
17
17
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
18
18
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
19
19
 
20
+ ORCID_DOMAIN = 'https://orcid.org/'
21
+ ROR_DOMAIN = 'https://ror.org/'
22
+ DOI_DOMAIN = 'https://doi.org/'
23
+ SORT_OPTIONS = %w[title modified]
24
+ SORT_DIRECTIONS = %w[asc desc]
25
+ MAX_PAGE_SIZE = 100
26
+ DEFAULT_PAGE_SIZE = 25
27
+ DEFAULT_SORT_OPTION = 'modified'
28
+ DEFAULT_SORT_DIR = 'desc'
29
+
20
30
  class << self
21
31
  # TODO: Replace this with ElasticSearch
22
32
  def search_dmps(args:, logger: nil)
23
- client = Uc3DmpDynamo::Client.new
24
- return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
33
+ # Fetch the DMPs for each of the possible filter options
34
+ client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
35
+ owner = args['owner']
36
+ org = args['org']
37
+ funder = args['funder']
38
+
39
+ owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
40
+ org_pks = org.nil? ? [] : _by_org(org: org, client:, logger:)
41
+ funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
42
+ # pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
43
+ logger&.debug(
44
+ message: 'PKs found',
45
+ details: { owner: owner_pks, org: org_pks, funder: funder_pks }
46
+ )
47
+ # return [] if pks.nil? || pks.empty?
25
48
 
26
- unless args['owner_org_ror'].nil?
27
- return _by_owner_org(owner_org: args['owner_org_ror'], client:,
28
- logger:)
29
- end
30
- unless args['modification_day'].nil?
31
- return _by_mod_day(day: args['modification_day'], client:,
32
- logger:)
33
- end
49
+ # Only use the DMPs that fit all of the filter criteria
50
+ # dmps = pks.reduce(:&).flatten.uniq
51
+ # return [] if dmps.nil? || dmps.empty?
52
+
53
+ [owner_pks, org_pks, funder_pks].flatten.uniq
34
54
 
35
- []
36
55
  end
37
56
 
38
57
  # Find a DMP based on the contents of the incoming JSON
@@ -134,74 +153,68 @@ module Uc3DmpId
134
153
 
135
154
  private
136
155
 
137
- # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
138
- def _by_owner(owner_id:, client: nil, logger: nil)
139
- regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
140
- raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
156
+ # Fetch the DMP IDs for the specified person's ORCID (or email)
157
+ def _by_owner(owner:, client: nil, logger: nil)
158
+ orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
159
+ email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
160
+ orcid = owner.to_s.strip
161
+ return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
141
162
 
142
- args = {
143
- index_name: 'dmphub_owner_id_gsi',
144
- key_conditions: {
145
- dmphub_owner_id: {
146
- attribute_value_list: [
147
- "http://orcid.org/#{owner_id}",
148
- "https://orcid.org/#{owner_id}"
149
- ],
150
- comparison_operator: 'IN'
151
- }
152
- },
153
- filter_expression: 'SK = :version',
154
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
155
- }
156
- logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
157
- client = Uc3DmpDynamo::Client.new if client.nil?
158
- _process_search_response(response: client.query(args:, logger:))
163
+ orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
164
+ resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
165
+ return [] unless resp.is_a?(Hash)
166
+
167
+ logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
168
+ resp.fetch('dmps', [])
159
169
  end
160
170
 
161
- # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
162
- # on the DMP ID record)
163
- def _by_owner_org(owner_org:, client: nil, logger: nil)
171
+ # Fetch the DMP IDs for the specified organization/institution
172
+ def _by_org(org:, client: nil, logger: nil)
164
173
  regex = /^[a-zA-Z0-9]+$/
165
- raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
174
+ id = "#{ROR_DOMAIN}#{org.strip}" unless (org.to_s =~ regex).nil?
175
+ return [] if id.nil?
176
+
177
+ resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: id }, logger:)
178
+ return [] unless resp.is_a?(Hash)
179
+
180
+ logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
181
+ resp.fetch('dmps', [])
182
+ end
166
183
 
184
+ # Fetch the DMP IDs for the specified funder
185
+ def _by_funder(funder:, client: nil, logger: nil)
186
+ regex = /^[a-zA-Z0-9]+$/
187
+ id = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
188
+ id = "#{DOI_DOMAIN}#{funder.strip}" if id.nil? && !(funder.to_s =~ Helper::DOI_REGEX).nil?
189
+ return [] if id.nil?
190
+
191
+ resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
192
+ return [] unless resp.is_a?(Hash)
193
+
194
+ logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
195
+ resp.fetch('dmps', [])
196
+ end
197
+
198
+ # Fetch the DMP IDs that are marked as featured
199
+ def _by_featured(client: nil, logger: nil)
167
200
  args = {
168
- index_name: 'dmphub_owner_org_gsi',
169
- key_conditions: {
170
- dmphub_owner_org: {
171
- attribute_value_list: [
172
- "https://ror.org/#{owner_org.to_s.downcase}",
173
- "http://ror.org/#{owner_org.to_s.downcase}"
174
- ],
175
- comparison_operator: 'IN'
176
- }
177
- },
178
- filter_expression: 'SK = :version',
179
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
201
+ filter_expression: 'featured = :featured AND SK = :sk',
202
+ expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
180
203
  }
181
- logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
204
+ logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
182
205
  client = Uc3DmpDynamo::Client.new if client.nil?
183
- _process_search_response(response: client.query(args:, logger:))
206
+ _process_search_response(response: client.scan(args:))
184
207
  end
185
208
 
186
- # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
187
- def _by_mod_day(day:, client: nil, logger: nil)
188
- regex = /^[0-9]{4}(-[0-9]{2}){2}/
189
- raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
190
-
209
+ # Return all of the publicly visible DMPs
210
+ def _publicly_visible(client: nil, logger: nil)
191
211
  args = {
192
- index_name: 'dmphub_modification_day_gsi',
193
- key_conditions: {
194
- dmphub_modification_day: {
195
- attribute_value_list: [day.to_s],
196
- comparison_operator: 'IN'
197
- }
198
- },
199
- filter_expression: 'SK = :version',
200
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
212
+ filter_expression: 'visibility = :visibility AND SK = :sk',
213
+ expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
201
214
  }
202
- logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
215
+ logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
203
216
  client = Uc3DmpDynamo::Client.new if client.nil?
204
- _process_search_response(response: client.query(args:, logger:))
217
+ _process_search_response(response: client.scan(args:))
205
218
  end
206
219
 
207
220
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -212,8 +225,8 @@ module Uc3DmpId
212
225
  next if item.nil?
213
226
 
214
227
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
215
- dmp = _remove_narrative_if_private(json: dmp)
216
- Helper.cleanse_dmp_json(json: dmp)
228
+ # dmp = _remove_narrative_if_private(json: dmp)
229
+ # Helper.cleanse_dmp_json(json: dmp)
217
230
  end
218
231
  results.compact.uniq
219
232
  end
@@ -47,6 +47,7 @@ module Uc3DmpId
47
47
  # Handle any changes to the dmphub_modifications section
48
48
  version = _process_harvester_mods(client:, p_key:, json: payload, version:, logger:)
49
49
  logger&.debug(message: 'Version after process_harvester_mods', details: version)
50
+ raise UpdaterError, Helper::MSG_SERVER_ERROR if version.nil?
50
51
 
51
52
  # Remove the version info any any lingering modification blocks
52
53
  version.delete('dmphub_versions')
@@ -176,14 +177,14 @@ module Uc3DmpId
176
177
  # Fetch any Harvester modifications to the JSON
177
178
  def _process_harvester_mods(client:, p_key:, json:, version:, logger: nil)
178
179
  logger&.debug(message: 'Incoming modifications', details: json)
179
- return json if json.fetch('dmphub_modifications', []).empty?
180
+ return version if json.fetch('dmphub_modifications', []).empty?
180
181
 
181
182
  # Fetch the `"SK": "HARVESTER_MODS"` record
182
183
  client = Uc3DmpDynamo::Client.new if client.nil?
183
184
  resp = client.get_item(
184
185
  key: { PK: Helper.append_pk_prefix(p_key:), SK: Helper::SK_HARVESTER_MODS }, logger:
185
186
  )
186
- return json unless resp.is_a?(Hash) && resp['related_works'].is_a?(Hash)
187
+ return version unless resp.is_a?(Hash) && resp['related_works'].is_a?(Hash)
187
188
 
188
189
  logger&.debug(message: 'Original HARVESTER_MODS record', details: resp)
189
190
  # The `dmphub_modifications` array will ONLY ever have things the harvester mods know about
@@ -192,7 +193,7 @@ module Uc3DmpId
192
193
  json['dmproadmap_related_identifiers'] = [] if json['dmproadmap_related_identifiers'].nil?
193
194
 
194
195
  json['dmphub_modifications'].each do |entry|
195
- next if entry.fetch('dmproadmap_related_identifiers', []).empty?
196
+ next if entry.is_a?(Hash) && entry.fetch('dmproadmap_related_identifiers', []).empty?
196
197
 
197
198
  entry['dmproadmap_related_identifiers'].each do |related|
198
199
  # Detrmine if the HARVESTER_MODS record even knows about the mod
@@ -213,13 +214,13 @@ module Uc3DmpId
213
214
 
214
215
  # Add it if it was approved and doesn't exist in dmproadmap_related_identifiers
215
216
  if related['status'] == 'approved' && existing.empty?
216
- version['dmproadmap_related_identifiers'] << {
217
+ version['dmproadmap_related_identifiers'] << JSON.parse({
217
218
  identifier: key,
218
219
  work_type: related['work_type'],
219
220
  type: related['type'],
220
221
  descriptor: related['descriptor'],
221
222
  citation: related['citation']
222
- }
223
+ }.to_json)
223
224
  elsif related['status'] == 'rejected' && existing.any?
224
225
  # otherwise remove it
225
226
  version['dmproadmap_related_identifiers'] = version['dmproadmap_related_identifiers'].reject { |ri| ri == existing.first }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.78'
4
+ VERSION = '0.1.80'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.78
4
+ version: 0.1.80
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-12 00:00:00.000000000 Z
11
+ date: 2024-07-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json