uc3-dmp-id 0.1.86 → 0.1.87

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4058648219af1d94417a32457d48fe2d7b1139679785909b34d7af398be5dea7
4
- data.tar.gz: eb8048888ab4bd00dd44053fe9eb06fa4e4104a54b15a7b908ca35f193770732
3
+ metadata.gz: a5f7760c4099466f5a99377669e189dfc33cfe2e718d1bf0c96b06b251f5ff77
4
+ data.tar.gz: dd6b30e66d7848e8810bd4c735ce5d007e7c0dd80c059e2341813db873e74c35
5
5
  SHA512:
6
- metadata.gz: 49fecf88078de154046ea0a9fa21ba1c96ece0a6aa2a9632c38d5dbadabcef9a672305ffb370934f6028f2bbf50b1740710b8187f39f6f0857e8921304e95d51
7
- data.tar.gz: de4ff71ab78e95c9e0404ff1d66a9e0d8d01128fe9d6f04b0499450ad4630adbbf0095f53e093f617040eee426c4ffada6b54f9dd3caff7b524f3a2a9345176f
6
+ metadata.gz: 9b2ebf8b2d1919f15bc9b538373fcafddb077c432754b0ba27c8706de6352db07d0db51ec41e5bb4bdc1469a55d45162d45127fc14e4df0c4699d63ed10963e5
7
+ data.tar.gz: 6470cf64e86138c08c343c299dea73a5d5add92755c6716684ad33e25378bfb36f6c3e440a142d5c66f439655d4b3600516c417efd31c9dc7ae4dac0047e416b
@@ -53,7 +53,7 @@ module Uc3DmpId
53
53
  annotated['registered'] = annotated['created'] if annotated['registered'].nil?
54
54
 
55
55
  # Create the item
56
- annotated.delete('dmphub_modifications')
56
+ annotated['dmphub_modifications'] = []
57
57
  resp = client.put_item(json: annotated, logger:)
58
58
  raise CreatorError, Helper::MSG_DMP_NO_DMP_ID if resp.nil?
59
59
 
@@ -17,22 +17,43 @@ module Uc3DmpId
17
17
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
18
18
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
19
19
 
20
+ ORCID_DOMAIN = 'https://orcid.org/'
21
+ ROR_DOMAIN = 'https://ror.org/'
22
+ DOI_DOMAIN = 'https://doi.org/'
23
+ SORT_OPTIONS = %w[title modified]
24
+ SORT_DIRECTIONS = %w[asc desc]
25
+ MAX_PAGE_SIZE = 100
26
+ DEFAULT_PAGE_SIZE = 25
27
+ DEFAULT_SORT_OPTION = 'modified'
28
+ DEFAULT_SORT_DIR = 'desc'
29
+
20
30
  class << self
21
31
  # TODO: Replace this with ElasticSearch
22
32
  def search_dmps(args:, logger: nil)
23
- client = Uc3DmpDynamo::Client.new
24
- return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
33
+ # Fetch the DMPs for each of the possible filter options
34
+ client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
35
+ owner = args['owner']
36
+ org = args['org']
37
+ funder = args['funder']
38
+
39
+ owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
40
+ # There may be multiple Org ids, so query them all
41
+ org_pks = org.nil? ? [] : org.split('|').map { |o| _by_org(org: o, client:, logger:) }
42
+ org_pks = org_pks.flatten.uniq
43
+ funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
44
+ # pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
45
+ logger&.debug(
46
+ message: 'PKs found',
47
+ details: { owner: owner_pks, org: org_pks, funder: funder_pks }
48
+ )
49
+ # return [] if pks.nil? || pks.empty?
25
50
 
26
- unless args['owner_org_ror'].nil?
27
- return _by_owner_org(owner_org: args['owner_org_ror'], client:,
28
- logger:)
29
- end
30
- unless args['modification_day'].nil?
31
- return _by_mod_day(day: args['modification_day'], client:,
32
- logger:)
33
- end
51
+ # Only use the DMPs that fit all of the filter criteria
52
+ # dmps = pks.reduce(:&).flatten.uniq
53
+ # return [] if dmps.nil? || dmps.empty?
54
+
55
+ [owner_pks, org_pks, funder_pks].flatten.uniq
34
56
 
35
- []
36
57
  end
37
58
 
38
59
  # Find a DMP based on the contents of the incoming JSON
@@ -134,74 +155,68 @@ module Uc3DmpId
134
155
 
135
156
  private
136
157
 
137
- # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
138
- def _by_owner(owner_id:, client: nil, logger: nil)
139
- regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
140
- raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
158
+ # Fetch the DMP IDs for the specified person's ORCID (or email)
159
+ def _by_owner(owner:, client: nil, logger: nil)
160
+ orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
161
+ email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
162
+ orcid = owner.to_s.strip
163
+ return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
141
164
 
142
- args = {
143
- index_name: 'dmphub_owner_id_gsi',
144
- key_conditions: {
145
- dmphub_owner_id: {
146
- attribute_value_list: [
147
- "http://orcid.org/#{owner_id}",
148
- "https://orcid.org/#{owner_id}"
149
- ],
150
- comparison_operator: 'IN'
151
- }
152
- },
153
- filter_expression: 'SK = :version',
154
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
155
- }
156
- logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
157
- client = Uc3DmpDynamo::Client.new if client.nil?
158
- _process_search_response(response: client.query(args:, logger:))
165
+ orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
166
+ resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
167
+ return [] unless resp.is_a?(Hash)
168
+
169
+ logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
170
+ resp.fetch('dmps', [])
159
171
  end
160
172
 
161
- # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
162
- # on the DMP ID record)
163
- def _by_owner_org(owner_org:, client: nil, logger: nil)
173
+ # Fetch the DMP IDs for the specified organization/institution
174
+ def _by_org(org:, client: nil, logger: nil)
164
175
  regex = /^[a-zA-Z0-9]+$/
165
- raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
176
+ id = "#{ROR_DOMAIN}#{org.strip}" unless (org.to_s =~ regex).nil?
177
+ return [] if id.nil?
178
+
179
+ resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: id }, logger:)
180
+ return [] unless resp.is_a?(Hash)
181
+
182
+ logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
183
+ resp.fetch('dmps', [])
184
+ end
166
185
 
186
+ # Fetch the DMP IDs for the specified funder
187
+ def _by_funder(funder:, client: nil, logger: nil)
188
+ regex = /^[a-zA-Z0-9]+$/
189
+ id = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
190
+ id = "#{DOI_DOMAIN}#{funder.strip}" if id.nil? && !(funder.to_s =~ Helper::DOI_REGEX).nil?
191
+ return [] if id.nil?
192
+
193
+ resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
194
+ return [] unless resp.is_a?(Hash)
195
+
196
+ logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
197
+ resp.fetch('dmps', [])
198
+ end
199
+
200
+ # Fetch the DMP IDs that are marked as featured
201
+ def _by_featured(client: nil, logger: nil)
167
202
  args = {
168
- index_name: 'dmphub_owner_org_gsi',
169
- key_conditions: {
170
- dmphub_owner_org: {
171
- attribute_value_list: [
172
- "https://ror.org/#{owner_org.to_s.downcase}",
173
- "http://ror.org/#{owner_org.to_s.downcase}"
174
- ],
175
- comparison_operator: 'IN'
176
- }
177
- },
178
- filter_expression: 'SK = :version',
179
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
203
+ filter_expression: 'featured = :featured AND SK = :sk',
204
+ expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
180
205
  }
181
- logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
206
+ logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
182
207
  client = Uc3DmpDynamo::Client.new if client.nil?
183
- _process_search_response(response: client.query(args:, logger:))
208
+ _process_search_response(response: client.scan(args:))
184
209
  end
185
210
 
186
- # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
187
- def _by_mod_day(day:, client: nil, logger: nil)
188
- regex = /^[0-9]{4}(-[0-9]{2}){2}/
189
- raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
190
-
211
+ # Return all of the publicly visible DMPs
212
+ def _publicly_visible(client: nil, logger: nil)
191
213
  args = {
192
- index_name: 'dmphub_modification_day_gsi',
193
- key_conditions: {
194
- dmphub_modification_day: {
195
- attribute_value_list: [day.to_s],
196
- comparison_operator: 'IN'
197
- }
198
- },
199
- filter_expression: 'SK = :version',
200
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
214
+ filter_expression: 'visibility = :visibility AND SK = :sk',
215
+ expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
201
216
  }
202
- logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
217
+ logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
203
218
  client = Uc3DmpDynamo::Client.new if client.nil?
204
- _process_search_response(response: client.query(args:, logger:))
219
+ _process_search_response(response: client.scan(args:))
205
220
  end
206
221
 
207
222
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -212,8 +227,8 @@ module Uc3DmpId
212
227
  next if item.nil?
213
228
 
214
229
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
215
- dmp = _remove_narrative_if_private(json: dmp)
216
- Helper.cleanse_dmp_json(json: dmp)
230
+ # dmp = _remove_narrative_if_private(json: dmp)
231
+ # Helper.cleanse_dmp_json(json: dmp)
217
232
  end
218
233
  results.compact.uniq
219
234
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.86'
4
+ VERSION = '0.1.87'
5
5
  end
@@ -58,7 +58,7 @@ module Uc3DmpId
58
58
 
59
59
  # Create the prior version record ()
60
60
  client = Uc3DmpDynamo::Client.new if client.nil?
61
- prior.delete('dmphub_modifications')
61
+ prior['dmphub_modifications'] = []
62
62
  resp = client.put_item(json: prior, logger:)
63
63
  return nil if resp.nil?
64
64
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.86
4
+ version: 0.1.87
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-16 00:00:00.000000000 Z
11
+ date: 2024-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json