uc3-dmp-id 0.1.50 → 0.1.52

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e3c4a8acb59e69dd223f10dab9ce2bcf8e7fa2ad131eed5bd6f04feced6e668
4
- data.tar.gz: f8ad221d21001a798415ecced5dff904d20c5ca2d1e7021a7ed28a49f496f57f
3
+ metadata.gz: a126b7c8bbcf6836334748edef7a8ae44510b1c980c208de7448fffbb50f39df
4
+ data.tar.gz: d597f5f334d488e9dd1d1c25d475949daf9eb01b2a44d0166bdb8cb6f106bc4c
5
5
  SHA512:
6
- metadata.gz: 84157070699350225aac0549b2e5a2879dc3ee95e33aa7e9139124f071109e8a51ea6c7789c68520a52f498dfbf5cee24475cddc5c0b6724decbee60feb38e57
7
- data.tar.gz: 13e96259e5668dd4ecfcbeebf5b07320b555d4c55db08aae60a3204e847c974f6efcbfb9a4e29747bbc6f647c23ec73775729152b4b17ca2d79b3c491ea2f7da
6
+ metadata.gz: c52f3a5bd533a0b00a6d76ee2f2f94f0cfa3bb581823411dfc2fa54d44e81ad807f54ed965b91ea8ee08c37d2861cd890ed501331804c813bab6b57203ccd0c4
7
+ data.tar.gz: c4c9bf7e8ee84bc89f2d41626de0421773377b1564144e35ce02c0d102b60f2be207ec8b04b5187e7c098691c4b20b9bbc2dff19e4180115ddb442e0712ab8ce
@@ -17,22 +17,19 @@ module Uc3DmpId
17
17
  MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
18
18
  Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
19
19
 
20
+ ORCID_DOMAIN = 'orcid.org'
21
+ ROR_DOMAIN = 'ror.org'
22
+
20
23
  class << self
21
24
  # TODO: Replace this with ElasticSearch
22
25
  def search_dmps(args:, logger: nil)
23
- client = Uc3DmpDynamo::Client.new
24
- return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
26
+ client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
27
+ return _by_owner(owner: args['owner'], client:, logger:) unless args['owner'].nil?
28
+ return _by_org(org: args['org'], client:, logger:) unless args['org'].nil?
29
+ return _by_funder(funder: args['funder'], client:, logger:) unless args['funder'].nil?
30
+ return _by_featured(client:, logger:) if args.fetch('featured', 'false').to_s.downcase == 'true'
25
31
 
26
- unless args['owner_org_ror'].nil?
27
- return _by_owner_org(owner_org: args['owner_org_ror'], client:,
28
- logger:)
29
- end
30
- unless args['modification_day'].nil?
31
- return _by_mod_day(day: args['modification_day'], client:,
32
- logger:)
33
- end
34
-
35
- []
32
+ return _publicly_visible(client:, logger:)
36
33
  end
37
34
 
38
35
  # Find a DMP based on the contents of the incoming JSON
@@ -134,72 +131,102 @@ module Uc3DmpId
134
131
 
135
132
  private
136
133
 
137
- # Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
138
- def _by_owner(owner_id:, client: nil, logger: nil)
139
- regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
140
- raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
134
+ # Fetch the DMP IDs for the specified person's ORCID (or email)
135
+ def _by_owner(owner:, client: nil, logger: nil)
136
+ orcid_regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
137
+ email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
138
+ email = owner.trim() unless (owner.to_s =~ email_regex).nil?
139
+
140
+ if email.nil?
141
+ args = {
142
+ filter_expression: 'contains(:people_ids, people)',
143
+ expression_attribute_values: {
144
+ ':sk': 'METADATA',
145
+ ':people_ids': [
146
+ "http://#{ORCID_DOMAIN}/#{owner}",
147
+ "https://#{ORCID_DOMAIN}/#{owner}"
148
+ ]
149
+ }
150
+ }
151
+ else
152
+ args = {
153
+ filter_expression: 'contains(:people, people)',
154
+ expression_attribute_values: { ':sk': 'METADATA', ':people': [email] }
155
+ }
156
+ end
157
+ logger&.debug(message: 'Fetch relevant DMPs _by_owner - scan args', details: args)
158
+ resp = client.scan(args)
159
+
160
+ client = Uc3DmpDynamo::Client.new if client.nil?
161
+ _process_search_response(response: client.query(args:, logger:))
162
+ end
163
+
164
+ # Fetch the DMP IDs for the specified organization/institution
165
+ def _by_org(org:, client: nil, logger: nil)
166
+ regex = /^[a-zA-Z0-9]+$/
167
+ ror = org.trim() unless (org.to_s =~ regex).nil?
141
168
 
142
169
  args = {
143
- index_name: 'dmphub_owner_id_gsi',
144
- key_conditions: {
145
- dmphub_owner_id: {
146
- attribute_value_list: [
147
- "http://orcid.org/#{owner_id}",
148
- "https://orcid.org/#{owner_id}"
149
- ],
150
- comparison_operator: 'IN'
151
- }
152
- },
153
- filter_expression: 'SK = :version',
154
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
170
+ filter_expression: 'contains(:affiliation_ids, affiliation_ids)',
171
+ expression_attribute_values: {
172
+ ':sk': 'METADATA',
173
+ ':affiliation_ids': [
174
+ "http://#{ROR_DOMAIN}/#{ror}",
175
+ "https://#{ROR_DOMAIN}/#{ror}"
176
+ ]
177
+ }
155
178
  }
156
- logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
179
+ logger&.debug(message: 'Fetch relevant DMPs _by_org - scan args', details: args)
180
+ resp = client.scan(args)
181
+
157
182
  client = Uc3DmpDynamo::Client.new if client.nil?
158
183
  _process_search_response(response: client.query(args:, logger:))
159
184
  end
160
185
 
161
- # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
162
- # on the DMP ID record)
163
- def _by_owner_org(owner_org:, client: nil, logger: nil)
186
+ # Fetch the DMP IDs for the specified funder
187
+ def _by_funder(funder:, client: nil, logger: nil)
164
188
  regex = /^[a-zA-Z0-9]+$/
165
- raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
189
+ ror = funder.trim() unless (funder.to_s =~ regex).nil?
166
190
 
167
191
  args = {
168
- index_name: 'dmphub_owner_org_gsi',
169
- key_conditions: {
170
- dmphub_owner_org: {
171
- attribute_value_list: [
172
- "https://ror.org/#{owner_org.to_s.downcase}",
173
- "http://ror.org/#{owner_org.to_s.downcase}"
174
- ],
175
- comparison_operator: 'IN'
176
- }
177
- },
178
- filter_expression: 'SK = :version',
179
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
192
+ filter_expression: 'contains(:funder_ids, funder_ids)',
193
+ expression_attribute_values: {
194
+ ':sk': 'METADATA',
195
+ ':funder_ids': [
196
+ "http://#{ROR_DOMAIN}/#{ror}",
197
+ "https://#{ROR_DOMAIN}/#{ror}"
198
+ ]
199
+ }
180
200
  }
181
- logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
201
+ logger&.debug(message: 'Fetch relevant DMPs _by_funder - scan args', details: args)
202
+ resp = client.scan(args)
203
+
182
204
  client = Uc3DmpDynamo::Client.new if client.nil?
183
205
  _process_search_response(response: client.query(args:, logger:))
184
206
  end
185
207
 
186
- # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
187
- def _by_mod_day(day:, client: nil, logger: nil)
188
- regex = /^[0-9]{4}(-[0-9]{2}){2}/
189
- raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
208
+ # Fetch the DMP IDs that are marked as featured
209
+ def _by_featured(client: nil, logger: nil)
210
+ args = {
211
+ filter_expression: ':featured = featured',
212
+ expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
213
+ }
214
+ logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
215
+ resp = client.scan(args)
216
+
217
+ client = Uc3DmpDynamo::Client.new if client.nil?
218
+ _process_search_response(response: client.query(args:, logger:))
219
+ end
190
220
 
221
+ # Return all of the publicly visible DMPs
222
+ def _publicly_visible(client: nil, logger: nil)
191
223
  args = {
192
- index_name: 'dmphub_modification_day_gsi',
193
- key_conditions: {
194
- dmphub_modification_day: {
195
- attribute_value_list: [day.to_s],
196
- comparison_operator: 'IN'
197
- }
198
- },
199
- filter_expression: 'SK = :version',
200
- expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
224
+ filter_expression: ':visibility = visibility',
225
+ expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
201
226
  }
202
- logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
227
+ logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
228
+ resp = client.scan(args)
229
+
203
230
  client = Uc3DmpDynamo::Client.new if client.nil?
204
231
  _process_search_response(response: client.query(args:, logger:))
205
232
  end
@@ -212,8 +239,8 @@ module Uc3DmpId
212
239
  next if item.nil?
213
240
 
214
241
  dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
215
- dmp = _remove_narrative_if_private(json: dmp)
216
- Helper.cleanse_dmp_json(json: dmp)
242
+ # dmp = _remove_narrative_if_private(json: dmp)
243
+ # Helper.cleanse_dmp_json(json: dmp)
217
244
  end
218
245
  results.compact.uniq
219
246
  end
@@ -204,23 +204,13 @@ module Uc3DmpId
204
204
  # The `dmphub_modifications` array will ONLY ever have things the harvester mods know about
205
205
  # so just find them and update the status accordingly
206
206
  mods = resp.dup
207
-
208
- puts 'MODIFICATIONS RECORD'
209
- puts mods
210
-
211
- puts 'INCOMING RECORD'
212
- puts json['dmphub_modifications']
213
-
214
-
215
207
  json['dmphub_modifications'].each do |entry|
216
208
  next if entry.fetch('dmproadmap_related_identifiers', []).empty?
217
209
 
218
210
  entry['dmproadmap_related_identifiers'].each do |related|
219
- related_id = mods['related_works'][related.identifier] if related.respond_to?(:identifier)
220
- related_id = mods['related_works'][related['identifier']] if related_id.nil?
221
- next if related_id.nil?
211
+ next if mods['related_works'][related.identifier].nil?
222
212
 
223
- mods['related_works'][related_id]['status'] = related['status']
213
+ mods['related_works'][related.identifier]['status'] = related['status']
224
214
  end
225
215
  end
226
216
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.50'
4
+ VERSION = '0.1.52'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.50
4
+ version: 0.1.52
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-08 00:00:00.000000000 Z
11
+ date: 2024-07-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json