uc3-dmp-id 0.1.81 → 0.1.82
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/finder.rb +68 -83
- data/lib/uc3-dmp-id/updater.rb +39 -11
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c9845166f4bdaebc57491464cf46037645a6fccd2cc3274aa988ee6c7a735ad
|
4
|
+
data.tar.gz: 39d2e885ac1b4a8fd95796464d117c6a86b060e4842b99814dc30972160dc9ce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d850ae3e53fb18bae12856a45f2fda95d66664f70052ed6f9c33e047de47728a5936012529a8727c59ccd671d687c6834d6880a97f915c9e4538d282ec3ea320
|
7
|
+
data.tar.gz: 2fd983b7d683a5da85f21a1f7001d5f8b1f7a47e798f5fffcb9db98e3486890d6982746d7a65b8d30377aa7eab5c8fcde377ba9b4d31fdc2cfd633d4bcb9fcb1
|
data/lib/uc3-dmp-id/finder.rb
CHANGED
@@ -17,43 +17,22 @@ module Uc3DmpId
|
|
17
17
|
MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
|
18
18
|
Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
|
19
19
|
|
20
|
-
ORCID_DOMAIN = 'https://orcid.org/'
|
21
|
-
ROR_DOMAIN = 'https://ror.org/'
|
22
|
-
DOI_DOMAIN = 'https://doi.org/'
|
23
|
-
SORT_OPTIONS = %w[title modified]
|
24
|
-
SORT_DIRECTIONS = %w[asc desc]
|
25
|
-
MAX_PAGE_SIZE = 100
|
26
|
-
DEFAULT_PAGE_SIZE = 25
|
27
|
-
DEFAULT_SORT_OPTION = 'modified'
|
28
|
-
DEFAULT_SORT_DIR = 'desc'
|
29
|
-
|
30
20
|
class << self
|
31
21
|
# TODO: Replace this with ElasticSearch
|
32
22
|
def search_dmps(args:, logger: nil)
|
33
|
-
|
34
|
-
|
35
|
-
owner = args['owner']
|
36
|
-
org = args['org']
|
37
|
-
funder = args['funder']
|
38
|
-
|
39
|
-
owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
|
40
|
-
# There may be multiple Org ids, so query them all
|
41
|
-
org_pks = org.nil? ? [] : org.split('|').map { |o| _by_org(org: o, client:, logger:) }
|
42
|
-
org_pks = org_pks.flatten.uniq
|
43
|
-
funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
|
44
|
-
# pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
|
45
|
-
logger&.debug(
|
46
|
-
message: 'PKs found',
|
47
|
-
details: { owner: owner_pks, org: org_pks, funder: funder_pks }
|
48
|
-
)
|
49
|
-
# return [] if pks.nil? || pks.empty?
|
50
|
-
|
51
|
-
# Only use the DMPs that fit all of the filter criteria
|
52
|
-
# dmps = pks.reduce(:&).flatten.uniq
|
53
|
-
# return [] if dmps.nil? || dmps.empty?
|
23
|
+
client = Uc3DmpDynamo::Client.new
|
24
|
+
return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
|
54
25
|
|
55
|
-
[
|
26
|
+
unless args['owner_org_ror'].nil?
|
27
|
+
return _by_owner_org(owner_org: args['owner_org_ror'], client:,
|
28
|
+
logger:)
|
29
|
+
end
|
30
|
+
unless args['modification_day'].nil?
|
31
|
+
return _by_mod_day(day: args['modification_day'], client:,
|
32
|
+
logger:)
|
33
|
+
end
|
56
34
|
|
35
|
+
[]
|
57
36
|
end
|
58
37
|
|
59
38
|
# Find a DMP based on the contents of the incoming JSON
|
@@ -155,68 +134,74 @@ module Uc3DmpId
|
|
155
134
|
|
156
135
|
private
|
157
136
|
|
158
|
-
# Fetch the DMP IDs for the specified
|
159
|
-
def _by_owner(
|
160
|
-
|
161
|
-
|
162
|
-
orcid = owner.to_s.strip
|
163
|
-
return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
|
137
|
+
# Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
|
138
|
+
def _by_owner(owner_id:, client: nil, logger: nil)
|
139
|
+
regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
|
140
|
+
raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
|
164
141
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
|
183
|
-
resp.fetch('dmps', [])
|
142
|
+
args = {
|
143
|
+
index_name: 'dmphub_owner_id_gsi',
|
144
|
+
key_conditions: {
|
145
|
+
dmphub_owner_id: {
|
146
|
+
attribute_value_list: [
|
147
|
+
"http://orcid.org/#{owner_id}",
|
148
|
+
"https://orcid.org/#{owner_id}"
|
149
|
+
],
|
150
|
+
comparison_operator: 'IN'
|
151
|
+
}
|
152
|
+
},
|
153
|
+
filter_expression: 'SK = :version',
|
154
|
+
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
155
|
+
}
|
156
|
+
logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
|
157
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
158
|
+
_process_search_response(response: client.query(args:, logger:))
|
184
159
|
end
|
185
160
|
|
186
|
-
# Fetch the DMP IDs for the specified
|
187
|
-
|
161
|
+
# Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
|
162
|
+
# on the DMP ID record)
|
163
|
+
def _by_owner_org(owner_org:, client: nil, logger: nil)
|
188
164
|
regex = /^[a-zA-Z0-9]+$/
|
189
|
-
|
190
|
-
id = "#{DOI_DOMAIN}#{funder.strip}" if id.nil? && !(funder.to_s =~ Helper::DOI_REGEX).nil?
|
191
|
-
return [] if id.nil?
|
192
|
-
|
193
|
-
resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
|
194
|
-
return [] unless resp.is_a?(Hash)
|
195
|
-
|
196
|
-
logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
|
197
|
-
resp.fetch('dmps', [])
|
198
|
-
end
|
165
|
+
raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
|
199
166
|
|
200
|
-
# Fetch the DMP IDs that are marked as featured
|
201
|
-
def _by_featured(client: nil, logger: nil)
|
202
167
|
args = {
|
203
|
-
|
204
|
-
|
168
|
+
index_name: 'dmphub_owner_org_gsi',
|
169
|
+
key_conditions: {
|
170
|
+
dmphub_owner_org: {
|
171
|
+
attribute_value_list: [
|
172
|
+
"https://ror.org/#{owner_org.to_s.downcase}",
|
173
|
+
"http://ror.org/#{owner_org.to_s.downcase}"
|
174
|
+
],
|
175
|
+
comparison_operator: 'IN'
|
176
|
+
}
|
177
|
+
},
|
178
|
+
filter_expression: 'SK = :version',
|
179
|
+
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
205
180
|
}
|
206
|
-
logger
|
181
|
+
logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
|
207
182
|
client = Uc3DmpDynamo::Client.new if client.nil?
|
208
|
-
_process_search_response(response: client.
|
183
|
+
_process_search_response(response: client.query(args:, logger:))
|
209
184
|
end
|
210
185
|
|
211
|
-
#
|
212
|
-
def
|
186
|
+
# Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
|
187
|
+
def _by_mod_day(day:, client: nil, logger: nil)
|
188
|
+
regex = /^[0-9]{4}(-[0-9]{2}){2}/
|
189
|
+
raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
|
190
|
+
|
213
191
|
args = {
|
214
|
-
|
215
|
-
|
192
|
+
index_name: 'dmphub_modification_day_gsi',
|
193
|
+
key_conditions: {
|
194
|
+
dmphub_modification_day: {
|
195
|
+
attribute_value_list: [day.to_s],
|
196
|
+
comparison_operator: 'IN'
|
197
|
+
}
|
198
|
+
},
|
199
|
+
filter_expression: 'SK = :version',
|
200
|
+
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
216
201
|
}
|
217
|
-
logger
|
202
|
+
logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
|
218
203
|
client = Uc3DmpDynamo::Client.new if client.nil?
|
219
|
-
_process_search_response(response: client.
|
204
|
+
_process_search_response(response: client.query(args:, logger:))
|
220
205
|
end
|
221
206
|
|
222
207
|
# Transform the search results so that we do not include any of the DMPHub specific metadata
|
@@ -227,8 +212,8 @@ module Uc3DmpId
|
|
227
212
|
next if item.nil?
|
228
213
|
|
229
214
|
dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
|
230
|
-
|
231
|
-
|
215
|
+
dmp = _remove_narrative_if_private(json: dmp)
|
216
|
+
Helper.cleanse_dmp_json(json: dmp)
|
232
217
|
end
|
233
218
|
results.compact.uniq
|
234
219
|
end
|
data/lib/uc3-dmp-id/updater.rb
CHANGED
@@ -49,27 +49,21 @@ module Uc3DmpId
|
|
49
49
|
logger&.debug(message: 'Version after process_harvester_mods', details: version)
|
50
50
|
raise UpdaterError, Helper::MSG_SERVER_ERROR if version.nil?
|
51
51
|
|
52
|
-
#
|
53
|
-
version
|
54
|
-
version.delete('dmphub_modifications')
|
55
|
-
|
56
|
-
# Set the :modified timestamps
|
57
|
-
now = Time.now.utc
|
58
|
-
version['modified'] = now.iso8601
|
59
|
-
version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
|
52
|
+
# Process the incoming payload
|
53
|
+
payload = _process_modifications(owner:, updater:, version:, payload:, logger:)
|
60
54
|
|
61
55
|
# Save the changes
|
62
|
-
resp = client.put_item(json:
|
56
|
+
resp = client.put_item(json: payload, logger:)
|
63
57
|
raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
|
64
58
|
|
65
59
|
# Send the updates to EZID
|
66
|
-
_post_process(provenance:, json:
|
60
|
+
_post_process(provenance:, json: payload, logger:)
|
67
61
|
|
68
62
|
# Return the new version record
|
69
63
|
logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
|
70
64
|
|
71
65
|
# Append the :dmphub_versions Array
|
72
|
-
out = JSON.parse({ dmp:
|
66
|
+
out = JSON.parse({ dmp: payload }.to_json)
|
73
67
|
out = Versioner.append_versions(p_key:, dmp: out, client:, logger:)
|
74
68
|
Helper.cleanse_dmp_json(json: out)
|
75
69
|
end
|
@@ -235,6 +229,40 @@ module Uc3DmpId
|
|
235
229
|
logger&.debug(message: 'Returning updated VERSION:', details: version)
|
236
230
|
version
|
237
231
|
end
|
232
|
+
|
233
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
234
|
+
def _process_modifications(owner:, updater:, version:, payload:, logger: nil)
|
235
|
+
return version unless payload.is_a?(Hash) && !updater.nil?
|
236
|
+
return payload unless version.is_a?(Hash) && !owner.nil?
|
237
|
+
|
238
|
+
logger.debug(message: 'Modifications before processing.', details: payload) if logger.respond_to?(:debug)
|
239
|
+
|
240
|
+
excluded_keys = %w[modified dmphub_modifications dmphub_versions]
|
241
|
+
|
242
|
+
# Always remove the dmphub_versions and dmphub_modifications
|
243
|
+
excluded_keys.each { |key| payload.delete(key) }
|
244
|
+
|
245
|
+
# Always include the PK and SK!
|
246
|
+
payload['PK'] = version['PK']
|
247
|
+
payload['SK'] = Helper::DMP_LATEST_VERSION
|
248
|
+
|
249
|
+
# Set the :modified timestamps
|
250
|
+
now = Time.now.utc
|
251
|
+
version['modified'] = now.iso8601
|
252
|
+
version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
|
253
|
+
|
254
|
+
# Retain all the other attributes on the original version unless they are
|
255
|
+
# in the list of exclusions OR the incoming payload already has a value for it
|
256
|
+
version.keys.each do |key|
|
257
|
+
next if excluded_keys.include?(key) || !payload[key].nil?
|
258
|
+
|
259
|
+
payload[key] = version[key]
|
260
|
+
end
|
261
|
+
logger.debug(message: 'Modifications after processing.', details: payload) if logger.respond_to?(:debug)
|
262
|
+
payload
|
263
|
+
end
|
264
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
265
|
+
|
238
266
|
end
|
239
267
|
end
|
240
268
|
end
|
data/lib/uc3-dmp-id/version.rb
CHANGED