uc3-dmp-id 0.1.81 → 0.1.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/finder.rb +68 -83
- data/lib/uc3-dmp-id/updater.rb +39 -11
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9c9845166f4bdaebc57491464cf46037645a6fccd2cc3274aa988ee6c7a735ad
|
|
4
|
+
data.tar.gz: 39d2e885ac1b4a8fd95796464d117c6a86b060e4842b99814dc30972160dc9ce
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d850ae3e53fb18bae12856a45f2fda95d66664f70052ed6f9c33e047de47728a5936012529a8727c59ccd671d687c6834d6880a97f915c9e4538d282ec3ea320
|
|
7
|
+
data.tar.gz: 2fd983b7d683a5da85f21a1f7001d5f8b1f7a47e798f5fffcb9db98e3486890d6982746d7a65b8d30377aa7eab5c8fcde377ba9b4d31fdc2cfd633d4bcb9fcb1
|
data/lib/uc3-dmp-id/finder.rb
CHANGED
|
@@ -17,43 +17,22 @@ module Uc3DmpId
|
|
|
17
17
|
MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
|
|
18
18
|
Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
|
|
19
19
|
|
|
20
|
-
ORCID_DOMAIN = 'https://orcid.org/'
|
|
21
|
-
ROR_DOMAIN = 'https://ror.org/'
|
|
22
|
-
DOI_DOMAIN = 'https://doi.org/'
|
|
23
|
-
SORT_OPTIONS = %w[title modified]
|
|
24
|
-
SORT_DIRECTIONS = %w[asc desc]
|
|
25
|
-
MAX_PAGE_SIZE = 100
|
|
26
|
-
DEFAULT_PAGE_SIZE = 25
|
|
27
|
-
DEFAULT_SORT_OPTION = 'modified'
|
|
28
|
-
DEFAULT_SORT_DIR = 'desc'
|
|
29
|
-
|
|
30
20
|
class << self
|
|
31
21
|
# TODO: Replace this with ElasticSearch
|
|
32
22
|
def search_dmps(args:, logger: nil)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
owner = args['owner']
|
|
36
|
-
org = args['org']
|
|
37
|
-
funder = args['funder']
|
|
38
|
-
|
|
39
|
-
owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
|
|
40
|
-
# There may be multiple Org ids, so query them all
|
|
41
|
-
org_pks = org.nil? ? [] : org.split('|').map { |o| _by_org(org: o, client:, logger:) }
|
|
42
|
-
org_pks = org_pks.flatten.uniq
|
|
43
|
-
funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
|
|
44
|
-
# pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
|
|
45
|
-
logger&.debug(
|
|
46
|
-
message: 'PKs found',
|
|
47
|
-
details: { owner: owner_pks, org: org_pks, funder: funder_pks }
|
|
48
|
-
)
|
|
49
|
-
# return [] if pks.nil? || pks.empty?
|
|
50
|
-
|
|
51
|
-
# Only use the DMPs that fit all of the filter criteria
|
|
52
|
-
# dmps = pks.reduce(:&).flatten.uniq
|
|
53
|
-
# return [] if dmps.nil? || dmps.empty?
|
|
23
|
+
client = Uc3DmpDynamo::Client.new
|
|
24
|
+
return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
|
|
54
25
|
|
|
55
|
-
[
|
|
26
|
+
unless args['owner_org_ror'].nil?
|
|
27
|
+
return _by_owner_org(owner_org: args['owner_org_ror'], client:,
|
|
28
|
+
logger:)
|
|
29
|
+
end
|
|
30
|
+
unless args['modification_day'].nil?
|
|
31
|
+
return _by_mod_day(day: args['modification_day'], client:,
|
|
32
|
+
logger:)
|
|
33
|
+
end
|
|
56
34
|
|
|
35
|
+
[]
|
|
57
36
|
end
|
|
58
37
|
|
|
59
38
|
# Find a DMP based on the contents of the incoming JSON
|
|
@@ -155,68 +134,74 @@ module Uc3DmpId
|
|
|
155
134
|
|
|
156
135
|
private
|
|
157
136
|
|
|
158
|
-
# Fetch the DMP IDs for the specified
|
|
159
|
-
def _by_owner(
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
orcid = owner.to_s.strip
|
|
163
|
-
return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
|
|
137
|
+
# Fetch the DMP IDs for the specified owner's ORCID (the owner is the :dmphub_owner_id on the DMP ID record)
|
|
138
|
+
def _by_owner(owner_id:, client: nil, logger: nil)
|
|
139
|
+
regex = /^([0-9A-Z]{4}-){3}[0-9A-Z]{4}$/
|
|
140
|
+
raise FinderError, MSG_INVALID_OWNER_ID if owner_id.nil? || (owner_id.to_s =~ regex).nil?
|
|
164
141
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
|
|
183
|
-
resp.fetch('dmps', [])
|
|
142
|
+
args = {
|
|
143
|
+
index_name: 'dmphub_owner_id_gsi',
|
|
144
|
+
key_conditions: {
|
|
145
|
+
dmphub_owner_id: {
|
|
146
|
+
attribute_value_list: [
|
|
147
|
+
"http://orcid.org/#{owner_id}",
|
|
148
|
+
"https://orcid.org/#{owner_id}"
|
|
149
|
+
],
|
|
150
|
+
comparison_operator: 'IN'
|
|
151
|
+
}
|
|
152
|
+
},
|
|
153
|
+
filter_expression: 'SK = :version',
|
|
154
|
+
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
|
155
|
+
}
|
|
156
|
+
logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
|
|
157
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
|
158
|
+
_process_search_response(response: client.query(args:, logger:))
|
|
184
159
|
end
|
|
185
160
|
|
|
186
|
-
# Fetch the DMP IDs for the specified
|
|
187
|
-
|
|
161
|
+
# Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
|
|
162
|
+
# on the DMP ID record)
|
|
163
|
+
def _by_owner_org(owner_org:, client: nil, logger: nil)
|
|
188
164
|
regex = /^[a-zA-Z0-9]+$/
|
|
189
|
-
|
|
190
|
-
id = "#{DOI_DOMAIN}#{funder.strip}" if id.nil? && !(funder.to_s =~ Helper::DOI_REGEX).nil?
|
|
191
|
-
return [] if id.nil?
|
|
192
|
-
|
|
193
|
-
resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
|
|
194
|
-
return [] unless resp.is_a?(Hash)
|
|
195
|
-
|
|
196
|
-
logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
|
|
197
|
-
resp.fetch('dmps', [])
|
|
198
|
-
end
|
|
165
|
+
raise FinderError, MSG_INVALID_OWNER_ID if owner_org.nil? || (owner_org.to_s.downcase =~ regex).nil?
|
|
199
166
|
|
|
200
|
-
# Fetch the DMP IDs that are marked as featured
|
|
201
|
-
def _by_featured(client: nil, logger: nil)
|
|
202
167
|
args = {
|
|
203
|
-
|
|
204
|
-
|
|
168
|
+
index_name: 'dmphub_owner_org_gsi',
|
|
169
|
+
key_conditions: {
|
|
170
|
+
dmphub_owner_org: {
|
|
171
|
+
attribute_value_list: [
|
|
172
|
+
"https://ror.org/#{owner_org.to_s.downcase}",
|
|
173
|
+
"http://ror.org/#{owner_org.to_s.downcase}"
|
|
174
|
+
],
|
|
175
|
+
comparison_operator: 'IN'
|
|
176
|
+
}
|
|
177
|
+
},
|
|
178
|
+
filter_expression: 'SK = :version',
|
|
179
|
+
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
|
205
180
|
}
|
|
206
|
-
logger
|
|
181
|
+
logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
|
|
207
182
|
client = Uc3DmpDynamo::Client.new if client.nil?
|
|
208
|
-
_process_search_response(response: client.
|
|
183
|
+
_process_search_response(response: client.query(args:, logger:))
|
|
209
184
|
end
|
|
210
185
|
|
|
211
|
-
#
|
|
212
|
-
def
|
|
186
|
+
# Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
|
|
187
|
+
def _by_mod_day(day:, client: nil, logger: nil)
|
|
188
|
+
regex = /^[0-9]{4}(-[0-9]{2}){2}/
|
|
189
|
+
raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
|
|
190
|
+
|
|
213
191
|
args = {
|
|
214
|
-
|
|
215
|
-
|
|
192
|
+
index_name: 'dmphub_modification_day_gsi',
|
|
193
|
+
key_conditions: {
|
|
194
|
+
dmphub_modification_day: {
|
|
195
|
+
attribute_value_list: [day.to_s],
|
|
196
|
+
comparison_operator: 'IN'
|
|
197
|
+
}
|
|
198
|
+
},
|
|
199
|
+
filter_expression: 'SK = :version',
|
|
200
|
+
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
|
216
201
|
}
|
|
217
|
-
logger
|
|
202
|
+
logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
|
|
218
203
|
client = Uc3DmpDynamo::Client.new if client.nil?
|
|
219
|
-
_process_search_response(response: client.
|
|
204
|
+
_process_search_response(response: client.query(args:, logger:))
|
|
220
205
|
end
|
|
221
206
|
|
|
222
207
|
# Transform the search results so that we do not include any of the DMPHub specific metadata
|
|
@@ -227,8 +212,8 @@ module Uc3DmpId
|
|
|
227
212
|
next if item.nil?
|
|
228
213
|
|
|
229
214
|
dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
|
|
230
|
-
|
|
231
|
-
|
|
215
|
+
dmp = _remove_narrative_if_private(json: dmp)
|
|
216
|
+
Helper.cleanse_dmp_json(json: dmp)
|
|
232
217
|
end
|
|
233
218
|
results.compact.uniq
|
|
234
219
|
end
|
data/lib/uc3-dmp-id/updater.rb
CHANGED
|
@@ -49,27 +49,21 @@ module Uc3DmpId
|
|
|
49
49
|
logger&.debug(message: 'Version after process_harvester_mods', details: version)
|
|
50
50
|
raise UpdaterError, Helper::MSG_SERVER_ERROR if version.nil?
|
|
51
51
|
|
|
52
|
-
#
|
|
53
|
-
version
|
|
54
|
-
version.delete('dmphub_modifications')
|
|
55
|
-
|
|
56
|
-
# Set the :modified timestamps
|
|
57
|
-
now = Time.now.utc
|
|
58
|
-
version['modified'] = now.iso8601
|
|
59
|
-
version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
|
|
52
|
+
# Process the incoming payload
|
|
53
|
+
payload = _process_modifications(owner:, updater:, version:, payload:, logger:)
|
|
60
54
|
|
|
61
55
|
# Save the changes
|
|
62
|
-
resp = client.put_item(json:
|
|
56
|
+
resp = client.put_item(json: payload, logger:)
|
|
63
57
|
raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
|
|
64
58
|
|
|
65
59
|
# Send the updates to EZID
|
|
66
|
-
_post_process(provenance:, json:
|
|
60
|
+
_post_process(provenance:, json: payload, logger:)
|
|
67
61
|
|
|
68
62
|
# Return the new version record
|
|
69
63
|
logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
|
|
70
64
|
|
|
71
65
|
# Append the :dmphub_versions Array
|
|
72
|
-
out = JSON.parse({ dmp:
|
|
66
|
+
out = JSON.parse({ dmp: payload }.to_json)
|
|
73
67
|
out = Versioner.append_versions(p_key:, dmp: out, client:, logger:)
|
|
74
68
|
Helper.cleanse_dmp_json(json: out)
|
|
75
69
|
end
|
|
@@ -235,6 +229,40 @@ module Uc3DmpId
|
|
|
235
229
|
logger&.debug(message: 'Returning updated VERSION:', details: version)
|
|
236
230
|
version
|
|
237
231
|
end
|
|
232
|
+
|
|
233
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
234
|
+
def _process_modifications(owner:, updater:, version:, payload:, logger: nil)
|
|
235
|
+
return version unless payload.is_a?(Hash) && !updater.nil?
|
|
236
|
+
return payload unless version.is_a?(Hash) && !owner.nil?
|
|
237
|
+
|
|
238
|
+
logger.debug(message: 'Modifications before processing.', details: payload) if logger.respond_to?(:debug)
|
|
239
|
+
|
|
240
|
+
excluded_keys = %w[modified dmphub_modifications dmphub_versions]
|
|
241
|
+
|
|
242
|
+
# Always remove the dmphub_versions and dmphub_modifications
|
|
243
|
+
excluded_keys.each { |key| payload.delete(key) }
|
|
244
|
+
|
|
245
|
+
# Always include the PK and SK!
|
|
246
|
+
payload['PK'] = version['PK']
|
|
247
|
+
payload['SK'] = Helper::DMP_LATEST_VERSION
|
|
248
|
+
|
|
249
|
+
# Set the :modified timestamps
|
|
250
|
+
now = Time.now.utc
|
|
251
|
+
version['modified'] = now.iso8601
|
|
252
|
+
version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
|
|
253
|
+
|
|
254
|
+
# Retain all the other attributes on the original version unless they are
|
|
255
|
+
# in the list of exclusions OR the incoming payload already has a value for it
|
|
256
|
+
version.keys.each do |key|
|
|
257
|
+
next if excluded_keys.include?(key) || !payload[key].nil?
|
|
258
|
+
|
|
259
|
+
payload[key] = version[key]
|
|
260
|
+
end
|
|
261
|
+
logger.debug(message: 'Modifications after processing.', details: payload) if logger.respond_to?(:debug)
|
|
262
|
+
payload
|
|
263
|
+
end
|
|
264
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
265
|
+
|
|
238
266
|
end
|
|
239
267
|
end
|
|
240
268
|
end
|
data/lib/uc3-dmp-id/version.rb
CHANGED