uc3-dmp-id 0.1.78 → 0.1.79
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/finder.rb +79 -68
- data/lib/uc3-dmp-id/updater.rb +37 -53
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d05302ee8b8190c77e7efc0ad602ad0de2a74739b1653672d707b1fda95eec16
|
4
|
+
data.tar.gz: 3cca6911d51d58ff974308f8647bab075fb9bf717092bce45f3cbe2a151c7712
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e19a514c8233bdf9abc32c375b636611181738f6f64818431d0b3e26b1fd40c3817b5dcf1cfc8e1a270e3c521ad67636dd50baaec7638af9a162ebb864b26975
|
7
|
+
data.tar.gz: 3b09fdc606348c0c3dddcc7536563920c2971d475981c1b11a89a7147157db712d416d52eafec3689ec0e004be3d845acdf83cbb2113b2df08aca58936421915
|
data/lib/uc3-dmp-id/finder.rb
CHANGED
@@ -17,22 +17,41 @@ module Uc3DmpId
|
|
17
17
|
MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
|
18
18
|
Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
|
19
19
|
|
20
|
+
ORCID_DOMAIN = 'https://orcid.org/'
|
21
|
+
ROR_DOMAIN = 'https://ror.org/'
|
22
|
+
DOI_DOMAIN = 'https://doi.org/'
|
23
|
+
SORT_OPTIONS = %w[title modified]
|
24
|
+
SORT_DIRECTIONS = %w[asc desc]
|
25
|
+
MAX_PAGE_SIZE = 100
|
26
|
+
DEFAULT_PAGE_SIZE = 25
|
27
|
+
DEFAULT_SORT_OPTION = 'modified'
|
28
|
+
DEFAULT_SORT_DIR = 'desc'
|
29
|
+
|
20
30
|
class << self
|
21
31
|
# TODO: Replace this with ElasticSearch
|
22
32
|
def search_dmps(args:, logger: nil)
|
23
|
-
|
24
|
-
|
33
|
+
# Fetch the DMPs for each of the possible filter options
|
34
|
+
client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
|
35
|
+
owner = args['owner']
|
36
|
+
org = args['org']
|
37
|
+
funder = args['funder']
|
38
|
+
|
39
|
+
owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
|
40
|
+
org_pks = org.nil? ? [] : _by_org(org: org, client:, logger:)
|
41
|
+
funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
|
42
|
+
# pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
|
43
|
+
logger&.debug(
|
44
|
+
message: 'PKs found',
|
45
|
+
details: { owner: owner_pks, org: org_pks, funder: funder_pks }
|
46
|
+
)
|
47
|
+
# return [] if pks.nil? || pks.empty?
|
25
48
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
return _by_mod_day(day: args['modification_day'], client:,
|
32
|
-
logger:)
|
33
|
-
end
|
49
|
+
# Only use the DMPs that fit all of the filter criteria
|
50
|
+
# dmps = pks.reduce(:&).flatten.uniq
|
51
|
+
# return [] if dmps.nil? || dmps.empty?
|
52
|
+
|
53
|
+
[owner_pks, org_pks, funder_pks].flatten.uniq
|
34
54
|
|
35
|
-
[]
|
36
55
|
end
|
37
56
|
|
38
57
|
# Find a DMP based on the contents of the incoming JSON
|
@@ -134,74 +153,66 @@ module Uc3DmpId
|
|
134
153
|
|
135
154
|
private
|
136
155
|
|
137
|
-
# Fetch the DMP IDs for the specified
|
138
|
-
def _by_owner(
|
139
|
-
|
140
|
-
|
156
|
+
# Fetch the DMP IDs for the specified person's ORCID (or email)
|
157
|
+
def _by_owner(owner:, client: nil, logger: nil)
|
158
|
+
orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
|
159
|
+
email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
|
160
|
+
orcid = owner.to_s.strip
|
161
|
+
return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
|
141
162
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
"https://orcid.org/#{owner_id}"
|
149
|
-
],
|
150
|
-
comparison_operator: 'IN'
|
151
|
-
}
|
152
|
-
},
|
153
|
-
filter_expression: 'SK = :version',
|
154
|
-
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
155
|
-
}
|
156
|
-
logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
|
157
|
-
client = Uc3DmpDynamo::Client.new if client.nil?
|
158
|
-
_process_search_response(response: client.query(args:, logger:))
|
163
|
+
orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
|
164
|
+
resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
|
165
|
+
return [] unless resp.is_a?(Hash)
|
166
|
+
|
167
|
+
logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
|
168
|
+
resp.fetch('dmps', [])
|
159
169
|
end
|
160
170
|
|
161
|
-
# Fetch the DMP IDs for the specified organization/institution
|
162
|
-
|
163
|
-
def _by_owner_org(owner_org:, client: nil, logger: nil)
|
171
|
+
# Fetch the DMP IDs for the specified organization/institution
|
172
|
+
def _by_org(org:, client: nil, logger: nil)
|
164
173
|
regex = /^[a-zA-Z0-9]+$/
|
165
|
-
|
174
|
+
id = "#{ROR_DOMAIN}#{org.strip}" unless (org.to_s =~ regex).nil?
|
175
|
+
|
176
|
+
resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: id }, logger:)
|
177
|
+
return [] unless resp.is_a?(Hash)
|
178
|
+
|
179
|
+
logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
|
180
|
+
resp.fetch('dmps', [])
|
181
|
+
end
|
166
182
|
|
183
|
+
# Fetch the DMP IDs for the specified funder
|
184
|
+
def _by_funder(funder:, client: nil, logger: nil)
|
185
|
+
regex = /^[a-zA-Z0-9]+$/
|
186
|
+
id = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
|
187
|
+
id = "#{DOI_DOMAIN}#{org.strip}" if id.nil? && !(org.to_s =~ Helper::DOI_REGEX).nil?
|
188
|
+
|
189
|
+
resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
|
190
|
+
return [] unless resp.is_a?(Hash)
|
191
|
+
|
192
|
+
logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
|
193
|
+
resp.fetch('dmps', [])
|
194
|
+
end
|
195
|
+
|
196
|
+
# Fetch the DMP IDs that are marked as featured
|
197
|
+
def _by_featured(client: nil, logger: nil)
|
167
198
|
args = {
|
168
|
-
|
169
|
-
|
170
|
-
dmphub_owner_org: {
|
171
|
-
attribute_value_list: [
|
172
|
-
"https://ror.org/#{owner_org.to_s.downcase}",
|
173
|
-
"http://ror.org/#{owner_org.to_s.downcase}"
|
174
|
-
],
|
175
|
-
comparison_operator: 'IN'
|
176
|
-
}
|
177
|
-
},
|
178
|
-
filter_expression: 'SK = :version',
|
179
|
-
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
199
|
+
filter_expression: 'featured = :featured AND SK = :sk',
|
200
|
+
expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
|
180
201
|
}
|
181
|
-
logger
|
202
|
+
logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
|
182
203
|
client = Uc3DmpDynamo::Client.new if client.nil?
|
183
|
-
_process_search_response(response: client.
|
204
|
+
_process_search_response(response: client.scan(args:))
|
184
205
|
end
|
185
206
|
|
186
|
-
#
|
187
|
-
def
|
188
|
-
regex = /^[0-9]{4}(-[0-9]{2}){2}/
|
189
|
-
raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
|
190
|
-
|
207
|
+
# Return all of the publicly visible DMPs
|
208
|
+
def _publicly_visible(client: nil, logger: nil)
|
191
209
|
args = {
|
192
|
-
|
193
|
-
|
194
|
-
dmphub_modification_day: {
|
195
|
-
attribute_value_list: [day.to_s],
|
196
|
-
comparison_operator: 'IN'
|
197
|
-
}
|
198
|
-
},
|
199
|
-
filter_expression: 'SK = :version',
|
200
|
-
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
210
|
+
filter_expression: 'visibility = :visibility AND SK = :sk',
|
211
|
+
expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
|
201
212
|
}
|
202
|
-
logger
|
213
|
+
logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
|
203
214
|
client = Uc3DmpDynamo::Client.new if client.nil?
|
204
|
-
_process_search_response(response: client.
|
215
|
+
_process_search_response(response: client.scan(args:))
|
205
216
|
end
|
206
217
|
|
207
218
|
# Transform the search results so that we do not include any of the DMPHub specific metadata
|
@@ -212,8 +223,8 @@ module Uc3DmpId
|
|
212
223
|
next if item.nil?
|
213
224
|
|
214
225
|
dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
|
215
|
-
dmp = _remove_narrative_if_private(json: dmp)
|
216
|
-
Helper.cleanse_dmp_json(json: dmp)
|
226
|
+
# dmp = _remove_narrative_if_private(json: dmp)
|
227
|
+
# Helper.cleanse_dmp_json(json: dmp)
|
217
228
|
end
|
218
229
|
results.compact.uniq
|
219
230
|
end
|
data/lib/uc3-dmp-id/updater.rb
CHANGED
@@ -16,9 +16,9 @@ module Uc3DmpId
|
|
16
16
|
def update(provenance:, p_key:, json: {}, logger: nil)
|
17
17
|
raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
|
18
18
|
|
19
|
-
|
19
|
+
mods = Helper.parse_json(json:).fetch('dmp', {})
|
20
20
|
p_key = Helper.append_pk_prefix(p_key:)
|
21
|
-
logger.debug(message: "Incoming modifications for PK #{p_key}", details:
|
21
|
+
logger.debug(message: "Incoming modifications for PK #{p_key}", details: mods) if logger.respond_to?(:debug)
|
22
22
|
|
23
23
|
# Fetch the latest version of the DMP ID
|
24
24
|
client = Uc3DmpDynamo::Client.new
|
@@ -28,30 +28,29 @@ module Uc3DmpId
|
|
28
28
|
|
29
29
|
# Verify that the DMP ID is updateable with the info passed in
|
30
30
|
errs = _updateable?(provenance:, p_key:, latest_version: latest_version['dmp'],
|
31
|
-
mods:
|
31
|
+
mods: mods['dmp'])
|
32
32
|
logger.error(message: errs.join(', ')) if logger.respond_to?(:error) && errs.is_a?(Array) && errs.any?
|
33
33
|
raise UpdaterError, errs if errs.is_a?(Array) && errs.any?
|
34
34
|
# Don't continue if nothing has changed!
|
35
|
-
raise UpdaterError, Helper::MSG_NO_CHANGE if Helper.eql?(dmp_a: latest_version, dmp_b:
|
35
|
+
raise UpdaterError, Helper::MSG_NO_CHANGE if Helper.eql?(dmp_a: latest_version, dmp_b: mods)
|
36
36
|
|
37
37
|
# Version the DMP ID record (if applicable).
|
38
38
|
owner = latest_version['dmphub_provenance_id']
|
39
39
|
updater = provenance['PK']
|
40
40
|
version = Versioner.generate_version(client:, latest_version:, owner:,
|
41
41
|
updater:, logger:)
|
42
|
-
logger&.debug(message: 'New Version', details: version)
|
43
42
|
raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if version.nil?
|
44
43
|
# Bail if the system trying to make the update is not the creator of the DMP ID
|
45
44
|
raise UpdaterError, Helper::MSG_DMP_FORBIDDEN if owner != updater
|
46
45
|
|
47
46
|
# Handle any changes to the dmphub_modifications section
|
48
|
-
version = _process_harvester_mods(client:, p_key:, json:
|
49
|
-
logger&.debug(message: 'Version after process_harvester_mods', details: version)
|
47
|
+
version = _process_harvester_mods(client:, p_key:, json: version, logger:)
|
50
48
|
|
51
|
-
# Remove the version info
|
49
|
+
# Remove the version info because we don't want to save it on the record
|
52
50
|
version.delete('dmphub_versions')
|
53
|
-
version.delete('dmphub_modifications')
|
54
51
|
|
52
|
+
# Splice the assertions
|
53
|
+
version = _process_modifications(owner:, updater:, version:, mods:, logger:)
|
55
54
|
# Set the :modified timestamps
|
56
55
|
now = Time.now.utc
|
57
56
|
version['modified'] = now.iso8601
|
@@ -68,9 +67,9 @@ module Uc3DmpId
|
|
68
67
|
logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
|
69
68
|
|
70
69
|
# Append the :dmphub_versions Array
|
71
|
-
|
72
|
-
|
73
|
-
Helper.cleanse_dmp_json(json:
|
70
|
+
json = JSON.parse({ dmp: version }.to_json)
|
71
|
+
json = Versioner.append_versions(p_key:, dmp: json, client:, logger:)
|
72
|
+
Helper.cleanse_dmp_json(json:)
|
74
73
|
end
|
75
74
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
76
75
|
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
@@ -131,6 +130,24 @@ module Uc3DmpId
|
|
131
130
|
end
|
132
131
|
# rubocop:enable Metrics/AbcSize
|
133
132
|
|
133
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
134
|
+
def _process_modifications(owner:, updater:, version:, mods:, logger: nil)
|
135
|
+
return version unless mods.is_a?(Hash) && !updater.nil?
|
136
|
+
return mods unless version.is_a?(Hash) && !owner.nil?
|
137
|
+
|
138
|
+
logger.debug(message: 'Modifications before merge.', details: mods) if logger.respond_to?(:debug)
|
139
|
+
keys_to_retain = version.keys.select do |key|
|
140
|
+
(key.start_with?('dmphub_') && !%w[dmphub_modifications dmphub_versions].include?(key)) ||
|
141
|
+
key.start_with?('PK') || key.start_with?('SK') || key.start_with?('dmproadmap_related_identifiers')
|
142
|
+
end
|
143
|
+
keys_to_retain.each do |key|
|
144
|
+
mods[key] = version[key]
|
145
|
+
end
|
146
|
+
logger.debug(message: 'Modifications after merge.', details: mods) if logger.respond_to?(:debug)
|
147
|
+
mods
|
148
|
+
end
|
149
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
150
|
+
|
134
151
|
# Once the DMP has been updated, we need to update it's DOI metadata
|
135
152
|
# -------------------------------------------------------------------------
|
136
153
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
@@ -174,8 +191,7 @@ module Uc3DmpId
|
|
174
191
|
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
175
192
|
|
176
193
|
# Fetch any Harvester modifications to the JSON
|
177
|
-
def _process_harvester_mods(client:, p_key:, json:,
|
178
|
-
logger&.debug(message: 'Incoming modifications', details: json)
|
194
|
+
def _process_harvester_mods(client:, p_key:, json:, logger: nil)
|
179
195
|
return json if json.fetch('dmphub_modifications', []).empty?
|
180
196
|
|
181
197
|
# Fetch the `"SK": "HARVESTER_MODS"` record
|
@@ -185,54 +201,22 @@ module Uc3DmpId
|
|
185
201
|
)
|
186
202
|
return json unless resp.is_a?(Hash) && resp['related_works'].is_a?(Hash)
|
187
203
|
|
188
|
-
logger&.debug(message: 'Original HARVESTER_MODS record', details: resp)
|
189
204
|
# The `dmphub_modifications` array will ONLY ever have things the harvester mods know about
|
190
205
|
# so just find them and update the status accordingly
|
191
|
-
|
192
|
-
json['dmproadmap_related_identifiers'] = [] if json['dmproadmap_related_identifiers'].nil?
|
193
|
-
|
206
|
+
mods = resp.dup
|
194
207
|
json['dmphub_modifications'].each do |entry|
|
195
208
|
next if entry.fetch('dmproadmap_related_identifiers', []).empty?
|
196
209
|
|
197
210
|
entry['dmproadmap_related_identifiers'].each do |related|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
key = "#{related_domain.end_with?('/') ? related_domain : "#{related_domain}/"}#{related_id}"
|
202
|
-
key_found = original['related_works'].has_key?(key)
|
203
|
-
logger&.debug(message: "No matching HARVEST_MOD found for #{key}") unless key_found
|
204
|
-
next unless key_found
|
205
|
-
|
206
|
-
# Update the status in the HARVESTER_MODS record
|
207
|
-
logger&.debug(message: "Updating status for #{key} from #{original['related_works'][key]['status']} to #{related['status']}")
|
208
|
-
original['related_works'][key]['status'] = related['status']
|
209
|
-
|
210
|
-
existing = version['dmproadmap_related_identifiers'].select do |ri|
|
211
|
-
ri['identifier'] == key
|
212
|
-
end
|
213
|
-
|
214
|
-
# Add it if it was approved and doesn't exist in dmproadmap_related_identifiers
|
215
|
-
if related['status'] == 'approved' && existing.empty?
|
216
|
-
version['dmproadmap_related_identifiers'] << {
|
217
|
-
identifier: key,
|
218
|
-
work_type: related['work_type'],
|
219
|
-
type: related['type'],
|
220
|
-
descriptor: related['descriptor'],
|
221
|
-
citation: related['citation']
|
222
|
-
}
|
223
|
-
elsif related['status'] == 'rejected' && existing.any?
|
224
|
-
# otherwise remove it
|
225
|
-
version['dmproadmap_related_identifiers'] = version['dmproadmap_related_identifiers'].reject { |ri| ri == existing.first }
|
226
|
-
end
|
211
|
+
next if mods['related_works'][related.identifier].nil?
|
212
|
+
|
213
|
+
mods['related_works'][related.identifier]['status'] = related['status']
|
227
214
|
end
|
228
215
|
end
|
229
216
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
logger&.debug(message: 'Returning updated VERSION:', details: version)
|
235
|
-
version
|
217
|
+
client.put_item(json: mods, logger:)
|
218
|
+
json.delete('dmphub_modifications')
|
219
|
+
json
|
236
220
|
end
|
237
221
|
end
|
238
222
|
end
|
data/lib/uc3-dmp-id/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uc3-dmp-id
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.79
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Riley
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|