uc3-dmp-id 0.1.78 → 0.1.79
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/finder.rb +79 -68
- data/lib/uc3-dmp-id/updater.rb +37 -53
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d05302ee8b8190c77e7efc0ad602ad0de2a74739b1653672d707b1fda95eec16
|
4
|
+
data.tar.gz: 3cca6911d51d58ff974308f8647bab075fb9bf717092bce45f3cbe2a151c7712
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e19a514c8233bdf9abc32c375b636611181738f6f64818431d0b3e26b1fd40c3817b5dcf1cfc8e1a270e3c521ad67636dd50baaec7638af9a162ebb864b26975
|
7
|
+
data.tar.gz: 3b09fdc606348c0c3dddcc7536563920c2971d475981c1b11a89a7147157db712d416d52eafec3689ec0e004be3d845acdf83cbb2113b2df08aca58936421915
|
data/lib/uc3-dmp-id/finder.rb
CHANGED
@@ -17,22 +17,41 @@ module Uc3DmpId
|
|
17
17
|
MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
|
18
18
|
Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
|
19
19
|
|
20
|
+
ORCID_DOMAIN = 'https://orcid.org/'
|
21
|
+
ROR_DOMAIN = 'https://ror.org/'
|
22
|
+
DOI_DOMAIN = 'https://doi.org/'
|
23
|
+
SORT_OPTIONS = %w[title modified]
|
24
|
+
SORT_DIRECTIONS = %w[asc desc]
|
25
|
+
MAX_PAGE_SIZE = 100
|
26
|
+
DEFAULT_PAGE_SIZE = 25
|
27
|
+
DEFAULT_SORT_OPTION = 'modified'
|
28
|
+
DEFAULT_SORT_DIR = 'desc'
|
29
|
+
|
20
30
|
class << self
|
21
31
|
# TODO: Replace this with ElasticSearch
|
22
32
|
def search_dmps(args:, logger: nil)
|
23
|
-
|
24
|
-
|
33
|
+
# Fetch the DMPs for each of the possible filter options
|
34
|
+
client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
|
35
|
+
owner = args['owner']
|
36
|
+
org = args['org']
|
37
|
+
funder = args['funder']
|
38
|
+
|
39
|
+
owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
|
40
|
+
org_pks = org.nil? ? [] : _by_org(org: org, client:, logger:)
|
41
|
+
funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
|
42
|
+
# pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
|
43
|
+
logger&.debug(
|
44
|
+
message: 'PKs found',
|
45
|
+
details: { owner: owner_pks, org: org_pks, funder: funder_pks }
|
46
|
+
)
|
47
|
+
# return [] if pks.nil? || pks.empty?
|
25
48
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
return _by_mod_day(day: args['modification_day'], client:,
|
32
|
-
logger:)
|
33
|
-
end
|
49
|
+
# Only use the DMPs that fit all of the filter criteria
|
50
|
+
# dmps = pks.reduce(:&).flatten.uniq
|
51
|
+
# return [] if dmps.nil? || dmps.empty?
|
52
|
+
|
53
|
+
[owner_pks, org_pks, funder_pks].flatten.uniq
|
34
54
|
|
35
|
-
[]
|
36
55
|
end
|
37
56
|
|
38
57
|
# Find a DMP based on the contents of the incoming JSON
|
@@ -134,74 +153,66 @@ module Uc3DmpId
|
|
134
153
|
|
135
154
|
private
|
136
155
|
|
137
|
-
# Fetch the DMP IDs for the specified
|
138
|
-
def _by_owner(
|
139
|
-
|
140
|
-
|
156
|
+
# Fetch the DMP IDs for the specified person's ORCID (or email)
|
157
|
+
def _by_owner(owner:, client: nil, logger: nil)
|
158
|
+
orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
|
159
|
+
email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
|
160
|
+
orcid = owner.to_s.strip
|
161
|
+
return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
|
141
162
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
"https://orcid.org/#{owner_id}"
|
149
|
-
],
|
150
|
-
comparison_operator: 'IN'
|
151
|
-
}
|
152
|
-
},
|
153
|
-
filter_expression: 'SK = :version',
|
154
|
-
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
155
|
-
}
|
156
|
-
logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
|
157
|
-
client = Uc3DmpDynamo::Client.new if client.nil?
|
158
|
-
_process_search_response(response: client.query(args:, logger:))
|
163
|
+
orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
|
164
|
+
resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
|
165
|
+
return [] unless resp.is_a?(Hash)
|
166
|
+
|
167
|
+
logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
|
168
|
+
resp.fetch('dmps', [])
|
159
169
|
end
|
160
170
|
|
161
|
-
# Fetch the DMP IDs for the specified organization/institution
|
162
|
-
|
163
|
-
def _by_owner_org(owner_org:, client: nil, logger: nil)
|
171
|
+
# Fetch the DMP IDs for the specified organization/institution
|
172
|
+
def _by_org(org:, client: nil, logger: nil)
|
164
173
|
regex = /^[a-zA-Z0-9]+$/
|
165
|
-
|
174
|
+
id = "#{ROR_DOMAIN}#{org.strip}" unless (org.to_s =~ regex).nil?
|
175
|
+
|
176
|
+
resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: id }, logger:)
|
177
|
+
return [] unless resp.is_a?(Hash)
|
178
|
+
|
179
|
+
logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
|
180
|
+
resp.fetch('dmps', [])
|
181
|
+
end
|
166
182
|
|
183
|
+
# Fetch the DMP IDs for the specified funder
|
184
|
+
def _by_funder(funder:, client: nil, logger: nil)
|
185
|
+
regex = /^[a-zA-Z0-9]+$/
|
186
|
+
id = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
|
187
|
+
id = "#{DOI_DOMAIN}#{org.strip}" if id.nil? && !(org.to_s =~ Helper::DOI_REGEX).nil?
|
188
|
+
|
189
|
+
resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
|
190
|
+
return [] unless resp.is_a?(Hash)
|
191
|
+
|
192
|
+
logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
|
193
|
+
resp.fetch('dmps', [])
|
194
|
+
end
|
195
|
+
|
196
|
+
# Fetch the DMP IDs that are marked as featured
|
197
|
+
def _by_featured(client: nil, logger: nil)
|
167
198
|
args = {
|
168
|
-
|
169
|
-
|
170
|
-
dmphub_owner_org: {
|
171
|
-
attribute_value_list: [
|
172
|
-
"https://ror.org/#{owner_org.to_s.downcase}",
|
173
|
-
"http://ror.org/#{owner_org.to_s.downcase}"
|
174
|
-
],
|
175
|
-
comparison_operator: 'IN'
|
176
|
-
}
|
177
|
-
},
|
178
|
-
filter_expression: 'SK = :version',
|
179
|
-
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
199
|
+
filter_expression: 'featured = :featured AND SK = :sk',
|
200
|
+
expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
|
180
201
|
}
|
181
|
-
logger
|
202
|
+
logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
|
182
203
|
client = Uc3DmpDynamo::Client.new if client.nil?
|
183
|
-
_process_search_response(response: client.
|
204
|
+
_process_search_response(response: client.scan(args:))
|
184
205
|
end
|
185
206
|
|
186
|
-
#
|
187
|
-
def
|
188
|
-
regex = /^[0-9]{4}(-[0-9]{2}){2}/
|
189
|
-
raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
|
190
|
-
|
207
|
+
# Return all of the publicly visible DMPs
|
208
|
+
def _publicly_visible(client: nil, logger: nil)
|
191
209
|
args = {
|
192
|
-
|
193
|
-
|
194
|
-
dmphub_modification_day: {
|
195
|
-
attribute_value_list: [day.to_s],
|
196
|
-
comparison_operator: 'IN'
|
197
|
-
}
|
198
|
-
},
|
199
|
-
filter_expression: 'SK = :version',
|
200
|
-
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
210
|
+
filter_expression: 'visibility = :visibility AND SK = :sk',
|
211
|
+
expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
|
201
212
|
}
|
202
|
-
logger
|
213
|
+
logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
|
203
214
|
client = Uc3DmpDynamo::Client.new if client.nil?
|
204
|
-
_process_search_response(response: client.
|
215
|
+
_process_search_response(response: client.scan(args:))
|
205
216
|
end
|
206
217
|
|
207
218
|
# Transform the search results so that we do not include any of the DMPHub specific metadata
|
@@ -212,8 +223,8 @@ module Uc3DmpId
|
|
212
223
|
next if item.nil?
|
213
224
|
|
214
225
|
dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
|
215
|
-
dmp = _remove_narrative_if_private(json: dmp)
|
216
|
-
Helper.cleanse_dmp_json(json: dmp)
|
226
|
+
# dmp = _remove_narrative_if_private(json: dmp)
|
227
|
+
# Helper.cleanse_dmp_json(json: dmp)
|
217
228
|
end
|
218
229
|
results.compact.uniq
|
219
230
|
end
|
data/lib/uc3-dmp-id/updater.rb
CHANGED
@@ -16,9 +16,9 @@ module Uc3DmpId
|
|
16
16
|
def update(provenance:, p_key:, json: {}, logger: nil)
|
17
17
|
raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
|
18
18
|
|
19
|
-
|
19
|
+
mods = Helper.parse_json(json:).fetch('dmp', {})
|
20
20
|
p_key = Helper.append_pk_prefix(p_key:)
|
21
|
-
logger.debug(message: "Incoming modifications for PK #{p_key}", details:
|
21
|
+
logger.debug(message: "Incoming modifications for PK #{p_key}", details: mods) if logger.respond_to?(:debug)
|
22
22
|
|
23
23
|
# Fetch the latest version of the DMP ID
|
24
24
|
client = Uc3DmpDynamo::Client.new
|
@@ -28,30 +28,29 @@ module Uc3DmpId
|
|
28
28
|
|
29
29
|
# Verify that the DMP ID is updateable with the info passed in
|
30
30
|
errs = _updateable?(provenance:, p_key:, latest_version: latest_version['dmp'],
|
31
|
-
mods:
|
31
|
+
mods: mods['dmp'])
|
32
32
|
logger.error(message: errs.join(', ')) if logger.respond_to?(:error) && errs.is_a?(Array) && errs.any?
|
33
33
|
raise UpdaterError, errs if errs.is_a?(Array) && errs.any?
|
34
34
|
# Don't continue if nothing has changed!
|
35
|
-
raise UpdaterError, Helper::MSG_NO_CHANGE if Helper.eql?(dmp_a: latest_version, dmp_b:
|
35
|
+
raise UpdaterError, Helper::MSG_NO_CHANGE if Helper.eql?(dmp_a: latest_version, dmp_b: mods)
|
36
36
|
|
37
37
|
# Version the DMP ID record (if applicable).
|
38
38
|
owner = latest_version['dmphub_provenance_id']
|
39
39
|
updater = provenance['PK']
|
40
40
|
version = Versioner.generate_version(client:, latest_version:, owner:,
|
41
41
|
updater:, logger:)
|
42
|
-
logger&.debug(message: 'New Version', details: version)
|
43
42
|
raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if version.nil?
|
44
43
|
# Bail if the system trying to make the update is not the creator of the DMP ID
|
45
44
|
raise UpdaterError, Helper::MSG_DMP_FORBIDDEN if owner != updater
|
46
45
|
|
47
46
|
# Handle any changes to the dmphub_modifications section
|
48
|
-
version = _process_harvester_mods(client:, p_key:, json:
|
49
|
-
logger&.debug(message: 'Version after process_harvester_mods', details: version)
|
47
|
+
version = _process_harvester_mods(client:, p_key:, json: version, logger:)
|
50
48
|
|
51
|
-
# Remove the version info
|
49
|
+
# Remove the version info because we don't want to save it on the record
|
52
50
|
version.delete('dmphub_versions')
|
53
|
-
version.delete('dmphub_modifications')
|
54
51
|
|
52
|
+
# Splice the assertions
|
53
|
+
version = _process_modifications(owner:, updater:, version:, mods:, logger:)
|
55
54
|
# Set the :modified timestamps
|
56
55
|
now = Time.now.utc
|
57
56
|
version['modified'] = now.iso8601
|
@@ -68,9 +67,9 @@ module Uc3DmpId
|
|
68
67
|
logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
|
69
68
|
|
70
69
|
# Append the :dmphub_versions Array
|
71
|
-
|
72
|
-
|
73
|
-
Helper.cleanse_dmp_json(json:
|
70
|
+
json = JSON.parse({ dmp: version }.to_json)
|
71
|
+
json = Versioner.append_versions(p_key:, dmp: json, client:, logger:)
|
72
|
+
Helper.cleanse_dmp_json(json:)
|
74
73
|
end
|
75
74
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
76
75
|
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
@@ -131,6 +130,24 @@ module Uc3DmpId
|
|
131
130
|
end
|
132
131
|
# rubocop:enable Metrics/AbcSize
|
133
132
|
|
133
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
134
|
+
def _process_modifications(owner:, updater:, version:, mods:, logger: nil)
|
135
|
+
return version unless mods.is_a?(Hash) && !updater.nil?
|
136
|
+
return mods unless version.is_a?(Hash) && !owner.nil?
|
137
|
+
|
138
|
+
logger.debug(message: 'Modifications before merge.', details: mods) if logger.respond_to?(:debug)
|
139
|
+
keys_to_retain = version.keys.select do |key|
|
140
|
+
(key.start_with?('dmphub_') && !%w[dmphub_modifications dmphub_versions].include?(key)) ||
|
141
|
+
key.start_with?('PK') || key.start_with?('SK') || key.start_with?('dmproadmap_related_identifiers')
|
142
|
+
end
|
143
|
+
keys_to_retain.each do |key|
|
144
|
+
mods[key] = version[key]
|
145
|
+
end
|
146
|
+
logger.debug(message: 'Modifications after merge.', details: mods) if logger.respond_to?(:debug)
|
147
|
+
mods
|
148
|
+
end
|
149
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
150
|
+
|
134
151
|
# Once the DMP has been updated, we need to update it's DOI metadata
|
135
152
|
# -------------------------------------------------------------------------
|
136
153
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
@@ -174,8 +191,7 @@ module Uc3DmpId
|
|
174
191
|
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
175
192
|
|
176
193
|
# Fetch any Harvester modifications to the JSON
|
177
|
-
def _process_harvester_mods(client:, p_key:, json:,
|
178
|
-
logger&.debug(message: 'Incoming modifications', details: json)
|
194
|
+
def _process_harvester_mods(client:, p_key:, json:, logger: nil)
|
179
195
|
return json if json.fetch('dmphub_modifications', []).empty?
|
180
196
|
|
181
197
|
# Fetch the `"SK": "HARVESTER_MODS"` record
|
@@ -185,54 +201,22 @@ module Uc3DmpId
|
|
185
201
|
)
|
186
202
|
return json unless resp.is_a?(Hash) && resp['related_works'].is_a?(Hash)
|
187
203
|
|
188
|
-
logger&.debug(message: 'Original HARVESTER_MODS record', details: resp)
|
189
204
|
# The `dmphub_modifications` array will ONLY ever have things the harvester mods know about
|
190
205
|
# so just find them and update the status accordingly
|
191
|
-
|
192
|
-
json['dmproadmap_related_identifiers'] = [] if json['dmproadmap_related_identifiers'].nil?
|
193
|
-
|
206
|
+
mods = resp.dup
|
194
207
|
json['dmphub_modifications'].each do |entry|
|
195
208
|
next if entry.fetch('dmproadmap_related_identifiers', []).empty?
|
196
209
|
|
197
210
|
entry['dmproadmap_related_identifiers'].each do |related|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
key = "#{related_domain.end_with?('/') ? related_domain : "#{related_domain}/"}#{related_id}"
|
202
|
-
key_found = original['related_works'].has_key?(key)
|
203
|
-
logger&.debug(message: "No matching HARVEST_MOD found for #{key}") unless key_found
|
204
|
-
next unless key_found
|
205
|
-
|
206
|
-
# Update the status in the HARVESTER_MODS record
|
207
|
-
logger&.debug(message: "Updating status for #{key} from #{original['related_works'][key]['status']} to #{related['status']}")
|
208
|
-
original['related_works'][key]['status'] = related['status']
|
209
|
-
|
210
|
-
existing = version['dmproadmap_related_identifiers'].select do |ri|
|
211
|
-
ri['identifier'] == key
|
212
|
-
end
|
213
|
-
|
214
|
-
# Add it if it was approved and doesn't exist in dmproadmap_related_identifiers
|
215
|
-
if related['status'] == 'approved' && existing.empty?
|
216
|
-
version['dmproadmap_related_identifiers'] << {
|
217
|
-
identifier: key,
|
218
|
-
work_type: related['work_type'],
|
219
|
-
type: related['type'],
|
220
|
-
descriptor: related['descriptor'],
|
221
|
-
citation: related['citation']
|
222
|
-
}
|
223
|
-
elsif related['status'] == 'rejected' && existing.any?
|
224
|
-
# otherwise remove it
|
225
|
-
version['dmproadmap_related_identifiers'] = version['dmproadmap_related_identifiers'].reject { |ri| ri == existing.first }
|
226
|
-
end
|
211
|
+
next if mods['related_works'][related.identifier].nil?
|
212
|
+
|
213
|
+
mods['related_works'][related.identifier]['status'] = related['status']
|
227
214
|
end
|
228
215
|
end
|
229
216
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
logger&.debug(message: 'Returning updated VERSION:', details: version)
|
235
|
-
version
|
217
|
+
client.put_item(json: mods, logger:)
|
218
|
+
json.delete('dmphub_modifications')
|
219
|
+
json
|
236
220
|
end
|
237
221
|
end
|
238
222
|
end
|
data/lib/uc3-dmp-id/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uc3-dmp-id
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.79
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Riley
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|