uc3-dmp-id 0.1.86 → 0.1.87
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/creator.rb +1 -1
- data/lib/uc3-dmp-id/finder.rb +83 -68
- data/lib/uc3-dmp-id/version.rb +1 -1
- data/lib/uc3-dmp-id/versioner.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a5f7760c4099466f5a99377669e189dfc33cfe2e718d1bf0c96b06b251f5ff77
|
|
4
|
+
data.tar.gz: dd6b30e66d7848e8810bd4c735ce5d007e7c0dd80c059e2341813db873e74c35
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9b2ebf8b2d1919f15bc9b538373fcafddb077c432754b0ba27c8706de6352db07d0db51ec41e5bb4bdc1469a55d45162d45127fc14e4df0c4699d63ed10963e5
|
|
7
|
+
data.tar.gz: 6470cf64e86138c08c343c299dea73a5d5add92755c6716684ad33e25378bfb36f6c3e440a142d5c66f439655d4b3600516c417efd31c9dc7ae4dac0047e416b
|
data/lib/uc3-dmp-id/creator.rb
CHANGED
|
@@ -53,7 +53,7 @@ module Uc3DmpId
|
|
|
53
53
|
annotated['registered'] = annotated['created'] if annotated['registered'].nil?
|
|
54
54
|
|
|
55
55
|
# Create the item
|
|
56
|
-
annotated
|
|
56
|
+
annotated['dmphub_modifications'] = []
|
|
57
57
|
resp = client.put_item(json: annotated, logger:)
|
|
58
58
|
raise CreatorError, Helper::MSG_DMP_NO_DMP_ID if resp.nil?
|
|
59
59
|
|
data/lib/uc3-dmp-id/finder.rb
CHANGED
|
@@ -17,22 +17,43 @@ module Uc3DmpId
|
|
|
17
17
|
MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
|
|
18
18
|
Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
|
|
19
19
|
|
|
20
|
+
ORCID_DOMAIN = 'https://orcid.org/'
|
|
21
|
+
ROR_DOMAIN = 'https://ror.org/'
|
|
22
|
+
DOI_DOMAIN = 'https://doi.org/'
|
|
23
|
+
SORT_OPTIONS = %w[title modified]
|
|
24
|
+
SORT_DIRECTIONS = %w[asc desc]
|
|
25
|
+
MAX_PAGE_SIZE = 100
|
|
26
|
+
DEFAULT_PAGE_SIZE = 25
|
|
27
|
+
DEFAULT_SORT_OPTION = 'modified'
|
|
28
|
+
DEFAULT_SORT_DIR = 'desc'
|
|
29
|
+
|
|
20
30
|
class << self
|
|
21
31
|
# TODO: Replace this with ElasticSearch
|
|
22
32
|
def search_dmps(args:, logger: nil)
|
|
23
|
-
|
|
24
|
-
|
|
33
|
+
# Fetch the DMPs for each of the possible filter options
|
|
34
|
+
client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
|
|
35
|
+
owner = args['owner']
|
|
36
|
+
org = args['org']
|
|
37
|
+
funder = args['funder']
|
|
38
|
+
|
|
39
|
+
owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
|
|
40
|
+
# There may be multiple Org ids, so query them all
|
|
41
|
+
org_pks = org.nil? ? [] : org.split('|').map { |o| _by_org(org: o, client:, logger:) }
|
|
42
|
+
org_pks = org_pks.flatten.uniq
|
|
43
|
+
funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
|
|
44
|
+
# pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
|
|
45
|
+
logger&.debug(
|
|
46
|
+
message: 'PKs found',
|
|
47
|
+
details: { owner: owner_pks, org: org_pks, funder: funder_pks }
|
|
48
|
+
)
|
|
49
|
+
# return [] if pks.nil? || pks.empty?
|
|
25
50
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
return _by_mod_day(day: args['modification_day'], client:,
|
|
32
|
-
logger:)
|
|
33
|
-
end
|
|
51
|
+
# Only use the DMPs that fit all of the filter criteria
|
|
52
|
+
# dmps = pks.reduce(:&).flatten.uniq
|
|
53
|
+
# return [] if dmps.nil? || dmps.empty?
|
|
54
|
+
|
|
55
|
+
[owner_pks, org_pks, funder_pks].flatten.uniq
|
|
34
56
|
|
|
35
|
-
[]
|
|
36
57
|
end
|
|
37
58
|
|
|
38
59
|
# Find a DMP based on the contents of the incoming JSON
|
|
@@ -134,74 +155,68 @@ module Uc3DmpId
|
|
|
134
155
|
|
|
135
156
|
private
|
|
136
157
|
|
|
137
|
-
# Fetch the DMP IDs for the specified
|
|
138
|
-
def _by_owner(
|
|
139
|
-
|
|
140
|
-
|
|
158
|
+
# Fetch the DMP IDs for the specified person's ORCID (or email)
|
|
159
|
+
def _by_owner(owner:, client: nil, logger: nil)
|
|
160
|
+
orcid_regex = /^([0-9a-zA-Z]{4}-){3}[0-9a-zA-Z]{4}$/
|
|
161
|
+
email_regex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
|
|
162
|
+
orcid = owner.to_s.strip
|
|
163
|
+
return [] if (orcid =~ orcid_regex).nil? && (orcid =~ email_regex).nil?
|
|
141
164
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
"https://orcid.org/#{owner_id}"
|
|
149
|
-
],
|
|
150
|
-
comparison_operator: 'IN'
|
|
151
|
-
}
|
|
152
|
-
},
|
|
153
|
-
filter_expression: 'SK = :version',
|
|
154
|
-
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
|
155
|
-
}
|
|
156
|
-
logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
|
|
157
|
-
client = Uc3DmpDynamo::Client.new if client.nil?
|
|
158
|
-
_process_search_response(response: client.query(args:, logger:))
|
|
165
|
+
orcid = "#{ORCID_DOMAIN}#{orcid}" unless (orcid =~ orcid_regex).nil?
|
|
166
|
+
resp = client.get_item(key: { PK: 'PERSON_INDEX', SK: orcid }, logger:)
|
|
167
|
+
return [] unless resp.is_a?(Hash)
|
|
168
|
+
|
|
169
|
+
logger&.debug(message: "DMPs for PERSON #{orcid}", details: resp)
|
|
170
|
+
resp.fetch('dmps', [])
|
|
159
171
|
end
|
|
160
172
|
|
|
161
|
-
# Fetch the DMP IDs for the specified organization/institution
|
|
162
|
-
|
|
163
|
-
def _by_owner_org(owner_org:, client: nil, logger: nil)
|
|
173
|
+
# Fetch the DMP IDs for the specified organization/institution
|
|
174
|
+
def _by_org(org:, client: nil, logger: nil)
|
|
164
175
|
regex = /^[a-zA-Z0-9]+$/
|
|
165
|
-
|
|
176
|
+
id = "#{ROR_DOMAIN}#{org.strip}" unless (org.to_s =~ regex).nil?
|
|
177
|
+
return [] if id.nil?
|
|
178
|
+
|
|
179
|
+
resp = client.get_item(key: { PK: 'AFFILIATION_INDEX', SK: id }, logger:)
|
|
180
|
+
return [] unless resp.is_a?(Hash)
|
|
181
|
+
|
|
182
|
+
logger&.debug(message: "DMPs for AFFILIATION #{id}", details: resp)
|
|
183
|
+
resp.fetch('dmps', [])
|
|
184
|
+
end
|
|
166
185
|
|
|
186
|
+
# Fetch the DMP IDs for the specified funder
|
|
187
|
+
def _by_funder(funder:, client: nil, logger: nil)
|
|
188
|
+
regex = /^[a-zA-Z0-9]+$/
|
|
189
|
+
id = "#{ROR_DOMAIN}/#{funder.strip}" unless (funder.to_s =~ regex).nil?
|
|
190
|
+
id = "#{DOI_DOMAIN}#{funder.strip}" if id.nil? && !(funder.to_s =~ Helper::DOI_REGEX).nil?
|
|
191
|
+
return [] if id.nil?
|
|
192
|
+
|
|
193
|
+
resp = client.get_item(key: { PK: 'FUNDER_INDEX', SK: id }, logger:)
|
|
194
|
+
return [] unless resp.is_a?(Hash)
|
|
195
|
+
|
|
196
|
+
logger&.debug(message: "DMPs for FUNDER #{id}", details: resp)
|
|
197
|
+
resp.fetch('dmps', [])
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Fetch the DMP IDs that are marked as featured
|
|
201
|
+
def _by_featured(client: nil, logger: nil)
|
|
167
202
|
args = {
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
dmphub_owner_org: {
|
|
171
|
-
attribute_value_list: [
|
|
172
|
-
"https://ror.org/#{owner_org.to_s.downcase}",
|
|
173
|
-
"http://ror.org/#{owner_org.to_s.downcase}"
|
|
174
|
-
],
|
|
175
|
-
comparison_operator: 'IN'
|
|
176
|
-
}
|
|
177
|
-
},
|
|
178
|
-
filter_expression: 'SK = :version',
|
|
179
|
-
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
|
203
|
+
filter_expression: 'featured = :featured AND SK = :sk',
|
|
204
|
+
expression_attribute_values: { ':sk': 'METADATA', ':featured': 1 }
|
|
180
205
|
}
|
|
181
|
-
logger
|
|
206
|
+
logger&.debug(message: 'Fetch relevant DMPs _by_featured - scan args', details: args)
|
|
182
207
|
client = Uc3DmpDynamo::Client.new if client.nil?
|
|
183
|
-
_process_search_response(response: client.
|
|
208
|
+
_process_search_response(response: client.scan(args:))
|
|
184
209
|
end
|
|
185
210
|
|
|
186
|
-
#
|
|
187
|
-
def
|
|
188
|
-
regex = /^[0-9]{4}(-[0-9]{2}){2}/
|
|
189
|
-
raise FinderError, MSG_INVALID_OWNER_ID if day.nil? || (day.to_s =~ regex).nil?
|
|
190
|
-
|
|
211
|
+
# Return all of the publicly visible DMPs
|
|
212
|
+
def _publicly_visible(client: nil, logger: nil)
|
|
191
213
|
args = {
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
dmphub_modification_day: {
|
|
195
|
-
attribute_value_list: [day.to_s],
|
|
196
|
-
comparison_operator: 'IN'
|
|
197
|
-
}
|
|
198
|
-
},
|
|
199
|
-
filter_expression: 'SK = :version',
|
|
200
|
-
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
|
214
|
+
filter_expression: 'visibility = :visibility AND SK = :sk',
|
|
215
|
+
expression_attribute_values: { ':sk': 'METADATA', ':visibility': 'public' }
|
|
201
216
|
}
|
|
202
|
-
logger
|
|
217
|
+
logger&.debug(message: 'Fetch relevant DMPs _publicly_visible - scan args', details: args)
|
|
203
218
|
client = Uc3DmpDynamo::Client.new if client.nil?
|
|
204
|
-
_process_search_response(response: client.
|
|
219
|
+
_process_search_response(response: client.scan(args:))
|
|
205
220
|
end
|
|
206
221
|
|
|
207
222
|
# Transform the search results so that we do not include any of the DMPHub specific metadata
|
|
@@ -212,8 +227,8 @@ module Uc3DmpId
|
|
|
212
227
|
next if item.nil?
|
|
213
228
|
|
|
214
229
|
dmp = item['dmp'].nil? ? JSON.parse({ dmp: item }.to_json) : item
|
|
215
|
-
dmp = _remove_narrative_if_private(json: dmp)
|
|
216
|
-
Helper.cleanse_dmp_json(json: dmp)
|
|
230
|
+
# dmp = _remove_narrative_if_private(json: dmp)
|
|
231
|
+
# Helper.cleanse_dmp_json(json: dmp)
|
|
217
232
|
end
|
|
218
233
|
results.compact.uniq
|
|
219
234
|
end
|
data/lib/uc3-dmp-id/version.rb
CHANGED
data/lib/uc3-dmp-id/versioner.rb
CHANGED
|
@@ -58,7 +58,7 @@ module Uc3DmpId
|
|
|
58
58
|
|
|
59
59
|
# Create the prior version record ()
|
|
60
60
|
client = Uc3DmpDynamo::Client.new if client.nil?
|
|
61
|
-
prior
|
|
61
|
+
prior['dmphub_modifications'] = []
|
|
62
62
|
resp = client.put_item(json: prior, logger:)
|
|
63
63
|
return nil if resp.nil?
|
|
64
64
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: uc3-dmp-id
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.87
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Brian Riley
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-07-
|
|
11
|
+
date: 2024-07-17 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: json
|