uc3-dmp-id 0.0.24 → 0.0.25
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/creator.rb +75 -2
- data/lib/uc3-dmp-id/finder.rb +20 -5
- data/lib/uc3-dmp-id/helper.rb +35 -0
- data/lib/uc3-dmp-id/schemas/author.rb +2 -2
- data/lib/uc3-dmp-id/validator.rb +2 -2
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +1 -2
- data/lib/uc3-dmp-id/waf_analysis_2023-06-05.txt +0 -75
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7725d7a42fc20d12dda5b52f697beedb5f945839e31b5b0ebfac1740a3ebf9a2
|
4
|
+
data.tar.gz: 68ac33ba9ed458bc7d399804290ef4cbfb4863145bb3b055e67f25c4dce90376
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5be611a6931994acf746c509af01735dd0797bcd80ae9975fc5eaba3dafb1069785ad2a3df0b24beb0d29e7c699e1c4f89a35412326ee8a4efa4c68fc722357e
|
7
|
+
data.tar.gz: eab5793eed4a050c47462b257da4c96dab19ad1cd80ffaa5743d6d76d50e2497e8c29d743e8a77575bf0a49c1d85475f38386019d70b52a9e3da3843c9ce6f6b
|
data/lib/uc3-dmp-id/creator.rb
CHANGED
@@ -1,13 +1,86 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'securerandom'
|
4
|
+
|
3
5
|
module Uc3DmpId
|
4
|
-
class
|
6
|
+
class CreatorError < StandardError; end
|
5
7
|
|
6
8
|
class Creator
|
9
|
+
MSG_NO_BASE_URL = 'No base URL found for DMP ID (e.g. `doi.org`)'
|
10
|
+
MSG_NO_PROVENANCE_OWNER = 'No provenance system and/or owner defined.'
|
11
|
+
MSG_NO_SHOULDER = 'No DOI shoulder found. (e.g. `10.12345/`)'
|
12
|
+
MSG_UNABLE_TO_MINT = 'Unable to mint a unique DMP ID.'
|
13
|
+
|
7
14
|
class << self
|
15
|
+
def create(provenance:, owner_org:, json:, debug: false)
|
16
|
+
raise CreatorError, MSG_NO_SHOULDER if ENV['DMP_ID_SHOULDER'].nil?
|
17
|
+
raise CreatorError, MSG_NO_BASE_URL if ENV['DMP_ID_BASE_URL'].nil?
|
18
|
+
|
19
|
+
# Validate the incoming JSON first
|
20
|
+
errs = Validator.validate(mode: 'author', json: Helper.parse_json(json: json))&.fetch('dmp', {})
|
21
|
+
raise CreatorError, errs.join(', ') if errs.is_a?(Array) && errs.any?
|
22
|
+
|
23
|
+
# Fail if the provenance or owner affiliation are not defined
|
24
|
+
raise CreatorError, MSG_NO_PROVENANCE_OWNER if provenance.nil? || owner_org.nil?
|
25
|
+
|
26
|
+
# Try to find it first and Fail if found
|
27
|
+
result = Finder.by_json(json: json, debug: debug)
|
28
|
+
raise CreatorError, Uc3DmpId::MSG_DMP_EXISTS if result.is_a?(Hash)
|
29
|
+
|
30
|
+
p_key = _preregister_dmp_id(json: json, debug: debug)
|
31
|
+
raise CreatorError, MSG_UNABLE_TO_MINT if p_key.nil?
|
32
|
+
|
33
|
+
# Add the DMPHub specific attributes and then save
|
34
|
+
annotated = Helper.annotate_dmp_json(provenance: provenance, owner_org: owner_org, p_key: p_key, json: json)
|
35
|
+
puts "CREATING DMP ID:" if debug
|
36
|
+
puts annotated if debug
|
37
|
+
|
38
|
+
# Create the item
|
39
|
+
client = Uc3DmpDynamo::Client.new(debug: debug)
|
40
|
+
resp = client.put_item(json: annotated, debug: debug)
|
41
|
+
raise CreatorError, Uc3DmpId::MSG_DMP_NO_DMP_ID if resp.nil?
|
42
|
+
|
43
|
+
# _post_process(json: annotated)
|
44
|
+
annotated
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def _preregister_dmp_id(json:, debug: false)
|
50
|
+
# Use the specified DMP ID if the provenance has permission
|
51
|
+
existing = json.fetch('dmp_id', {})
|
52
|
+
id = existing['identifier'].gsub(%r{https?://}, Helper::PK_DMP_PREFIX) if existing.is_a?(Hash) &&
|
53
|
+
!existing['identifier'].nil?
|
54
|
+
return id if existing['type'].downcase == 'doi' && !id.nil? && !Finder.exists?(p_key: id)
|
55
|
+
|
56
|
+
dmp_id = ''
|
57
|
+
counter = 0
|
58
|
+
while dmp_id == '' && counter <= 10
|
59
|
+
prefix = "#{ENV['DMP_ID_SHOULDER']}#{SecureRandom.hex(2).upcase}#{SecureRandom.hex(2)}"
|
60
|
+
dmp_id = prefix unless Finder.exists?(p_key: prefix)
|
61
|
+
counter += 1
|
62
|
+
end
|
63
|
+
# Something went wrong and it was unable to identify a unique id
|
64
|
+
raise CreatorError, MSG_UNABLE_TO_MINT if counter >= 10
|
65
|
+
|
66
|
+
puts "Uc3DmpId::Creator._pregister_dmp_id - registering DMP ID: #{dmp_id}" if debug
|
67
|
+
url = ENV['DMP_ID_BASE_URL'].gsub(%r{https?://}, '')
|
68
|
+
"#{Helper::PK_DMP_PREFIX}#{url.end_with?('/') ? url : "#{url}/"}#{dmp_id}"
|
69
|
+
end
|
70
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
8
71
|
|
9
|
-
#
|
72
|
+
# Once the DMP has been created, we need to register it's DMP ID and download any
|
73
|
+
# PDF if applicable
|
74
|
+
# -------------------------------------------------------------------------
|
75
|
+
def _post_process(json:)
|
76
|
+
return false unless json.is_a?(Hash)
|
10
77
|
|
78
|
+
# We are creating, so this is always true
|
79
|
+
json['dmphub_updater_is_provenance'] = true
|
80
|
+
# Publish the change to the EventBridge
|
81
|
+
EventPublisher.publish(source: 'DmpCreator', dmp: json, debug: @debug)
|
82
|
+
true
|
83
|
+
end
|
11
84
|
end
|
12
85
|
end
|
13
86
|
end
|
data/lib/uc3-dmp-id/finder.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'uc3-dmp-dynamo'
|
4
4
|
|
5
5
|
module Uc3DmpId
|
6
|
-
class
|
6
|
+
class FinderError < StandardError; end
|
7
7
|
|
8
8
|
# Methods to find/search for DMP IDs
|
9
9
|
class Finder
|
@@ -24,7 +24,7 @@ module Uc3DmpId
|
|
24
24
|
# Find the DMP's versions
|
25
25
|
# -------------------------------------------------------------------------
|
26
26
|
def versions(p_key:, client: nil, debug: false)
|
27
|
-
raise
|
27
|
+
raise FinderError, MSG_MISSING_PK if p_key.nil?
|
28
28
|
|
29
29
|
args = {
|
30
30
|
key_conditions: {
|
@@ -41,7 +41,7 @@ module Uc3DmpId
|
|
41
41
|
# -------------------------------------------------------------------------
|
42
42
|
def by_json(json:, debug: false)
|
43
43
|
json = Validator.parse_json(json: json)&.fetch('dmp', {})
|
44
|
-
raise
|
44
|
+
raise FinderError, MSG_INVALID_ARGS if json.nil? || (json['PK'].nil? && json['dmp_id'].nil?)
|
45
45
|
|
46
46
|
p_key = json['PK']
|
47
47
|
# Translate the incoming :dmp_id into a PK
|
@@ -58,7 +58,7 @@ module Uc3DmpId
|
|
58
58
|
# Find the DMP by its PK and SK
|
59
59
|
# -------------------------------------------------------------------------
|
60
60
|
def by_pk(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, debug: false)
|
61
|
-
raise
|
61
|
+
raise FinderError, MSG_MISSING_PK if p_key.nil?
|
62
62
|
|
63
63
|
s_key = Helper::DMP_LATEST_VERSION if s_key.nil? || s_key.to_s.strip.empty?
|
64
64
|
client = client.nil? ? Uc3DmpDynamo::Client.new(debug: debug) : client
|
@@ -77,11 +77,26 @@ module Uc3DmpId
|
|
77
77
|
Helper.cleanse_dmp_json(json: dmp)
|
78
78
|
end
|
79
79
|
|
80
|
+
# Fetch just the PK to see if a record exists
|
81
|
+
# -------------------------------------------------------------------------
|
82
|
+
def exists?(p_key:)
|
83
|
+
raise FinderError, MSG_MISSING_PK if p_key.nil?
|
84
|
+
|
85
|
+
resp = client.get_item(
|
86
|
+
key: {
|
87
|
+
PK: Helper.append_pk_prefix(p_key: p_key),
|
88
|
+
SK: s_key
|
89
|
+
},
|
90
|
+
projection_expression: 'PK'
|
91
|
+
)
|
92
|
+
resp.is_a?(Hash)
|
93
|
+
end
|
94
|
+
|
80
95
|
# Attempt to find the DMP item by the provenance system's identifier
|
81
96
|
# -------------------------------------------------------------------------
|
82
97
|
# rubocop:disable Metrics/AbcSize
|
83
98
|
def by_provenance_identifier(json:, client: nil, debug: false)
|
84
|
-
raise
|
99
|
+
raise FinderError, MSG_MISSING_PROV_ID if json.nil? || json.fetch('dmp_id', {})['identifier'].nil?
|
85
100
|
|
86
101
|
args = {
|
87
102
|
key_conditions: {
|
data/lib/uc3-dmp-id/helper.rb
CHANGED
@@ -110,6 +110,41 @@ module Uc3DmpId
|
|
110
110
|
json.is_a?(String) ? JSON.parse(json) : nil
|
111
111
|
end
|
112
112
|
|
113
|
+
# Add DMPHub specific fields to the DMP ID JSON
|
114
|
+
def annotate_dmp_json(provenance:, owner_org:, p_key:, json:)
|
115
|
+
json = parse_json(json: json)
|
116
|
+
return json if provenance.nil? || owner_org.nil? || p_key.nil? || !json.is_a?(Hash)
|
117
|
+
|
118
|
+
# Fail the json as is if the :PK does not match the :dmp_id if the json has a :PK
|
119
|
+
id = dmp_id_to_pk(json: json.fetch('dmp_id', {}))
|
120
|
+
return json if id != p_key && !json['PK'].nil?
|
121
|
+
|
122
|
+
annotated = deep_copy_dmp(obj: json)
|
123
|
+
annotated['PK'] = json['PK'] || p_key
|
124
|
+
annotated['SK'] = DMP_LATEST_VERSION
|
125
|
+
|
126
|
+
# Ensure that the :dmp_id matches the :PK
|
127
|
+
annotated['dmp_id'] = pk_to_dmp_id(p_key: annotated['PK'])
|
128
|
+
|
129
|
+
# Update the modification timestamps
|
130
|
+
annotated['dmphub_modification_day'] = Time.now.strftime('%Y-%m-%d')
|
131
|
+
annotated['dmphub_owner_org'] = owner_org.to_s
|
132
|
+
annotated['dmphub_updated_at'] = Time.now.iso8601
|
133
|
+
# Only add the Creation date if it is blank
|
134
|
+
annotated['dmphub_created_at'] = Time.now.iso8601 if json['dmphub_created_at'].nil?
|
135
|
+
return annotated unless json['dmphub_provenance_id'].nil?
|
136
|
+
|
137
|
+
annotated['dmphub_provenance_id'] = provenance.fetch('PK', '')
|
138
|
+
return annotated if !annotated['dmphub_provenance_identifier'].nil? ||
|
139
|
+
json.fetch('dmp_id', {})['identifier'].nil?
|
140
|
+
|
141
|
+
# Record the original Provenance system's identifier
|
142
|
+
annotated['dmphub_provenance_identifier'] = format_provenance_id(
|
143
|
+
provenance: provenance, value: json.fetch('dmp_id', {})['identifier']
|
144
|
+
)
|
145
|
+
annotated
|
146
|
+
end
|
147
|
+
|
113
148
|
# Recursive method that strips out any DMPHub related metadata from a DMP record before sending
|
114
149
|
# it to the caller
|
115
150
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
@@ -741,7 +741,7 @@ module Uc3DmpId
|
|
741
741
|
"description": "Language of the metadata expressed using ISO 639-3.",
|
742
742
|
"examples": ["eng"]
|
743
743
|
},
|
744
|
-
"metadata_standard_id"
|
744
|
+
"metadata_standard_id": {
|
745
745
|
"$id": "#/properties/dmp/properties/dataset/items/properties/metadata/items/properties/metadata_standard_id",
|
746
746
|
"type": "object",
|
747
747
|
"title": "The Dataset Metadata Standard ID Schema",
|
@@ -852,7 +852,7 @@ module Uc3DmpId
|
|
852
852
|
"description": "Description of the technical resource",
|
853
853
|
"examples": ["Device needed to collect field data..."]
|
854
854
|
},
|
855
|
-
"dmproadmap_technical_resource_id"
|
855
|
+
"dmproadmap_technical_resource_id": {
|
856
856
|
"$id": "#/properties/dmp/properties/dataset/items/properties/technical_resource/items/dmproadmap_technical_resource_id",
|
857
857
|
"type": "object",
|
858
858
|
"title": "The Dataset Metadata Standard ID Schema",
|
data/lib/uc3-dmp-id/validator.rb
CHANGED
@@ -45,10 +45,10 @@ module Uc3DmpId
|
|
45
45
|
# ------------------------------------------------------------------------------------
|
46
46
|
def _load_schema(mode:)
|
47
47
|
|
48
|
-
puts "Loading schema --
|
48
|
+
puts "Loading schema -- Schemas::#{mode.to_s.downcase.capitalize}"
|
49
49
|
|
50
50
|
# Instatiate the matching schema
|
51
|
-
schema = "
|
51
|
+
schema = "Schemas::#{mode.to_s.downcase.capitalize}".split('::').inject(Object) { |o,c| o.const_get c }
|
52
52
|
schema.respond_to?(:load) ? schema.load : nil
|
53
53
|
end
|
54
54
|
|
data/lib/uc3-dmp-id/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uc3-dmp-id
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.25
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Riley
|
@@ -127,7 +127,6 @@ files:
|
|
127
127
|
- lib/uc3-dmp-id/validator.rb
|
128
128
|
- lib/uc3-dmp-id/version.rb
|
129
129
|
- lib/uc3-dmp-id/versioner.rb
|
130
|
-
- lib/uc3-dmp-id/waf_analysis_2023-06-05.txt
|
131
130
|
homepage: https://github.com/CDLUC3/dmp-hub-cfn/blob/main/src/sam/gems/uc3-dmp-id
|
132
131
|
licenses:
|
133
132
|
- MIT
|
@@ -1,75 +0,0 @@
|
|
1
|
-
|
2
|
-
WAF analysis
|
3
|
-
===========================================================
|
4
|
-
|
5
|
-
Is there a rule to block incessent requests for the same bad URL? For example `/nuclei.svg?fc9dz=x` was requested over a 100 times!
|
6
|
-
|
7
|
-
AWS#AWSManagedRulesCommonRuleSet#NoUserAgent_HEADER
|
8
|
-
Action: REMOVE THIS RULE? I feel like this one will cause more harm then good
|
9
|
-
Blocking: /
|
10
|
-
/plans/98982
|
11
|
-
/nuclei.svg?fc9dz=x (about 100 times!)
|
12
|
-
/v1/metadata/private-networks
|
13
|
-
/latest/meta-data/
|
14
|
-
/computeMetadata/v1/project/
|
15
|
-
/dynamic/instance-identity/document
|
16
|
-
/openstack/latest
|
17
|
-
/metadata/v1.json
|
18
|
-
/opc/v1/instance
|
19
|
-
|
20
|
-
AWS#AWSManagedRulesCommonRuleSet#SizeRestrictions_BODY
|
21
|
-
Action: Add exclusion for this specific path!
|
22
|
-
Blocking: /Shibboleth.sso/SAML2/POST
|
23
|
-
|
24
|
-
AWS#AWSManagedRulesAdminProtectionRuleSet#AdminProtection_URIPATH
|
25
|
-
Action: Add exceptions for [/org/admin/, /org_admin/, /super_admin/, /paginable/plans/org_admin]. The others are
|
26
|
-
all illegitimate so we want to block them.
|
27
|
-
Blocking: /org/admin/users/98307/admin_update_permissions
|
28
|
-
/org/admin/users/admin_index
|
29
|
-
/paginable/plans/org_admin/[page]?[query_params]
|
30
|
-
/org_admin/plans
|
31
|
-
/org/admin/543/admin_edit
|
32
|
-
/org/admin/users/admin_index
|
33
|
-
/org/admin/guidance/2163/admin_update
|
34
|
-
/org_admin/templates/1967/phases/2144/sections/11313
|
35
|
-
/org/admin/17/admin_edit
|
36
|
-
/super_admin/users/77446/merge
|
37
|
-
|
38
|
-
/admin/
|
39
|
-
/admin/phpMyAdmin/server_import.php
|
40
|
-
/phpMyAdmin/server_import.php
|
41
|
-
/admin/pma/server_import.php
|
42
|
-
/miscadmin
|
43
|
-
/admin/server_import.php
|
44
|
-
/backend/admin/users?username=anonymous
|
45
|
-
/phpmyadmin/server_import.php
|
46
|
-
/admin/install.php
|
47
|
-
/admin/install/install.php
|
48
|
-
/wp-admin/install.php
|
49
|
-
/solr/admin/
|
50
|
-
/Admin/frmWelcome.aspx
|
51
|
-
/boaform/admin/formLogin?username=user&psd=user
|
52
|
-
|
53
|
-
|
54
|
-
AWS#AWSManagedRulesCommonRuleSet#UserAgent_BadBots_HEADER
|
55
|
-
Actions: Block them
|
56
|
-
Blocking: /public_templates?[query_params]
|
57
|
-
/robots.txt
|
58
|
-
/aab8
|
59
|
-
/aaa9
|
60
|
-
/dmptool-ui/SourceSans3VF-Italic.ttf.fd20af5b.woff2
|
61
|
-
/dmptool-ui/SourceSans3VF-Roman.ttf.99aa17fb.woff2
|
62
|
-
/assets/application-4551ebb71fffa2b6d576438af0e66620a4e84cb8431cdd25889e191eed0fae66.js
|
63
|
-
|
64
|
-
AWS#AWSManagedRulesAmazonIpReputationList#AWSManagedReconnaissanceList
|
65
|
-
Actions: Block them
|
66
|
-
Blocking: /
|
67
|
-
/.env
|
68
|
-
/header.php
|
69
|
-
|
70
|
-
AWS#AWSManagedRulesCommonRuleSet#CrossSiteScripting_BODY
|
71
|
-
Actions: Add exception for /answers/create_or_update (or better yet address it) BLOCK all others
|
72
|
-
Blocking: /answers/create_or_update?question_id=17592
|
73
|
-
/content/crx/de/setPreferences.jsp;%0A.html?keymap=<svg/onload=confirm(document.domain)>//a&language=en
|
74
|
-
/7/0/33/1d/www.citysearch.com/search?what=x&where=place%22%3E%3Csvg+onload=confirm(document.domain)%3E
|
75
|
-
/etc/designs/xh1x.childrenlist.json//%3Csvg%20onload=alert%28document.domain%29%3E.html
|