uc3-dmp-id 0.0.24 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/creator.rb +75 -2
- data/lib/uc3-dmp-id/finder.rb +20 -5
- data/lib/uc3-dmp-id/helper.rb +35 -0
- data/lib/uc3-dmp-id/schemas/author.rb +2 -2
- data/lib/uc3-dmp-id/validator.rb +2 -2
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +1 -2
- data/lib/uc3-dmp-id/waf_analysis_2023-06-05.txt +0 -75
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7725d7a42fc20d12dda5b52f697beedb5f945839e31b5b0ebfac1740a3ebf9a2
|
4
|
+
data.tar.gz: 68ac33ba9ed458bc7d399804290ef4cbfb4863145bb3b055e67f25c4dce90376
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5be611a6931994acf746c509af01735dd0797bcd80ae9975fc5eaba3dafb1069785ad2a3df0b24beb0d29e7c699e1c4f89a35412326ee8a4efa4c68fc722357e
|
7
|
+
data.tar.gz: eab5793eed4a050c47462b257da4c96dab19ad1cd80ffaa5743d6d76d50e2497e8c29d743e8a77575bf0a49c1d85475f38386019d70b52a9e3da3843c9ce6f6b
|
data/lib/uc3-dmp-id/creator.rb
CHANGED
@@ -1,13 +1,86 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'securerandom'
|
4
|
+
|
3
5
|
module Uc3DmpId
|
4
|
-
class
|
6
|
+
class CreatorError < StandardError; end
|
5
7
|
|
6
8
|
class Creator
|
9
|
+
MSG_NO_BASE_URL = 'No base URL found for DMP ID (e.g. `doi.org`)'
|
10
|
+
MSG_NO_PROVENANCE_OWNER = 'No provenance system and/or owner defined.'
|
11
|
+
MSG_NO_SHOULDER = 'No DOI shoulder found. (e.g. `10.12345/`)'
|
12
|
+
MSG_UNABLE_TO_MINT = 'Unable to mint a unique DMP ID.'
|
13
|
+
|
7
14
|
class << self
|
15
|
+
def create(provenance:, owner_org:, json:, debug: false)
|
16
|
+
raise CreatorError, MSG_NO_SHOULDER if ENV['DMP_ID_SHOULDER'].nil?
|
17
|
+
raise CreatorError, MSG_NO_BASE_URL if ENV['DMP_ID_BASE_URL'].nil?
|
18
|
+
|
19
|
+
# Validate the incoming JSON first
|
20
|
+
errs = Validator.validate(mode: 'author', json: Helper.parse_json(json: json))&.fetch('dmp', {})
|
21
|
+
raise CreatorError, errs.join(', ') if errs.is_a?(Array) && errs.any?
|
22
|
+
|
23
|
+
# Fail if the provenance or owner affiliation are not defined
|
24
|
+
raise CreatorError, MSG_NO_PROVENANCE_OWNER if provenance.nil? || owner_org.nil?
|
25
|
+
|
26
|
+
# Try to find it first and Fail if found
|
27
|
+
result = Finder.by_json(json: json, debug: debug)
|
28
|
+
raise CreatorError, Uc3DmpId::MSG_DMP_EXISTS if result.is_a?(Hash)
|
29
|
+
|
30
|
+
p_key = _preregister_dmp_id(json: json, debug: debug)
|
31
|
+
raise CreatorError, MSG_UNABLE_TO_MINT if p_key.nil?
|
32
|
+
|
33
|
+
# Add the DMPHub specific attributes and then save
|
34
|
+
annotated = Helper.annotate_dmp_json(provenance: provenance, owner_org: owner_org, p_key: p_key, json: json)
|
35
|
+
puts "CREATING DMP ID:" if debug
|
36
|
+
puts annotated if debug
|
37
|
+
|
38
|
+
# Create the item
|
39
|
+
client = Uc3DmpDynamo::Client.new(debug: debug)
|
40
|
+
resp = client.put_item(json: annotated, debug: debug)
|
41
|
+
raise CreatorError, Uc3DmpId::MSG_DMP_NO_DMP_ID if resp.nil?
|
42
|
+
|
43
|
+
# _post_process(json: annotated)
|
44
|
+
annotated
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def _preregister_dmp_id(json:, debug: false)
|
50
|
+
# Use the specified DMP ID if the provenance has permission
|
51
|
+
existing = json.fetch('dmp_id', {})
|
52
|
+
id = existing['identifier'].gsub(%r{https?://}, Helper::PK_DMP_PREFIX) if existing.is_a?(Hash) &&
|
53
|
+
!existing['identifier'].nil?
|
54
|
+
return id if existing['type'].downcase == 'doi' && !id.nil? && !Finder.exists?(p_key: id)
|
55
|
+
|
56
|
+
dmp_id = ''
|
57
|
+
counter = 0
|
58
|
+
while dmp_id == '' && counter <= 10
|
59
|
+
prefix = "#{ENV['DMP_ID_SHOULDER']}#{SecureRandom.hex(2).upcase}#{SecureRandom.hex(2)}"
|
60
|
+
dmp_id = prefix unless Finder.exists?(p_key: prefix)
|
61
|
+
counter += 1
|
62
|
+
end
|
63
|
+
# Something went wrong and it was unable to identify a unique id
|
64
|
+
raise CreatorError, MSG_UNABLE_TO_MINT if counter >= 10
|
65
|
+
|
66
|
+
puts "Uc3DmpId::Creator._pregister_dmp_id - registering DMP ID: #{dmp_id}" if debug
|
67
|
+
url = ENV['DMP_ID_BASE_URL'].gsub(%r{https?://}, '')
|
68
|
+
"#{Helper::PK_DMP_PREFIX}#{url.end_with?('/') ? url : "#{url}/"}#{dmp_id}"
|
69
|
+
end
|
70
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
8
71
|
|
9
|
-
#
|
72
|
+
# Once the DMP has been created, we need to register it's DMP ID and download any
|
73
|
+
# PDF if applicable
|
74
|
+
# -------------------------------------------------------------------------
|
75
|
+
def _post_process(json:)
|
76
|
+
return false unless json.is_a?(Hash)
|
10
77
|
|
78
|
+
# We are creating, so this is always true
|
79
|
+
json['dmphub_updater_is_provenance'] = true
|
80
|
+
# Publish the change to the EventBridge
|
81
|
+
EventPublisher.publish(source: 'DmpCreator', dmp: json, debug: @debug)
|
82
|
+
true
|
83
|
+
end
|
11
84
|
end
|
12
85
|
end
|
13
86
|
end
|
data/lib/uc3-dmp-id/finder.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'uc3-dmp-dynamo'
|
4
4
|
|
5
5
|
module Uc3DmpId
|
6
|
-
class
|
6
|
+
class FinderError < StandardError; end
|
7
7
|
|
8
8
|
# Methods to find/search for DMP IDs
|
9
9
|
class Finder
|
@@ -24,7 +24,7 @@ module Uc3DmpId
|
|
24
24
|
# Find the DMP's versions
|
25
25
|
# -------------------------------------------------------------------------
|
26
26
|
def versions(p_key:, client: nil, debug: false)
|
27
|
-
raise
|
27
|
+
raise FinderError, MSG_MISSING_PK if p_key.nil?
|
28
28
|
|
29
29
|
args = {
|
30
30
|
key_conditions: {
|
@@ -41,7 +41,7 @@ module Uc3DmpId
|
|
41
41
|
# -------------------------------------------------------------------------
|
42
42
|
def by_json(json:, debug: false)
|
43
43
|
json = Validator.parse_json(json: json)&.fetch('dmp', {})
|
44
|
-
raise
|
44
|
+
raise FinderError, MSG_INVALID_ARGS if json.nil? || (json['PK'].nil? && json['dmp_id'].nil?)
|
45
45
|
|
46
46
|
p_key = json['PK']
|
47
47
|
# Translate the incoming :dmp_id into a PK
|
@@ -58,7 +58,7 @@ module Uc3DmpId
|
|
58
58
|
# Find the DMP by its PK and SK
|
59
59
|
# -------------------------------------------------------------------------
|
60
60
|
def by_pk(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, debug: false)
|
61
|
-
raise
|
61
|
+
raise FinderError, MSG_MISSING_PK if p_key.nil?
|
62
62
|
|
63
63
|
s_key = Helper::DMP_LATEST_VERSION if s_key.nil? || s_key.to_s.strip.empty?
|
64
64
|
client = client.nil? ? Uc3DmpDynamo::Client.new(debug: debug) : client
|
@@ -77,11 +77,26 @@ module Uc3DmpId
|
|
77
77
|
Helper.cleanse_dmp_json(json: dmp)
|
78
78
|
end
|
79
79
|
|
80
|
+
# Fetch just the PK to see if a record exists
|
81
|
+
# -------------------------------------------------------------------------
|
82
|
+
def exists?(p_key:)
|
83
|
+
raise FinderError, MSG_MISSING_PK if p_key.nil?
|
84
|
+
|
85
|
+
resp = client.get_item(
|
86
|
+
key: {
|
87
|
+
PK: Helper.append_pk_prefix(p_key: p_key),
|
88
|
+
SK: s_key
|
89
|
+
},
|
90
|
+
projection_expression: 'PK'
|
91
|
+
)
|
92
|
+
resp.is_a?(Hash)
|
93
|
+
end
|
94
|
+
|
80
95
|
# Attempt to find the DMP item by the provenance system's identifier
|
81
96
|
# -------------------------------------------------------------------------
|
82
97
|
# rubocop:disable Metrics/AbcSize
|
83
98
|
def by_provenance_identifier(json:, client: nil, debug: false)
|
84
|
-
raise
|
99
|
+
raise FinderError, MSG_MISSING_PROV_ID if json.nil? || json.fetch('dmp_id', {})['identifier'].nil?
|
85
100
|
|
86
101
|
args = {
|
87
102
|
key_conditions: {
|
data/lib/uc3-dmp-id/helper.rb
CHANGED
@@ -110,6 +110,41 @@ module Uc3DmpId
|
|
110
110
|
json.is_a?(String) ? JSON.parse(json) : nil
|
111
111
|
end
|
112
112
|
|
113
|
+
# Add DMPHub specific fields to the DMP ID JSON
|
114
|
+
def annotate_dmp_json(provenance:, owner_org:, p_key:, json:)
|
115
|
+
json = parse_json(json: json)
|
116
|
+
return json if provenance.nil? || owner_org.nil? || p_key.nil? || !json.is_a?(Hash)
|
117
|
+
|
118
|
+
# Fail the json as is if the :PK does not match the :dmp_id if the json has a :PK
|
119
|
+
id = dmp_id_to_pk(json: json.fetch('dmp_id', {}))
|
120
|
+
return json if id != p_key && !json['PK'].nil?
|
121
|
+
|
122
|
+
annotated = deep_copy_dmp(obj: json)
|
123
|
+
annotated['PK'] = json['PK'] || p_key
|
124
|
+
annotated['SK'] = DMP_LATEST_VERSION
|
125
|
+
|
126
|
+
# Ensure that the :dmp_id matches the :PK
|
127
|
+
annotated['dmp_id'] = pk_to_dmp_id(p_key: annotated['PK'])
|
128
|
+
|
129
|
+
# Update the modification timestamps
|
130
|
+
annotated['dmphub_modification_day'] = Time.now.strftime('%Y-%m-%d')
|
131
|
+
annotated['dmphub_owner_org'] = owner_org.to_s
|
132
|
+
annotated['dmphub_updated_at'] = Time.now.iso8601
|
133
|
+
# Only add the Creation date if it is blank
|
134
|
+
annotated['dmphub_created_at'] = Time.now.iso8601 if json['dmphub_created_at'].nil?
|
135
|
+
return annotated unless json['dmphub_provenance_id'].nil?
|
136
|
+
|
137
|
+
annotated['dmphub_provenance_id'] = provenance.fetch('PK', '')
|
138
|
+
return annotated if !annotated['dmphub_provenance_identifier'].nil? ||
|
139
|
+
json.fetch('dmp_id', {})['identifier'].nil?
|
140
|
+
|
141
|
+
# Record the original Provenance system's identifier
|
142
|
+
annotated['dmphub_provenance_identifier'] = format_provenance_id(
|
143
|
+
provenance: provenance, value: json.fetch('dmp_id', {})['identifier']
|
144
|
+
)
|
145
|
+
annotated
|
146
|
+
end
|
147
|
+
|
113
148
|
# Recursive method that strips out any DMPHub related metadata from a DMP record before sending
|
114
149
|
# it to the caller
|
115
150
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
@@ -741,7 +741,7 @@ module Uc3DmpId
|
|
741
741
|
"description": "Language of the metadata expressed using ISO 639-3.",
|
742
742
|
"examples": ["eng"]
|
743
743
|
},
|
744
|
-
"metadata_standard_id"
|
744
|
+
"metadata_standard_id": {
|
745
745
|
"$id": "#/properties/dmp/properties/dataset/items/properties/metadata/items/properties/metadata_standard_id",
|
746
746
|
"type": "object",
|
747
747
|
"title": "The Dataset Metadata Standard ID Schema",
|
@@ -852,7 +852,7 @@ module Uc3DmpId
|
|
852
852
|
"description": "Description of the technical resource",
|
853
853
|
"examples": ["Device needed to collect field data..."]
|
854
854
|
},
|
855
|
-
"dmproadmap_technical_resource_id"
|
855
|
+
"dmproadmap_technical_resource_id": {
|
856
856
|
"$id": "#/properties/dmp/properties/dataset/items/properties/technical_resource/items/dmproadmap_technical_resource_id",
|
857
857
|
"type": "object",
|
858
858
|
"title": "The Dataset Metadata Standard ID Schema",
|
data/lib/uc3-dmp-id/validator.rb
CHANGED
@@ -45,10 +45,10 @@ module Uc3DmpId
|
|
45
45
|
# ------------------------------------------------------------------------------------
|
46
46
|
def _load_schema(mode:)
|
47
47
|
|
48
|
-
puts "Loading schema --
|
48
|
+
puts "Loading schema -- Schemas::#{mode.to_s.downcase.capitalize}"
|
49
49
|
|
50
50
|
# Instatiate the matching schema
|
51
|
-
schema = "
|
51
|
+
schema = "Schemas::#{mode.to_s.downcase.capitalize}".split('::').inject(Object) { |o,c| o.const_get c }
|
52
52
|
schema.respond_to?(:load) ? schema.load : nil
|
53
53
|
end
|
54
54
|
|
data/lib/uc3-dmp-id/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uc3-dmp-id
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.25
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Riley
|
@@ -127,7 +127,6 @@ files:
|
|
127
127
|
- lib/uc3-dmp-id/validator.rb
|
128
128
|
- lib/uc3-dmp-id/version.rb
|
129
129
|
- lib/uc3-dmp-id/versioner.rb
|
130
|
-
- lib/uc3-dmp-id/waf_analysis_2023-06-05.txt
|
131
130
|
homepage: https://github.com/CDLUC3/dmp-hub-cfn/blob/main/src/sam/gems/uc3-dmp-id
|
132
131
|
licenses:
|
133
132
|
- MIT
|
@@ -1,75 +0,0 @@
|
|
1
|
-
|
2
|
-
WAF analysis
|
3
|
-
===========================================================
|
4
|
-
|
5
|
-
Is there a rule to block incessent requests for the same bad URL? For example `/nuclei.svg?fc9dz=x` was requested over a 100 times!
|
6
|
-
|
7
|
-
AWS#AWSManagedRulesCommonRuleSet#NoUserAgent_HEADER
|
8
|
-
Action: REMOVE THIS RULE? I feel like this one will cause more harm then good
|
9
|
-
Blocking: /
|
10
|
-
/plans/98982
|
11
|
-
/nuclei.svg?fc9dz=x (about 100 times!)
|
12
|
-
/v1/metadata/private-networks
|
13
|
-
/latest/meta-data/
|
14
|
-
/computeMetadata/v1/project/
|
15
|
-
/dynamic/instance-identity/document
|
16
|
-
/openstack/latest
|
17
|
-
/metadata/v1.json
|
18
|
-
/opc/v1/instance
|
19
|
-
|
20
|
-
AWS#AWSManagedRulesCommonRuleSet#SizeRestrictions_BODY
|
21
|
-
Action: Add exclusion for this specific path!
|
22
|
-
Blocking: /Shibboleth.sso/SAML2/POST
|
23
|
-
|
24
|
-
AWS#AWSManagedRulesAdminProtectionRuleSet#AdminProtection_URIPATH
|
25
|
-
Action: Add exceptions for [/org/admin/, /org_admin/, /super_admin/, /paginable/plans/org_admin]. The others are
|
26
|
-
all illegitimate so we want to block them.
|
27
|
-
Blocking: /org/admin/users/98307/admin_update_permissions
|
28
|
-
/org/admin/users/admin_index
|
29
|
-
/paginable/plans/org_admin/[page]?[query_params]
|
30
|
-
/org_admin/plans
|
31
|
-
/org/admin/543/admin_edit
|
32
|
-
/org/admin/users/admin_index
|
33
|
-
/org/admin/guidance/2163/admin_update
|
34
|
-
/org_admin/templates/1967/phases/2144/sections/11313
|
35
|
-
/org/admin/17/admin_edit
|
36
|
-
/super_admin/users/77446/merge
|
37
|
-
|
38
|
-
/admin/
|
39
|
-
/admin/phpMyAdmin/server_import.php
|
40
|
-
/phpMyAdmin/server_import.php
|
41
|
-
/admin/pma/server_import.php
|
42
|
-
/miscadmin
|
43
|
-
/admin/server_import.php
|
44
|
-
/backend/admin/users?username=anonymous
|
45
|
-
/phpmyadmin/server_import.php
|
46
|
-
/admin/install.php
|
47
|
-
/admin/install/install.php
|
48
|
-
/wp-admin/install.php
|
49
|
-
/solr/admin/
|
50
|
-
/Admin/frmWelcome.aspx
|
51
|
-
/boaform/admin/formLogin?username=user&psd=user
|
52
|
-
|
53
|
-
|
54
|
-
AWS#AWSManagedRulesCommonRuleSet#UserAgent_BadBots_HEADER
|
55
|
-
Actions: Block them
|
56
|
-
Blocking: /public_templates?[query_params]
|
57
|
-
/robots.txt
|
58
|
-
/aab8
|
59
|
-
/aaa9
|
60
|
-
/dmptool-ui/SourceSans3VF-Italic.ttf.fd20af5b.woff2
|
61
|
-
/dmptool-ui/SourceSans3VF-Roman.ttf.99aa17fb.woff2
|
62
|
-
/assets/application-4551ebb71fffa2b6d576438af0e66620a4e84cb8431cdd25889e191eed0fae66.js
|
63
|
-
|
64
|
-
AWS#AWSManagedRulesAmazonIpReputationList#AWSManagedReconnaissanceList
|
65
|
-
Actions: Block them
|
66
|
-
Blocking: /
|
67
|
-
/.env
|
68
|
-
/header.php
|
69
|
-
|
70
|
-
AWS#AWSManagedRulesCommonRuleSet#CrossSiteScripting_BODY
|
71
|
-
Actions: Add exception for /answers/create_or_update (or better yet address it) BLOCK all others
|
72
|
-
Blocking: /answers/create_or_update?question_id=17592
|
73
|
-
/content/crx/de/setPreferences.jsp;%0A.html?keymap=<svg/onload=confirm(document.domain)>//a&language=en
|
74
|
-
/7/0/33/1d/www.citysearch.com/search?what=x&where=place%22%3E%3Csvg+onload=confirm(document.domain)%3E
|
75
|
-
/etc/designs/xh1x.childrenlist.json//%3Csvg%20onload=alert%28document.domain%29%3E.html
|