uc3-dmp-id 0.0.24 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f71bf88908949d2b8514ea8177c71e97334663740156b7b676368b5600e31f34
4
- data.tar.gz: a4a807c92d00c78f5bf9a5d3898d6cbdbab3fd05b95f97a4dfc7104ac5e12438
3
+ metadata.gz: 7725d7a42fc20d12dda5b52f697beedb5f945839e31b5b0ebfac1740a3ebf9a2
4
+ data.tar.gz: 68ac33ba9ed458bc7d399804290ef4cbfb4863145bb3b055e67f25c4dce90376
5
5
  SHA512:
6
- metadata.gz: 99d8b88c1b74b6e5cef633674c6487e3b5617e448f803664ecb07ff6077296b1ccff4969fc80d33dcadad03c9afa504fac217a718f138b392a540bdc20dcc000
7
- data.tar.gz: f2cfda840d062a017452e5372c882fad94b0196117ca35cbc74c790ce2ea54ce11f51c63ae3bb131b688733927fdddc7e18f88e72f3197a27f75507dde71c8dd
6
+ metadata.gz: 5be611a6931994acf746c509af01735dd0797bcd80ae9975fc5eaba3dafb1069785ad2a3df0b24beb0d29e7c699e1c4f89a35412326ee8a4efa4c68fc722357e
7
+ data.tar.gz: eab5793eed4a050c47462b257da4c96dab19ad1cd80ffaa5743d6d76d50e2497e8c29d743e8a77575bf0a49c1d85475f38386019d70b52a9e3da3843c9ce6f6b
@@ -1,13 +1,86 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'securerandom'
4
+
3
5
  module Uc3DmpId
4
- class Uc3DmpIdCreatorError < StandardError; end
6
+ class CreatorError < StandardError; end
5
7
 
6
8
  class Creator
9
+ MSG_NO_BASE_URL = 'No base URL found for DMP ID (e.g. `doi.org`)'
10
+ MSG_NO_PROVENANCE_OWNER = 'No provenance system and/or owner defined.'
11
+ MSG_NO_SHOULDER = 'No DOI shoulder found. (e.g. `10.12345/`)'
12
+ MSG_UNABLE_TO_MINT = 'Unable to mint a unique DMP ID.'
13
+
7
14
  class << self
15
+ def create(provenance:, owner_org:, json:, debug: false)
16
+ raise CreatorError, MSG_NO_SHOULDER if ENV['DMP_ID_SHOULDER'].nil?
17
+ raise CreatorError, MSG_NO_BASE_URL if ENV['DMP_ID_BASE_URL'].nil?
18
+
19
+ # Validate the incoming JSON first
20
+ errs = Validator.validate(mode: 'author', json: Helper.parse_json(json: json))&.fetch('dmp', {})
21
+ raise CreatorError, errs.join(', ') if errs.is_a?(Array) && errs.any?
22
+
23
+ # Fail if the provenance or owner affiliation are not defined
24
+ raise CreatorError, MSG_NO_PROVENANCE_OWNER if provenance.nil? || owner_org.nil?
25
+
26
+ # Try to find it first and Fail if found
27
+ result = Finder.by_json(json: json, debug: debug)
28
+ raise CreatorError, Uc3DmpId::MSG_DMP_EXISTS if result.is_a?(Hash)
29
+
30
+ p_key = _preregister_dmp_id(json: json, debug: debug)
31
+ raise CreatorError, MSG_UNABLE_TO_MINT if p_key.nil?
32
+
33
+ # Add the DMPHub specific attributes and then save
34
+ annotated = Helper.annotate_dmp_json(provenance: provenance, owner_org: owner_org, p_key: p_key, json: json)
35
+ puts "CREATING DMP ID:" if debug
36
+ puts annotated if debug
37
+
38
+ # Create the item
39
+ client = Uc3DmpDynamo::Client.new(debug: debug)
40
+ resp = client.put_item(json: annotated, debug: debug)
41
+ raise CreatorError, Uc3DmpId::MSG_DMP_NO_DMP_ID if resp.nil?
42
+
43
+ # _post_process(json: annotated)
44
+ annotated
45
+ end
46
+
47
+ private
48
+
49
+ def _preregister_dmp_id(json:, debug: false)
50
+ # Use the specified DMP ID if the provenance has permission
51
+ existing = json.fetch('dmp_id', {})
52
+ id = existing['identifier'].gsub(%r{https?://}, Helper::PK_DMP_PREFIX) if existing.is_a?(Hash) &&
53
+ !existing['identifier'].nil?
54
+ return id if existing['type'].downcase == 'doi' && !id.nil? && !Finder.exists?(p_key: id)
55
+
56
+ dmp_id = ''
57
+ counter = 0
58
+ while dmp_id == '' && counter <= 10
59
+ prefix = "#{ENV['DMP_ID_SHOULDER']}#{SecureRandom.hex(2).upcase}#{SecureRandom.hex(2)}"
60
+ dmp_id = prefix unless Finder.exists?(p_key: prefix)
61
+ counter += 1
62
+ end
63
+ # Something went wrong and it was unable to identify a unique id
64
+ raise CreatorError, MSG_UNABLE_TO_MINT if counter >= 10
65
+
66
+ puts "Uc3DmpId::Creator._pregister_dmp_id - registering DMP ID: #{dmp_id}" if debug
67
+ url = ENV['DMP_ID_BASE_URL'].gsub(%r{https?://}, '')
68
+ "#{Helper::PK_DMP_PREFIX}#{url.end_with?('/') ? url : "#{url}/"}#{dmp_id}"
69
+ end
70
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
8
71
 
9
- # TODO: include new 'dmphub_owner_org' top level entry with the ROR of the 'contact'
72
+ # Once the DMP has been created, we need to register it's DMP ID and download any
73
+ # PDF if applicable
74
+ # -------------------------------------------------------------------------
75
+ def _post_process(json:)
76
+ return false unless json.is_a?(Hash)
10
77
 
78
+ # We are creating, so this is always true
79
+ json['dmphub_updater_is_provenance'] = true
80
+ # Publish the change to the EventBridge
81
+ EventPublisher.publish(source: 'DmpCreator', dmp: json, debug: @debug)
82
+ true
83
+ end
11
84
  end
12
85
  end
13
86
  end
@@ -3,7 +3,7 @@
3
3
  require 'uc3-dmp-dynamo'
4
4
 
5
5
  module Uc3DmpId
6
- class Uc3DmpIdFinderError < StandardError; end
6
+ class FinderError < StandardError; end
7
7
 
8
8
  # Methods to find/search for DMP IDs
9
9
  class Finder
@@ -24,7 +24,7 @@ module Uc3DmpId
24
24
  # Find the DMP's versions
25
25
  # -------------------------------------------------------------------------
26
26
  def versions(p_key:, client: nil, debug: false)
27
- raise Uc3DmpIdFinderError, MSG_MISSING_PK if p_key.nil?
27
+ raise FinderError, MSG_MISSING_PK if p_key.nil?
28
28
 
29
29
  args = {
30
30
  key_conditions: {
@@ -41,7 +41,7 @@ module Uc3DmpId
41
41
  # -------------------------------------------------------------------------
42
42
  def by_json(json:, debug: false)
43
43
  json = Validator.parse_json(json: json)&.fetch('dmp', {})
44
- raise Uc3DmpIdFinderError, MSG_INVALID_ARGS if json.nil? || (json['PK'].nil? && json['dmp_id'].nil?)
44
+ raise FinderError, MSG_INVALID_ARGS if json.nil? || (json['PK'].nil? && json['dmp_id'].nil?)
45
45
 
46
46
  p_key = json['PK']
47
47
  # Translate the incoming :dmp_id into a PK
@@ -58,7 +58,7 @@ module Uc3DmpId
58
58
  # Find the DMP by its PK and SK
59
59
  # -------------------------------------------------------------------------
60
60
  def by_pk(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, debug: false)
61
- raise Uc3DmpIdFinderError, MSG_MISSING_PK if p_key.nil?
61
+ raise FinderError, MSG_MISSING_PK if p_key.nil?
62
62
 
63
63
  s_key = Helper::DMP_LATEST_VERSION if s_key.nil? || s_key.to_s.strip.empty?
64
64
  client = client.nil? ? Uc3DmpDynamo::Client.new(debug: debug) : client
@@ -77,11 +77,26 @@ module Uc3DmpId
77
77
  Helper.cleanse_dmp_json(json: dmp)
78
78
  end
79
79
 
80
+ # Fetch just the PK to see if a record exists
81
+ # -------------------------------------------------------------------------
82
+ def exists?(p_key:)
83
+ raise FinderError, MSG_MISSING_PK if p_key.nil?
84
+
85
+ resp = client.get_item(
86
+ key: {
87
+ PK: Helper.append_pk_prefix(p_key: p_key),
88
+ SK: s_key
89
+ },
90
+ projection_expression: 'PK'
91
+ )
92
+ resp.is_a?(Hash)
93
+ end
94
+
80
95
  # Attempt to find the DMP item by the provenance system's identifier
81
96
  # -------------------------------------------------------------------------
82
97
  # rubocop:disable Metrics/AbcSize
83
98
  def by_provenance_identifier(json:, client: nil, debug: false)
84
- raise Uc3DmpIdFinderError, MSG_MISSING_PROV_ID if json.nil? || json.fetch('dmp_id', {})['identifier'].nil?
99
+ raise FinderError, MSG_MISSING_PROV_ID if json.nil? || json.fetch('dmp_id', {})['identifier'].nil?
85
100
 
86
101
  args = {
87
102
  key_conditions: {
@@ -110,6 +110,41 @@ module Uc3DmpId
110
110
  json.is_a?(String) ? JSON.parse(json) : nil
111
111
  end
112
112
 
113
+ # Add DMPHub specific fields to the DMP ID JSON
114
+ def annotate_dmp_json(provenance:, owner_org:, p_key:, json:)
115
+ json = parse_json(json: json)
116
+ return json if provenance.nil? || owner_org.nil? || p_key.nil? || !json.is_a?(Hash)
117
+
118
+ # Fail the json as is if the :PK does not match the :dmp_id if the json has a :PK
119
+ id = dmp_id_to_pk(json: json.fetch('dmp_id', {}))
120
+ return json if id != p_key && !json['PK'].nil?
121
+
122
+ annotated = deep_copy_dmp(obj: json)
123
+ annotated['PK'] = json['PK'] || p_key
124
+ annotated['SK'] = DMP_LATEST_VERSION
125
+
126
+ # Ensure that the :dmp_id matches the :PK
127
+ annotated['dmp_id'] = pk_to_dmp_id(p_key: annotated['PK'])
128
+
129
+ # Update the modification timestamps
130
+ annotated['dmphub_modification_day'] = Time.now.strftime('%Y-%m-%d')
131
+ annotated['dmphub_owner_org'] = owner_org.to_s
132
+ annotated['dmphub_updated_at'] = Time.now.iso8601
133
+ # Only add the Creation date if it is blank
134
+ annotated['dmphub_created_at'] = Time.now.iso8601 if json['dmphub_created_at'].nil?
135
+ return annotated unless json['dmphub_provenance_id'].nil?
136
+
137
+ annotated['dmphub_provenance_id'] = provenance.fetch('PK', '')
138
+ return annotated if !annotated['dmphub_provenance_identifier'].nil? ||
139
+ json.fetch('dmp_id', {})['identifier'].nil?
140
+
141
+ # Record the original Provenance system's identifier
142
+ annotated['dmphub_provenance_identifier'] = format_provenance_id(
143
+ provenance: provenance, value: json.fetch('dmp_id', {})['identifier']
144
+ )
145
+ annotated
146
+ end
147
+
113
148
  # Recursive method that strips out any DMPHub related metadata from a DMP record before sending
114
149
  # it to the caller
115
150
  # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
@@ -741,7 +741,7 @@ module Uc3DmpId
741
741
  "description": "Language of the metadata expressed using ISO 639-3.",
742
742
  "examples": ["eng"]
743
743
  },
744
- "metadata_standard_id" : {
744
+ "metadata_standard_id": {
745
745
  "$id": "#/properties/dmp/properties/dataset/items/properties/metadata/items/properties/metadata_standard_id",
746
746
  "type": "object",
747
747
  "title": "The Dataset Metadata Standard ID Schema",
@@ -852,7 +852,7 @@ module Uc3DmpId
852
852
  "description": "Description of the technical resource",
853
853
  "examples": ["Device needed to collect field data..."]
854
854
  },
855
- "dmproadmap_technical_resource_id" : {
855
+ "dmproadmap_technical_resource_id": {
856
856
  "$id": "#/properties/dmp/properties/dataset/items/properties/technical_resource/items/dmproadmap_technical_resource_id",
857
857
  "type": "object",
858
858
  "title": "The Dataset Metadata Standard ID Schema",
@@ -45,10 +45,10 @@ module Uc3DmpId
45
45
  # ------------------------------------------------------------------------------------
46
46
  def _load_schema(mode:)
47
47
 
48
- puts "Loading schema -- Uc3DmpId::Schemas::#{mode.to_s.downcase.capitalize}"
48
+ puts "Loading schema -- Schemas::#{mode.to_s.downcase.capitalize}"
49
49
 
50
50
  # Instatiate the matching schema
51
- schema = "Uc3DmpId::Schemas::#{mode.to_s.downcase.capitalize}".split('::').inject(Object) { |o,c| o.const_get c }
51
+ schema = "Schemas::#{mode.to_s.downcase.capitalize}".split('::').inject(Object) { |o,c| o.const_get c }
52
52
  schema.respond_to?(:load) ? schema.load : nil
53
53
  end
54
54
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.0.24'
4
+ VERSION = '0.0.25'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.24
4
+ version: 0.0.25
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley
@@ -127,7 +127,6 @@ files:
127
127
  - lib/uc3-dmp-id/validator.rb
128
128
  - lib/uc3-dmp-id/version.rb
129
129
  - lib/uc3-dmp-id/versioner.rb
130
- - lib/uc3-dmp-id/waf_analysis_2023-06-05.txt
131
130
  homepage: https://github.com/CDLUC3/dmp-hub-cfn/blob/main/src/sam/gems/uc3-dmp-id
132
131
  licenses:
133
132
  - MIT
@@ -1,75 +0,0 @@
1
-
2
- WAF analysis
3
- ===========================================================
4
-
5
- Is there a rule to block incessent requests for the same bad URL? For example `/nuclei.svg?fc9dz=x` was requested over a 100 times!
6
-
7
- AWS#AWSManagedRulesCommonRuleSet#NoUserAgent_HEADER
8
- Action: REMOVE THIS RULE? I feel like this one will cause more harm then good
9
- Blocking: /
10
- /plans/98982
11
- /nuclei.svg?fc9dz=x (about 100 times!)
12
- /v1/metadata/private-networks
13
- /latest/meta-data/
14
- /computeMetadata/v1/project/
15
- /dynamic/instance-identity/document
16
- /openstack/latest
17
- /metadata/v1.json
18
- /opc/v1/instance
19
-
20
- AWS#AWSManagedRulesCommonRuleSet#SizeRestrictions_BODY
21
- Action: Add exclusion for this specific path!
22
- Blocking: /Shibboleth.sso/SAML2/POST
23
-
24
- AWS#AWSManagedRulesAdminProtectionRuleSet#AdminProtection_URIPATH
25
- Action: Add exceptions for [/org/admin/, /org_admin/, /super_admin/, /paginable/plans/org_admin]. The others are
26
- all illegitimate so we want to block them.
27
- Blocking: /org/admin/users/98307/admin_update_permissions
28
- /org/admin/users/admin_index
29
- /paginable/plans/org_admin/[page]?[query_params]
30
- /org_admin/plans
31
- /org/admin/543/admin_edit
32
- /org/admin/users/admin_index
33
- /org/admin/guidance/2163/admin_update
34
- /org_admin/templates/1967/phases/2144/sections/11313
35
- /org/admin/17/admin_edit
36
- /super_admin/users/77446/merge
37
-
38
- /admin/
39
- /admin/phpMyAdmin/server_import.php
40
- /phpMyAdmin/server_import.php
41
- /admin/pma/server_import.php
42
- /miscadmin
43
- /admin/server_import.php
44
- /backend/admin/users?username=anonymous
45
- /phpmyadmin/server_import.php
46
- /admin/install.php
47
- /admin/install/install.php
48
- /wp-admin/install.php
49
- /solr/admin/
50
- /Admin/frmWelcome.aspx
51
- /boaform/admin/formLogin?username=user&psd=user
52
-
53
-
54
- AWS#AWSManagedRulesCommonRuleSet#UserAgent_BadBots_HEADER
55
- Actions: Block them
56
- Blocking: /public_templates?[query_params]
57
- /robots.txt
58
- /aab8
59
- /aaa9
60
- /dmptool-ui/SourceSans3VF-Italic.ttf.fd20af5b.woff2
61
- /dmptool-ui/SourceSans3VF-Roman.ttf.99aa17fb.woff2
62
- /assets/application-4551ebb71fffa2b6d576438af0e66620a4e84cb8431cdd25889e191eed0fae66.js
63
-
64
- AWS#AWSManagedRulesAmazonIpReputationList#AWSManagedReconnaissanceList
65
- Actions: Block them
66
- Blocking: /
67
- /.env
68
- /header.php
69
-
70
- AWS#AWSManagedRulesCommonRuleSet#CrossSiteScripting_BODY
71
- Actions: Add exception for /answers/create_or_update (or better yet address it) BLOCK all others
72
- Blocking: /answers/create_or_update?question_id=17592
73
- /content/crx/de/setPreferences.jsp;%0A.html?keymap=<svg/onload=confirm(document.domain)>//a&language=en
74
- /7/0/33/1d/www.citysearch.com/search?what=x&where=place%22%3E%3Csvg+onload=confirm(document.domain)%3E
75
- /etc/designs/xh1x.childrenlist.json//%3Csvg%20onload=alert%28document.domain%29%3E.html