uc3-dmp-id 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Uc3DmpId
4
+ class Uc3DmpIdCreatorError << StandardError; end
5
+
6
+ class Creator
7
+ class << self
8
+
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Uc3DmpId
4
+ class Uc3DmpIdDeleterError << StandardError; end
5
+
6
+ class Deleter
7
+ class << self
8
+
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uc3-dmp-dynamo'
4
+
5
+ module Uc3DmpId
6
+ class Uc3DmpIdFinderError << StandardError; end
7
+
8
+ # Methods to find/search for DMP IDs
9
+ class Finder
10
+ MSG_INVALID_ARGS = 'Expected JSON to be structured as `{ "dmp": { "PK": "value"} } OR \
11
+ { "dmp": { "dmp_id": { "identifier": "value", "type": "value" } }`'
12
+ MSG_MISSING_PK = 'No PK was provided'
13
+ MSG_MISSING_PROV_ID = 'No Provenance identifier was provided. \
14
+ Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
15
+
16
+ class << self
17
+ # TODO: Replace this with ElasticSearch
18
+ def search_dmps(**_args)
19
+
20
+ # TODO: Need to move this to ElasticSearch!!!
21
+ end
22
+ # rubocop:enable Metrics/MethodLength
23
+
24
+ # Find the DMP's versions
25
+ # -------------------------------------------------------------------------
26
+ def versions(p_key:)
27
+ raise Uc3DmpIdFinderError, MSG_MISSING_PK if p_key.nil?
28
+
29
+ args = {
30
+ key_conditions: {
31
+ PK: { attribute_value_list: [Helper.append_pk_prefix(dmp: p_key)], comparison_operator: 'EQ' }
32
+ },
33
+ projection_expression: 'modified',
34
+ scan_index_forward: false
35
+ }
36
+ client = Uc3DmpDynamo::Client.new
37
+ client.query(**args)
38
+ end
39
+
40
+ # Find a DMP based on the contents of the incoming JSON
41
+ # -------------------------------------------------------------------------
42
+ def by_json(json:)
43
+ json = Validator.parse_json(json: json)&.fetch('dmp', {})
44
+ raise Uc3DmpIdFinderError, MSG_INVALID_ARGS if json.nil? || (json['PK'].nil? && json['dmp_id'].nil?)
45
+
46
+ p_key = json['PK']
47
+ # Translate the incoming :dmp_id into a PK
48
+ p_key = Helper.dmp_id_to_pk(json: json.fetch('dmp_id', {})) if p_key.nil?
49
+
50
+ # find_by_PK
51
+ resp = by_pk(p_key: p_key, s_key: json['SK']) unless p_key.nil?
52
+ return resp unless resp.nil?
53
+
54
+ # find_by_dmphub_provenance_id -> if no PK and no dmp_id result
55
+ by_dmphub_provenance_identifier(json: json)
56
+ end
57
+
58
+ # Find the DMP by its PK and SK
59
+ # -------------------------------------------------------------------------
60
+ def by_pk(p_key:, s_key: Helper::DMP_LATEST_VERSION)
61
+ raise Uc3DmpIdFinderError, MSG_MISSING_PK if p_key.nil?
62
+
63
+ s_key = Helper::DMP_LATEST_VERSION if s_key.nil? || s_key.strip.empty?
64
+
65
+ client = Uc3DmpDynamo::Client.new
66
+ resp = client.get_item(
67
+ key: {
68
+ PK: Helper.append_pk_prefix(dmp: p_key),
69
+ SK: s_key.nil? || s_key.strip.empty? ? Helper::DMP_LATEST_VERSION : s_key
70
+ }
71
+ )
72
+ return nil if resp.nil? || resp.fetch('dmp', {})['PK'].nil?
73
+
74
+ _append_versions(p_key: resp['dmp']['PK'], dmp: resp)
75
+ end
76
+
77
+ # Attempt to find the DMP item by the provenance system's identifier
78
+ # -------------------------------------------------------------------------
79
+ # rubocop:disable Metrics/AbcSize
80
+ def by_dmphub_provenance_identifier(json:)
81
+ raise Uc3DmpIdFinderError, MSG_MISSING_PROV_ID if json.nil? || json.fetch('dmp_id', {})['identifier'].nil?
82
+
83
+ args = {
84
+ key_conditions: {
85
+ dmphub_provenance_identifier: {
86
+ attribute_value_list: [json['dmp_id']['identifier']],
87
+ comparison_operator: 'EQ'
88
+ }
89
+ },
90
+ filter_expression: 'SK = :version',
91
+ expression_attribute_values: { ':version': KeyHelper::DMP_LATEST_VERSION }
92
+ }
93
+ client = Uc3DmpDynamo::Client.new
94
+ resp = client.query(**args)
95
+ return resp if resp.nil? || resp['dmp'].nil?
96
+
97
+ # If we got a hit, fetch the DMP and return it.
98
+ by_pk(p_key: resp['dmp']['PK'], s_key: resp['dmp']['SK'])
99
+ end
100
+ # rubocop:enable Metrics/AbcSize
101
+
102
+ private
103
+
104
+ # Build the dmphub_versions array and attach it to the dmp
105
+ # rubocop:disable Metrics/AbcSize
106
+ def _append_versions(p_key:, dmp:)
107
+ return dmp if p_key.nil? || !dmp.is_a?(Hash) || dmp['dmp'].nil?
108
+
109
+ results = find_dmp_versions(p_key: p_key)
110
+ return dmp unless results.length > 1
111
+
112
+ versions = results.map do |version|
113
+ next if version.fetch('dmp', {})['modified'].nil?
114
+
115
+ timestamp = version['dmp']['modified']
116
+ {
117
+ timestamp: timestamp,
118
+ url: "#{Helper.api_base_url}dmps/#{Helper.remove_pk_prefix(dmp: p_key)}?version=#{timestamp}"
119
+ }
120
+ end
121
+ dmp['dmp']['dmphub_versions'] = JSON.parse(versions.to_json)
122
+ dmp
123
+ end
124
+ # rubocop:enable Metrics/AbcSize
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+
4
+ # TODO: Be sure to update the API functions so that they call cleanse_dmp_json before
5
+ # calling Uc3DmpApiCore::Responder.respond !!!!!!!!!!
6
+
7
+
8
+ module Uc3DmpId
9
+ # Helper functions for working with DMP IDs
10
+ class Helper
11
+ PK_DMP_PREFIX = 'DMP#'
12
+ PK_DMP_REGEX = %r{DMP#[a-zA-Z0-9\-_.]+/[a-zA-Z0-9]{2}\.[a-zA-Z0-9./:]+}.freeze
13
+
14
+ SK_DMP_PREFIX = 'VERSION#'
15
+ SK_DMP_REGEX = /VERSION#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}/.freeze
16
+
17
+ # TODO: Verify the assumed structure of the DOI is valid
18
+ DOI_REGEX = %r{[0-9]{2}\.[0-9]{5}/[a-zA-Z0-9/_.-]+}.freeze
19
+ URL_REGEX = %r{(https?://)?([a-zA-Z0-9\-_]\.)+[a-zA-Z0-9\-_]{2,3}(:[0-9]+)?/?}.freeze
20
+
21
+ DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest"
22
+ DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone"
23
+
24
+ class << self
25
+ # Return the base URL for a DMP ID
26
+ def dmp_id_base_url
27
+ url = ENV.fetch('DMP_ID_BASE_URL', 'https://dmptool-dev.cdlib.org/dmps/')
28
+ url&.end_with?('/') ? url : "#{url}/"
29
+ end
30
+
31
+ # Return the base URL for the API
32
+ def api_base_url
33
+ url = ENV.fetch('DMP_ID_BASE_URL', 'https://api.dmptool-dev.cdlib.org/dmps/')
34
+ url&.end_with?('/') ? url : "#{url}/"
35
+ end
36
+
37
+ # Format the DMP ID in the way we want it
38
+ def format_dmp_id(value:, with_protocol: false)
39
+ dmp_id = value.match(DOI_REGEX).to_s
40
+ return nil if dmp_id.nil? || dmp_id == ''
41
+ # If it's already a URL, return it as is
42
+ return value if value.start_with?('http')
43
+
44
+ dmp_id = dmp_id.gsub('doi:', '')
45
+ dmp_id = dmp_id.start_with?('/') ? dmp_id[1..dmp_id.length] : dmp_id
46
+ base_domain = with_protocol ? dmp_id_base_url : dmp_id_base_url.gsub(%r{https?://}, '')
47
+ "#{base_domain}#{dmp_id}"
48
+ end
49
+
50
+ # Convert an API PathParameter (DMP ID) into a PK
51
+ def path_parameter_to_pk(param:)
52
+ return nil unless param.is_a?(String) && !param.strip.empty?
53
+
54
+ base_domain = dmp_id_base_url.gsub(%r{https?://}, '')
55
+ p_key = param if param.start_with?(dmp_id_base_url) || param.start_with?(base_domain)
56
+ p_key = CGI.unescape(p_key.nil? ? param : p_key)
57
+ p_key = format_dmp_id(value: p_key)
58
+ append_pk_prefix(dmp: p_key)
59
+ end
60
+
61
+ # Append the :PK prefix to the :dmp_id
62
+ def dmp_id_to_pk(json:)
63
+ return nil if json.nil? || json['identifier'].nil?
64
+
65
+ # If it's a DOI format it correctly
66
+ dmp_id = format_dmp_id(value: json['identifier'].to_s)
67
+ return nil if dmp_id.nil? || dmp_id == ''
68
+
69
+ append_pk_prefix(dmp: dmp_id)
70
+ end
71
+
72
+ # Derive the DMP ID by removing the :PK prefix
73
+ def pk_to_dmp_id(p_key:)
74
+ return nil if p_key.nil?
75
+
76
+ {
77
+ type: 'doi',
78
+ identifier: format_dmp_id(value: remove_pk_prefix(dmp: p_key), with_protocol: true)
79
+ }
80
+ end
81
+
82
+ # Parse the incoming JSON if necessary or return as is if it's already a Hash
83
+ def parse_json(json:)
84
+ return json if json.is_a?(Hash)
85
+
86
+ json.is_a?(String) ? JSON.parse(json) : nil
87
+ end
88
+
89
+ # Recursive method that strips out any DMPHub related metadata from a DMP record before sending
90
+ # it to the caller
91
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
92
+ def cleanse_dmp_json(json:)
93
+ return json unless json.is_a?(Hash) || json.is_a?(Array)
94
+
95
+ # If it's an array clean each of the objects individually
96
+ return json.map { |obj| cleanse_dmp_json(json: obj) }.compact if json.is_a?(Array)
97
+
98
+ cleansed = {}
99
+ allowable = %w[dmphub_versions]
100
+ json.each_key do |key|
101
+ next if (key.to_s.start_with?('dmphub') && !allowable.include?(key)) || %w[PK SK].include?(key.to_s)
102
+
103
+ obj = json[key]
104
+ # If this object is a Hash or Array then recursively cleanse it
105
+ cleansed[key] = obj.is_a?(Hash) || obj.is_a?(Array) ? cleanse_dmp_json(json: obj) : obj
106
+ end
107
+ cleansed.keys.any? ? cleansed : nil
108
+ end
109
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Uc3DmpId
4
+ class Uc3DmpIdUpdaterError << StandardError; end
5
+
6
+ class Updater
7
+ class << self
8
+
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Uc3DmpId
4
+ class Uc3DmpIdValidatorError << StandardError; end
5
+
6
+ class Validator
7
+ # Valid Validation modes are:
8
+ # - :author --> system of provenance is attempting to create or update
9
+ # - :delete --> system of provenance is attempting to delete/tombstone
10
+ # - :amend --> a non-provenance system is attempting to update
11
+ VALIDATION_MODES = %w[author amend delete].freeze
12
+
13
+ MSG_EMPTY_JSON = 'JSON was empty or was not a valid JSON document!'
14
+ MSG_INVALID_JSON = 'Invalid JSON.'
15
+ MSG_NO_SCHEMA = 'No JSON schema available!'
16
+ MSG_BAD_JSON = 'Fatal validation error: %{msg} - %{trace}'
17
+ MSG_VALID_JSON = 'The JSON is valid.'
18
+
19
+ class << self
20
+ # Validate the specified DMP's :json against the schema for the specified :mode
21
+ #
22
+ # ------------------------------------------------------------------------------------
23
+ def validate(mode:, json:)
24
+ json = Helper.parse_json(json: json)
25
+ return [MSG_EMPTY_JSON] if json.nil? || !VALIDATION_MODES.include?(mode)
26
+
27
+ # Load the appropriate JSON schema for the mode
28
+ schema = _load_schema(mode: mode)
29
+ return [MSG_NO_SCHEMA] if schema.nil?
30
+
31
+ # Validate the JSON
32
+ errors = JSON::Validator.fully_validate(schema, json)
33
+ errors = errors.map { |err| err.gsub('The property \'#/\' ', '') }
34
+ errors = ([MSG_INVALID_JSON] << errors).flatten.compact.uniq unless errors.empty?
35
+ errors.map { |err| err.gsub(/in schema [a-z0-9-]+/, '').strip }
36
+ rescue JSON::Schema::ValidationError => e
37
+ raise Uc3DmpIdValidatorError, MSG_BAD_JSON % { msg: e.message, trace: e.backtrace }
38
+ end
39
+
40
+ # ------------------------------------------------------------------------------------
41
+ # METHODS BELOW ARE ONLY MEANT TO BE INVOKED FROM WITHIN THIS MODULE
42
+ # ------------------------------------------------------------------------------------
43
+
44
+ # Load the JSON schema that corresponds with the mode
45
+ # ------------------------------------------------------------------------------------
46
+ def _load_schema(mode:)
47
+ schema = "#{_schema_dir}/schemas/#{mode}.json"
48
+ file = schema if File.exist?(schema)
49
+ return nil if mode.nil? || file.nil? || !File.exist?(file)
50
+
51
+ JSON.parse(File.read(file))
52
+ rescue JSON::ParserError
53
+ nil
54
+ end
55
+
56
+ # The location of th JSON schema files
57
+ # ------------------------------------------------------------------------------------
58
+ def _schema_dir
59
+ # TODO: Switch this to the gem dirctory, not sure if this is the same as the Layer below
60
+ '/opt/ruby'
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Uc3DmpId
4
+ VERSION = '0.0.1'
5
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Uc3DmpId
4
+ class Uc3DmpIdVersionerError << StandardError; end
5
+
6
+ class Versioner
7
+ class << self
8
+
9
+ end
10
+ end
11
+ end
data/lib/uc3-dmp-id.rb ADDED
@@ -0,0 +1,18 @@
1
+ # rubocop:disable Naming/FileName
2
+ # frozen_string_literal: true
3
+
4
+ require 'json'
5
+ require 'json-schema'
6
+
7
+ require 'uc3-dmp-id/creator'
8
+ require 'uc3-dmp-id/deleter'
9
+ require 'uc3-dmp-id/finder'
10
+ require 'uc3-dmp-id/helper'
11
+ require 'uc3-dmp-id/updater'
12
+ require 'uc3-dmp-id/validator'
13
+ require 'uc3-dmp-id/versioner'
14
+
15
+ module Uc3DmpId
16
+ MSG_DMP_NOT_FOUND = 'The DMP ID does not exist'
17
+ end
18
+ # rubocop:enable Naming/FileName
metadata ADDED
@@ -0,0 +1,168 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: uc3-dmp-id
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brian Riley
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-05-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json-schema
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: logger
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.4'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.4'
55
+ - !ruby/object:Gem::Dependency
56
+ name: uc3-dmp-dynamo
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.83'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.83'
69
+ - !ruby/object:Gem::Dependency
70
+ name: byebug
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 11.1.3
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 11.1.3
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 3.9.0
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 3.9.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubocop
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '='
102
+ - !ruby/object:Gem::Version
103
+ version: 1.50.2
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '='
109
+ - !ruby/object:Gem::Version
110
+ version: 1.50.2
111
+ - !ruby/object:Gem::Dependency
112
+ name: rubocop-rspec
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - '='
116
+ - !ruby/object:Gem::Version
117
+ version: 2.20.0
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - '='
123
+ - !ruby/object:Gem::Version
124
+ version: 2.20.0
125
+ description: Helpers for working with JSON that represents a DMP ID
126
+ email:
127
+ - brian.riley@ucop.edu
128
+ executables: []
129
+ extensions: []
130
+ extra_rdoc_files: []
131
+ files:
132
+ - README.md
133
+ - lib/schemas/amend.json
134
+ - lib/schemas/author.json
135
+ - lib/uc3-dmp-id.rb
136
+ - lib/uc3-dmp-id/creator.rb
137
+ - lib/uc3-dmp-id/deleter.rb
138
+ - lib/uc3-dmp-id/finder.rb
139
+ - lib/uc3-dmp-id/helper.rb
140
+ - lib/uc3-dmp-id/updater.rb
141
+ - lib/uc3-dmp-id/validator.rb
142
+ - lib/uc3-dmp-id/version.rb
143
+ - lib/uc3-dmp-id/versioner.rb
144
+ homepage: https://github.com/CDLUC3/dmp-hub-cfn/blob/main/src/sam/gems/uc3-dmp-id
145
+ licenses:
146
+ - MIT
147
+ metadata:
148
+ rubygems_mfa_required: 'false'
149
+ post_install_message:
150
+ rdoc_options: []
151
+ require_paths:
152
+ - lib
153
+ required_ruby_version: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - ">="
156
+ - !ruby/object:Gem::Version
157
+ version: '2.7'
158
+ required_rubygems_version: !ruby/object:Gem::Requirement
159
+ requirements:
160
+ - - ">="
161
+ - !ruby/object:Gem::Version
162
+ version: '0'
163
+ requirements: []
164
+ rubygems_version: 3.1.6
165
+ signing_key:
166
+ specification_version: 4
167
+ summary: DMPTool gem that provides support for DMP ID records
168
+ test_files: []