ndr_pseudonymise 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ require 'active_support'
2
+
3
+ require 'ndr_pseudonymise/version'
4
+ require 'ndr_pseudonymise/pseudonymisation_specification'
5
+
6
+ require 'ndr_pseudonymise/client'
7
+
8
+ require 'ndr_pseudonymise/demographics_only_pseudonymiser'
9
+ require 'ndr_pseudonymise/prescription_pseudonymiser'
10
+ require 'ndr_pseudonymise/progress_printer'
11
+ require 'ndr_pseudonymise/pseudonymisation_specification'
12
+ require 'ndr_pseudonymise/simple_pseudonymisation'
13
+
14
+ # Pseudonymise CSV data for matching purposes
15
+ module NdrPseudonymise
16
+ end
@@ -0,0 +1,114 @@
1
+ # From https://gist.github.com/Zapotek/981959/raw/d7353edad1bd88110dc1f03dcc47257219d1624e/rsa_aes_cbc.rb
2
+ # To generate a public / private key pair:
3
+ # require 'openssl'
4
+ # new_key = OpenSSL::PKey::RSA.generate( 15360 ) # 4096 probably OK
5
+ # File.open("./new_public.pem", "w") { |f| f.puts new_key.public_key }
6
+ # File.open("./new_private.pem", "w") { |f| f.puts new_key.to_pem }
7
+
8
+ require 'openssl'
9
+ require 'yaml'
10
+ require 'base64'
11
+
12
+ # SECURE: TVB Mon 14 Oct 2013 13:33:06 BST
13
+ # All the encryption keys are generated by standard libraries.
14
+ # The data is encrypted with AES symmetric key. The symmetric key
15
+ # is encrypted with RSA.
16
+ #
17
+
18
+ # KH: 20160914 - changed:
19
+ #
20
+ # Base64.encode64 to Base64.strict_encode64 in encrypt()
21
+ #
22
+ # to remove newlines, to be consistent with the rest of the ndr_pseudonymise code.
23
+ # Base64.decode64 is kept in decrypt(), as this (correctly) ignores newlines produced
24
+ # by existing encrypted data using encrypt().
25
+ #
26
+ # http://ruby-doc.org/stdlib-2.3.1/libdoc/base64/rdoc/Base64.html
27
+ # https://tools.ietf.org/html/rfc4648#section-3.1
28
+
29
+
30
+
31
+ # Simple hybrid crypto class using RSA for public key encryption and AES with CBC
32
+ # for bulk data encryption/decryption.
33
+ #
34
+ # RSA is used to encrypt the AES primitives which are used to encrypt the plaintext.
35
+ #
36
+ # @author: Tasos "Zapotek" Laskos
37
+ # <tasos.laskos@gmail.com>
38
+ # <zapotek@segfault.gr>
39
+ # @version: 0.1
40
+ class RSA_AES_CBC
41
+ #
42
+ # If only encryption is required the private key parameter can be omitted.
43
+ #
44
+ # @param [String] public_pem location of the Public key in PEM format
45
+ # @param [String] private_pem location of the Private key in PEM format
46
+ #
47
+ def initialize(public_pem, private_pem = nil)
48
+ @public_pem = public_pem
49
+ @private_pem = private_pem
50
+ end
51
+
52
+ #
53
+ # Encrypts data and returns a Base64 representation of the ciphertext
54
+ # and AES CBC primitives encrypted using the public key.
55
+ #
56
+ # @param [String] data
57
+ #
58
+ # @return [String] Base64 representation of the ciphertext
59
+ # and AES CBC primitives encrypted using the public key.
60
+ #
61
+ def encrypt(data)
62
+ rsa = OpenSSL::PKey::RSA.new(@public_pem)
63
+
64
+ # encrypt with 256 bit AES with CBC
65
+ aes = OpenSSL::Cipher.new('aes-256-cbc')
66
+ aes.encrypt
67
+
68
+ # use random key and IV
69
+ aes.key = key = aes.random_key
70
+ aes.iv = iv = aes.random_iv
71
+
72
+ # this will hold all primitives and ciphertext
73
+ primitives = {}
74
+
75
+ primitives['ciphertext'] = aes.update(data)
76
+ primitives['ciphertext'] << aes.final
77
+
78
+ primitives['key'] = rsa.public_encrypt(key)
79
+ primitives['iv'] = rsa.public_encrypt(iv)
80
+
81
+ # serialize everything and base64 encode it
82
+ Base64.strict_encode64(primitives.to_yaml)
83
+ end
84
+
85
+ #
86
+ # Decrypts data.
87
+ #
88
+ # @param [String] data
89
+ #
90
+ # @return [String] plaintext
91
+ #
92
+ def decrypt(data)
93
+ rsa = OpenSSL::PKey::RSA.new(@private_pem)
94
+
95
+ # decrypt with 256 bit AES with CBC
96
+ aes = OpenSSL::Cipher.new('aes-256-cbc')
97
+ aes.decrypt
98
+
99
+ # unencode and unserialize to get the primitives and ciphertext
100
+ primitives = YAML.load(Base64.decode64(data))
101
+
102
+ aes.key = rsa.private_decrypt(primitives['key'])
103
+ aes.iv = rsa.private_decrypt(primitives['iv'])
104
+
105
+ plaintext = aes.update(primitives['ciphertext'])
106
+ plaintext << aes.final
107
+
108
+ plaintext
109
+ end
110
+ end
111
+
112
+ # crypto = RSA_AES_CBC.new( 'public.pem', 'private.pem' )
113
+ # ciphered = crypto.encrypt( 'Foo' )
114
+ # puts crypto.decrypt( ciphered )
@@ -0,0 +1,36 @@
1
+ lib = File.expand_path('lib', __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'ndr_pseudonymise/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'ndr_pseudonymise'
7
+ spec.version = NdrPseudonymise::VERSION
8
+ spec.authors = ['NCRS development team']
9
+ spec.email = []
10
+
11
+ spec.summary = 'Provide pseudonymisation facilities.'
12
+ spec.description = 'Provide pseudonymisation facilities.'
13
+ spec.homepage = 'https://github.com/NHSDigital/ndr_pseudonymise'
14
+ spec.license = 'MIT'
15
+
16
+ ignore_files_re = %r{^(\.github|test|spec|features|gemfiles|)/|.travis.yml|code_safety.yml}
17
+ spec.files = `git ls-files -z`.split("\x0").
18
+ reject { |f| f.match(ignore_files_re) }
19
+ spec.bindir = 'exe'
20
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
+ spec.require_paths = ['lib']
22
+
23
+ # Object methods like .blank?
24
+ spec.add_dependency 'activesupport'
25
+
26
+ spec.add_development_dependency 'activesupport'
27
+ spec.add_development_dependency 'bundler'
28
+ spec.add_development_dependency 'minitest', '>= 5.0'
29
+ spec.add_development_dependency 'mocha'
30
+ spec.add_development_dependency 'ndr_dev_support', '>= 6.0'
31
+ spec.add_development_dependency 'ndr_import'
32
+ spec.add_development_dependency 'pry'
33
+ spec.add_development_dependency 'rake', '>= 12.3.3'
34
+
35
+ spec.required_ruby_version = '>= 2.6.0'
36
+ end
@@ -0,0 +1,154 @@
1
+ # ndr_encrypt script
2
+
3
+ `ndr_encrypt` encrypts images and other files, allowing them to be hosted and
4
+ retrieved safely. The files still need to be hosted securely, but public /
5
+ private key encryption provides additional protection.
6
+
7
+ ## Overview
8
+
9
+ We define a simple suite of tools, `ndr_encrypt`, to generate the encrypted
10
+ image files, and the data that lets us access them. These work in a similar way
11
+ to git object storage, and require minimal software to run in a standard Linux
12
+ / macOS environment. (Related reading:
13
+ https://git-scm.com/book/en/v2/Git-Internals-Git-Objects)
14
+
15
+ With these tools, you can:
16
+ 1. transform a nested directory tree of files into an encrypted storage
17
+ representation + CSV file suitable for import to a SQL database
18
+ 2. identify and decrypt an image, using an entry from the CSV file
19
+ 3. recover the original contents of an unknown encrypted file (but not the
20
+ original filename), and use the CSV file to identify the original file
21
+ [TODO: not yet implemented]
22
+ 4. rewrite the encrypted files using a new encryption key [TODO: not yet
23
+ implemented]
24
+
25
+ ## Usage
26
+
27
+ ```
28
+ usage: ndr_encrypt [-v | --version] [-h | --help]
29
+ <command> [<args>]
30
+
31
+ These are common ndr_encrypt commands used in various situations:
32
+
33
+ start a working area
34
+ init Create an empty Git ndr_encrypt working copy
35
+
36
+ work with files
37
+ add Add file contents to the encrypted store and index
38
+
39
+ encryption key rotation and repository maintenance
40
+ gc Cleanup unnecessary index entries and optimize the encrypted store
41
+
42
+ decrypt data
43
+ cat-remote Retrieve remote file based on git_blobid
44
+ get Retrieve local file(s) based on path in CSV index
45
+
46
+ Low-level Commands / Interrogators
47
+
48
+ Low-level Commands / Manipulators
49
+
50
+ Additional options:
51
+ --base_url=URL Remote repository URL
52
+ --key_name=NAME Key name
53
+ --private_key=NAME Private key filename
54
+ --pub_key=NAME Public key filename
55
+ --passin=OPTIONS Pass in private key passphrase
56
+ -p Print downloaded object
57
+ ```
58
+
59
+ `ndr_encrypt` requires ruby 2.0 or later to be installed
60
+
61
+ ## Simple Usage Example
62
+
63
+ ``` shell
64
+ # Set up an image repository:
65
+ ndr_encrypt init images
66
+ cd images
67
+
68
+ # Set up encryption / decryption keys:
69
+ # Use a strong passphrase, e.g. by running openssl rand -hex 32
70
+ echo Use a strong passphrase, e.g. `openssl rand -hex 32`
71
+ keyname=ourkey1
72
+ openssl genpkey -algorithm RSA -out ourkey1.pem -aes-256-cbc -pkeyopt rsa_keygen_bits:4096
73
+ openssl rsa -in ourkey1.pem -out ourkey1.pub -outform PEM -pubout
74
+
75
+ # Create a sample .gif file "test/dir/ok.gif"
76
+ mkdir -p test/dir
77
+ base64 --decode > test/dir/ok.gif <<BASE64
78
+ R0lGODlhDAAIAPABAAAAAP///yH5BAAAAAAAIf8LSW1hZ2VNYWdpY2sOZ2Ft
79
+ bWE9MC40NTQ1NDUALAAAAAAMAAgAAAITjI8HC9GuTJvozRchVQz6BIZgAQA7
80
+ BASE64
81
+
82
+ # Add the object to the repository
83
+ ndr_encrypt add --key_name=ourkey1 --pub_key=ourkey1.pub test/dir/ok.gif
84
+
85
+ # Move aside the original file, to test recovery
86
+ mv test/dir/ok.gif{,.orig}
87
+
88
+ # Recover file from the repository, prompting for the passphrase
89
+ # (This uses the CSV index file ndr_encrypted/index.csv and
90
+ # the encrypted object store in ndr_encrypted/objects)
91
+ ndr_encrypt get --key_name=ourkey1 --private_key=ourkey1.pem test/dir/ok.gif
92
+
93
+ # Ensure the recovered file is identical
94
+ diff -s test/dir/ok.gif{.orig,}
95
+
96
+ # Check index contents (for hashes used in next example)
97
+ cat ndr_encrypted/index.csv
98
+ ```
99
+
100
+ ## Retrieving files hosted on a webserver
101
+
102
+ To retrieve files from a webserver, we assume that the contents of the index
103
+ file `ndr_encrypted/index.csv` has been moved to a table, and the object store
104
+ contents of `ndr_encrypted/objects/` have been hosted on a webserver or S3
105
+ buckets, e.g. inside `https://example.org/encrypted/storage/`
106
+
107
+ ### Using `ndr_pseudonymise` gem
108
+
109
+ ``` ruby
110
+ require 'ndr_pseudonymise/ndr_encrypt'
111
+
112
+ key_name = 'ourkey1'
113
+ private_key = 'ourkey1.pem'
114
+ private_passphrase = begin # Should be read from encrypted credential storage
115
+ require 'io/console'
116
+ IO::console.getpass("Enter decryption passphrase for #{private_key.inspect}: ")
117
+ end
118
+
119
+ base_url = 'https://example.org/encrypted/storage/'
120
+ git_blobid = 'f29bddf64c444f663d106568f4a81a22151ed3f97b0ec0c2a5ab25a0e8a02515'
121
+
122
+ remote_repo = NdrPseudonymise::NdrEncrypt::RemoteRepository.new(base_url: base_url)
123
+ decrypted_data = remote_repo.cat_remote(
124
+ git_blobid, key_name: key_name, private_key: private_key,
125
+ passin: "pass:#{private_passphrase}"
126
+ )
127
+ ```
128
+
129
+ ### Using `ndr_encrypt` command line inside a ruby applcation
130
+
131
+ ``` ruby
132
+ require 'open3'
133
+
134
+ key_name = 'ourkey1'
135
+ private_key = 'ourkey1.pem'
136
+ private_passphrase = begin # Should be read from encrypted credential storage
137
+ require 'io/console'
138
+ IO::console.getpass("Enter decryption passphrase for #{private_key.inspect}: ")
139
+ end
140
+
141
+ base_url = 'https://example.org/encrypted/storage/'
142
+ git_blobid = 'f29bddf64c444f663d106568f4a81a22151ed3f97b0ec0c2a5ab25a0e8a02515'
143
+
144
+ decrypted_data = Open3.capture2(
145
+ 'ndr_encrypt', 'cat-remote', '-p', "--key_name=#{key_name}",
146
+ "--private_key=#{private_key}", "--base_url=#{base_url}",
147
+ '--passin=stdin', git_blobid,
148
+ stdin_data: private_passphrase, binmode: true
149
+ )[0]
150
+ ```
151
+
152
+ ## Low-level object manipulation
153
+
154
+ TODO
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.join(__dir__, '../../lib'))
3
+ require 'ndr_pseudonymise/ndr_encrypt'
4
+ NdrPseudonymise::NdrEncrypt::CommandLine.run!
metadata ADDED
@@ -0,0 +1,197 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ndr_pseudonymise
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.1
5
+ platform: ruby
6
+ authors:
7
+ - NCRS development team
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-10-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activesupport
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '5.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '5.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: mocha
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: ndr_dev_support
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '6.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '6.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: ndr_import
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: pry
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rake
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: 12.3.3
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: 12.3.3
139
+ description: Provide pseudonymisation facilities.
140
+ email: []
141
+ executables: []
142
+ extensions: []
143
+ extra_rdoc_files: []
144
+ files:
145
+ - ".gitignore"
146
+ - ".rubocop.yml"
147
+ - CHANGELOG.md
148
+ - Gemfile
149
+ - LICENSE.txt
150
+ - README.md
151
+ - Rakefile
152
+ - bin/console
153
+ - bin/setup
154
+ - lib/ndr_pseudonymise.rb
155
+ - lib/ndr_pseudonymise/client.rb
156
+ - lib/ndr_pseudonymise/demographics_only_pseudonymiser.rb
157
+ - lib/ndr_pseudonymise/engine.rb
158
+ - lib/ndr_pseudonymise/ndr_encrypt.rb
159
+ - lib/ndr_pseudonymise/ndr_encrypt/command_line.rb
160
+ - lib/ndr_pseudonymise/ndr_encrypt/encrypted_object.rb
161
+ - lib/ndr_pseudonymise/ndr_encrypt/remote_repository.rb
162
+ - lib/ndr_pseudonymise/ndr_encrypt/repository.rb
163
+ - lib/ndr_pseudonymise/prescription_pseudonymiser.rb
164
+ - lib/ndr_pseudonymise/progress_printer.rb
165
+ - lib/ndr_pseudonymise/pseudonymisation_specification.rb
166
+ - lib/ndr_pseudonymise/pseudonymised_file_converter.rb
167
+ - lib/ndr_pseudonymise/pseudonymised_file_wrapper.rb
168
+ - lib/ndr_pseudonymise/simple_pseudonymisation.rb
169
+ - lib/ndr_pseudonymise/version.rb
170
+ - lib/rsa_aes_cbc.rb
171
+ - ndr_pseudonymise.gemspec
172
+ - script/ndr_encrypt/README.md
173
+ - script/ndr_encrypt/ndr_encrypt
174
+ homepage: https://github.com/NHSDigital/ndr_pseudonymise
175
+ licenses:
176
+ - MIT
177
+ metadata: {}
178
+ post_install_message:
179
+ rdoc_options: []
180
+ require_paths:
181
+ - lib
182
+ required_ruby_version: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - ">="
185
+ - !ruby/object:Gem::Version
186
+ version: 2.6.0
187
+ required_rubygems_version: !ruby/object:Gem::Requirement
188
+ requirements:
189
+ - - ">="
190
+ - !ruby/object:Gem::Version
191
+ version: '0'
192
+ requirements: []
193
+ rubygems_version: 3.2.33
194
+ signing_key:
195
+ specification_version: 4
196
+ summary: Provide pseudonymisation facilities.
197
+ test_files: []