sequel 5.40.0 → 5.45.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +52 -0
  3. data/MIT-LICENSE +1 -1
  4. data/doc/release_notes/5.41.0.txt +25 -0
  5. data/doc/release_notes/5.42.0.txt +136 -0
  6. data/doc/release_notes/5.43.0.txt +98 -0
  7. data/doc/release_notes/5.44.0.txt +32 -0
  8. data/doc/release_notes/5.45.0.txt +34 -0
  9. data/doc/sql.rdoc +1 -1
  10. data/doc/testing.rdoc +3 -0
  11. data/doc/virtual_rows.rdoc +1 -1
  12. data/lib/sequel/adapters/ado.rb +16 -16
  13. data/lib/sequel/adapters/odbc.rb +5 -1
  14. data/lib/sequel/adapters/shared/postgres.rb +4 -14
  15. data/lib/sequel/adapters/shared/sqlite.rb +8 -4
  16. data/lib/sequel/core.rb +11 -0
  17. data/lib/sequel/database/misc.rb +1 -2
  18. data/lib/sequel/database/schema_generator.rb +35 -47
  19. data/lib/sequel/database/schema_methods.rb +4 -0
  20. data/lib/sequel/dataset/query.rb +1 -3
  21. data/lib/sequel/dataset/sql.rb +7 -0
  22. data/lib/sequel/extensions/async_thread_pool.rb +438 -0
  23. data/lib/sequel/extensions/blank.rb +2 -0
  24. data/lib/sequel/extensions/date_arithmetic.rb +32 -23
  25. data/lib/sequel/extensions/inflector.rb +2 -0
  26. data/lib/sequel/extensions/named_timezones.rb +5 -1
  27. data/lib/sequel/extensions/pg_enum.rb +1 -1
  28. data/lib/sequel/extensions/pg_interval.rb +12 -2
  29. data/lib/sequel/extensions/pg_loose_count.rb +3 -1
  30. data/lib/sequel/model/associations.rb +70 -14
  31. data/lib/sequel/model/base.rb +2 -2
  32. data/lib/sequel/plugins/async_thread_pool.rb +39 -0
  33. data/lib/sequel/plugins/auto_validations.rb +15 -1
  34. data/lib/sequel/plugins/auto_validations_constraint_validations_presence_message.rb +68 -0
  35. data/lib/sequel/plugins/column_encryption.rb +728 -0
  36. data/lib/sequel/plugins/composition.rb +2 -1
  37. data/lib/sequel/plugins/concurrent_eager_loading.rb +174 -0
  38. data/lib/sequel/plugins/json_serializer.rb +37 -22
  39. data/lib/sequel/plugins/nested_attributes.rb +5 -2
  40. data/lib/sequel/plugins/pg_array_associations.rb +6 -4
  41. data/lib/sequel/plugins/rcte_tree.rb +27 -19
  42. data/lib/sequel/plugins/serialization.rb +8 -3
  43. data/lib/sequel/plugins/serialization_modification_detection.rb +1 -1
  44. data/lib/sequel/plugins/validation_helpers.rb +6 -2
  45. data/lib/sequel/version.rb +1 -1
  46. metadata +18 -3
@@ -0,0 +1,68 @@
1
+ # frozen-string-literal: true
2
+
3
+ module Sequel
4
+ module Plugins
5
+ # The auto_validations_constraint_validations_presence_message plugin provides
6
+ # integration for the auto_validations and constraint_validations plugins in
7
+ # the following situation:
8
+ #
9
+ # * A column has a NOT NULL constraint in the database
10
+ # * A constraint validation for presence exists on the column, with a :message
11
+ # option to set a column-specific message, and with the :allow_nil option set
12
+ # to true because the CHECK constraint doesn't need to check for NULL values
13
+ # as the column itself is NOT NULL
14
+ #
15
+ # In this case, by default the validation error message on the column will
16
+ # use the more specific constraint validation error message if the column
17
+ # has a non-NULL empty value, but will use the default auto_validations
18
+ # message if the column has a NULL value. With this plugin, the column-specific
19
+ # constraint validation error message will be used in both cases.
20
+ #
21
+ # Usage:
22
+ #
23
+ # # Make all model subclasses use this auto_validations/constraint_validations
24
+ # # integration (called before loading subclasses)
25
+ # Sequel::Model.plugin :auto_validations_constraint_validations_presence_message
26
+ #
27
+ # # Make the Album class use this auto_validations/constraint_validations integration
28
+ # Album.plugin :auto_validations_constraint_validations_presence_message
29
+ module AutoValidationsConstraintValidationsPresenceMessage
30
+ def self.apply(model)
31
+ model.plugin :auto_validations
32
+ model.plugin :constraint_validations
33
+ end
34
+
35
+ def self.configure(model, opts=OPTS)
36
+ model.send(:_adjust_auto_validations_constraint_validations_presence_message)
37
+ end
38
+
39
+ module ClassMethods
40
+ Plugins.after_set_dataset(self, :_adjust_auto_validations_constraint_validations_presence_message)
41
+
42
+ private
43
+
44
+ def _adjust_auto_validations_constraint_validations_presence_message
45
+ if @dataset &&
46
+ !@auto_validate_options[:not_null][:message] &&
47
+ !@auto_validate_options[:explicit_not_null][:message]
48
+
49
+ @constraint_validations.each do |array|
50
+ meth, column, opts = array
51
+
52
+ if meth == :validates_presence &&
53
+ opts &&
54
+ opts[:message] &&
55
+ opts[:allow_nil] &&
56
+ (@auto_validate_not_null_columns.include?(column) || @auto_validate_explicit_not_null_columns.include?(column))
57
+
58
+ @auto_validate_not_null_columns.delete(column)
59
+ @auto_validate_explicit_not_null_columns.delete(column)
60
+ array[2] = array[2].merge(:allow_nil=>false)
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,728 @@
1
+ # frozen-string-literal: true
2
+
3
+ # :nocov:
4
+ raise(Sequel::Error, "Sequel column_encryption plugin requires ruby 2.3 or greater") unless RUBY_VERSION >= '2.3'
5
+ # :nocov:
6
+
7
+ require 'openssl'
8
+
9
+ begin
10
+ # Test cipher actually works
11
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
12
+ cipher.encrypt
13
+ cipher.key = '1'*32
14
+ cipher_iv = cipher.random_iv
15
+ cipher.auth_data = ''
16
+ cipher_text = cipher.update('2') << cipher.final
17
+ auth_tag = cipher.auth_tag
18
+
19
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
20
+ cipher.decrypt
21
+ cipher.iv = cipher_iv
22
+ cipher.key = '1'*32
23
+ cipher.auth_data = ''
24
+ cipher.auth_tag = auth_tag
25
+ # :nocov:
26
+ unless (cipher.update(cipher_text) << cipher.final) == '2'
27
+ raise OpenSSL::Cipher::CipherError
28
+ end
29
+ rescue RuntimeError, OpenSSL::Cipher::CipherError
30
+ raise LoadError, "Sequel column_encryption plugin requires a working aes-256-gcm cipher"
31
+ # :nocov:
32
+ end
33
+
34
+ require 'base64'
35
+ require 'securerandom'
36
+
37
+ module Sequel
38
+ module Plugins
39
+ # The column_encryption plugin adds support for encrypting the content of individual
40
+ # columns in a table.
41
+ #
42
+ # Column values are encrypted with AES-256-GCM using a per-value cipher key derived from
43
+ # a key provided in the configuration using HMAC-SHA256.
44
+ #
45
+ # = Usage
46
+ #
47
+ # If you would like to support encryption of columns in more than one model, you should
48
+ # probably load the plugin into the parent class of your models and specify the keys:
49
+ #
50
+ # Sequel::Model.plugin :column_encryption do |enc|
51
+ # enc.key 0, ENV["SEQUEL_COLUMN_ENCRYPTION_KEY"]
52
+ # end
53
+ #
54
+ # This specifies a single master encryption key. Unless you are actively rotating keys,
55
+ # it is best to use a single master key. Rotation of encryption keys will be discussed
56
+ # in a later section.
57
+ #
58
+ # In the above call, <tt>0</tt> is the id of the key, and the
59
+ # <tt>ENV["SEQUEL_COLUMN_ENCRYPTION_KEY"]</tt> is the content of the key, which must be
60
+ # a string with exactly 32 bytes. As indicated, this key should not be hardcoded or
61
+ # otherwise committed to the source control repository.
62
+ #
63
+ # For models that need encrypted columns, you load the plugin again, but specify the
64
+ # columns to encrypt:
65
+ #
66
+ # ConfidentialModel.plugin :column_encryption do |enc|
67
+ # enc.column :encrypted_column_name
68
+ # enc.column :searchable_column_name, searchable: true
69
+ # enc.column :ci_searchable_column_name, searchable: :case_insensitive
70
+ # end
71
+ #
72
+ # With this, all three specified columns (+encrypted_column_name+, +searchable_column_name+,
73
+ # and +ci_searchable_column_name+) will be marked as encrypted columns. When you run the
74
+ # following code:
75
+ #
76
+ # ConfidentialModel.create(
77
+ # encrypted_column_name: 'These',
78
+ # searchable_column_name: 'will be',
79
+ # ci_searchable_column_name: 'Encrypted'
80
+ # )
81
+ #
82
+ # It will save encrypted versions to the database. +encrypted_column_name+ will not be
83
+ # searchable, +searchable_column_name+ will be searchable with an exact match, and
84
+ # +ci_searchable_column_name+ will be searchable with a case insensitive match. See section
85
+ # below for details on searching.
86
+ #
87
+ # It is possible to have model-specific keys by specifying both the +key+ and +column+ methods
88
+ # in the model:
89
+ #
90
+ # ConfidentialModel.plugin :column_encryption do |enc|
91
+ # enc.key 0, ENV["SEQUEL_MODEL_SPECIFIC_ENCRYPTION_KEY"]
92
+ #
93
+ # enc.column :encrypted_column_name
94
+ # enc.column :searchable_column_name, searchable: true
95
+ # enc.column :ci_searchable_column_name, searchable: :case_insensitive
96
+ # end
97
+ #
98
+ # When the +key+ method is called inside the plugin block, previous keys are ignored,
99
+ # and only the new keys specified will be used. This approach would allow the
100
+ # +ConfidentialModel+ to use the model specific encryption keys, and other models
101
+ # to use the default keys specified in the parent class.
102
+ #
103
+ # The +key+ and +column+ methods inside the plugin block support additional options.
104
+ # The +key+ method supports the following options:
105
+ #
106
+ # :auth_data :: The authentication data to use for the AES-256-GCM cipher. Defaults
107
+ # to the empty string.
108
+ # :padding :: The number of padding bytes to use. For security, data is padded so that
109
+ # a database administrator cannot determine the exact size of the
110
+ # unencrypted data. By default, this value is 8, which means that
111
+ # unencrypted data will be padded to a multiple of 8 bytes. Up to twice as
112
+ # much padding as specified will be used, as the number of padding bytes
113
+ # is partially randomized.
114
+ #
115
+ # The +column+ method supports the following options:
116
+ #
117
+ # :searchable :: Whether the column is searchable. This should not be used unless
118
+ # searchability is needed, as it can allow the database administrator
119
+ # to determine whether two distinct rows have the same unencrypted
120
+ # data (but not what that data is). This can be set to +true+ to allow
121
+ # searching with an exact match, or +:case_insensitive+ for a case
122
+ # insensitive match.
123
+ # :search_both :: This should only be used if you have previously switched the
124
+ # +:searchable+ option from +true+ to +:case_insensitive+ or vice-versa,
125
+ # and would like the search to return values that have not yet been
126
+ # reencrypted. Note that switching from +true+ to +:case_insensitive+
127
+ # isn't a problem, but switching from +:case_insensitive+ to +true+ and
128
+ # using this option can cause the search to return values that are
129
+ # not an exact match. You should manually filter those objects
130
+ # after decrypting if you want to ensure an exact match.
131
+ # :format :: The format of the column, if you want to perform serialization before
132
+ # encryption and deserialization after decryption. Can be either a
133
+ # symbol registered with the serialization plugin or an array of two
134
+ # callables, the first for serialization and the second for deserialization.
135
+ #
136
+ # The +column+ method also supports a block for column-specific keys:
137
+ #
138
+ # ConfidentialModel.plugin :column_encryption do |enc|
139
+ # enc.column :encrypted_column_name do |cenc|
140
+ # cenc.key 0, ENV["SEQUEL_COLUMN_SPECIFIC_ENCRYPTION_KEY"]
141
+ # end
142
+ #
143
+ # enc.column :searchable_column_name, searchable: true
144
+ # enc.column :ci_searchable_column_name, searchable: :case_insensitive
145
+ # end
146
+ #
147
+ # In this case, the <tt>ENV["SEQUEL_COLUMN_SPECIFIC_ENCRYPTION_KEY"]</tt> key will
148
+ # only be used for the +:encrypted_column_name+ column, and not the other columns.
149
+ #
150
+ # Note that there isn't a security reason to prefer either model-specific or
151
+ # column-specific keys, as the actual cipher key used is unique per column value.
152
+ #
153
+ # Note that changing the key_id, key string, or auth_data for an existing key will
154
+ # break decryption of values encrypted with that key. If you would like to change
155
+ # any aspect of the key, add a new key, rotate to the new encryption key, and then
156
+ # remove the previous key, as described in the section below on key rotation.
157
+ #
158
+ # = Searching Encrypted Values
159
+ #
160
+ # To search searchable encrypted columns, use +with_encrypted_value+. This example
161
+ # code will return the model instance created in the code example in the previous
162
+ # section:
163
+ #
164
+ # ConfidentialModel.
165
+ # with_encrypted_value(:searchable_column_name, "will be")
166
+ # with_encrypted_value(:ci_searchable_column_name, "encrypted").
167
+ # first
168
+ #
169
+ # = Encryption Key Rotation
170
+ #
171
+ # To rotate encryption keys, add a new key above the existing key, with a new key ID:
172
+ #
173
+ # Sequel::Model.plugin :column_encryption do |enc|
174
+ # enc.key 1, ENV["SEQUEL_COLUMN_ENCRYPTION_KEY"]
175
+ # enc.key 0, ENV["SEQUEL_OLD_COLUMN_ENCRYPTION_KEY"]
176
+ # end
177
+ #
178
+ # Newly encrypted data will then use the new key. Records encrypted with the older key
179
+ # will still be decrypted correctly.
180
+ #
181
+ # To force reencryption for existing records that are using the older key, you can use
182
+ # the +needing_reencryption+ dataset method and the +reencrypt+ instance method. For a
183
+ # small number of records, you can probably do:
184
+ #
185
+ # ConfidentialModel.needing_reencryption.all(&:reencrypt)
186
+ #
187
+ # With more than a small number of records, you'll want to do this in batches. It's
188
+ # possible you could use an approach such as:
189
+ #
190
+ # ds = ConfidentialModel.needing_reencryption.limit(100)
191
+ # true until ds.all(&:reencrypt).empty?
192
+ #
193
+ # After all values have been reencrypted for all models, and no models use the older
194
+ # encryption key, you can remove it from the configuration:
195
+ #
196
+ # Sequel::Model.plugin :column_encryption do |enc|
197
+ # enc.key 1, ENV["SEQUEL_COLUMN_ENCRYPTION_KEY"]
198
+ # end
199
+ #
200
+ # Once an encryption key has been removed, after no data uses it, it is safe to reuse
201
+ # the same key id for a new key. This approach allows for up to 256 concurrent keys
202
+ # in the same configuration.
203
+ #
204
+ # = Encrypting Additional Formats
205
+ #
206
+ # By default, the column_encryption plugin assumes that the decrypted data should be
207
+ # returned as a string, and a string will be passed to encrypt. However, using the
208
+ # +:format+ option, you can specify an alternate format. For example, if you want to
209
+ # encrypt a JSON representation of the object, so that you can deal with an array/hash
210
+ # and automatically have it serialized with JSON and then encrypted when saving, and
211
+ # then deserialized with JSON after decryption when it is retrieved:
212
+ #
213
+ # require 'json'
214
+ # ConfidentialModel.plugin :column_encryption do |enc|
215
+ # enc.key 0, ENV["SEQUEL_MODEL_SPECIFIC_ENCRYPTION_KEY"]
216
+ #
217
+ # enc.column :encrypted_column_name
218
+ # enc.column :searchable_column_name, searchable: true
219
+ # enc.column :ci_searchable_column_name, searchable: :case_insensitive
220
+ # enc.column :encrypted_json_column_name, format: :json
221
+ # end
222
+ #
223
+ # The values of the +:format+ are the same values you can pass as the first argument
224
+ # to +serialize_attributes+ (in the serialization plugin). You can pass an array
225
+ # with the serializer and deserializer for custom support.
226
+ #
227
+ # You can use both +:searchable+ and +:format+ together for searchable encrypted
228
+ # serialized columns. However, note that this allows only exact searches of the
229
+ # serialized version of the data. So for JSON, a search for <tt>{'a'=>1, 'b'=>2}</tt>
230
+ # would not match <tt>{'b'=>2, 'a'=>1}</tt> even though the objects are considered
231
+ # equal. If this is an issue, make sure you use a serialization format where all
232
+ # equal objects are serialized to the same string.
233
+ #
234
+ # = Enforcing Uniqueness
235
+ #
236
+ # You cannot enforce uniqueness of unencrypted data at the database level
237
+ # if you also want to support key rotation. However, absent key rotation, a
238
+ # unique index on the first 48 characters of the encrypted column can enforce uniqueness,
239
+ # as long as the column is searchable. If the encrypted column is case-insensitive
240
+ # searchable, the uniqueness is case insensitive as well.
241
+ #
242
+ # = Column Value Cryptography/Format
243
+ #
244
+ # Column values used by this plugin use the following format (+key+ is specified
245
+ # in the plugin configuration and must be exactly 32 bytes):
246
+ #
247
+ # column_value :: urlsafe_base64(flags + NUL + key_id + NUL + search_data + key_data +
248
+ # cipher_iv + cipher_auth_tag + encrypted_data)
249
+ # flags :: 1 byte, the type of record (0: not searchable, 1: searchable, 2: lowercase searchable)
250
+ # NUL :: 1 byte, ASCII NUL
251
+ # key_id :: 1 byte, the key id, supporting 256 concurrently active keys (0 - 255)
252
+ # search_data :: 0 bytes if flags is 0, 32 bytes if flags is 1 or 2.
253
+ # Format is HMAC-SHA256(key, unencrypted_data).
254
+ # Ignored on decryption, only used for searching.
255
+ # key_data :: 32 bytes random data used to construct cipher key
256
+ # cipher_iv :: 12 bytes, AES-256-GCM cipher random initialization vector
257
+ # cipher_auth_tag :: 16 bytes, AES-256-GCM cipher authentication tag
258
+ # encrypted_data :: AES-256-GCM(HMAC-SHA256(key, key_data),
259
+ # padding_size + padding + unencrypted_data)
260
+ # padding_size :: 1 byte, with the amount of padding (0-255 bytes of padding allowed)
261
+ # padding :: number of bytes specified by padding size, ignored on decryption
262
+ # unencrypted_data :: actual column value
263
+ #
264
+ # The reason for <tt>flags + NUL + key_id + NUL</tt> (4 bytes) as the header is to allow for
265
+ # an easy way to search for values needing reencryption using a database index. It takes
266
+ # the first three bytes and converts them to base64, and looks for values less than that value
267
+ # or greater than that value with 'B' appended. The NUL byte in the fourth byte of the header
268
+ # ensures that after base64 encoding, the fifth byte in the column will be 'A'.
269
+ #
270
+ # The reason for <tt>search_data</tt> (32 bytes) directly after is that for searchable values,
271
+ # after base64 encoding of the header and search data, it is 48 bytes and can be used directly
272
+ # as a prefix search on the column, which can be supported by the same database index. This is
273
+ # more efficient than a full column value search for large values, and allows for case-insensitive
274
+ # searching without a separate column, by having the search_data be based on the lowercase value
275
+ # while the unencrypted data is original case.
276
+ #
277
+ # The reason for the padding is so that a database administrator cannot be sure exactly how
278
+ # many bytes are in the column. It is stored encrypted because otherwise the database
279
+ # administrator could calculate it by decoding the base64 data.
280
+ #
281
+ # = Unsupported Features
282
+ #
283
+ # The following features are delibrately not supported:
284
+ #
285
+ # == Compression
286
+ #
287
+ # Allowing compression with encryption is inviting security issues later.
288
+ # While padding can reduce the risk of compression with encryption, it does not
289
+ # eliminate it entirely. Users that must have compression with encryption can use
290
+ # the +:format+ option with a serializer that compresses and a deserializer that
291
+ # decompresses.
292
+ #
293
+ # == Mixing Encrypted/Unencrypted Data
294
+ #
295
+ # Mixing encrypted and unencrypted data increases the complexity and security risk, since there
296
+ # is a chance unencrypted data could look like encrypted data in the pathologic case.
297
+ # If you have existing unencrypted data that would like to encrypt, create a new column for
298
+ # the encrypted data, and then migrate the data from the unencrypted column to the encrypted
299
+ # column. After all unencrypted values have been migrated, drop the unencrypted column.
300
+ #
301
+ # == Arbitrary Encryption Schemes
302
+ #
303
+ # Supporting arbitrary encryption schemes increases the complexity risk.
304
+ # If in the future AES-256-GCM is not considered a secure enough cipher, it is possible to
305
+ # extend the current format using the reserved values in the first two bytes of the header.
306
+ #
307
+ # = Caveats
308
+ #
309
+ # As column_encryption is a model plugin, it only works with using model instance methods.
310
+ # If you directly modify the database using a dataset or an external program that modifies
311
+ # the contents of the encrypted columns, you will probably corrupt the data. To make data
312
+ # corruption less likely, it is best to have a CHECK constraints on the encrypted column
313
+ # with a basic format and length check:
314
+ #
315
+ # DB.alter_table(:table_name) do
316
+ # c = Sequel[:encrypted_column_name]
317
+ # add_constraint(:encrypted_column_name_format,
318
+ # c.like('AA__A%') | c.like('Ag__A%') | c.like('AQ__A%'))
319
+ # add_constraint(:encrypted_column_name_length, Sequel.char_length(c) >= 88)
320
+ # end
321
+ #
322
+ # If possible, it's also best to check that the column is valid urlsafe base64 data of
323
+ # sufficient length. This can be done on PostgreSQL using a combination of octet_length,
324
+ # decode, and regexp_replace:
325
+ #
326
+ # DB.alter_table(:ce_test) do
327
+ # c = Sequel[:encrypted_column_name]
328
+ # add_constraint(:enc_base64) do
329
+ # octet_length(decode(regexp_replace(regexp_replace(c, '_', '/', 'g'), '-', '+', 'g'), 'base64')) >= 65}
330
+ # end
331
+ # end
332
+ #
333
+ # Such constraints will probably be sufficient to protect against most unintentional corruption of
334
+ # encrypted columns.
335
+ #
336
+ # If the database supports transparent data encryption and you trust the database administrator,
337
+ # using the database support is probably a better approach.
338
+ #
339
+ # The column_encryption plugin is only supported on Ruby 2.3+ and when the Ruby openssl standard
340
+ # library supports the AES-256-GCM cipher.
341
+ module ColumnEncryption
342
+ # Cryptor handles the encryption and decryption of rows for a key set.
343
+ # It also provides methods that return search prefixes, which datasets
344
+ # use in queries.
345
+ #
346
+ # The same cryptor can support non-searchable, searchable, and case-insensitive
347
+ # searchable columns.
348
+ class Cryptor # :nodoc:
349
+ # Flags
350
+ NOT_SEARCHABLE = 0
351
+ SEARCHABLE = 1
352
+ LOWERCASE_SEARCHABLE = 2
353
+
354
+ # This is the default padding, but up to 2x the padding can be used for a record.
355
+ DEFAULT_PADDING = 8
356
+
357
+ # Keys should be an array of arrays containing key_id, key string, auth_data, and padding.
358
+ def initialize(keys)
359
+ if keys.empty?
360
+ raise Error, "Cannot initialize encryptor without encryption key"
361
+ end
362
+
363
+ # First key is used for encryption
364
+ @key_id, @key, @auth_data, @padding = keys[0]
365
+
366
+ # All keys are candidates for decryption
367
+ @key_map = {}
368
+ keys.each do |key_id, key, auth_data, padding|
369
+ @key_map[key_id] = [key, auth_data, padding].freeze
370
+ end
371
+
372
+ freeze
373
+ end
374
+
375
+ # Decrypt using any supported format and any available key.
376
+ def decrypt(data)
377
+ begin
378
+ data = Base64.urlsafe_decode64(data)
379
+ rescue ArgumentError
380
+ raise Error, "Unable to decode encrypted column: invalid base64"
381
+ end
382
+
383
+ unless data.getbyte(1) == 0 && data.getbyte(3) == 0
384
+ raise Error, "Unable to decode encrypted column: invalid format"
385
+ end
386
+
387
+ flags = data.getbyte(0)
388
+
389
+ key, auth_data = @key_map[data.getbyte(2)]
390
+ unless key
391
+ raise Error, "Unable to decode encrypted column: invalid key id"
392
+ end
393
+
394
+ case flags
395
+ when NOT_SEARCHABLE
396
+ if data.bytesize < 65
397
+ raise Error, "Decoded encrypted column smaller than minimum size"
398
+ end
399
+
400
+ data.slice!(0, 4)
401
+ when SEARCHABLE, LOWERCASE_SEARCHABLE
402
+ if data.bytesize < 97
403
+ raise Error, "Decoded encrypted column smaller than minimum size"
404
+ end
405
+
406
+ data.slice!(0, 36)
407
+ else
408
+ raise Error, "Unable to decode encrypted column: invalid flags"
409
+ end
410
+
411
+ key_part = data.slice!(0, 32)
412
+ cipher_iv = data.slice!(0, 12)
413
+ auth_tag = data.slice!(0, 16)
414
+
415
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
416
+ cipher.decrypt
417
+ cipher.iv = cipher_iv
418
+ cipher.key = OpenSSL::HMAC.digest(OpenSSL::Digest::SHA256.new, key, key_part)
419
+ cipher.auth_data = auth_data
420
+ cipher.auth_tag = auth_tag
421
+ begin
422
+ decrypted_data = cipher.update(data) << cipher.final
423
+ rescue OpenSSL::Cipher::CipherError => e
424
+ raise Error, "Unable to decrypt encrypted column: #{e.class} (probably due to encryption key or auth data mismatch or corrupt data)"
425
+ end
426
+
427
+ # Remove padding
428
+ decrypted_data.slice!(0, decrypted_data.getbyte(0) + 1)
429
+
430
+ decrypted_data
431
+ end
432
+
433
+ # Encrypt in not searchable format with the first configured encryption key.
434
+ def encrypt(data)
435
+ _encrypt(data, "#{NOT_SEARCHABLE.chr}\0#{@key_id.chr}\0")
436
+ end
437
+
438
+ # Encrypt in searchable format with the first configured encryption key.
439
+ def searchable_encrypt(data)
440
+ _encrypt(data, _search_prefix(data, SEARCHABLE, @key_id, @key))
441
+ end
442
+
443
+ # Encrypt in case insensitive searchable format with the first configured encryption key.
444
+ def case_insensitive_searchable_encrypt(data)
445
+ _encrypt(data, _search_prefix(data.downcase, LOWERCASE_SEARCHABLE, @key_id, @key))
446
+ end
447
+
448
+ # The prefix string of columns for the given search type and the first configured encryption key.
449
+ # Used to find values that do not use this prefix in order to perform reencryption.
450
+ def current_key_prefix(search_type)
451
+ Base64.urlsafe_encode64("#{search_type.chr}\0#{@key_id.chr}")
452
+ end
453
+
454
+ # The prefix values to search for the given data (an array of strings), assuming the column uses
455
+ # the searchable format.
456
+ def search_prefixes(data)
457
+ _search_prefixes(data, SEARCHABLE)
458
+ end
459
+
460
+ # The prefix values to search for the given data (an array of strings), assuming the column uses
461
+ # the case insensitive searchable format.
462
+ def lowercase_search_prefixes(data)
463
+ _search_prefixes(data.downcase, LOWERCASE_SEARCHABLE)
464
+ end
465
+
466
+ # The prefix values to search for the given data (an array of strings), assuming the column uses
467
+ # either the searchable or the case insensitive searchable format. Should be used only when
468
+ # transitioning between formats (used by the :search_both option when encrypting columns).
469
+ def regular_and_lowercase_search_prefixes(data)
470
+ search_prefixes(data) + lowercase_search_prefixes(data)
471
+ end
472
+
473
+ private
474
+
475
+ # An array of strings, one for each configured encryption key, to find encypted values matching
476
+ # the given data and search format.
477
+ def _search_prefixes(data, search_type)
478
+ @key_map.map do |key_id, (key, _)|
479
+ Base64.urlsafe_encode64(_search_prefix(data, search_type, key_id, key))
480
+ end
481
+ end
482
+
483
+ # The prefix to use for searchable data, including the HMAC-SHA256(key, data).
484
+ def _search_prefix(data, search_type, key_id, key)
485
+ "#{search_type.chr}\0#{key_id.chr}\0#{OpenSSL::HMAC.digest(OpenSSL::Digest::SHA256.new, key, data)}"
486
+ end
487
+
488
+ # Encrypt the data using AES-256-GCM, with the given prefix.
489
+ def _encrypt(data, prefix)
490
+ padding = @padding
491
+ random_data = SecureRandom.random_bytes(32)
492
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
493
+ cipher.encrypt
494
+ cipher.key = OpenSSL::HMAC.digest(OpenSSL::Digest::SHA256.new, @key, random_data)
495
+ cipher_iv = cipher.random_iv
496
+ cipher.auth_data = @auth_data
497
+
498
+ cipher_text = String.new
499
+ data_size = data.bytesize
500
+
501
+ padding_size = if padding
502
+ (padding * rand(1)) + padding - (data.bytesize % padding)
503
+ else
504
+ 0
505
+ end
506
+
507
+ cipher_text << cipher.update(padding_size.chr)
508
+ cipher_text << cipher.update(SecureRandom.random_bytes(padding_size)) if padding_size > 0
509
+ cipher_text << cipher.update(data) if data_size > 0
510
+ cipher_text << cipher.final
511
+
512
+ Base64.urlsafe_encode64("#{prefix}#{random_data}#{cipher_iv}#{cipher.auth_tag}#{cipher_text}")
513
+ end
514
+ end
515
+
516
+ # The object type yielded to blocks passed to the +column+ method inside
517
+ # <tt>plugin :column_encryption</tt> blocks. This is used to configure custom
518
+ # per-column keys.
519
+ class ColumnDSL # :nodoc:
520
+ # An array of arrays for the data for the keys configured inside the block.
521
+ attr_reader :keys
522
+
523
+ def initialize
524
+ @keys = []
525
+ end
526
+
527
+ # Verify that the key_id, key, and options are value.
528
+ def key(key_id, key, opts=OPTS)
529
+ unless key_id.is_a?(Integer) && key_id >= 0 && key_id <= 255
530
+ raise Error, "invalid key_id argument, must be integer between 0 and 255"
531
+ end
532
+
533
+ unless key.is_a?(String) && key.bytesize == 32
534
+ raise Error, "invalid key argument, must be string with exactly 32 bytes"
535
+ end
536
+
537
+ if opts.has_key?(:padding)
538
+ if padding = opts[:padding]
539
+ unless padding.is_a?(Integer) && padding >= 1 && padding <= 120
540
+ raise Error, "invalid :padding option, must be between 1 and 120"
541
+ end
542
+ end
543
+ else
544
+ padding = Cryptor::DEFAULT_PADDING
545
+ end
546
+
547
+ @keys << [key_id, key, opts[:auth_data].to_s, padding].freeze
548
+ end
549
+ end
550
+
551
+ # The object type yielded to <tt>plugin :column_encryption</tt> blocks,
552
+ # used to configure encryption keys and encrypted columns.
553
+ class DSL < ColumnDSL # :nodoc:
554
+ # An array of arrays of data for the columns configured inside the block.
555
+ attr_reader :columns
556
+
557
+ def initialize
558
+ super
559
+ @columns = []
560
+ end
561
+
562
+ # Store the column information.
563
+ def column(column, opts=OPTS, &block)
564
+ @columns << [column, opts, block].freeze
565
+ end
566
+ end
567
+
568
+ def self.apply(model, opts=OPTS)
569
+ model.plugin :serialization
570
+ end
571
+
572
+ def self.configure(model)
573
+ dsl = DSL.new
574
+ yield dsl
575
+
576
+ model.instance_exec do
577
+ unless dsl.keys.empty?
578
+ @column_encryption_keys = dsl.keys.freeze
579
+ @column_encryption_cryptor = nil
580
+ end
581
+
582
+ @column_encryption_metadata = Hash[@column_encryption_metadata || {}]
583
+
584
+ dsl.columns.each do |column, opts, block|
585
+ _encrypt_column(column, opts, &block)
586
+ end
587
+
588
+ @column_encryption_metadata.freeze
589
+ end
590
+ end
591
+
592
+ # This stores four callables for handling encyption, decryption, data searching,
593
+ # and key searching. One of these is created for each encrypted column.
594
+ ColumnEncryptionMetadata = Struct.new(:encryptor, :decryptor, :data_searcher, :key_searcher) # :nodoc:
595
+
596
+ module ClassMethods
597
+ private
598
+
599
+ # A hash with column symbol keys and ColumnEncryptionMetadata values for each
600
+ # encrypted column.
601
+ attr_reader :column_encryption_metadata
602
+
603
+ # The default Cryptor to use for encrypted columns. This is only overridden if
604
+ # per-column keys are used.
605
+ def column_encryption_cryptor
606
+ @column_encryption_cryptor ||= Cryptor.new(@column_encryption_keys)
607
+ end
608
+
609
+ # Setup encryption for the given column.
610
+ def _encrypt_column(column, opts)
611
+ cryptor ||= if block_given?
612
+ dsl = ColumnDSL.new
613
+ yield dsl
614
+ Cryptor.new(dsl.keys)
615
+ else
616
+ column_encryption_cryptor
617
+ end
618
+
619
+ encrypt_method, search_prefixes_method, search_type = case searchable = opts[:searchable]
620
+ when nil, false
621
+ [:encrypt, nil, Cryptor::NOT_SEARCHABLE]
622
+ when true
623
+ [:searchable_encrypt, :search_prefixes, Cryptor::SEARCHABLE]
624
+ when :case_insensitive
625
+ [:case_insensitive_searchable_encrypt, :lowercase_search_prefixes, Cryptor::LOWERCASE_SEARCHABLE]
626
+ else
627
+ raise Error, "invalid :searchable option for encrypted column: #{searchable.inspect}"
628
+ end
629
+
630
+ if searchable && opts[:search_both]
631
+ search_prefixes_method = :regular_and_lowercase_search_prefixes
632
+ end
633
+
634
+ # Setup the callables used in the metadata.
635
+ encryptor = cryptor.method(encrypt_method)
636
+ decryptor = cryptor.method(:decrypt)
637
+ data_searcher = cryptor.method(search_prefixes_method) if search_prefixes_method
638
+ key_searcher = lambda{cryptor.current_key_prefix(search_type)}
639
+
640
+ if format = opts[:format]
641
+ if format.is_a?(Symbol)
642
+ unless format = Sequel.synchronize{Serialization::REGISTERED_FORMATS[format]}
643
+ raise(Error, "Unsupported serialization format: #{format} (valid formats: #{Sequel.synchronize{Serialization::REGISTERED_FORMATS.keys}.inspect})")
644
+ end
645
+ end
646
+
647
+ # If a custom serialization format is used, override the
648
+ # callables to handle serialization and deserialization.
649
+ serializer, deserializer = format
650
+ enc, dec, data_s = encryptor, decryptor, data_searcher
651
+ encryptor = lambda do |data|
652
+ enc.call(serializer.call(data))
653
+ end
654
+ decryptor = lambda do |data|
655
+ deserializer.call(dec.call(data))
656
+ end
657
+ data_searcher = lambda do |data|
658
+ data_s.call(serializer.call(data))
659
+ end
660
+ end
661
+
662
+ # Setup the setter and getter methods to do encryption and decryption using
663
+ # the serialization plugin.
664
+ serialize_attributes([encryptor, decryptor], column)
665
+
666
+ column_encryption_metadata[column] = ColumnEncryptionMetadata.new(encryptor, decryptor, data_searcher, key_searcher).freeze
667
+
668
+ nil
669
+ end
670
+ end
671
+
672
+ module ClassMethods
673
+ Plugins.def_dataset_methods(self, [:with_encrypted_value, :needing_reencryption])
674
+
675
+ Plugins.inherited_instance_variables(self,
676
+ :@column_encryption_cryptor=>nil,
677
+ :@column_encryption_keys=>nil,
678
+ :@column_encryption_metadata=>nil,
679
+ )
680
+ end
681
+
682
+ module InstanceMethods
683
+ # Reencrypt the model if needed. Looks at all of the models encrypted columns
684
+ # and if any were encypted with older keys or a different format, reencrypt
685
+ # with the current key and format and save the object. Returns the object
686
+ # if reencryption was needed, or nil if reencryption was not needed.
687
+ def reencrypt
688
+ do_save = false
689
+
690
+ model.send(:column_encryption_metadata).each do |column, metadata|
691
+ if (value = values[column]) && !value.start_with?(metadata.key_searcher.call)
692
+ do_save = true
693
+ values[column] = metadata.encryptor.call(metadata.decryptor.call(value))
694
+ end
695
+ end
696
+
697
+ save if do_save
698
+ end
699
+ end
700
+
701
+ module DatasetMethods
702
+ # Filter the dataset to only match rows where the column contains an encrypted version
703
+ # of value. Only works on searchable encrypted columns.
704
+ def with_encrypted_value(column, value)
705
+ metadata = model.send(:column_encryption_metadata)[column]
706
+
707
+ unless metadata && metadata.data_searcher
708
+ raise Error, "lookup for encrypted column #{column.inspect} is not supported"
709
+ end
710
+
711
+ prefixes = metadata.data_searcher.call(value)
712
+ where(Sequel.|(*prefixes.map{|v| Sequel.like(column, "#{escape_like(v)}%")}))
713
+ end
714
+
715
+ # Filter the dataset to exclude rows where all encrypted columns are already encrypted
716
+ # with the current key and format.
717
+ def needing_reencryption
718
+ incorrect_column_prefixes = model.send(:column_encryption_metadata).map do |column, metadata|
719
+ prefix = metadata.key_searcher.call
720
+ (Sequel[column] < prefix) | (Sequel[column] > prefix + 'B')
721
+ end
722
+
723
+ where(Sequel.|(*incorrect_column_prefixes))
724
+ end
725
+ end
726
+ end
727
+ end
728
+ end