sequel 5.39.0 → 5.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +52 -0
  3. data/MIT-LICENSE +1 -1
  4. data/doc/release_notes/5.40.0.txt +40 -0
  5. data/doc/release_notes/5.41.0.txt +25 -0
  6. data/doc/release_notes/5.42.0.txt +136 -0
  7. data/doc/release_notes/5.43.0.txt +98 -0
  8. data/doc/release_notes/5.44.0.txt +32 -0
  9. data/doc/sql.rdoc +1 -1
  10. data/doc/testing.rdoc +3 -0
  11. data/lib/sequel/adapters/ado.rb +16 -16
  12. data/lib/sequel/adapters/jdbc.rb +2 -2
  13. data/lib/sequel/adapters/shared/postgres.rb +4 -2
  14. data/lib/sequel/adapters/shared/sqlite.rb +37 -3
  15. data/lib/sequel/core.rb +11 -0
  16. data/lib/sequel/database/misc.rb +1 -2
  17. data/lib/sequel/database/schema_generator.rb +35 -47
  18. data/lib/sequel/database/schema_methods.rb +4 -0
  19. data/lib/sequel/dataset/features.rb +10 -0
  20. data/lib/sequel/dataset/prepared_statements.rb +2 -0
  21. data/lib/sequel/dataset/sql.rb +32 -10
  22. data/lib/sequel/extensions/async_thread_pool.rb +438 -0
  23. data/lib/sequel/extensions/blank.rb +8 -0
  24. data/lib/sequel/extensions/date_arithmetic.rb +36 -24
  25. data/lib/sequel/extensions/eval_inspect.rb +2 -0
  26. data/lib/sequel/extensions/inflector.rb +8 -0
  27. data/lib/sequel/extensions/migration.rb +2 -0
  28. data/lib/sequel/extensions/named_timezones.rb +5 -1
  29. data/lib/sequel/extensions/pg_array.rb +1 -0
  30. data/lib/sequel/extensions/pg_enum.rb +1 -1
  31. data/lib/sequel/extensions/pg_interval.rb +34 -8
  32. data/lib/sequel/extensions/pg_row.rb +1 -0
  33. data/lib/sequel/extensions/query.rb +2 -0
  34. data/lib/sequel/model/associations.rb +68 -13
  35. data/lib/sequel/model/base.rb +23 -6
  36. data/lib/sequel/model/plugins.rb +5 -0
  37. data/lib/sequel/plugins/association_proxies.rb +2 -0
  38. data/lib/sequel/plugins/async_thread_pool.rb +39 -0
  39. data/lib/sequel/plugins/auto_validations.rb +15 -1
  40. data/lib/sequel/plugins/column_encryption.rb +728 -0
  41. data/lib/sequel/plugins/composition.rb +7 -2
  42. data/lib/sequel/plugins/concurrent_eager_loading.rb +174 -0
  43. data/lib/sequel/plugins/constraint_validations.rb +2 -1
  44. data/lib/sequel/plugins/dataset_associations.rb +4 -1
  45. data/lib/sequel/plugins/json_serializer.rb +37 -22
  46. data/lib/sequel/plugins/nested_attributes.rb +8 -3
  47. data/lib/sequel/plugins/pg_array_associations.rb +10 -4
  48. data/lib/sequel/plugins/pg_auto_constraint_validations.rb +2 -0
  49. data/lib/sequel/plugins/rcte_tree.rb +27 -19
  50. data/lib/sequel/plugins/serialization.rb +8 -3
  51. data/lib/sequel/plugins/serialization_modification_detection.rb +1 -1
  52. data/lib/sequel/plugins/validation_helpers.rb +6 -2
  53. data/lib/sequel/version.rb +1 -1
  54. metadata +36 -22
@@ -508,7 +508,9 @@ module Sequel
508
508
 
509
509
  m.configure(self, *args, &block) if m.respond_to?(:configure)
510
510
  end
511
+ # :nocov:
511
512
  ruby2_keywords(:plugin) if respond_to?(:ruby2_keywords, true)
513
+ # :nocov:
512
514
 
513
515
  # Returns primary key attribute hash. If using a composite primary key
514
516
  # value such be an array with values for each primary key in the correct
@@ -727,8 +729,14 @@ module Sequel
727
729
  im = instance_methods
728
730
  overridable_methods_module.module_eval do
729
731
  meth = :"#{column}="
730
- define_method(column){self[column]} unless im.include?(column)
731
- define_method(meth){|v| self[column] = v} unless im.include?(meth)
732
+ unless im.include?(column)
733
+ define_method(column){self[column]}
734
+ alias_method(column, column)
735
+ end
736
+ unless im.include?(meth)
737
+ define_method(meth){|v| self[column] = v}
738
+ alias_method(meth, meth)
739
+ end
732
740
  end
733
741
  end
734
742
 
@@ -741,8 +749,14 @@ module Sequel
741
749
  im = instance_methods
742
750
  columns.each do |column|
743
751
  meth = :"#{column}="
744
- overridable_methods_module.module_eval("def #{column}; self[:#{column}] end", __FILE__, __LINE__) unless im.include?(column)
745
- overridable_methods_module.module_eval("def #{meth}(v); self[:#{column}] = v end", __FILE__, __LINE__) unless im.include?(meth)
752
+ unless im.include?(column)
753
+ overridable_methods_module.module_eval("def #{column}; self[:#{column}] end", __FILE__, __LINE__)
754
+ overridable_methods_module.send(:alias_method, column, column)
755
+ end
756
+ unless im.include?(meth)
757
+ overridable_methods_module.module_eval("def #{meth}(v); self[:#{column}] = v end", __FILE__, __LINE__)
758
+ overridable_methods_module.send(:alias_method, meth, meth)
759
+ end
746
760
  end
747
761
  end
748
762
 
@@ -757,7 +771,10 @@ module Sequel
757
771
  else
758
772
  define_singleton_method(meth){|*args, &block| dataset.public_send(meth, *args, &block)}
759
773
  end
774
+ singleton_class.send(:alias_method, meth, meth)
775
+ # :nocov:
760
776
  singleton_class.send(:ruby2_keywords, meth) if respond_to?(:ruby2_keywords, true)
777
+ # :nocov:
761
778
  end
762
779
 
763
780
  # Get the schema from the database, fall back on checking the columns
@@ -1243,12 +1260,12 @@ module Sequel
1243
1260
  # Once an object is frozen, you cannot modify it's values, changed_columns,
1244
1261
  # errors, or dataset.
1245
1262
  def freeze
1246
- values.freeze
1247
- _changed_columns.freeze
1248
1263
  unless errors.frozen?
1249
1264
  validate
1250
1265
  errors.freeze
1251
1266
  end
1267
+ values.freeze
1268
+ _changed_columns.freeze
1252
1269
  this if !new? && model.primary_key
1253
1270
  super
1254
1271
  end
@@ -31,7 +31,9 @@ module Sequel
31
31
  def self.def_dataset_methods(mod, meths)
32
32
  Array(meths).each do |meth|
33
33
  mod.class_eval("def #{meth}(*args, &block); dataset.#{meth}(*args, &block) end", __FILE__, __LINE__)
34
+ # :nocov:
34
35
  mod.send(:ruby2_keywords, meth) if respond_to?(:ruby2_keywords, true)
36
+ # :nocov:
35
37
  end
36
38
  end
37
39
 
@@ -120,6 +122,7 @@ module Sequel
120
122
 
121
123
  model.send(:define_method, meth, &block)
122
124
  model.send(:private, meth)
125
+ model.send(:alias_method, meth, meth)
123
126
  call_meth
124
127
  end
125
128
 
@@ -141,6 +144,8 @@ module Sequel
141
144
  keyword = :required
142
145
  when :key, :keyrest
143
146
  keyword ||= true
147
+ else
148
+ raise Error, "invalid arg_type passed to _define_sequel_method_arg_numbers: #{arg_type}"
144
149
  end
145
150
  end
146
151
  arity = callable.arity
@@ -99,7 +99,9 @@ module Sequel
99
99
  end
100
100
  v.public_send(meth, *args, &block)
101
101
  end
102
+ # :nocov:
102
103
  ruby2_keywords(:method_missing) if respond_to?(:ruby2_keywords, true)
104
+ # :nocov:
103
105
  end
104
106
 
105
107
  module ClassMethods
@@ -0,0 +1,39 @@
1
+ # frozen-string-literal: true
2
+
3
+ module Sequel
4
+ extension 'async_thread_pool'
5
+
6
+ module Plugins
7
+ # The async_thread_pool plugin makes it slightly easier to use the async_thread_pool
8
+ # Database extension with models. It makes Model.async return an async dataset for the
9
+ # model, and support async behavior for #destroy, #with_pk, and #with_pk! for model
10
+ # datasets:
11
+ #
12
+ # # Will load the artist with primary key 1 asynchronously
13
+ # artist = Artist.async.with_pk(1)
14
+ #
15
+ # You must load the async_thread_pool Database extension into the Database object the
16
+ # model class uses in order for async behavior to work.
17
+ #
18
+ # Usage:
19
+ #
20
+ # # Make all model subclass datasets support support async class methods and additional
21
+ # # async dataset methods
22
+ # Sequel::Model.plugin :async_thread_pool
23
+ #
24
+ # # Make the Album class support async class method and additional async dataset methods
25
+ # Album.plugin :async_thread_pool
26
+ module AsyncThreadPool
27
+ module ClassMethods
28
+ Plugins.def_dataset_methods(self, :async)
29
+ end
30
+
31
+ module DatasetMethods
32
+ [:destroy, :with_pk, :with_pk!].each do |meth|
33
+ ::Sequel::Database::AsyncThreadPool::DatasetMethods.define_async_method(self, meth)
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+
@@ -14,7 +14,9 @@ module Sequel
14
14
  # the plugin looks at the database schema for the model's table. To determine
15
15
  # the unique validations, Sequel looks at the indexes on the table. In order
16
16
  # for this plugin to be fully functional, the underlying database adapter needs
17
- # to support both schema and index parsing.
17
+ # to support both schema and index parsing. Additionally, unique validations are
18
+ # only added for models that select from a simple table, they are not added for models
19
+ # that select from a subquery or joined dataset.
18
20
  #
19
21
  # This plugin uses the validation_helpers plugin underneath to implement the
20
22
  # validations. It does not allow for any per-column validation message
@@ -51,6 +53,11 @@ module Sequel
51
53
  # This works for unique_opts, max_length_opts, schema_types_opts,
52
54
  # explicit_not_null_opts, and not_null_opts.
53
55
  #
56
+ # If you only want auto_validations to add validations to columns that do not already
57
+ # have an error associated with them, you can use the skip_invalid option:
58
+ #
59
+ # Model.plugin :auto_validations, skip_invalid: true
60
+ #
54
61
  # Usage:
55
62
  #
56
63
  # # Make all model subclass use auto validations (called before loading subclasses)
@@ -100,6 +107,13 @@ module Sequel
100
107
  h[type] = h[type].merge(type_opts).freeze
101
108
  end
102
109
  end
110
+
111
+ if opts[:skip_invalid]
112
+ [:not_null, :explicit_not_null, :max_length, :schema_types].each do |type|
113
+ h[type] = h[type].merge(:skip_invalid=>true).freeze
114
+ end
115
+ end
116
+
103
117
  @auto_validate_options = h.freeze
104
118
  end
105
119
  end
@@ -0,0 +1,728 @@
1
+ # frozen-string-literal: true
2
+
3
+ # :nocov:
4
+ raise(Sequel::Error, "Sequel column_encryption plugin requires ruby 2.3 or greater") unless RUBY_VERSION >= '2.3'
5
+ # :nocov:
6
+
7
+ require 'openssl'
8
+
9
+ begin
10
+ # Test cipher actually works
11
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
12
+ cipher.encrypt
13
+ cipher.key = '1'*32
14
+ cipher_iv = cipher.random_iv
15
+ cipher.auth_data = ''
16
+ cipher_text = cipher.update('2') << cipher.final
17
+ auth_tag = cipher.auth_tag
18
+
19
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
20
+ cipher.decrypt
21
+ cipher.iv = cipher_iv
22
+ cipher.key = '1'*32
23
+ cipher.auth_data = ''
24
+ cipher.auth_tag = auth_tag
25
+ # :nocov:
26
+ unless (cipher.update(cipher_text) << cipher.final) == '2'
27
+ raise OpenSSL::Cipher::CipherError
28
+ end
29
+ rescue RuntimeError, OpenSSL::Cipher::CipherError
30
+ raise LoadError, "Sequel column_encryption plugin requires a working aes-256-gcm cipher"
31
+ # :nocov:
32
+ end
33
+
34
+ require 'base64'
35
+ require 'securerandom'
36
+
37
+ module Sequel
38
+ module Plugins
39
+ # The column_encryption plugin adds support for encrypting the content of individual
40
+ # columns in a table.
41
+ #
42
+ # Column values are encrypted with AES-256-GCM using a per-value cipher key derived from
43
+ # a key provided in the configuration using HMAC-SHA256.
44
+ #
45
+ # = Usage
46
+ #
47
+ # If you would like to support encryption of columns in more than one model, you should
48
+ # probably load the plugin into the parent class of your models and specify the keys:
49
+ #
50
+ # Sequel::Model.plugin :column_encryption do |enc|
51
+ # enc.key 0, ENV["SEQUEL_COLUMN_ENCRYPTION_KEY"]
52
+ # end
53
+ #
54
+ # This specifies a single master encryption key. Unless you are actively rotating keys,
55
+ # it is best to use a single master key. Rotation of encryption keys will be discussed
56
+ # in a later section.
57
+ #
58
+ # In the above call, <tt>0</tt> is the id of the key, and the
59
+ # <tt>ENV["SEQUEL_COLUMN_ENCRYPTION_KEY"]</tt> is the content of the key, which must be
60
+ # a string with exactly 32 bytes. As indicated, this key should not be hardcoded or
61
+ # otherwise committed to the source control repository.
62
+ #
63
+ # For models that need encrypted columns, you load the plugin again, but specify the
64
+ # columns to encrypt:
65
+ #
66
+ # ConfidentialModel.plugin :column_encryption do |enc|
67
+ # enc.column :encrypted_column_name
68
+ # enc.column :searchable_column_name, searchable: true
69
+ # enc.column :ci_searchable_column_name, searchable: :case_insensitive
70
+ # end
71
+ #
72
+ # With this, all three specified columns (+encrypted_column_name+, +searchable_column_name+,
73
+ # and +ci_searchable_column_name+) will be marked as encrypted columns. When you run the
74
+ # following code:
75
+ #
76
+ # ConfidentialModel.create(
77
+ # encrypted_column_name: 'These',
78
+ # searchable_column_name: 'will be',
79
+ # ci_searchable_column_name: 'Encrypted'
80
+ # )
81
+ #
82
+ # It will save encrypted versions to the database. +encrypted_column_name+ will not be
83
+ # searchable, +searchable_column_name+ will be searchable with an exact match, and
84
+ # +ci_searchable_column_name+ will be searchable with a case insensitive match. See section
85
+ # below for details on searching.
86
+ #
87
+ # It is possible to have model-specific keys by specifying both the +key+ and +column+ methods
88
+ # in the model:
89
+ #
90
+ # ConfidentialModel.plugin :column_encryption do |enc|
91
+ # enc.key 0, ENV["SEQUEL_MODEL_SPECIFIC_ENCRYPTION_KEY"]
92
+ #
93
+ # enc.column :encrypted_column_name
94
+ # enc.column :searchable_column_name, searchable: true
95
+ # enc.column :ci_searchable_column_name, searchable: :case_insensitive
96
+ # end
97
+ #
98
+ # When the +key+ method is called inside the plugin block, previous keys are ignored,
99
+ # and only the new keys specified will be used. This approach would allow the
100
+ # +ConfidentialModel+ to use the model specific encryption keys, and other models
101
+ # to use the default keys specified in the parent class.
102
+ #
103
+ # The +key+ and +column+ methods inside the plugin block support additional options.
104
+ # The +key+ method supports the following options:
105
+ #
106
+ # :auth_data :: The authentication data to use for the AES-256-GCM cipher. Defaults
107
+ # to the empty string.
108
+ # :padding :: The number of padding bytes to use. For security, data is padded so that
109
+ # a database administrator cannot determine the exact size of the
110
+ # unencrypted data. By default, this value is 8, which means that
111
+ # unencrypted data will be padded to a multiple of 8 bytes. Up to twice as
112
+ # much padding as specified will be used, as the number of padding bytes
113
+ # is partially randomized.
114
+ #
115
+ # The +column+ method supports the following options:
116
+ #
117
+ # :searchable :: Whether the column is searchable. This should not be used unless
118
+ # searchability is needed, as it can allow the database administrator
119
+ # to determine whether two distinct rows have the same unencrypted
120
+ # data (but not what that data is). This can be set to +true+ to allow
121
+ # searching with an exact match, or +:case_insensitive+ for a case
122
+ # insensitive match.
123
+ # :search_both :: This should only be used if you have previously switched the
124
+ # +:searchable+ option from +true+ to +:case_insensitive+ or vice-versa,
125
+ # and would like the search to return values that have not yet been
126
+ # reencrypted. Note that switching from +true+ to +:case_insensitive+
127
+ # isn't a problem, but switching from +:case_insensitive+ to +true+ and
128
+ # using this option can cause the search to return values that are
129
+ # not an exact match. You should manually filter those objects
130
+ # after decrypting if you want to ensure an exact match.
131
+ # :format :: The format of the column, if you want to perform serialization before
132
+ # encryption and deserialization after decryption. Can be either a
133
+ # symbol registered with the serialization plugin or an array of two
134
+ # callables, the first for serialization and the second for deserialization.
135
+ #
136
+ # The +column+ method also supports a block for column-specific keys:
137
+ #
138
+ # ConfidentialModel.plugin :column_encryption do |enc|
139
+ # enc.column :encrypted_column_name do |cenc|
140
+ # cenc.key 0, ENV["SEQUEL_COLUMN_SPECIFIC_ENCRYPTION_KEY"]
141
+ # end
142
+ #
143
+ # enc.column :searchable_column_name, searchable: true
144
+ # enc.column :ci_searchable_column_name, searchable: :case_insensitive
145
+ # end
146
+ #
147
+ # In this case, the <tt>ENV["SEQUEL_COLUMN_SPECIFIC_ENCRYPTION_KEY"]</tt> key will
148
+ # only be used for the +:encrypted_column_name+ column, and not the other columns.
149
+ #
150
+ # Note that there isn't a security reason to prefer either model-specific or
151
+ # column-specific keys, as the actual cipher key used is unique per column value.
152
+ #
153
+ # Note that changing the key_id, key string, or auth_data for an existing key will
154
+ # break decryption of values encrypted with that key. If you would like to change
155
+ # any aspect of the key, add a new key, rotate to the new encryption key, and then
156
+ # remove the previous key, as described in the section below on key rotation.
157
+ #
158
+ # = Searching Encrypted Values
159
+ #
160
+ # To search searchable encrypted columns, use +with_encrypted_value+. This example
161
+ # code will return the model instance created in the code example in the previous
162
+ # section:
163
+ #
164
+ # ConfidentialModel.
165
+ # with_encrypted_value(:searchable_column_name, "will be")
166
+ # with_encrypted_value(:ci_searchable_column_name, "encrypted").
167
+ # first
168
+ #
169
+ # = Encryption Key Rotation
170
+ #
171
+ # To rotate encryption keys, add a new key above the existing key, with a new key ID:
172
+ #
173
+ # Sequel::Model.plugin :column_encryption do |enc|
174
+ # enc.key 1, ENV["SEQUEL_COLUMN_ENCRYPTION_KEY"]
175
+ # enc.key 0, ENV["SEQUEL_OLD_COLUMN_ENCRYPTION_KEY"]
176
+ # end
177
+ #
178
+ # Newly encrypted data will then use the new key. Records encrypted with the older key
179
+ # will still be decrypted correctly.
180
+ #
181
+ # To force reencryption for existing records that are using the older key, you can use
182
+ # the +needing_reencryption+ dataset method and the +reencrypt+ instance method. For a
183
+ # small number of records, you can probably do:
184
+ #
185
+ # ConfidentialModel.needing_reencryption.all(&:reencrypt)
186
+ #
187
+ # With more than a small number of records, you'll want to do this in batches. It's
188
+ # possible you could use an approach such as:
189
+ #
190
+ # ds = ConfidentialModel.needing_reencryption.limit(100)
191
+ # true until ds.all(&:reencrypt).empty?
192
+ #
193
+ # After all values have been reencrypted for all models, and no models use the older
194
+ # encryption key, you can remove it from the configuration:
195
+ #
196
+ # Sequel::Model.plugin :column_encryption do |enc|
197
+ # enc.key 1, ENV["SEQUEL_COLUMN_ENCRYPTION_KEY"]
198
+ # end
199
+ #
200
+ # Once an encryption key has been removed, after no data uses it, it is safe to reuse
201
+ # the same key id for a new key. This approach allows for up to 256 concurrent keys
202
+ # in the same configuration.
203
+ #
204
+ # = Encrypting Additional Formats
205
+ #
206
+ # By default, the column_encryption plugin assumes that the decrypted data should be
207
+ # returned as a string, and a string will be passed to encrypt. However, using the
208
+ # +:format+ option, you can specify an alternate format. For example, if you want to
209
+ # encrypt a JSON representation of the object, so that you can deal with an array/hash
210
+ # and automatically have it serialized with JSON and then encrypted when saving, and
211
+ # then deserialized with JSON after decryption when it is retrieved:
212
+ #
213
+ # require 'json'
214
+ # ConfidentialModel.plugin :column_encryption do |enc|
215
+ # enc.key 0, ENV["SEQUEL_MODEL_SPECIFIC_ENCRYPTION_KEY"]
216
+ #
217
+ # enc.column :encrypted_column_name
218
+ # enc.column :searchable_column_name, searchable: true
219
+ # enc.column :ci_searchable_column_name, searchable: :case_insensitive
220
+ # enc.column :encrypted_json_column_name, format: :json
221
+ # end
222
+ #
223
+ # The values of the +:format+ are the same values you can pass as the first argument
224
+ # to +serialize_attributes+ (in the serialization plugin). You can pass an array
225
+ # with the serializer and deserializer for custom support.
226
+ #
227
+ # You can use both +:searchable+ and +:format+ together for searchable encrypted
228
+ # serialized columns. However, note that this allows only exact searches of the
229
+ # serialized version of the data. So for JSON, a search for <tt>{'a'=>1, 'b'=>2}</tt>
230
+ # would not match <tt>{'b'=>2, 'a'=>1}</tt> even though the objects are considered
231
+ # equal. If this is an issue, make sure you use a serialization format where all
232
+ # equal objects are serialized to the same string.
233
+ #
234
+ # = Enforcing Uniqueness
235
+ #
236
+ # You cannot enforce uniqueness of unencrypted data at the database level
237
+ # if you also want to support key rotation. However, absent key rotation, a
238
+ # unique index on the first 48 characters of the encrypted column can enforce uniqueness,
239
+ # as long as the column is searchable. If the encrypted column is case-insensitive
240
+ # searchable, the uniqueness is case insensitive as well.
241
+ #
242
+ # = Column Value Cryptography/Format
243
+ #
244
+ # Column values used by this plugin use the following format (+key+ is specified
245
+ # in the plugin configuration and must be exactly 32 bytes):
246
+ #
247
+ # column_value :: urlsafe_base64(flags + NUL + key_id + NUL + search_data + key_data +
248
+ # cipher_iv + cipher_auth_tag + encrypted_data)
249
+ # flags :: 1 byte, the type of record (0: not searchable, 1: searchable, 2: lowercase searchable)
250
+ # NUL :: 1 byte, ASCII NUL
251
+ # key_id :: 1 byte, the key id, supporting 256 concurrently active keys (0 - 255)
252
+ # search_data :: 0 bytes if flags is 0, 32 bytes if flags is 1 or 2.
253
+ # Format is HMAC-SHA256(key, unencrypted_data).
254
+ # Ignored on decryption, only used for searching.
255
+ # key_data :: 32 bytes random data used to construct cipher key
256
+ # cipher_iv :: 12 bytes, AES-256-GCM cipher random initialization vector
257
+ # cipher_auth_tag :: 16 bytes, AES-256-GCM cipher authentication tag
258
+ # encrypted_data :: AES-256-GCM(HMAC-SHA256(key, key_data),
259
+ # padding_size + padding + unencrypted_data)
260
+ # padding_size :: 1 byte, with the amount of padding (0-255 bytes of padding allowed)
261
+ # padding :: number of bytes specified by padding size, ignored on decryption
262
+ # unencrypted_data :: actual column value
263
+ #
264
+ # The reason for <tt>flags + NUL + key_id + NUL</tt> (4 bytes) as the header is to allow for
265
+ # an easy way to search for values needing reencryption using a database index. It takes
266
+ # the first three bytes and converts them to base64, and looks for values less than that value
267
+ # or greater than that value with 'B' appended. The NUL byte in the fourth byte of the header
268
+ # ensures that after base64 encoding, the fifth byte in the column will be 'A'.
269
+ #
270
+ # The reason for <tt>search_data</tt> (32 bytes) directly after is that for searchable values,
271
+ # after base64 encoding of the header and search data, it is 48 bytes and can be used directly
272
+ # as a prefix search on the column, which can be supported by the same database index. This is
273
+ # more efficient than a full column value search for large values, and allows for case-insensitive
274
+ # searching without a separate column, by having the search_data be based on the lowercase value
275
+ # while the unencrypted data is original case.
276
+ #
277
+ # The reason for the padding is so that a database administrator cannot be sure exactly how
278
+ # many bytes are in the column. It is stored encrypted because otherwise the database
279
+ # administrator could calculate it by decoding the base64 data.
280
+ #
281
+ # = Unsupported Features
282
+ #
283
+ # The following features are delibrately not supported:
284
+ #
285
+ # == Compression
286
+ #
287
+ # Allowing compression with encryption is inviting security issues later.
288
+ # While padding can reduce the risk of compression with encryption, it does not
289
+ # eliminate it entirely. Users that must have compression with encryption can use
290
+ # the +:format+ option with a serializer that compresses and a deserializer that
291
+ # decompresses.
292
+ #
293
+ # == Mixing Encrypted/Unencrypted Data
294
+ #
295
+ # Mixing encrypted and unencrypted data increases the complexity and security risk, since there
296
+ # is a chance unencrypted data could look like encrypted data in the pathologic case.
297
+ # If you have existing unencrypted data that would like to encrypt, create a new column for
298
+ # the encrypted data, and then migrate the data from the unencrypted column to the encrypted
299
+ # column. After all unencrypted values have been migrated, drop the unencrypted column.
300
+ #
301
+ # == Arbitrary Encryption Schemes
302
+ #
303
+ # Supporting arbitrary encryption schemes increases the complexity risk.
304
+ # If in the future AES-256-GCM is not considered a secure enough cipher, it is possible to
305
+ # extend the current format using the reserved values in the first two bytes of the header.
306
+ #
307
+ # = Caveats
308
+ #
309
+ # As column_encryption is a model plugin, it only works with using model instance methods.
310
+ # If you directly modify the database using a dataset or an external program that modifies
311
+ # the contents of the encrypted columns, you will probably corrupt the data. To make data
312
+ # corruption less likely, it is best to have a CHECK constraints on the encrypted column
313
+ # with a basic format and length check:
314
+ #
315
+ # DB.alter_table(:table_name) do
316
+ # c = Sequel[:encrypted_column_name]
317
+ # add_constraint(:encrypted_column_name_format,
318
+ # c.like('AA__A%') | c.like('Ag__A%') | c.like('AQ__A%'))
319
+ # add_constraint(:encrypted_column_name_length, Sequel.char_length(c) >= 88)
320
+ # end
321
+ #
322
+ # If possible, it's also best to check that the column is valid urlsafe base64 data of
323
+ # sufficient length. This can be done on PostgreSQL using a combination of octet_length,
324
+ # decode, and regexp_replace:
325
+ #
326
+ # DB.alter_table(:ce_test) do
327
+ # c = Sequel[:encrypted_column_name]
328
+ # add_constraint(:enc_base64) do
329
+ # octet_length(decode(regexp_replace(regexp_replace(c, '_', '/', 'g'), '-', '+', 'g'), 'base64')) >= 65}
330
+ # end
331
+ # end
332
+ #
333
+ # Such constraints will probably be sufficient to protect against most unintentional corruption of
334
+ # encrypted columns.
335
+ #
336
+ # If the database supports transparent data encryption and you trust the database administrator,
337
+ # using the database support is probably a better approach.
338
+ #
339
+ # The column_encryption plugin is only supported on Ruby 2.3+ and when the Ruby openssl standard
340
+ # library supports the AES-256-GCM cipher.
341
+ module ColumnEncryption
342
+ # Cryptor handles the encryption and decryption of rows for a key set.
343
+ # It also provides methods that return search prefixes, which datasets
344
+ # use in queries.
345
+ #
346
+ # The same cryptor can support non-searchable, searchable, and case-insensitive
347
+ # searchable columns.
348
+ class Cryptor # :nodoc:
349
+ # Flags
350
+ NOT_SEARCHABLE = 0
351
+ SEARCHABLE = 1
352
+ LOWERCASE_SEARCHABLE = 2
353
+
354
+ # This is the default padding, but up to 2x the padding can be used for a record.
355
+ DEFAULT_PADDING = 8
356
+
357
+ # Keys should be an array of arrays containing key_id, key string, auth_data, and padding.
358
+ def initialize(keys)
359
+ if keys.empty?
360
+ raise Error, "Cannot initialize encryptor without encryption key"
361
+ end
362
+
363
+ # First key is used for encryption
364
+ @key_id, @key, @auth_data, @padding = keys[0]
365
+
366
+ # All keys are candidates for decryption
367
+ @key_map = {}
368
+ keys.each do |key_id, key, auth_data, padding|
369
+ @key_map[key_id] = [key, auth_data, padding].freeze
370
+ end
371
+
372
+ freeze
373
+ end
374
+
375
+ # Decrypt using any supported format and any available key.
376
+ def decrypt(data)
377
+ begin
378
+ data = Base64.urlsafe_decode64(data)
379
+ rescue ArgumentError
380
+ raise Error, "Unable to decode encrypted column: invalid base64"
381
+ end
382
+
383
+ unless data.getbyte(1) == 0 && data.getbyte(3) == 0
384
+ raise Error, "Unable to decode encrypted column: invalid format"
385
+ end
386
+
387
+ flags = data.getbyte(0)
388
+
389
+ key, auth_data = @key_map[data.getbyte(2)]
390
+ unless key
391
+ raise Error, "Unable to decode encrypted column: invalid key id"
392
+ end
393
+
394
+ case flags
395
+ when NOT_SEARCHABLE
396
+ if data.bytesize < 65
397
+ raise Error, "Decoded encrypted column smaller than minimum size"
398
+ end
399
+
400
+ data.slice!(0, 4)
401
+ when SEARCHABLE, LOWERCASE_SEARCHABLE
402
+ if data.bytesize < 97
403
+ raise Error, "Decoded encrypted column smaller than minimum size"
404
+ end
405
+
406
+ data.slice!(0, 36)
407
+ else
408
+ raise Error, "Unable to decode encrypted column: invalid flags"
409
+ end
410
+
411
+ key_part = data.slice!(0, 32)
412
+ cipher_iv = data.slice!(0, 12)
413
+ auth_tag = data.slice!(0, 16)
414
+
415
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
416
+ cipher.decrypt
417
+ cipher.iv = cipher_iv
418
+ cipher.key = OpenSSL::HMAC.digest(OpenSSL::Digest::SHA256.new, key, key_part)
419
+ cipher.auth_data = auth_data
420
+ cipher.auth_tag = auth_tag
421
+ begin
422
+ decrypted_data = cipher.update(data) << cipher.final
423
+ rescue OpenSSL::Cipher::CipherError => e
424
+ raise Error, "Unable to decrypt encrypted column: #{e.class} (probably due to encryption key or auth data mismatch or corrupt data)"
425
+ end
426
+
427
+ # Remove padding
428
+ decrypted_data.slice!(0, decrypted_data.getbyte(0) + 1)
429
+
430
+ decrypted_data
431
+ end
432
+
433
+ # Encrypt in not searchable format with the first configured encryption key.
434
+ def encrypt(data)
435
+ _encrypt(data, "#{NOT_SEARCHABLE.chr}\0#{@key_id.chr}\0")
436
+ end
437
+
438
+ # Encrypt in searchable format with the first configured encryption key.
439
+ def searchable_encrypt(data)
440
+ _encrypt(data, _search_prefix(data, SEARCHABLE, @key_id, @key))
441
+ end
442
+
443
+ # Encrypt in case insensitive searchable format with the first configured encryption key.
444
+ def case_insensitive_searchable_encrypt(data)
445
+ _encrypt(data, _search_prefix(data.downcase, LOWERCASE_SEARCHABLE, @key_id, @key))
446
+ end
447
+
448
+ # The prefix string of columns for the given search type and the first configured encryption key.
449
+ # Used to find values that do not use this prefix in order to perform reencryption.
450
+ def current_key_prefix(search_type)
451
+ Base64.urlsafe_encode64("#{search_type.chr}\0#{@key_id.chr}")
452
+ end
453
+
454
+ # The prefix values to search for the given data (an array of strings), assuming the column uses
455
+ # the searchable format.
456
+ def search_prefixes(data)
457
+ _search_prefixes(data, SEARCHABLE)
458
+ end
459
+
460
+ # The prefix values to search for the given data (an array of strings), assuming the column uses
461
+ # the case insensitive searchable format.
462
+ def lowercase_search_prefixes(data)
463
+ _search_prefixes(data.downcase, LOWERCASE_SEARCHABLE)
464
+ end
465
+
466
+ # The prefix values to search for the given data (an array of strings), assuming the column uses
467
+ # either the searchable or the case insensitive searchable format. Should be used only when
468
+ # transitioning between formats (used by the :search_both option when encrypting columns).
469
+ def regular_and_lowercase_search_prefixes(data)
470
+ search_prefixes(data) + lowercase_search_prefixes(data)
471
+ end
472
+
473
+ private
474
+
475
+ # An array of strings, one for each configured encryption key, to find encypted values matching
476
+ # the given data and search format.
477
+ def _search_prefixes(data, search_type)
478
+ @key_map.map do |key_id, (key, _)|
479
+ Base64.urlsafe_encode64(_search_prefix(data, search_type, key_id, key))
480
+ end
481
+ end
482
+
483
+ # The prefix to use for searchable data, including the HMAC-SHA256(key, data).
484
+ def _search_prefix(data, search_type, key_id, key)
485
+ "#{search_type.chr}\0#{key_id.chr}\0#{OpenSSL::HMAC.digest(OpenSSL::Digest::SHA256.new, key, data)}"
486
+ end
487
+
488
+ # Encrypt the data using AES-256-GCM, with the given prefix.
489
+ def _encrypt(data, prefix)
490
+ padding = @padding
491
+ random_data = SecureRandom.random_bytes(32)
492
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
493
+ cipher.encrypt
494
+ cipher.key = OpenSSL::HMAC.digest(OpenSSL::Digest::SHA256.new, @key, random_data)
495
+ cipher_iv = cipher.random_iv
496
+ cipher.auth_data = @auth_data
497
+
498
+ cipher_text = String.new
499
+ data_size = data.bytesize
500
+
501
+ padding_size = if padding
502
+ (padding * rand(1)) + padding - (data.bytesize % padding)
503
+ else
504
+ 0
505
+ end
506
+
507
+ cipher_text << cipher.update(padding_size.chr)
508
+ cipher_text << cipher.update(SecureRandom.random_bytes(padding_size)) if padding_size > 0
509
+ cipher_text << cipher.update(data) if data_size > 0
510
+ cipher_text << cipher.final
511
+
512
+ Base64.urlsafe_encode64("#{prefix}#{random_data}#{cipher_iv}#{cipher.auth_tag}#{cipher_text}")
513
+ end
514
+ end
515
+
516
+ # The object type yielded to blocks passed to the +column+ method inside
517
+ # <tt>plugin :column_encryption</tt> blocks. This is used to configure custom
518
+ # per-column keys.
519
+ class ColumnDSL # :nodoc:
520
+ # An array of arrays for the data for the keys configured inside the block.
521
+ attr_reader :keys
522
+
523
+ def initialize
524
+ @keys = []
525
+ end
526
+
527
+ # Verify that the key_id, key, and options are value.
528
+ def key(key_id, key, opts=OPTS)
529
+ unless key_id.is_a?(Integer) && key_id >= 0 && key_id <= 255
530
+ raise Error, "invalid key_id argument, must be integer between 0 and 255"
531
+ end
532
+
533
+ unless key.is_a?(String) && key.bytesize == 32
534
+ raise Error, "invalid key argument, must be string with exactly 32 bytes"
535
+ end
536
+
537
+ if opts.has_key?(:padding)
538
+ if padding = opts[:padding]
539
+ unless padding.is_a?(Integer) && padding >= 1 && padding <= 120
540
+ raise Error, "invalid :padding option, must be between 1 and 120"
541
+ end
542
+ end
543
+ else
544
+ padding = Cryptor::DEFAULT_PADDING
545
+ end
546
+
547
+ @keys << [key_id, key, opts[:auth_data].to_s, padding].freeze
548
+ end
549
+ end
550
+
551
+ # The object type yielded to <tt>plugin :column_encryption</tt> blocks,
552
+ # used to configure encryption keys and encrypted columns.
553
+ class DSL < ColumnDSL # :nodoc:
554
+ # An array of arrays of data for the columns configured inside the block.
555
+ attr_reader :columns
556
+
557
+ def initialize
558
+ super
559
+ @columns = []
560
+ end
561
+
562
+ # Store the column information.
563
+ def column(column, opts=OPTS, &block)
564
+ @columns << [column, opts, block].freeze
565
+ end
566
+ end
567
+
568
+ def self.apply(model, opts=OPTS)
569
+ model.plugin :serialization
570
+ end
571
+
572
+ def self.configure(model)
573
+ dsl = DSL.new
574
+ yield dsl
575
+
576
+ model.instance_exec do
577
+ unless dsl.keys.empty?
578
+ @column_encryption_keys = dsl.keys.freeze
579
+ @column_encryption_cryptor = nil
580
+ end
581
+
582
+ @column_encryption_metadata = Hash[@column_encryption_metadata || {}]
583
+
584
+ dsl.columns.each do |column, opts, block|
585
+ _encrypt_column(column, opts, &block)
586
+ end
587
+
588
+ @column_encryption_metadata.freeze
589
+ end
590
+ end
591
+
592
+ # This stores four callables for handling encyption, decryption, data searching,
593
+ # and key searching. One of these is created for each encrypted column.
594
+ ColumnEncryptionMetadata = Struct.new(:encryptor, :decryptor, :data_searcher, :key_searcher) # :nodoc:
595
+
596
+ module ClassMethods
597
+ private
598
+
599
+ # A hash with column symbol keys and ColumnEncryptionMetadata values for each
600
+ # encrypted column.
601
+ attr_reader :column_encryption_metadata
602
+
603
+ # The default Cryptor to use for encrypted columns. This is only overridden if
604
+ # per-column keys are used.
605
+ def column_encryption_cryptor
606
+ @column_encryption_cryptor ||= Cryptor.new(@column_encryption_keys)
607
+ end
608
+
609
+ # Setup encryption for the given column.
610
+ def _encrypt_column(column, opts)
611
+ cryptor ||= if block_given?
612
+ dsl = ColumnDSL.new
613
+ yield dsl
614
+ Cryptor.new(dsl.keys)
615
+ else
616
+ column_encryption_cryptor
617
+ end
618
+
619
+ encrypt_method, search_prefixes_method, search_type = case searchable = opts[:searchable]
620
+ when nil, false
621
+ [:encrypt, nil, Cryptor::NOT_SEARCHABLE]
622
+ when true
623
+ [:searchable_encrypt, :search_prefixes, Cryptor::SEARCHABLE]
624
+ when :case_insensitive
625
+ [:case_insensitive_searchable_encrypt, :lowercase_search_prefixes, Cryptor::LOWERCASE_SEARCHABLE]
626
+ else
627
+ raise Error, "invalid :searchable option for encrypted column: #{searchable.inspect}"
628
+ end
629
+
630
+ if searchable && opts[:search_both]
631
+ search_prefixes_method = :regular_and_lowercase_search_prefixes
632
+ end
633
+
634
+ # Setup the callables used in the metadata.
635
+ encryptor = cryptor.method(encrypt_method)
636
+ decryptor = cryptor.method(:decrypt)
637
+ data_searcher = cryptor.method(search_prefixes_method) if search_prefixes_method
638
+ key_searcher = lambda{cryptor.current_key_prefix(search_type)}
639
+
640
+ if format = opts[:format]
641
+ if format.is_a?(Symbol)
642
+ unless format = Sequel.synchronize{Serialization::REGISTERED_FORMATS[format]}
643
+ raise(Error, "Unsupported serialization format: #{format} (valid formats: #{Sequel.synchronize{Serialization::REGISTERED_FORMATS.keys}.inspect})")
644
+ end
645
+ end
646
+
647
+ # If a custom serialization format is used, override the
648
+ # callables to handle serialization and deserialization.
649
+ serializer, deserializer = format
650
+ enc, dec, data_s = encryptor, decryptor, data_searcher
651
+ encryptor = lambda do |data|
652
+ enc.call(serializer.call(data))
653
+ end
654
+ decryptor = lambda do |data|
655
+ deserializer.call(dec.call(data))
656
+ end
657
+ data_searcher = lambda do |data|
658
+ data_s.call(serializer.call(data))
659
+ end
660
+ end
661
+
662
+ # Setup the setter and getter methods to do encryption and decryption using
663
+ # the serialization plugin.
664
+ serialize_attributes([encryptor, decryptor], column)
665
+
666
+ column_encryption_metadata[column] = ColumnEncryptionMetadata.new(encryptor, decryptor, data_searcher, key_searcher).freeze
667
+
668
+ nil
669
+ end
670
+ end
671
+
672
+ module ClassMethods
673
+ Plugins.def_dataset_methods(self, [:with_encrypted_value, :needing_reencryption])
674
+
675
+ Plugins.inherited_instance_variables(self,
676
+ :@column_encryption_cryptor=>nil,
677
+ :@column_encryption_keys=>nil,
678
+ :@column_encryption_metadata=>nil,
679
+ )
680
+ end
681
+
682
+ module InstanceMethods
683
+ # Reencrypt the model if needed. Looks at all of the models encrypted columns
684
+ # and if any were encypted with older keys or a different format, reencrypt
685
+ # with the current key and format and save the object. Returns the object
686
+ # if reencryption was needed, or nil if reencryption was not needed.
687
+ def reencrypt
688
+ do_save = false
689
+
690
+ model.send(:column_encryption_metadata).each do |column, metadata|
691
+ if (value = values[column]) && !value.start_with?(metadata.key_searcher.call)
692
+ do_save = true
693
+ values[column] = metadata.encryptor.call(metadata.decryptor.call(value))
694
+ end
695
+ end
696
+
697
+ save if do_save
698
+ end
699
+ end
700
+
701
+ module DatasetMethods
702
+ # Filter the dataset to only match rows where the column contains an encrypted version
703
+ # of value. Only works on searchable encrypted columns.
704
+ def with_encrypted_value(column, value)
705
+ metadata = model.send(:column_encryption_metadata)[column]
706
+
707
+ unless metadata && metadata.data_searcher
708
+ raise Error, "lookup for encrypted column #{column.inspect} is not supported"
709
+ end
710
+
711
+ prefixes = metadata.data_searcher.call(value)
712
+ where(Sequel.|(*prefixes.map{|v| Sequel.like(column, "#{escape_like(v)}%")}))
713
+ end
714
+
715
+ # Filter the dataset to exclude rows where all encrypted columns are already encrypted
716
+ # with the current key and format.
717
+ def needing_reencryption
718
+ incorrect_column_prefixes = model.send(:column_encryption_metadata).map do |column, metadata|
719
+ prefix = metadata.key_searcher.call
720
+ (Sequel[column] < prefix) | (Sequel[column] > prefix + 'B')
721
+ end
722
+
723
+ where(Sequel.|(*incorrect_column_prefixes))
724
+ end
725
+ end
726
+ end
727
+ end
728
+ end