sequel 5.41.0 → 5.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +46 -0
  3. data/README.rdoc +1 -2
  4. data/doc/association_basics.rdoc +22 -3
  5. data/doc/release_notes/5.42.0.txt +136 -0
  6. data/doc/release_notes/5.43.0.txt +98 -0
  7. data/doc/release_notes/5.44.0.txt +32 -0
  8. data/doc/release_notes/5.45.0.txt +34 -0
  9. data/doc/release_notes/5.46.0.txt +87 -0
  10. data/doc/testing.rdoc +3 -0
  11. data/doc/virtual_rows.rdoc +1 -1
  12. data/lib/sequel/adapters/ado.rb +16 -16
  13. data/lib/sequel/adapters/odbc.rb +5 -1
  14. data/lib/sequel/adapters/shared/postgres.rb +0 -12
  15. data/lib/sequel/adapters/shared/sqlite.rb +8 -4
  16. data/lib/sequel/core.rb +11 -0
  17. data/lib/sequel/database/misc.rb +1 -2
  18. data/lib/sequel/database/schema_generator.rb +35 -47
  19. data/lib/sequel/database/schema_methods.rb +4 -0
  20. data/lib/sequel/dataset/query.rb +1 -3
  21. data/lib/sequel/dataset/sql.rb +7 -0
  22. data/lib/sequel/extensions/async_thread_pool.rb +438 -0
  23. data/lib/sequel/extensions/date_arithmetic.rb +29 -16
  24. data/lib/sequel/extensions/pg_enum.rb +1 -1
  25. data/lib/sequel/extensions/pg_loose_count.rb +3 -1
  26. data/lib/sequel/model/associations.rb +146 -75
  27. data/lib/sequel/model/base.rb +2 -2
  28. data/lib/sequel/plugins/async_thread_pool.rb +39 -0
  29. data/lib/sequel/plugins/auto_validations_constraint_validations_presence_message.rb +68 -0
  30. data/lib/sequel/plugins/column_encryption.rb +728 -0
  31. data/lib/sequel/plugins/composition.rb +2 -1
  32. data/lib/sequel/plugins/concurrent_eager_loading.rb +174 -0
  33. data/lib/sequel/plugins/json_serializer.rb +37 -22
  34. data/lib/sequel/plugins/nested_attributes.rb +5 -2
  35. data/lib/sequel/plugins/pg_array_associations.rb +52 -38
  36. data/lib/sequel/plugins/rcte_tree.rb +27 -19
  37. data/lib/sequel/plugins/serialization.rb +8 -3
  38. data/lib/sequel/plugins/serialization_modification_detection.rb +1 -1
  39. data/lib/sequel/plugins/unused_associations.rb +500 -0
  40. data/lib/sequel/version.rb +1 -1
  41. metadata +19 -3
data/doc/testing.rdoc CHANGED
@@ -157,9 +157,12 @@ The SEQUEL_INTEGRATION_URL environment variable specifies the Database connectio
157
157
 
158
158
  === Other
159
159
 
160
+ SEQUEL_ASYNC_THREAD_POOL :: Use the async_thread_pool extension when running the specs
161
+ SEQUEL_ASYNC_THREAD_POOL_PREEMPT :: Use the async_thread_pool extension when running the specs, with the :preempt_async_thread option
160
162
  SEQUEL_COLUMNS_INTROSPECTION :: Use the columns_introspection extension when running the specs
161
163
  SEQUEL_CONNECTION_VALIDATOR :: Use the connection validator extension when running the specs
162
164
  SEQUEL_DUPLICATE_COLUMNS_HANDLER :: Use the duplicate columns handler extension with value given when running the specs
165
+ SEQUEL_CONCURRENT_EAGER_LOADING :: Use the async_thread_pool extension and concurrent_eager_loading plugin when running the specs
163
166
  SEQUEL_ERROR_SQL :: Use the error_sql extension when running the specs
164
167
  SEQUEL_INDEX_CACHING :: Use the index_caching extension when running the specs
165
168
  SEQUEL_FIBER_CONCURRENCY :: Use the fiber_concurrency extension when running the adapter and integration specs
@@ -54,7 +54,7 @@ methods in the surrounding scope. For example:
54
54
 
55
55
  # Regular block
56
56
  ds.where{|o| o.c > a - b + @d}
57
- # WHERE (c > 100)
57
+ # WHERE (c > 110)
58
58
 
59
59
  # Instance-evaled block
60
60
  ds.where{c > a - b + @d}
@@ -195,10 +195,25 @@ module Sequel
195
195
  end
196
196
 
197
197
  @conversion_procs = CONVERSION_PROCS.dup
198
+ @conversion_procs[AdDBTimeStamp] = method(:adb_timestamp_to_application_timestamp)
198
199
 
199
200
  super
200
201
  end
201
202
 
203
+ def adb_timestamp_to_application_timestamp(v)
204
+ # This hard codes a timestamp_precision of 6 when converting.
205
+ # That is the default timestamp_precision, but the ado/mssql adapter uses a timestamp_precision
206
+ # of 3. However, timestamps returned by ado/mssql have nsec values that end up rounding to a
207
+ # the same value as if a timestamp_precision of 3 was hard coded (either xxx999yzz, where y is
208
+ # 5-9 or xxx000yzz where y is 0-4).
209
+ #
210
+ # ADO subadapters should override this they would like a different timestamp precision and the
211
+ # this code does not work for them (for example, if they provide full nsec precision).
212
+ #
213
+ # Note that fractional second handling for WIN32OLE objects is not correct on ruby <2.2
214
+ to_application_timestamp([v.year, v.month, v.day, v.hour, v.min, v.sec, (v.nsec/1000.0).round * 1000])
215
+ end
216
+
202
217
  def dataset_class_default
203
218
  Dataset
204
219
  end
@@ -233,23 +248,8 @@ module Sequel
233
248
  cols = []
234
249
  conversion_procs = db.conversion_procs
235
250
 
236
- ts_cp = nil
237
251
  recordset.Fields.each do |field|
238
- type = field.Type
239
- cp = if type == AdDBTimeStamp
240
- ts_cp ||= begin
241
- nsec_div = 1000000000.0/(10**(timestamp_precision))
242
- nsec_mul = 10**(timestamp_precision+3)
243
- meth = db.method(:to_application_timestamp)
244
- lambda do |v|
245
- # Fractional second handling is not correct on ruby <2.2
246
- meth.call([v.year, v.month, v.day, v.hour, v.min, v.sec, (v.nsec/nsec_div).round * nsec_mul])
247
- end
248
- end
249
- else
250
- conversion_procs[type]
251
- end
252
- cols << [output_identifier(field.Name), cp]
252
+ cols << [output_identifier(field.Name), conversion_procs[field.Type]]
253
253
  end
254
254
 
255
255
  self.columns = cols.map(&:first)
@@ -94,7 +94,11 @@ module Sequel
94
94
  self.columns = columns
95
95
  s.each do |row|
96
96
  hash = {}
97
- cols.each{|n,t,j| hash[n] = convert_odbc_value(row[j], t)}
97
+ cols.each do |n,t,j|
98
+ v = row[j]
99
+ # We can assume v is not false, so this shouldn't convert false to nil.
100
+ hash[n] = (convert_odbc_value(v, t) if v)
101
+ end
98
102
  yield hash
99
103
  end
100
104
  end
@@ -2141,18 +2141,6 @@ module Sequel
2141
2141
  opts[:with].any?{|w| w[:recursive]} ? "WITH RECURSIVE " : super
2142
2142
  end
2143
2143
 
2144
- # Support WITH AS [NOT] MATERIALIZED if :materialized option is used.
2145
- def select_with_sql_prefix(sql, w)
2146
- super
2147
-
2148
- case w[:materialized]
2149
- when true
2150
- sql << "MATERIALIZED "
2151
- when false
2152
- sql << "NOT MATERIALIZED "
2153
- end
2154
- end
2155
-
2156
2144
  # The version of the database server
2157
2145
  def server_version
2158
2146
  db.server_version(@opts[:server])
@@ -239,8 +239,12 @@ module Sequel
239
239
  super
240
240
  end
241
241
  when :drop_column
242
- ocp = lambda{|oc| oc.delete_if{|c| c.to_s == op[:name].to_s}}
243
- duplicate_table(table, :old_columns_proc=>ocp){|columns| columns.delete_if{|s| s[:name].to_s == op[:name].to_s}}
242
+ if sqlite_version >= 33500
243
+ super
244
+ else
245
+ ocp = lambda{|oc| oc.delete_if{|c| c.to_s == op[:name].to_s}}
246
+ duplicate_table(table, :old_columns_proc=>ocp){|columns| columns.delete_if{|s| s[:name].to_s == op[:name].to_s}}
247
+ end
244
248
  when :rename_column
245
249
  if sqlite_version >= 32500
246
250
  super
@@ -424,10 +428,10 @@ module Sequel
424
428
  skip_indexes = []
425
429
  indexes(table, :only_autocreated=>true).each do |name, h|
426
430
  skip_indexes << name
427
- if h[:unique]
431
+ if h[:unique] && !opts[:no_unique]
428
432
  if h[:columns].length == 1
429
433
  unique_columns.concat(h[:columns])
430
- elsif h[:columns].map(&:to_s) != pks && !opts[:no_unique]
434
+ elsif h[:columns].map(&:to_s) != pks
431
435
  constraints << {:type=>:unique, :columns=>h[:columns]}
432
436
  end
433
437
  end
data/lib/sequel/core.rb CHANGED
@@ -176,6 +176,17 @@ module Sequel
176
176
  JSON.parse(json, :create_additions=>false)
177
177
  end
178
178
 
179
+ # If a mutex is given, synchronize access using it. If nil is given, just
180
+ # yield to the block. This is designed for cases where a mutex may or may
181
+ # not be provided.
182
+ def synchronize_with(mutex)
183
+ if mutex
184
+ mutex.synchronize{yield}
185
+ else
186
+ yield
187
+ end
188
+ end
189
+
179
190
  # Convert each item in the array to the correct type, handling multi-dimensional
180
191
  # arrays. For each element in the array or subarrays, call the converter,
181
192
  # unless the value is nil.
@@ -213,8 +213,7 @@ module Sequel
213
213
  Sequel.extension(*exts)
214
214
  exts.each do |ext|
215
215
  if pr = Sequel.synchronize{EXTENSIONS[ext]}
216
- unless Sequel.synchronize{@loaded_extensions.include?(ext)}
217
- Sequel.synchronize{@loaded_extensions << ext}
216
+ if Sequel.synchronize{@loaded_extensions.include?(ext) ? false : (@loaded_extensions << ext)}
218
217
  pr.call(self)
219
218
  end
220
219
  else
@@ -159,7 +159,7 @@ module Sequel
159
159
  nil
160
160
  end
161
161
 
162
- # Adds a named constraint (or unnamed if name is nil),
162
+ # Adds a named CHECK constraint (or unnamed if name is nil),
163
163
  # with the given block or args. To provide options for the constraint, pass
164
164
  # a hash as the first argument.
165
165
  #
@@ -167,6 +167,15 @@ module Sequel
167
167
  # # CONSTRAINT blah CHECK num >= 1 AND num <= 5
168
168
  # constraint({name: :blah, deferrable: true}, num: 1..5)
169
169
  # # CONSTRAINT blah CHECK num >= 1 AND num <= 5 DEFERRABLE INITIALLY DEFERRED
170
+ #
171
+ # If the first argument is a hash, the following options are supported:
172
+ #
173
+ # Options:
174
+ # :name :: The name of the CHECK constraint
175
+ # :deferrable :: Whether the CHECK constraint should be marked DEFERRABLE.
176
+ #
177
+ # PostgreSQL specific options:
178
+ # :not_valid :: Whether the CHECK constraint should be marked NOT VALID.
170
179
  def constraint(name, *args, &block)
171
180
  opts = name.is_a?(Hash) ? name : {:name=>name}
172
181
  constraints << opts.merge(:type=>:check, :check=>block || args)
@@ -205,14 +214,12 @@ module Sequel
205
214
  end
206
215
 
207
216
  # Add a full text index on the given columns.
217
+ # See #index for additional options.
208
218
  #
209
219
  # PostgreSQL specific options:
210
220
  # :index_type :: Can be set to :gist to use a GIST index instead of the
211
221
  # default GIN index.
212
222
  # :language :: Set a language to use for the index (default: simple).
213
- #
214
- # Microsoft SQL Server specific options:
215
- # :key_index :: The KEY INDEX to use for the full text index.
216
223
  def full_text_index(columns, opts = OPTS)
217
224
  index(columns, opts.merge(:type => :full_text))
218
225
  end
@@ -222,35 +229,43 @@ module Sequel
222
229
  columns.any?{|c| c[:name] == name}
223
230
  end
224
231
 
225
- # Add an index on the given column(s) with the given options.
232
+ # Add an index on the given column(s) with the given options. Examples:
233
+ #
234
+ # index :name
235
+ # # CREATE INDEX table_name_index ON table (name)
236
+ #
237
+ # index [:artist_id, :name]
238
+ # # CREATE INDEX table_artist_id_name_index ON table (artist_id, name)
239
+ #
240
+ # index [:artist_id, :name], name: :foo
241
+ # # CREATE INDEX foo ON table (artist_id, name)
242
+ #
226
243
  # General options:
227
244
  #
245
+ # :include :: Include additional column values in the index, without
246
+ # actually indexing on those values (only supported by
247
+ # some databases).
228
248
  # :name :: The name to use for the index. If not given, a default name
229
249
  # based on the table and columns is used.
230
- # :type :: The type of index to use (only supported by some databases)
250
+ # :type :: The type of index to use (only supported by some databases,
251
+ # :full_text and :spatial values are handled specially).
231
252
  # :unique :: Make the index unique, so duplicate values are not allowed.
232
- # :where :: Create a partial index (only supported by some databases)
253
+ # :where :: A filter expression, used to create a partial index (only
254
+ # supported by some databases).
233
255
  #
234
256
  # PostgreSQL specific options:
235
257
  #
236
258
  # :concurrently :: Create the index concurrently, so it doesn't block
237
259
  # operations on the table while the index is being
238
260
  # built.
239
- # :opclass :: Use a specific operator class in the index.
240
- # :include :: Include additional column values in the index, without
241
- # actually indexing on those values (PostgreSQL 11+).
261
+ # :if_not_exists :: Only create the index if an index of the same name doesn't already exist.
262
+ # :opclass :: Set an opclass to use for all columns (per-column opclasses require
263
+ # custom SQL).
242
264
  # :tablespace :: Specify tablespace for index.
243
265
  #
244
266
  # Microsoft SQL Server specific options:
245
267
  #
246
- # :include :: Include additional column values in the index, without
247
- # actually indexing on those values.
248
- #
249
- # index :name
250
- # # CREATE INDEX table_name_index ON table (name)
251
- #
252
- # index [:artist_id, :name]
253
- # # CREATE INDEX table_artist_id_name_index ON table (artist_id, name)
268
+ # :key_index :: Sets the KEY INDEX to the given value.
254
269
  def index(columns, opts = OPTS)
255
270
  indexes << {:columns => Array(columns)}.merge!(opts)
256
271
  nil
@@ -316,6 +331,7 @@ module Sequel
316
331
  end
317
332
 
318
333
  # Add a spatial index on the given columns.
334
+ # See #index for additional options.
319
335
  def spatial_index(columns, opts = OPTS)
320
336
  index(columns, opts.merge(:type => :spatial))
321
337
  end
@@ -442,7 +458,7 @@ module Sequel
442
458
  end
443
459
 
444
460
  # Add a full text index on the given columns.
445
- # See CreateTableGenerator#index for available options.
461
+ # See CreateTableGenerator#full_text_index for available options.
446
462
  def add_full_text_index(columns, opts = OPTS)
447
463
  add_index(columns, {:type=>:full_text}.merge!(opts))
448
464
  end
@@ -451,34 +467,6 @@ module Sequel
451
467
  # CreateTableGenerator#index for available options.
452
468
  #
453
469
  # add_index(:artist_id) # CREATE INDEX table_artist_id_index ON table (artist_id)
454
- #
455
- # Options:
456
- #
457
- # :name :: Give a specific name for the index. Highly recommended if you plan on
458
- # dropping the index later.
459
- # :where :: A filter expression, used to setup a partial index (if supported).
460
- # :unique :: Create a unique index.
461
- #
462
- # PostgreSQL specific options:
463
- #
464
- # :concurrently :: Create the index concurrently, so it doesn't require an exclusive lock
465
- # on the table.
466
- # :index_type :: The underlying index type to use for a full_text index, gin by default).
467
- # :language :: The language to use for a full text index (simple by default).
468
- # :opclass :: Set an opclass to use for all columns (per-column opclasses require
469
- # custom SQL).
470
- # :type :: Set the index type (e.g. full_text, spatial, hash, gin, gist, btree).
471
- # :if_not_exists :: Only create the index if an index of the same name doesn't already exists
472
- #
473
- # MySQL specific options:
474
- #
475
- # :type :: Set the index type, with full_text and spatial indexes handled specially.
476
- #
477
- # Microsoft SQL Server specific options:
478
- #
479
- # :include :: Includes additional columns in the index.
480
- # :key_index :: Sets the KEY INDEX to the given value.
481
- # :type :: clustered uses a clustered index, full_text uses a full text index.
482
470
  def add_index(columns, opts = OPTS)
483
471
  @operations << {:op => :add_index, :columns => Array(columns)}.merge!(opts)
484
472
  nil
@@ -262,6 +262,10 @@ module Sequel
262
262
  # # SELECT * FROM items WHERE foo
263
263
  # # WITH CHECK OPTION
264
264
  #
265
+ # DB.create_view(:bar_items, DB[:items].select(:foo), columns: [:bar])
266
+ # # CREATE VIEW bar_items (bar) AS
267
+ # # SELECT foo FROM items
268
+ #
265
269
  # Options:
266
270
  # :columns :: The column names to use for the view. If not given,
267
271
  # automatically determined based on the input dataset.
@@ -1062,10 +1062,8 @@ module Sequel
1062
1062
  # Options:
1063
1063
  # :args :: Specify the arguments/columns for the CTE, should be an array of symbols.
1064
1064
  # :recursive :: Specify that this is a recursive CTE
1065
- #
1066
- # PostgreSQL Specific Options:
1067
1065
  # :materialized :: Set to false to force inlining of the CTE, or true to force not inlining
1068
- # the CTE (PostgreSQL 12+).
1066
+ # the CTE (PostgreSQL 12+/SQLite 3.35+).
1069
1067
  #
1070
1068
  # DB[:items].with(:items, DB[:syx].where(Sequel[:name].like('A%')))
1071
1069
  # # WITH items AS (SELECT * FROM syx WHERE (name LIKE 'A%' ESCAPE '\')) SELECT * FROM items
@@ -1567,6 +1567,13 @@ module Sequel
1567
1567
  sql << ')'
1568
1568
  end
1569
1569
  sql << ' AS '
1570
+
1571
+ case w[:materialized]
1572
+ when true
1573
+ sql << "MATERIALIZED "
1574
+ when false
1575
+ sql << "NOT MATERIALIZED "
1576
+ end
1570
1577
  end
1571
1578
 
1572
1579
  # Whether the symbol cache should be skipped when literalizing the dataset
@@ -0,0 +1,438 @@
1
+ # frozen-string-literal: true
2
+ #
3
+ # The async_thread_pool extension adds support for running database
4
+ # queries in a separate threads using a thread pool. With the following
5
+ # code
6
+ #
7
+ # DB.extension :async_thread_pool
8
+ # foos = DB[:foos].async.where{:name=>'A'..'M'}.all
9
+ # bar_names = DB[:bar].async.select_order_map(:name)
10
+ # baz_1 = DB[:bazes].async.first(:id=>1)
11
+ #
12
+ # All 3 queries will be run in separate threads. +foos+, +bar_names+
13
+ # and +baz_1+ will be proxy objects. Calling a method on the proxy
14
+ # object will wait for the query to be run, and will return the result
15
+ # of calling that method on the result of the query method. For example,
16
+ # if you run:
17
+ #
18
+ # foos = DB[:foos].async.where{:name=>'A'..'M'}.all
19
+ # bar_names = DB[:bars].async.select_order_map(:name)
20
+ # baz_1 = DB[:bazes].async.first(:id=>1)
21
+ # sleep(1)
22
+ # foos.size
23
+ # bar_names.first
24
+ # baz_1.name
25
+ #
26
+ # These three queries will generally be run concurrently in separate
27
+ # threads. If you instead run:
28
+ #
29
+ # DB[:foos].async.where{:name=>'A'..'M'}.all.size
30
+ # DB[:bars].async.select_order_map(:name).first
31
+ # DB[:bazes].async.first(:id=>1).name
32
+ #
33
+ # Then will run each query sequentially, since you need the result of
34
+ # one query before running the next query. The queries will still be
35
+ # run in separate threads (by default).
36
+ #
37
+ # What is run in the separate thread is the entire method call that
38
+ # returns results. So with the original example:
39
+ #
40
+ # foos = DB[:foos].async.where{:name=>'A'..'M'}.all
41
+ # bar_names = DB[:bars].async.select_order_map(:name)
42
+ # baz_1 = DB[:bazes].async.first(:id=>1)
43
+ #
44
+ # The +all+, <tt>select_order_map(:name)</tt>, and <tt>first(:id=>1)</tt>
45
+ # calls are run in separate threads. If a block is passed to a method
46
+ # such as +all+ or +each+, the block is also run in that thread. If you
47
+ # have code such as:
48
+ #
49
+ # h = {}
50
+ # DB[:foos].async.each{|row| h[row[:id]] = row}
51
+ # bar_names = DB[:bars].async.select_order_map(:name)
52
+ # p h
53
+ #
54
+ # You may end up with it printing an empty hash or partial hash, because the
55
+ # async +each+ call will not have run or finished running. Since the
56
+ # <tt>p h</tt> code relies on a side-effect of the +each+ block and not the
57
+ # return value of the +each+ call, it will not wait for the loading.
58
+ #
59
+ # You should avoid using +async+ for any queries where you are ignoring the
60
+ # return value, as otherwise you have no way to wait for the query to be run.
61
+ #
62
+ # Datasets that use async will use async threads to load data for the majority
63
+ # of methods that can return data. However, dataset methods that return
64
+ # enumerators will not use an async thread (e.g. calling # Dataset#map
65
+ # without a block or arguments does not use an async thread or return a
66
+ # proxy object).
67
+ #
68
+ # Because async methods (including their blocks) run in a separate thread, you
69
+ # should not use control flow modifiers such as +return+ or +break+ in async
70
+ # queries. Doing so will result in a error.
71
+ #
72
+ # Because async results are returned as proxy objects, it's a bad idea
73
+ # to use them in a boolean setting:
74
+ #
75
+ # result = DB[:foo].async.get(:boolean_column)
76
+ # # or:
77
+ # result = DB[:foo].async.first
78
+ #
79
+ # # ...
80
+ # if result
81
+ # # will always execute this banch, since result is a proxy object
82
+ # end
83
+ #
84
+ # In this case, you can call the +__value+ method to return the actual
85
+ # result:
86
+ #
87
+ # if result.__value
88
+ # # will not execute this branch if the dataset method returned nil or false
89
+ # end
90
+ #
91
+ # Similarly, because a proxy object is used, you should be careful using the
92
+ # result in a case statement or an argument to <tt>Class#===</tt>:
93
+ #
94
+ # # ...
95
+ # case result
96
+ # when Hash, true, false
97
+ # # will never take this branch, since result is a proxy object
98
+ # end
99
+ #
100
+ # Similar to usage in an +if+ statement, you should use +__value+:
101
+ #
102
+ # case result.__value
103
+ # when Hash, true, false
104
+ # # will never take this branch, since result is a proxy object
105
+ # end
106
+ #
107
+ # On Ruby 2.2+, you can use +itself+ instead of +__value+. It's preferable to
108
+ # use +itself+ if you can, as that will allow code to work with both proxy
109
+ # objects and regular objects.
110
+ #
111
+ # Because separate threads and connections are used for async queries,
112
+ # they do not use any state on the current connection/thread. So if
113
+ # you do:
114
+ #
115
+ # DB.transaction{DB[:table].async.all}
116
+ #
117
+ # Be aware that the transaction runs on one connection, and the SELECT
118
+ # query on a different connection. If you use currently using
119
+ # transactional testing (running each test inside a transaction/savepoint),
120
+ # and want to start using this extension, you should first switch to
121
+ # non-transactional testing of the code that will use the async thread
122
+ # pool before using this extension, as otherwise the use of
123
+ # <tt>Dataset#async</tt> will likely break your tests.
124
+ #
125
+ # If you are using Database#synchronize to checkout a connection, the
126
+ # same issue applies, where the async query runs on a different
127
+ # connection:
128
+ #
129
+ # DB.synchronize{DB[:table].async.all}
130
+ #
131
+ # Similarly, if you are using the server_block extension, any async
132
+ # queries inside with_server blocks will not use the server specified:
133
+ #
134
+ # DB.with_server(:shard1) do
135
+ # DB[:a].all # Uses shard1
136
+ # DB[:a].async.all # Uses default shard
137
+ # end
138
+ #
139
+ # You need to manually specify the shard for any dataset using an async
140
+ # query:
141
+ #
142
+ # DB.with_server(:shard1) do
143
+ # DB[:a].all # Uses shard1
144
+ # DB[:a].async.server(:shard1).all # Uses shard1
145
+ # end
146
+ #
147
+ # When the async_thread_pool extension, the size of the async thread pool
148
+ # can be set by using the +:num_async_threads+ Database option, which must
149
+ # be set before loading the async_thread_pool extension. This defaults
150
+ # to the size of the Database object's connection pool.
151
+ #
152
+ # By default, for consistent behavior, the async_thread_pool extension
153
+ # will always run the query in a separate thread. However, in some cases,
154
+ # such as when the async thread pool is busy and the results of a query
155
+ # are needed right away, it can improve performance to allow preemption,
156
+ # so that the query will run in the current thread instead of waiting
157
+ # for an async thread to become available. With the following code:
158
+ #
159
+ # foos = DB[:foos].async.where{:name=>'A'..'M'}.all
160
+ # bar_names = DB[:bar].async.select_order_map(:name)
161
+ # if foos.length > 4
162
+ # baz_1 = DB[:bazes].async.first(:id=>1)
163
+ # end
164
+ #
165
+ # Whether you need the +baz_1+ variable depends on the value of foos.
166
+ # If the async thread pool is busy, and by the time the +foos.length+
167
+ # call is made, the async thread pool has not started the processing
168
+ # to get the +foos+ value, it can improve performance to start that
169
+ # processing in the current thread, since it is needed immediately to
170
+ # determine whether to schedule query to get the +baz_1+ variable.
171
+ # The default is to not allow preemption, because if the current
172
+ # thread is used, it may have already checked out a connection that
173
+ # could be used, and that connection could be inside a transaction or
174
+ # have some other manner of connection-specific state applied to it.
175
+ # If you want to allow preemption, you can set the
176
+ # +:preempt_async_thread+ Database option before loading the
177
+ # async_thread_pool extension.
178
+ #
179
+ # Related module: Sequel::Database::AsyncThreadPool::DatasetMethods
180
+
181
+
182
+ #
183
+ module Sequel
184
+ module Database::AsyncThreadPool
185
+ # JobProcessor is a wrapper around a single thread, that will
186
+ # process a queue of jobs until it is shut down.
187
+ class JobProcessor # :nodoc:
188
+ def self.create_finalizer(queue, pool)
189
+ proc{run_finalizer(queue, pool)}
190
+ end
191
+
192
+ def self.run_finalizer(queue, pool)
193
+ # Push a nil for each thread using the queue, signalling
194
+ # that thread to close.
195
+ pool.each{queue.push(nil)}
196
+
197
+ # Join each of the closed threads.
198
+ pool.each(&:join)
199
+
200
+ # Clear the thread pool. Probably not necessary, but this allows
201
+ # for a simple way to check whether this finalizer has been run.
202
+ pool.clear
203
+
204
+ nil
205
+ end
206
+ private_class_method :run_finalizer
207
+
208
+ def initialize(queue)
209
+ @thread = ::Thread.new do
210
+ while proxy = queue.pop
211
+ proxy.__send__(:__run)
212
+ end
213
+ end
214
+ end
215
+
216
+ # Join the thread, should only be called by the related finalizer.
217
+ def join
218
+ @thread.join
219
+ end
220
+ end
221
+
222
+ # Wrapper for exception instances raised by async jobs. The
223
+ # wrapped exception will be raised by the code getting the value
224
+ # of the job.
225
+ WrappedException = Struct.new(:exception)
226
+
227
+ # Base proxy object class for jobs processed by async threads and
228
+ # the returned result.
229
+ class BaseProxy < BasicObject
230
+ # Store a block that returns the result when called.
231
+ def initialize(&block)
232
+ ::Kernel.raise Error, "must provide block for an async job" unless block
233
+ @block = block
234
+ end
235
+
236
+ # Pass all method calls to the returned result.
237
+ def method_missing(*args, &block)
238
+ __value.public_send(*args, &block)
239
+ end
240
+ # :nocov:
241
+ ruby2_keywords(:method_missing) if respond_to?(:ruby2_keywords, true)
242
+ # :nocov:
243
+
244
+ # Delegate respond_to? calls to the returned result.
245
+ def respond_to_missing?(*args)
246
+ __value.respond_to?(*args)
247
+ end
248
+
249
+ # Override some methods defined by default so they apply to the
250
+ # returned result and not the current object.
251
+ [:!, :==, :!=, :instance_eval, :instance_exec].each do |method|
252
+ define_method(method) do |*args, &block|
253
+ __value.public_send(method, *args, &block)
254
+ end
255
+ end
256
+
257
+ # Wait for the value to be loaded if it hasn't already been loaded.
258
+ # If the code to load the return value raised an exception that was
259
+ # wrapped, reraise the exception.
260
+ def __value
261
+ unless defined?(@value)
262
+ __get_value
263
+ end
264
+
265
+ if @value.is_a?(WrappedException)
266
+ ::Kernel.raise @value
267
+ end
268
+
269
+ @value
270
+ end
271
+
272
+ private
273
+
274
+ # Run the block and return the block value. If the block call raises
275
+ # an exception, wrap the exception.
276
+ def __run_block
277
+ # This may not catch concurrent calls (unless surrounded by a mutex), but
278
+ # it's not worth trying to protect against that. It's enough to just check for
279
+ # multiple non-concurrent calls.
280
+ ::Kernel.raise Error, "Cannot run async block multiple times" unless block = @block
281
+
282
+ @block = nil
283
+
284
+ begin
285
+ block.call
286
+ rescue ::Exception => e
287
+ WrappedException.new(e)
288
+ end
289
+ end
290
+ end
291
+
292
+ # Default object class for async job/proxy result. This uses a queue for
293
+ # synchronization. The JobProcessor will push a result until the queue,
294
+ # and the code to get the value will pop the result from that queue (and
295
+ # repush the result to handle thread safety).
296
+ class Proxy < BaseProxy
297
+ def initialize
298
+ super
299
+ @queue = ::Queue.new
300
+ end
301
+
302
+ private
303
+
304
+ def __run
305
+ @queue.push(__run_block)
306
+ end
307
+
308
+ def __get_value
309
+ @value = @queue.pop
310
+
311
+ # Handle thread-safety by repushing the popped value, so that
312
+ # concurrent calls will receive the same value
313
+ @queue.push(@value)
314
+ end
315
+ end
316
+
317
+ # Object class for async job/proxy result when the :preempt_async_thread
318
+ # Database option is used. Uses a mutex for synchronization, and either
319
+ # the JobProcessor or the calling thread can run code to get the value.
320
+ class PreemptableProxy < BaseProxy
321
+ def initialize
322
+ super
323
+ @mutex = ::Mutex.new
324
+ end
325
+
326
+ private
327
+
328
+ def __get_value
329
+ @mutex.synchronize do
330
+ unless defined?(@value)
331
+ @value = __run_block
332
+ end
333
+ end
334
+ end
335
+ alias __run __get_value
336
+ end
337
+
338
+ module DatabaseMethods
339
+ def self.extended(db)
340
+ db.instance_exec do
341
+ unless pool.pool_type == :threaded || pool.pool_type == :sharded_threaded
342
+ raise Error, "can only load async_thread_pool extension if using threaded or sharded_threaded connection pool"
343
+ end
344
+
345
+ num_async_threads = opts[:num_async_threads] ? typecast_value_integer(opts[:num_async_threads]) : (Integer(opts[:max_connections] || 4))
346
+ raise Error, "must have positive number for num_async_threads" if num_async_threads <= 0
347
+
348
+ proxy_klass = typecast_value_boolean(opts[:preempt_async_thread]) ? PreemptableProxy : Proxy
349
+ define_singleton_method(:async_job_class){proxy_klass}
350
+
351
+ queue = @async_thread_queue = Queue.new
352
+ pool = @async_thread_pool = num_async_threads.times.map{JobProcessor.new(queue)}
353
+ ObjectSpace.define_finalizer(db, JobProcessor.create_finalizer(queue, pool))
354
+
355
+ extend_datasets(DatasetMethods)
356
+ end
357
+ end
358
+
359
+ private
360
+
361
+ # Wrap the block in a job/proxy object and schedule it to run using the async thread pool.
362
+ def async_run(&block)
363
+ proxy = async_job_class.new(&block)
364
+ @async_thread_queue.push(proxy)
365
+ proxy
366
+ end
367
+ end
368
+
369
+ ASYNC_METHODS = ([:all?, :any?, :drop, :entries, :grep_v, :include?, :inject, :member?, :minmax, :none?, :one?, :reduce, :sort, :take, :tally, :to_a, :to_h, :uniq, :zip] & Enumerable.instance_methods) + (Dataset::ACTION_METHODS - [:map, :paged_each])
370
+ ASYNC_BLOCK_METHODS = ([:collect, :collect_concat, :detect, :drop_while, :each_cons, :each_entry, :each_slice, :each_with_index, :each_with_object, :filter_map, :find, :find_all, :find_index, :flat_map, :max_by, :min_by, :minmax_by, :partition, :reject, :reverse_each, :sort_by, :take_while] & Enumerable.instance_methods) + [:paged_each]
371
+ ASYNC_ARGS_OR_BLOCK_METHODS = [:map]
372
+
373
+ module DatasetMethods
374
+ # Define an method in the given module that will run the given method using an async thread
375
+ # if the current dataset is async.
376
+ def self.define_async_method(mod, method)
377
+ mod.send(:define_method, method) do |*args, &block|
378
+ if @opts[:async]
379
+ ds = sync
380
+ db.send(:async_run){ds.send(method, *args, &block)}
381
+ else
382
+ super(*args, &block)
383
+ end
384
+ end
385
+ end
386
+
387
+ # Define an method in the given module that will run the given method using an async thread
388
+ # if the current dataset is async and a block is provided.
389
+ def self.define_async_block_method(mod, method)
390
+ mod.send(:define_method, method) do |*args, &block|
391
+ if block && @opts[:async]
392
+ ds = sync
393
+ db.send(:async_run){ds.send(method, *args, &block)}
394
+ else
395
+ super(*args, &block)
396
+ end
397
+ end
398
+ end
399
+
400
+ # Define an method in the given module that will run the given method using an async thread
401
+ # if the current dataset is async and arguments or a block is provided.
402
+ def self.define_async_args_or_block_method(mod, method)
403
+ mod.send(:define_method, method) do |*args, &block|
404
+ if (block || !args.empty?) && @opts[:async]
405
+ ds = sync
406
+ db.send(:async_run){ds.send(method, *args, &block)}
407
+ else
408
+ super(*args, &block)
409
+ end
410
+ end
411
+ end
412
+
413
+ # Override all of the methods that return results to do the processing in an async thread
414
+ # if they have been marked to run async and should run async (i.e. they don't return an
415
+ # Enumerator).
416
+ ASYNC_METHODS.each{|m| define_async_method(self, m)}
417
+ ASYNC_BLOCK_METHODS.each{|m| define_async_block_method(self, m)}
418
+ ASYNC_ARGS_OR_BLOCK_METHODS.each{|m| define_async_args_or_block_method(self, m)}
419
+
420
+ # Return a cloned dataset that will load results using the async thread pool.
421
+ def async
422
+ cached_dataset(:_async) do
423
+ clone(:async=>true)
424
+ end
425
+ end
426
+
427
+ # Return a cloned dataset that will not load results using the async thread pool.
428
+ # Only used if the current dataset has been marked as using the async thread pool.
429
+ def sync
430
+ cached_dataset(:_sync) do
431
+ clone(:async=>false)
432
+ end
433
+ end
434
+ end
435
+ end
436
+
437
+ Database.register_extension(:async_thread_pool, Database::AsyncThreadPool::DatabaseMethods)
438
+ end