switchman 1.14.4 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,719 +0,0 @@
1
- require 'switchman/database_server'
2
- require 'switchman/default_shard'
3
- require 'switchman/environment'
4
- require 'switchman/errors'
5
-
6
- module Switchman
7
- class Shard < ::ActiveRecord::Base
8
- # ten trillion possible ids per shard. yup.
9
- IDS_PER_SHARD = 10_000_000_000_000
10
-
11
- CATEGORIES =
12
- {
13
- # special cased to mean all other models
14
- :primary => nil,
15
- # special cased to not allow activating a shard other than the default
16
- :unsharded => [Shard]
17
- }
18
- private_constant :CATEGORIES
19
- @connection_specification_name = @shard_category = :unsharded
20
-
21
- if defined?(::ProtectedAttributes)
22
- attr_accessible :default, :name, :database_server
23
- end
24
-
25
- # only allow one default
26
- validates_uniqueness_of :default, :if => lambda { |s| s.default? }
27
-
28
- after_save :clear_cache
29
- after_destroy :clear_cache
30
-
31
- after_rollback :on_rollback
32
-
33
- scope :primary, -> { where(name: nil).order(:database_server_id, :id).distinct_on(:database_server_id) }
34
-
35
- class << self
36
- def categories
37
- CATEGORIES.keys
38
- end
39
-
40
- def default(reload_deprecated = false, reload: false, with_fallback: false)
41
- reload = reload_deprecated if reload_deprecated
42
- if !@default || reload
43
- # Have to create a dummy object so that several key methods still work
44
- # (it's easier to do this in one place here, and just assume that sharding
45
- # is up and running everywhere else). This includes for looking up the
46
- # default shard itself. This also needs to be a local so that this method
47
- # can be re-entrant
48
- default = DefaultShard.instance
49
-
50
- # if we already have a default shard in place, and the caller wants
51
- # to use it as a fallback, use that instead of the dummy instance
52
- if with_fallback && @default
53
- default = @default
54
- end
55
-
56
- # the first time we need a dummy dummy for re-entrancy to avoid looping on ourselves
57
- @default ||= default
58
-
59
- # Now find the actual record, if it exists; rescue the fake default if the table doesn't exist
60
- @default = begin
61
- find_cached("default_shard") { Shard.where(default: true).take } || default
62
- rescue
63
- default
64
- end
65
-
66
- # rebuild current shard activations - it might have "another" default shard serialized there
67
- active_shards.replace(active_shards.map do |category, shard|
68
- shard = Shard.lookup((!shard || shard.default?) ? 'default' : shard.id)
69
- [category, shard]
70
- end.to_h)
71
-
72
- activate!(primary: @default) if active_shards.empty?
73
-
74
- # make sure this is not erroneously cached
75
- if @default.database_server.instance_variable_defined?(:@primary_shard)
76
- @default.database_server.remove_instance_variable(:@primary_shard)
77
- end
78
-
79
- # and finally, check for cached references to the default shard on the existing connection
80
- if ::ActiveRecord::Base.connected? && ::ActiveRecord::Base.connection.shard.default?
81
- ::ActiveRecord::Base.connection.shard = @default
82
- end
83
- end
84
- @default
85
- end
86
-
87
- def current(category = :primary)
88
- active_shards[category] || Shard.default
89
- end
90
-
91
- def activate(shards)
92
- old_shards = activate!(shards)
93
- yield
94
- ensure
95
- active_shards.merge!(old_shards) if old_shards
96
- end
97
-
98
- def activate!(shards)
99
- old_shards = nil
100
- currently_active_shards = active_shards
101
- shards.each do |category, shard|
102
- next if category == :unsharded
103
- unless currently_active_shards[category] == shard
104
- old_shards ||= {}
105
- old_shards[category] = currently_active_shards[category]
106
- currently_active_shards[category] = shard
107
- end
108
- end
109
- old_shards
110
- end
111
-
112
- def lookup(id)
113
- id_i = id.to_i
114
- return current if id_i == current.id || id == 'self'
115
- return default if id_i == default.id || id.nil? || id == 'default'
116
- id = id_i
117
- raise ArgumentError if id == 0
118
-
119
- unless cached_shards.has_key?(id)
120
- cached_shards[id] = Shard.default.activate do
121
- find_cached(['shard', id]) { find_by(id: id) }
122
- end
123
- end
124
- cached_shards[id]
125
- end
126
-
127
- def clear_cache
128
- cached_shards.clear
129
- end
130
-
131
- # ==== Parameters
132
- #
133
- # * +shards+ - an array or relation of Shards to iterate over
134
- # * +categories+ - an array of categories to activate
135
- # * +options+ -
136
- # :parallel - true/false to execute in parallel, or a integer of how many
137
- # sub-processes per database server. Note that parallel
138
- # invocation currently uses forking, so should be used sparingly
139
- # because errors are not raised, and you cannot get results back
140
- # :max_procs - only run this many parallel processes at a time
141
- # :exception - :ignore, :raise, :defer (wait until the end and raise the first
142
- # error), or a proc
143
- def with_each_shard(*args)
144
- raise ArgumentError, "wrong number of arguments (#{args.length} for 0...3)" if args.length > 3
145
-
146
- unless default.is_a?(Shard)
147
- return Array.wrap(yield)
148
- end
149
-
150
- options = args.extract_options!
151
- if args.length == 1
152
- if Array === args.first && args.first.first.is_a?(Symbol)
153
- categories = args.first
154
- else
155
- scope = args.first
156
- end
157
- else
158
- scope, categories = args
159
- end
160
-
161
- parallel = case options[:parallel]
162
- when true
163
- 1
164
- when false, nil
165
- 0
166
- else
167
- options[:parallel]
168
- end
169
- options.delete(:parallel)
170
-
171
- scope ||= Shard.all
172
- if ::ActiveRecord::Relation === scope && scope.order_values.empty?
173
- scope = scope.order(::Arel.sql("database_server_id IS NOT NULL, database_server_id, id"))
174
- end
175
-
176
- if parallel > 0
177
- max_procs = determine_max_procs(options.delete(:max_procs), parallel)
178
- if ::ActiveRecord::Relation === scope
179
- # still need a post-uniq, cause the default database server could be NULL or Rails.env in the db
180
- database_servers = scope.reorder('database_server_id').select(:database_server_id).distinct.
181
- map(&:database_server).compact.uniq
182
- parallel = [(max_procs.to_f / database_servers.count).ceil, parallel].min if max_procs
183
-
184
- scopes = Hash[database_servers.map do |server|
185
- server_scope = server.shards.merge(scope)
186
- if parallel == 1
187
- subscopes = [server_scope]
188
- else
189
- subscopes = []
190
- total = server_scope.count
191
- ranges = []
192
- server_scope.find_ids_in_ranges(:batch_size => (total.to_f / parallel).ceil) do |min, max|
193
- ranges << [min, max]
194
- end
195
- # create a half-open range on the last one
196
- ranges.last[1] = nil
197
- ranges.each do |min, max|
198
- subscope = server_scope.where("id>=?", min)
199
- subscope = subscope.where("id<=?", max) if max
200
- subscopes << subscope
201
- end
202
- end
203
- [server, subscopes]
204
- end]
205
- else
206
- scopes = scope.group_by(&:database_server)
207
- if parallel > 1
208
- parallel = [(max_procs.to_f / scopes.count).ceil, parallel].min if max_procs
209
- scopes = Hash[scopes.map do |(server, shards)|
210
- [server, shards.in_groups(parallel, false).compact]
211
- end]
212
- else
213
- scopes = Hash[scopes.map { |(server, shards)| [server, [shards]] }]
214
- end
215
- end
216
-
217
- exception_pipes = []
218
- pids = []
219
- out_fds = []
220
- err_fds = []
221
- pid_to_name_map = {}
222
- fd_to_name_map = {}
223
- errors = []
224
-
225
- wait_for_output = lambda do |out_fds, err_fds, fd_to_name_map|
226
- ready, _ = IO.select(out_fds + err_fds)
227
- ready.each do |fd|
228
- if fd.eof?
229
- fd.close
230
- out_fds.delete(fd)
231
- err_fds.delete(fd)
232
- next
233
- end
234
- line = fd.readline
235
- puts "#{fd_to_name_map[fd]}: #{line}"
236
- end
237
- end
238
-
239
- # only one process; don't bother forking
240
- if scopes.length == 1 && parallel == 1
241
- return with_each_shard(scopes.first.last.first, categories, options) { yield }
242
- end
243
-
244
- # clear connections prior to forking (no more queries will be executed in the parent,
245
- # and we want them gone so that we don't accidentally use them post-fork doing something
246
- # silly like dealloc'ing prepared statements)
247
- ::ActiveRecord::Base.clear_all_connections!
248
-
249
- scopes.each do |server, subscopes|
250
- subscopes.each_with_index do |subscope, idx|
251
- if subscopes.length > 1
252
- name = "#{server.id} #{idx + 1}"
253
- else
254
- name = server.id
255
- end
256
-
257
- exception_pipe = IO.pipe
258
- exception_pipes << exception_pipe
259
- pid, io_in, io_out, io_err = Open4.pfork4(lambda do
260
- begin
261
- Switchman.config[:on_fork_proc]&.call
262
-
263
- # set a pretty name for the process title, up to 128 characters
264
- # (we don't actually know the limit, depending on how the process
265
- # was started)
266
- # first, simplify the binary name by stripping directories,
267
- # then truncate arguments as necessary
268
- bin = File.basename($0) # Process.argv0 doesn't work on Ruby 2.5 (https://bugs.ruby-lang.org/issues/15887)
269
- max_length = 128 - bin.length - name.length - 3
270
- args = ARGV.join(" ")
271
- if max_length >= 0
272
- args = args[0..max_length]
273
- end
274
- new_title = [bin, args, name].join(" ")
275
- Process.setproctitle(new_title)
276
-
277
- with_each_shard(subscope, categories, options) { yield }
278
- exception_pipe.last.close
279
- rescue => e
280
- begin
281
- dumped = Marshal.dump(e)
282
- rescue
283
- # couldn't dump the exception; create a copy with just
284
- # the message and the backtrace
285
- e2 = e.class.new(e.message)
286
- e2.set_backtrace(e.backtrace)
287
- e2.instance_variable_set(:@active_shards, e.instance_variable_get(:@active_shards))
288
- dumped = Marshal.dump(e2)
289
- end
290
- exception_pipe.last.set_encoding(dumped.encoding)
291
- exception_pipe.last.write(dumped)
292
- exception_pipe.last.flush
293
- exception_pipe.last.close
294
- exit! 1
295
- end
296
- end)
297
- exception_pipe.last.close
298
- pids << pid
299
- io_in.close # don't care about writing to stdin
300
- out_fds << io_out
301
- err_fds << io_err
302
- pid_to_name_map[pid] = name
303
- fd_to_name_map[io_out] = name
304
- fd_to_name_map[io_err] = name
305
-
306
- while max_procs && pids.count >= max_procs
307
- while max_procs && out_fds.count >= max_procs
308
- # wait for output if we've hit the max_procs limit
309
- wait_for_output.call(out_fds, err_fds, fd_to_name_map)
310
- end
311
- # we've gotten all the output from one fd so wait for its child process to exit
312
- found_pid, status = Process.wait2
313
- pids.delete(found_pid)
314
- errors << pid_to_name_map[found_pid] if status.exitstatus != 0
315
- end
316
- end
317
- end
318
-
319
- while out_fds.any? || err_fds.any?
320
- wait_for_output.call(out_fds, err_fds, fd_to_name_map)
321
- end
322
- pids.each do |pid|
323
- _, status = Process.waitpid2(pid)
324
- errors << pid_to_name_map[pid] if status.exitstatus != 0
325
- end
326
-
327
- # check for an exception; we only re-raise the first one
328
- exception_pipes.each do |exception_pipe|
329
- begin
330
- serialized_exception = exception_pipe.first.read
331
- next if serialized_exception.empty?
332
- exception = Marshal.load(serialized_exception)
333
- raise exception
334
- ensure
335
- exception_pipe.first.close
336
- end
337
- end
338
-
339
- unless errors.empty?
340
- raise ParallelShardExecError.new("The following subprocesses did not exit cleanly: #{errors.sort.join(", ")}")
341
- end
342
- return
343
- end
344
-
345
- categories ||= []
346
-
347
- previous_shard = nil
348
- close_connections_if_needed = lambda do |shard|
349
- # prune the prior connection unless it happened to be the same
350
- if previous_shard && shard != previous_shard && !previous_shard.database_server.shareable?
351
- previous_shard.activate do
352
- ::Shackles.activated_environments.each do |env|
353
- ::Shackles.activate(env) do
354
- if ::ActiveRecord::Base.connected? && ::ActiveRecord::Base.connection.open_transactions == 0
355
- ::ActiveRecord::Base.connection_pool.current_pool.disconnect!
356
- end
357
- end
358
- end
359
- end
360
- end
361
- end
362
-
363
- result = []
364
- exception = nil
365
- scope.each do |shard|
366
- # shard references a database server that isn't configured in this environment
367
- next unless shard.database_server
368
- close_connections_if_needed.call(shard)
369
- shard.activate(*categories) do
370
- begin
371
- result.concat Array.wrap(yield)
372
- rescue
373
- case options[:exception]
374
- when :ignore
375
- when :defer
376
- exception ||= $!
377
- when Proc
378
- options[:exception].call
379
- when :raise
380
- raise
381
- else
382
- raise
383
- end
384
- end
385
- end
386
- previous_shard = shard
387
- end
388
- close_connections_if_needed.call(Shard.current)
389
- raise exception if exception
390
- result
391
- end
392
-
393
- def partition_by_shard(array, partition_proc = nil)
394
- shard_arrays = {}
395
- array.each do |object|
396
- partition_object = partition_proc ? partition_proc.call(object) : object
397
- case partition_object
398
- when Shard
399
- shard = partition_object
400
- when ::ActiveRecord::Base
401
- if partition_object.respond_to?(:associated_shards)
402
- partition_object.associated_shards.each do |a_shard|
403
- shard_arrays[a_shard] ||= []
404
- shard_arrays[a_shard] << object
405
- end
406
- next
407
- else
408
- shard = partition_object.shard
409
- end
410
- when Integer, /^\d+$/, /^(\d+)~(\d+)$/
411
- local_id, shard = Shard.local_id_for(partition_object)
412
- local_id ||= partition_object
413
- object = local_id if !partition_proc
414
- end
415
- shard ||= Shard.current
416
- shard_arrays[shard] ||= []
417
- shard_arrays[shard] << object
418
- end
419
- # TODO: use with_each_shard (or vice versa) to get
420
- # connection management and parallelism benefits
421
- shard_arrays.inject([]) do |results, (shard, objects)|
422
- results.concat shard.activate { Array.wrap(yield objects) }
423
- end
424
- end
425
-
426
- # converts an AR object, integral id, string id, or string short-global-id to a
427
- # integral id. nil if it can't be interpreted
428
- def integral_id_for(any_id)
429
- if any_id.is_a?(::Arel::Nodes::Casted)
430
- any_id = any_id.val
431
- elsif any_id.is_a?(::Arel::Nodes::BindParam) && ::Rails.version >= "5.2"
432
- any_id = any_id.value.value_before_type_cast
433
- end
434
-
435
- case any_id
436
- when ::ActiveRecord::Base
437
- any_id.id
438
- when /^(\d+)~(\d+)$/
439
- local_id = $2.to_i
440
- # doesn't make sense to have a double-global id
441
- return nil if local_id > IDS_PER_SHARD
442
- $1.to_i * IDS_PER_SHARD + local_id
443
- when Integer, /^\d+$/
444
- any_id.to_i
445
- else
446
- nil
447
- end
448
- end
449
-
450
- # takes an id-ish, and returns a local id and the shard it's
451
- # local to. [nil, nil] if it can't be interpreted. [id, nil]
452
- # if it's already a local ID. [nil, nil] if it's a well formed
453
- # id, but the shard it refers to does not exist
454
- NIL_NIL_ID = [nil, nil].freeze
455
- def local_id_for(any_id)
456
- id = integral_id_for(any_id)
457
- return NIL_NIL_ID unless id
458
- if id < IDS_PER_SHARD
459
- [id, nil]
460
- elsif shard = lookup(id / IDS_PER_SHARD)
461
- [id % IDS_PER_SHARD, shard]
462
- else
463
- NIL_NIL_ID
464
- end
465
- end
466
-
467
- # takes an id-ish, and returns an integral id relative to
468
- # target_shard. returns nil if it can't be interpreted,
469
- # or the integral version of the id if it refers to a shard
470
- # that does not exist
471
- def relative_id_for(any_id, source_shard, target_shard)
472
- integral_id = integral_id_for(any_id)
473
- local_id, shard = local_id_for(integral_id)
474
- return integral_id unless local_id
475
- shard ||= source_shard
476
- return local_id if shard == target_shard
477
- shard.global_id_for(local_id)
478
- end
479
-
480
- # takes an id-ish, and returns a shortened global
481
- # string id if global, and itself if local.
482
- # returns any_id itself if it can't be interpreted
483
- def short_id_for(any_id)
484
- local_id, shard = local_id_for(any_id)
485
- return any_id unless local_id
486
- return local_id unless shard
487
- "#{shard.id}~#{local_id}"
488
- end
489
-
490
- # takes an id-ish, and returns an integral global id.
491
- # returns nil if it can't be interpreted
492
- def global_id_for(any_id, source_shard = nil)
493
- id = integral_id_for(any_id)
494
- return any_id unless id
495
- if id >= IDS_PER_SHARD
496
- id
497
- else
498
- source_shard ||= Shard.current
499
- source_shard.global_id_for(id)
500
- end
501
- end
502
-
503
- def shard_for(any_id, source_shard = nil)
504
- return any_id.shard if any_id.is_a?(::ActiveRecord::Base)
505
- _, shard = local_id_for(any_id)
506
- shard || source_shard || Shard.current
507
- end
508
-
509
- # given the provided option, determines whether we need to (and whether
510
- # it's possible) to determine a reasonable default.
511
- def determine_max_procs(max_procs_input, parallel_input=2)
512
- max_procs = nil
513
- if max_procs_input
514
- max_procs = max_procs_input.to_i
515
- max_procs = nil if max_procs == 0
516
- else
517
- return 1 if parallel_input.nil? || parallel_input < 1
518
- cpus = Environment.cpu_count
519
- if cpus && cpus > 0
520
- max_procs = cpus * parallel_input
521
- end
522
- end
523
-
524
- return max_procs
525
- end
526
-
527
- private
528
- # in-process caching
529
- def cached_shards
530
- @cached_shards ||= {}.compare_by_identity
531
- end
532
-
533
- def add_to_cache(shard)
534
- cached_shards[shard.id] = shard
535
- end
536
-
537
- def remove_from_cache(shard)
538
- cached_shards.delete(shard.id)
539
- end
540
-
541
- def find_cached(key)
542
- # can't simply cache the AR object since Shard has a custom serializer
543
- # that calls this method
544
- attributes = Switchman.cache.fetch(key) { yield&.attributes }
545
- return nil unless attributes
546
-
547
- shard = Shard.new
548
- attributes.each do |attr, value|
549
- shard.send(:"#{attr}=", value) if shard.respond_to?(:"#{attr}=")
550
- end
551
- shard.clear_changes_information
552
- shard.instance_variable_set(:@new_record, false)
553
- # connection info doesn't exist in database.yml;
554
- # pretend the shard doesn't exist either
555
- shard = nil unless shard.database_server
556
- shard
557
- end
558
-
559
- def active_shards
560
- Thread.current[:active_shards] ||= {}.compare_by_identity
561
- end
562
- end
563
-
564
- def name
565
- unless instance_variable_defined?(:@name)
566
- # protect against re-entrancy
567
- @name = nil
568
- @name = read_attribute(:name) || default_name
569
- end
570
- @name
571
- end
572
-
573
- def name=(name)
574
- write_attribute(:name, @name = name)
575
- remove_instance_variable(:@name) if name == nil
576
- end
577
-
578
- def database_server
579
- @database_server ||= DatabaseServer.find(self.database_server_id)
580
- end
581
-
582
- def database_server=(database_server)
583
- self.database_server_id = database_server.id
584
- @database_server = database_server
585
- end
586
-
587
- def primary?
588
- self == database_server.primary_shard
589
- end
590
-
591
- def description
592
- [database_server.id, name].compact.join(':')
593
- end
594
-
595
- # Shards are always on the default shard
596
- def shard
597
- Shard.default
598
- end
599
-
600
- def activate(*categories)
601
- shards = hashify_categories(categories)
602
- Shard.activate(shards) do
603
- yield
604
- end
605
- end
606
-
607
- # for use from console ONLY
608
- def activate!(*categories)
609
- shards = hashify_categories(categories)
610
- Shard.activate!(shards)
611
- nil
612
- end
613
-
614
- # custom serialization, since shard is self-referential
615
- def _dump(depth)
616
- self.id.to_s
617
- end
618
-
619
- def self._load(str)
620
- lookup(str.to_i)
621
- end
622
-
623
- def drop_database
624
- raise("Cannot drop the database of the default shard") if self.default?
625
- return unless read_attribute(:name)
626
-
627
- begin
628
- adapter = self.database_server.config[:adapter]
629
- sharding_config = Switchman.config || {}
630
- drop_statement = sharding_config[adapter]&.[](:drop_statement)
631
- drop_statement ||= sharding_config[:drop_statement]
632
- if drop_statement
633
- drop_statement = Array(drop_statement).dup.
634
- map { |statement| statement.gsub('%{name}', self.name) }
635
- end
636
-
637
- case adapter
638
- when 'mysql', 'mysql2'
639
- self.activate do
640
- ::Shackles.activate(:deploy) do
641
- drop_statement ||= "DROP DATABASE #{self.name}"
642
- Array(drop_statement).each do |stmt|
643
- ::ActiveRecord::Base.connection.execute(stmt)
644
- end
645
- end
646
- end
647
- when 'postgresql'
648
- self.activate do
649
- ::Shackles.activate(:deploy) do
650
- # Shut up, Postgres!
651
- conn = ::ActiveRecord::Base.connection
652
- old_proc = conn.raw_connection.set_notice_processor {}
653
- begin
654
- drop_statement ||= "DROP SCHEMA #{self.name} CASCADE"
655
- Array(drop_statement).each do |stmt|
656
- ::ActiveRecord::Base.connection.execute(stmt)
657
- end
658
- ensure
659
- conn.raw_connection.set_notice_processor(&old_proc) if old_proc
660
- end
661
- end
662
- end
663
- end
664
- rescue
665
- logger.info "Drop failed: #{$!}"
666
- end
667
- end
668
-
669
- # takes an id local to this shard, and returns a global id
670
- def global_id_for(local_id)
671
- return nil unless local_id
672
- local_id + self.id * IDS_PER_SHARD
673
- end
674
-
675
- # skip global_id.hash
676
- def hash
677
- id.hash
678
- end
679
-
680
- def destroy
681
- raise("Cannot destroy the default shard") if self.default?
682
- super
683
- end
684
-
685
- private
686
-
687
- def clear_cache
688
- Shard.default.activate do
689
- Switchman.cache.delete(['shard', id].join('/'))
690
- Switchman.cache.delete("default_shard") if default?
691
- end
692
- end
693
-
694
- def default_name
695
- database_server.shard_name(self)
696
- end
697
-
698
- def on_rollback
699
- # make sure all connection pool proxies are referencing valid pools
700
- ::ActiveRecord::Base.connection_handler.connection_pools.each do |pool|
701
- next unless pool.is_a?(ConnectionPoolProxy)
702
- ::Shackles.activated_environments.each do |env|
703
- ::Shackles.activate(env) do
704
- pool.current_pool
705
- end
706
- end
707
- end
708
- end
709
-
710
- def hashify_categories(categories)
711
- if categories.empty?
712
- { :primary => self }
713
- else
714
- categories.inject({}) { |h, category| h[category] = self; h }
715
- end
716
- end
717
-
718
- end
719
- end