switchman 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,653 @@
1
+ require 'switchman/database_server'
2
+ require 'switchman/default_shard'
3
+ require 'switchman/environment'
4
+
5
+ module Switchman
6
+ class Shard < ::ActiveRecord::Base
7
+ # ten trillion possible ids per shard. yup.
8
+ IDS_PER_SHARD = 10_000_000_000_000
9
+
10
+ CATEGORIES =
11
+ {
12
+ # special cased to mean all other models
13
+ :default => nil,
14
+ # special cased to not allow activating a shard other than the default
15
+ :unsharded => [Shard]
16
+ }
17
+ private_constant :CATEGORIES
18
+ @shard_category = :unsharded
19
+
20
+ if defined?(::ProtectedAttributes)
21
+ attr_accessible :default, :name, :database_server
22
+ end
23
+
24
+ # only allow one default
25
+ validates_uniqueness_of :default, :if => lambda { |s| s.default? }
26
+
27
+ after_save :clear_cache
28
+
29
+ scope :primary, -> { where(name: nil).order(:database_server_id, :id).distinct_on(:database_server_id) }
30
+
31
+ class << self
32
+ def categories
33
+ CATEGORIES.keys
34
+ end
35
+
36
+ def default(reload_deprecated = false, reload: false, with_fallback: false)
37
+ reload = reload_deprecated if reload_deprecated
38
+ if !@default || reload
39
+ # Have to create a dummy object so that several key methods still work
40
+ # (it's easier to do this in one place here, and just assume that sharding
41
+ # is up and running everywhere else). This includes for looking up the
42
+ # default shard itself. This also needs to be a local so that this method
43
+ # can be re-entrant
44
+ default = DefaultShard.instance
45
+
46
+ # if we already have a default shard in place, and the caller wants
47
+ # to use it as a fallback, use that instead of the dummy instance
48
+ if with_fallback && @default
49
+ default = @default
50
+ end
51
+
52
+ # the first time we need a dummy dummy for re-entrancy to avoid looping on ourselves
53
+ @default ||= default
54
+ # forget our current shard activations - it might have "another" default shard serialized there
55
+ active_shards.clear
56
+
57
+ # Now find the actual record, if it exists; rescue the fake default if the table doesn't exist
58
+ @default = begin
59
+ Shard.where(default: true).first || default
60
+ rescue
61
+ default
62
+ end
63
+ activate!(:default => @default)
64
+ end
65
+ @default
66
+ end
67
+
68
+ def current(category = :default)
69
+ active_shards[category] || Shard.default
70
+ end
71
+
72
+ def activate(shards)
73
+ old_shards = activate!(shards)
74
+ yield
75
+ ensure
76
+ active_shards.merge!(old_shards) if old_shards
77
+ end
78
+
79
+ def activate!(shards)
80
+ old_shards = nil
81
+ currently_active_shards = active_shards
82
+ shards.each do |category, shard|
83
+ next if category == :unsharded
84
+ unless currently_active_shards[category] == shard
85
+ old_shards ||= {}
86
+ old_shards[category] = currently_active_shards[category]
87
+ currently_active_shards[category] = shard
88
+ end
89
+ end
90
+ old_shards
91
+ end
92
+
93
+ def lookup(id)
94
+ id_i = id.to_i
95
+ return current if id_i == current.id || id == 'self'
96
+ return default if id_i == default.id || id.nil? || id == 'default'
97
+ id = id_i
98
+ raise ArgumentError if id == 0
99
+
100
+ unless cached_shards.has_key?(id)
101
+ cached_shards[id] = Shard.default.activate do
102
+ # can't simply cache the AR object since Shard has a custom serializer
103
+ # that calls this method
104
+ attributes = Switchman.cache.fetch(['shard', id].join('/')) do
105
+ shard = find_by_id(id)
106
+ if shard
107
+ attributes = shard.attributes
108
+ if ::Rails.version < '4.2'
109
+ attributes.each_key do |key|
110
+ attributes[key] = attributes[key].unserialize if attributes[key].is_a?(::ActiveRecord::AttributeMethods::Serialization::Attribute)
111
+ end
112
+ end
113
+ attributes
114
+ else
115
+ :nil
116
+ end
117
+ end
118
+ if attributes == :nil
119
+ nil
120
+ else
121
+ shard = Shard.new
122
+ attributes.each do |attr, value|
123
+ shard.send(:"#{attr}=", value)
124
+ end
125
+ shard.instance_variable_set(:@new_record, false)
126
+ # connection info doesn't exist in database.yml;
127
+ # pretend the shard doesn't exist either
128
+ shard = nil unless shard.database_server
129
+ shard
130
+ end
131
+ end
132
+ end
133
+ cached_shards[id]
134
+ end
135
+
136
+ def clear_cache
137
+ cached_shards.clear
138
+ end
139
+
140
+ # ==== Parameters
141
+ #
142
+ # * +shards+ - an array or relation of Shards to iterate over
143
+ # * +categories+ - an array of categories to activate
144
+ # * +options+ -
145
+ # :parallel - true/false to execute in parallel, or a integer of how many
146
+ # sub-processes per database server. Note that parallel
147
+ # invocation currently uses forking, so should be used sparingly
148
+ # because errors are not raised, and you cannot get results back
149
+ # :exception - :ignore, :raise, :defer (wait until the end and raise the first
150
+ # error), or a proc
151
+ def with_each_shard(*args)
152
+ raise ArgumentError, "wrong number of arguments (#{args.length} for 0...3)" if args.length > 3
153
+
154
+ unless default.is_a?(Shard)
155
+ return Array.wrap(yield)
156
+ end
157
+
158
+ options = args.extract_options!
159
+ if args.length == 1
160
+ if Array === args.first && args.first.first.is_a?(Symbol)
161
+ categories = args.first
162
+ else
163
+ scope = args.first
164
+ end
165
+ else
166
+ scope, categories = args
167
+ end
168
+
169
+ parallel = case options[:parallel]
170
+ when true
171
+ 1
172
+ when false, nil
173
+ 0
174
+ else
175
+ options[:parallel]
176
+ end
177
+ options.delete(:parallel)
178
+
179
+ scope ||= Shard.all
180
+ if ::ActiveRecord::Relation === scope && scope.order_values.empty?
181
+ scope = scope.order("database_server_id IS NOT NULL, database_server_id, id")
182
+ end
183
+
184
+ if parallel > 0
185
+ max_procs = determine_max_procs(options.delete(:max_procs), parallel)
186
+ if ::ActiveRecord::Relation === scope
187
+ # still need a post-uniq, cause the default database server could be NULL or Rails.env in the db
188
+ database_servers = scope.reorder('database_server_id').select(:database_server_id).uniq.
189
+ map(&:database_server).compact.uniq
190
+ parallel = [(max_procs.to_f / database_servers.count).ceil, parallel].min if max_procs
191
+
192
+ scopes = Hash[database_servers.map do |server|
193
+ server_scope = server.shards.merge(scope)
194
+ if parallel == 1
195
+ subscopes = [server_scope]
196
+ else
197
+ subscopes = []
198
+ total = server_scope.count
199
+ ranges = []
200
+ server_scope.find_ids_in_ranges(:batch_size => (total.to_f / parallel).ceil) do |min, max|
201
+ ranges << [min, max]
202
+ end
203
+ # create a half-open range on the last one
204
+ ranges.last[1] = nil
205
+ ranges.each do |min, max|
206
+ subscope = server_scope.where("id>=?", min)
207
+ subscope = subscope.where("id<=?", max) if max
208
+ subscopes << subscope
209
+ end
210
+ end
211
+ [server, subscopes]
212
+ end]
213
+ else
214
+ scopes = scope.group_by(&:database_server)
215
+ if parallel > 1
216
+ parallel = [(max_procs.to_f / scopes.count).ceil, parallel].min if max_procs
217
+ scopes = Hash[scopes.map do |(server, shards)|
218
+ [server, shards.in_groups(parallel, false).compact]
219
+ end]
220
+ end
221
+ end
222
+
223
+ fd_to_name_map = {}
224
+ out_fds = []
225
+ err_fds = []
226
+ pids = []
227
+
228
+ wait_for_output = lambda do |out_fds, err_fds, fd_to_name_map|
229
+ ready, _ = IO.select(out_fds + err_fds)
230
+ ready.each do |fd|
231
+ if fd.eof?
232
+ fd.close
233
+ out_fds.delete(fd)
234
+ err_fds.delete(fd)
235
+ next
236
+ end
237
+ line = fd.readline
238
+ puts "#{fd_to_name_map[fd]}: #{line}"
239
+ end
240
+ end
241
+
242
+ exception_pipe = IO.pipe
243
+ scopes.each do |server, subscopes|
244
+ if !(::ActiveRecord::Relation === subscopes.first) && subscopes.first.class != Array
245
+ subscopes = [subscopes]
246
+ end
247
+ # only one process; don't bother forking
248
+ if scopes.length == 1 && subscopes.length == 1
249
+ exception_pipe.first.close
250
+ exception_pipe.last.close
251
+ return with_each_shard(subscopes.first, categories, options) { yield }
252
+ end
253
+
254
+ subscopes.each_with_index do |subscope, idx|
255
+ if subscopes.length > 1
256
+ name = "#{server.id} #{idx + 1}"
257
+ else
258
+ name = server.id
259
+ end
260
+
261
+ details = Open4.pfork4(lambda do
262
+ begin
263
+ ::ActiveRecord::Base.clear_all_connections!
264
+ Switchman.config[:on_fork_proc].try(:call)
265
+ $0 = [$0, ARGV, name].flatten.join(' ')
266
+ with_each_shard(subscope, categories, options) { yield }
267
+ rescue Exception => e
268
+ exception_pipe.last.write(Marshal.dump(e))
269
+ exception_pipe.last.flush
270
+ exit 1
271
+ end
272
+ end)
273
+ # don't care about writing to stdin
274
+ details[1].close
275
+ out_fds << details[2]
276
+ err_fds << details[3]
277
+ pids << details[0]
278
+ fd_to_name_map[details[2]] = name
279
+ fd_to_name_map[details[3]] = name
280
+
281
+ while max_procs && pids.count >= max_procs
282
+ while max_procs && out_fds.count >= max_procs
283
+ # wait for output if we've hit the max_procs limit
284
+ wait_for_output.call(out_fds, err_fds, fd_to_name_map)
285
+ end
286
+ pids.delete(Process.wait) # we've gotten all the output from one fd so wait for its child process to exit
287
+ end
288
+ end
289
+ end
290
+
291
+ exception_pipe.last.close
292
+
293
+ while out_fds.any? || err_fds.any?
294
+ wait_for_output.call(out_fds, err_fds, fd_to_name_map)
295
+ end
296
+ pids.each { |pid| Process.waitpid2(pid) }
297
+
298
+ # I'm not sure why, but we have to do this
299
+ ::ActiveRecord::Base.clear_all_connections!
300
+ # check for an exception; we only re-raise the first one
301
+ # (all the sub-processes shared the same pipe, so we only
302
+ # have to check the one)
303
+ begin
304
+ exception = Marshal.load exception_pipe.first
305
+ raise exception
306
+ rescue EOFError
307
+ # No exceptions
308
+ ensure
309
+ exception_pipe.first.close
310
+ end
311
+ return
312
+ end
313
+
314
+ categories ||= []
315
+
316
+ previous_shard = nil
317
+ close_connections_if_needed = lambda do |shard|
318
+ # prune the prior connection unless it happened to be the same
319
+ if previous_shard && shard != previous_shard &&
320
+ (shard.database_server != previous_shard.database_server || !previous_shard.database_server.shareable?)
321
+ previous_shard.activate do
322
+ ::Shackles.activated_environments.each do |env|
323
+ ::Shackles.activate(env) do
324
+ if ::ActiveRecord::Base.connected? && ::ActiveRecord::Base.connection.open_transactions == 0
325
+ ::ActiveRecord::Base.connection_pool.current_pool.disconnect!
326
+ end
327
+ end
328
+ end
329
+ end
330
+ end
331
+ end
332
+
333
+ result = []
334
+ exception = nil
335
+ scope.each do |shard|
336
+ # shard references a database server that isn't configured in this environment
337
+ next unless shard.database_server
338
+ close_connections_if_needed.call(shard)
339
+ shard.activate(*categories) do
340
+ begin
341
+ result.concat Array.wrap(yield)
342
+ rescue
343
+ case options[:exception]
344
+ when :ignore
345
+ when :defer
346
+ exception ||= $!
347
+ when Proc
348
+ options[:exception].call
349
+ when :raise
350
+ raise
351
+ else
352
+ raise
353
+ end
354
+ end
355
+ end
356
+ previous_shard = shard
357
+ end
358
+ close_connections_if_needed.call(Shard.current)
359
+ raise exception if exception
360
+ result
361
+ end
362
+
363
+ def partition_by_shard(array, partition_proc = nil)
364
+ shard_arrays = {}
365
+ array.each do |object|
366
+ partition_object = partition_proc ? partition_proc.call(object) : object
367
+ case partition_object
368
+ when Shard
369
+ shard = partition_object
370
+ when ::ActiveRecord::Base
371
+ if partition_object.respond_to?(:associated_shards)
372
+ partition_object.associated_shards.each do |a_shard|
373
+ shard_arrays[a_shard] ||= []
374
+ shard_arrays[a_shard] << object
375
+ end
376
+ next
377
+ else
378
+ shard = partition_object.shard
379
+ end
380
+ when Integer, /^\d+$/, /^(\d+)~(\d+)$/
381
+ local_id, shard = Shard.local_id_for(partition_object)
382
+ local_id ||= partition_object
383
+ object = local_id if !partition_proc
384
+ end
385
+ shard ||= Shard.current
386
+ shard_arrays[shard] ||= []
387
+ shard_arrays[shard] << object
388
+ end
389
+ # TODO: use with_each_shard (or vice versa) to get
390
+ # connection management and parallelism benefits
391
+ shard_arrays.inject([]) do |results, (shard, objects)|
392
+ results.concat shard.activate { Array.wrap(yield objects) }
393
+ end
394
+ end
395
+
396
+ # converts an AR object, integral id, string id, or string short-global-id to a
397
+ # integral id. nil if it can't be interpreted
398
+ def integral_id_for(any_id)
399
+ if ::Rails.version >= '4.2' && any_id.is_a?(::Arel::Nodes::Casted)
400
+ any_id = any_id.val
401
+ end
402
+
403
+ case any_id
404
+ when ::ActiveRecord::Base
405
+ any_id.id
406
+ when /^(\d+)~(\d+)$/
407
+ local_id = $2.to_i
408
+ # doesn't make sense to have a double-global id
409
+ return nil if local_id > IDS_PER_SHARD
410
+ $1.to_i * IDS_PER_SHARD + local_id
411
+ when Integer, /^\d+$/
412
+ any_id.to_i
413
+ else
414
+ nil
415
+ end
416
+ end
417
+
418
+ # takes an id-ish, and returns a local id and the shard it's
419
+ # local to. [nil, nil] if it can't be interpreted. [id, nil]
420
+ # if it's already a local ID
421
+ NIL_NIL_ID = [nil, nil].freeze
422
+ def local_id_for(any_id)
423
+ id = integral_id_for(any_id)
424
+ return NIL_NIL_ID unless id
425
+ if id < IDS_PER_SHARD
426
+ [id, nil]
427
+ elsif shard = lookup(id / IDS_PER_SHARD)
428
+ [id % IDS_PER_SHARD, shard]
429
+ else
430
+ NIL_NIL_ID
431
+ end
432
+ end
433
+
434
+ # takes an id-ish, and returns an integral id relative to
435
+ # target_shard. returns any_id itself if it can't be interpreted
436
+ def relative_id_for(any_id, source_shard, target_shard)
437
+ local_id, shard = local_id_for(any_id)
438
+ return any_id unless local_id
439
+ shard ||= source_shard
440
+ return local_id if shard == target_shard
441
+ shard.global_id_for(local_id)
442
+ end
443
+
444
+ # takes an id-ish, and returns a shortened global
445
+ # string id if global, and itself if local.
446
+ # returns any_id itself if it can't be interpreted
447
+ def short_id_for(any_id)
448
+ local_id, shard = local_id_for(any_id)
449
+ return any_id unless local_id
450
+ return local_id unless shard
451
+ "#{shard.id}~#{local_id}"
452
+ end
453
+
454
+ # takes an id-ish, and returns an integral global id.
455
+ # returns nil if it can't be interpreted
456
+ def global_id_for(any_id, source_shard = nil)
457
+ id = integral_id_for(any_id)
458
+ return any_id unless id
459
+ if id >= IDS_PER_SHARD
460
+ id
461
+ else
462
+ source_shard ||= Shard.current
463
+ source_shard.global_id_for(id)
464
+ end
465
+ end
466
+
467
+ def shard_for(any_id, source_shard = nil)
468
+ _, shard = local_id_for(any_id)
469
+ shard || source_shard || Shard.current
470
+ end
471
+
472
+ # given the provided option, determines whether we need to (and whether
473
+ # it's possible) to determine a reasonable default.
474
+ def determine_max_procs(max_procs_input, parallel_input=2)
475
+ max_procs = nil
476
+ if max_procs_input
477
+ max_procs = max_procs_input.to_i
478
+ max_procs = nil if max_procs == 0
479
+ else
480
+ return 1 if parallel_input.nil? || parallel_input < 1
481
+ cpus = Environment.cpu_count
482
+ if cpus && cpus > 0
483
+ max_procs = cpus * parallel_input
484
+ end
485
+ end
486
+
487
+ return max_procs
488
+ end
489
+
490
+ private
491
+ # in-process caching
492
+ def cached_shards
493
+ @cached_shards ||= {}.compare_by_identity
494
+ end
495
+
496
+ def add_to_cache(shard)
497
+ cached_shards[shard.id] = shard
498
+ end
499
+
500
+ def remove_from_cache(shard)
501
+ cached_shards.delete(shard.id)
502
+ end
503
+
504
+ def active_shards
505
+ Thread.current[:active_shards] ||= {}.compare_by_identity
506
+ end
507
+ end
508
+
509
+ def name
510
+ unless instance_variable_defined?(:@name)
511
+ # protect against re-entrancy
512
+ @name = nil
513
+ @name = read_attribute(:name) || default_name
514
+ end
515
+ @name
516
+ end
517
+
518
+ def name=(name)
519
+ write_attribute(:name, @name = name)
520
+ remove_instance_variable(:@name) if name == nil
521
+ end
522
+
523
+ def database_server
524
+ @database_server ||= DatabaseServer.find(self.database_server_id)
525
+ end
526
+
527
+ def database_server=(database_server)
528
+ self.database_server_id = database_server.id
529
+ @database_server = database_server
530
+ end
531
+
532
+ def primary?
533
+ self == database_server.primary_shard
534
+ end
535
+
536
+ def description
537
+ [database_server.id, name].compact.join(':')
538
+ end
539
+
540
+ # Shards are always on the default shard
541
+ def shard
542
+ Shard.default
543
+ end
544
+
545
+ def activate(*categories)
546
+ shards = hashify_categories(categories)
547
+ Shard.activate(shards) do
548
+ yield
549
+ end
550
+ end
551
+
552
+ # for use from console ONLY
553
+ def activate!(*categories)
554
+ shards = hashify_categories(categories)
555
+ Shard.activate!(shards)
556
+ nil
557
+ end
558
+
559
+ # custom serialization, since shard is self-referential
560
+ def _dump(depth)
561
+ self.id.to_s
562
+ end
563
+
564
+ def self._load(str)
565
+ lookup(str.to_i)
566
+ end
567
+
568
+ def drop_database
569
+ raise("Cannot drop the database of the default shard") if self.default?
570
+ return unless read_attribute(:name)
571
+
572
+ begin
573
+ adapter = self.database_server.config[:adapter]
574
+ sharding_config = Switchman.config || {}
575
+ drop_statement = sharding_config[adapter].try(:[], :drop_statement)
576
+ drop_statement ||= sharding_config[:drop_statement]
577
+ if drop_statement
578
+ drop_statement = Array(drop_statement).dup.
579
+ map { |statement| statement.gsub('%{name}', self.name) }
580
+ end
581
+
582
+ case adapter
583
+ when 'mysql', 'mysql2'
584
+ self.activate do
585
+ ::Shackles.activate(:deploy) do
586
+ drop_statement ||= "DROP DATABASE #{self.name}"
587
+ Array(drop_statement).each do |stmt|
588
+ ::ActiveRecord::Base.connection.execute(stmt)
589
+ end
590
+ end
591
+ end
592
+ when 'postgresql'
593
+ self.activate do
594
+ ::Shackles.activate(:deploy) do
595
+ # Shut up, Postgres!
596
+ conn = ::ActiveRecord::Base.connection
597
+ old_proc = conn.raw_connection.set_notice_processor {}
598
+ begin
599
+ drop_statement ||= "DROP SCHEMA #{self.name} CASCADE"
600
+ Array(drop_statement).each do |stmt|
601
+ ::ActiveRecord::Base.connection.execute(stmt)
602
+ end
603
+ ensure
604
+ conn.raw_connection.set_notice_processor(&old_proc) if old_proc
605
+ end
606
+ end
607
+ end
608
+ when 'sqlite3'
609
+ File.delete(self.name) unless self.name == ':memory:'
610
+ end
611
+ rescue
612
+ logger.info "Drop failed: #{$!}"
613
+ end
614
+ end
615
+
616
+ # takes an id local to this shard, and returns a global id
617
+ def global_id_for(local_id)
618
+ return nil unless local_id
619
+ local_id + self.id * IDS_PER_SHARD
620
+ end
621
+
622
+ # skip global_id.hash
623
+ def hash
624
+ id.hash
625
+ end
626
+
627
+ def destroy
628
+ raise("Cannot destroy the default shard") if self.default?
629
+ super
630
+ end
631
+
632
+ private
633
+
634
+ def clear_cache
635
+ Shard.default.activate do
636
+ Switchman.cache.delete(['shard', id].join('/'))
637
+ end
638
+ end
639
+
640
+ def default_name
641
+ database_server.shard_name(self)
642
+ end
643
+
644
+ def hashify_categories(categories)
645
+ if categories.empty?
646
+ { :default => self }
647
+ else
648
+ categories.inject({}) { |h, category| h[category] = self; h }
649
+ end
650
+ end
651
+
652
+ end
653
+ end