switchman 1.5.0 → 1.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,653 @@
1
+ require 'switchman/database_server'
2
+ require 'switchman/default_shard'
3
+ require 'switchman/environment'
4
+
5
+ module Switchman
6
+ class Shard < ::ActiveRecord::Base
7
+ # ten trillion possible ids per shard. yup.
8
+ IDS_PER_SHARD = 10_000_000_000_000
9
+
10
+ CATEGORIES =
11
+ {
12
+ # special cased to mean all other models
13
+ :default => nil,
14
+ # special cased to not allow activating a shard other than the default
15
+ :unsharded => [Shard]
16
+ }
17
+ private_constant :CATEGORIES
18
+ @shard_category = :unsharded
19
+
20
+ if defined?(::ProtectedAttributes)
21
+ attr_accessible :default, :name, :database_server
22
+ end
23
+
24
+ # only allow one default
25
+ validates_uniqueness_of :default, :if => lambda { |s| s.default? }
26
+
27
+ after_save :clear_cache
28
+
29
+ scope :primary, -> { where(name: nil).order(:database_server_id, :id).distinct_on(:database_server_id) }
30
+
31
+ class << self
32
+ def categories
33
+ CATEGORIES.keys
34
+ end
35
+
36
+ def default(reload_deprecated = false, reload: false, with_fallback: false)
37
+ reload = reload_deprecated if reload_deprecated
38
+ if !@default || reload
39
+ # Have to create a dummy object so that several key methods still work
40
+ # (it's easier to do this in one place here, and just assume that sharding
41
+ # is up and running everywhere else). This includes for looking up the
42
+ # default shard itself. This also needs to be a local so that this method
43
+ # can be re-entrant
44
+ default = DefaultShard.instance
45
+
46
+ # if we already have a default shard in place, and the caller wants
47
+ # to use it as a fallback, use that instead of the dummy instance
48
+ if with_fallback && @default
49
+ default = @default
50
+ end
51
+
52
+ # the first time we need a dummy dummy for re-entrancy to avoid looping on ourselves
53
+ @default ||= default
54
+ # forget our current shard activations - it might have "another" default shard serialized there
55
+ active_shards.clear
56
+
57
+ # Now find the actual record, if it exists; rescue the fake default if the table doesn't exist
58
+ @default = begin
59
+ Shard.where(default: true).first || default
60
+ rescue
61
+ default
62
+ end
63
+ activate!(:default => @default)
64
+ end
65
+ @default
66
+ end
67
+
68
+ def current(category = :default)
69
+ active_shards[category] || Shard.default
70
+ end
71
+
72
+ def activate(shards)
73
+ old_shards = activate!(shards)
74
+ yield
75
+ ensure
76
+ active_shards.merge!(old_shards) if old_shards
77
+ end
78
+
79
+ def activate!(shards)
80
+ old_shards = nil
81
+ currently_active_shards = active_shards
82
+ shards.each do |category, shard|
83
+ next if category == :unsharded
84
+ unless currently_active_shards[category] == shard
85
+ old_shards ||= {}
86
+ old_shards[category] = currently_active_shards[category]
87
+ currently_active_shards[category] = shard
88
+ end
89
+ end
90
+ old_shards
91
+ end
92
+
93
+ def lookup(id)
94
+ id_i = id.to_i
95
+ return current if id_i == current.id || id == 'self'
96
+ return default if id_i == default.id || id.nil? || id == 'default'
97
+ id = id_i
98
+ raise ArgumentError if id == 0
99
+
100
+ unless cached_shards.has_key?(id)
101
+ cached_shards[id] = Shard.default.activate do
102
+ # can't simply cache the AR object since Shard has a custom serializer
103
+ # that calls this method
104
+ attributes = Switchman.cache.fetch(['shard', id].join('/')) do
105
+ shard = find_by_id(id)
106
+ if shard
107
+ attributes = shard.attributes
108
+ if ::Rails.version < '4.2'
109
+ attributes.each_key do |key|
110
+ attributes[key] = attributes[key].unserialize if attributes[key].is_a?(::ActiveRecord::AttributeMethods::Serialization::Attribute)
111
+ end
112
+ end
113
+ attributes
114
+ else
115
+ :nil
116
+ end
117
+ end
118
+ if attributes == :nil
119
+ nil
120
+ else
121
+ shard = Shard.new
122
+ attributes.each do |attr, value|
123
+ shard.send(:"#{attr}=", value)
124
+ end
125
+ shard.instance_variable_set(:@new_record, false)
126
+ # connection info doesn't exist in database.yml;
127
+ # pretend the shard doesn't exist either
128
+ shard = nil unless shard.database_server
129
+ shard
130
+ end
131
+ end
132
+ end
133
+ cached_shards[id]
134
+ end
135
+
136
+ def clear_cache
137
+ cached_shards.clear
138
+ end
139
+
140
+ # ==== Parameters
141
+ #
142
+ # * +shards+ - an array or relation of Shards to iterate over
143
+ # * +categories+ - an array of categories to activate
144
+ # * +options+ -
145
+ # :parallel - true/false to execute in parallel, or a integer of how many
146
+ # sub-processes per database server. Note that parallel
147
+ # invocation currently uses forking, so should be used sparingly
148
+ # because errors are not raised, and you cannot get results back
149
+ # :exception - :ignore, :raise, :defer (wait until the end and raise the first
150
+ # error), or a proc
151
+ def with_each_shard(*args)
152
+ raise ArgumentError, "wrong number of arguments (#{args.length} for 0...3)" if args.length > 3
153
+
154
+ unless default.is_a?(Shard)
155
+ return Array.wrap(yield)
156
+ end
157
+
158
+ options = args.extract_options!
159
+ if args.length == 1
160
+ if Array === args.first && args.first.first.is_a?(Symbol)
161
+ categories = args.first
162
+ else
163
+ scope = args.first
164
+ end
165
+ else
166
+ scope, categories = args
167
+ end
168
+
169
+ parallel = case options[:parallel]
170
+ when true
171
+ 1
172
+ when false, nil
173
+ 0
174
+ else
175
+ options[:parallel]
176
+ end
177
+ options.delete(:parallel)
178
+
179
+ scope ||= Shard.all
180
+ if ::ActiveRecord::Relation === scope && scope.order_values.empty?
181
+ scope = scope.order("database_server_id IS NOT NULL, database_server_id, id")
182
+ end
183
+
184
+ if parallel > 0
185
+ max_procs = determine_max_procs(options.delete(:max_procs), parallel)
186
+ if ::ActiveRecord::Relation === scope
187
+ # still need a post-uniq, cause the default database server could be NULL or Rails.env in the db
188
+ database_servers = scope.reorder('database_server_id').select(:database_server_id).uniq.
189
+ map(&:database_server).compact.uniq
190
+ parallel = [(max_procs.to_f / database_servers.count).ceil, parallel].min if max_procs
191
+
192
+ scopes = Hash[database_servers.map do |server|
193
+ server_scope = server.shards.merge(scope)
194
+ if parallel == 1
195
+ subscopes = [server_scope]
196
+ else
197
+ subscopes = []
198
+ total = server_scope.count
199
+ ranges = []
200
+ server_scope.find_ids_in_ranges(:batch_size => (total.to_f / parallel).ceil) do |min, max|
201
+ ranges << [min, max]
202
+ end
203
+ # create a half-open range on the last one
204
+ ranges.last[1] = nil
205
+ ranges.each do |min, max|
206
+ subscope = server_scope.where("id>=?", min)
207
+ subscope = subscope.where("id<=?", max) if max
208
+ subscopes << subscope
209
+ end
210
+ end
211
+ [server, subscopes]
212
+ end]
213
+ else
214
+ scopes = scope.group_by(&:database_server)
215
+ if parallel > 1
216
+ parallel = [(max_procs.to_f / scopes.count).ceil, parallel].min if max_procs
217
+ scopes = Hash[scopes.map do |(server, shards)|
218
+ [server, shards.in_groups(parallel, false).compact]
219
+ end]
220
+ end
221
+ end
222
+
223
+ fd_to_name_map = {}
224
+ out_fds = []
225
+ err_fds = []
226
+ pids = []
227
+
228
+ wait_for_output = lambda do |out_fds, err_fds, fd_to_name_map|
229
+ ready, _ = IO.select(out_fds + err_fds)
230
+ ready.each do |fd|
231
+ if fd.eof?
232
+ fd.close
233
+ out_fds.delete(fd)
234
+ err_fds.delete(fd)
235
+ next
236
+ end
237
+ line = fd.readline
238
+ puts "#{fd_to_name_map[fd]}: #{line}"
239
+ end
240
+ end
241
+
242
+ exception_pipe = IO.pipe
243
+ scopes.each do |server, subscopes|
244
+ if !(::ActiveRecord::Relation === subscopes.first) && subscopes.first.class != Array
245
+ subscopes = [subscopes]
246
+ end
247
+ # only one process; don't bother forking
248
+ if scopes.length == 1 && subscopes.length == 1
249
+ exception_pipe.first.close
250
+ exception_pipe.last.close
251
+ return with_each_shard(subscopes.first, categories, options) { yield }
252
+ end
253
+
254
+ subscopes.each_with_index do |subscope, idx|
255
+ if subscopes.length > 1
256
+ name = "#{server.id} #{idx + 1}"
257
+ else
258
+ name = server.id
259
+ end
260
+
261
+ details = Open4.pfork4(lambda do
262
+ begin
263
+ ::ActiveRecord::Base.clear_all_connections!
264
+ Switchman.config[:on_fork_proc].try(:call)
265
+ $0 = [$0, ARGV, name].flatten.join(' ')
266
+ with_each_shard(subscope, categories, options) { yield }
267
+ rescue Exception => e
268
+ exception_pipe.last.write(Marshal.dump(e))
269
+ exception_pipe.last.flush
270
+ exit 1
271
+ end
272
+ end)
273
+ # don't care about writing to stdin
274
+ details[1].close
275
+ out_fds << details[2]
276
+ err_fds << details[3]
277
+ pids << details[0]
278
+ fd_to_name_map[details[2]] = name
279
+ fd_to_name_map[details[3]] = name
280
+
281
+ while max_procs && pids.count >= max_procs
282
+ while max_procs && out_fds.count >= max_procs
283
+ # wait for output if we've hit the max_procs limit
284
+ wait_for_output.call(out_fds, err_fds, fd_to_name_map)
285
+ end
286
+ pids.delete(Process.wait) # we've gotten all the output from one fd so wait for its child process to exit
287
+ end
288
+ end
289
+ end
290
+
291
+ exception_pipe.last.close
292
+
293
+ while out_fds.any? || err_fds.any?
294
+ wait_for_output.call(out_fds, err_fds, fd_to_name_map)
295
+ end
296
+ pids.each { |pid| Process.waitpid2(pid) }
297
+
298
+ # I'm not sure why, but we have to do this
299
+ ::ActiveRecord::Base.clear_all_connections!
300
+ # check for an exception; we only re-raise the first one
301
+ # (all the sub-processes shared the same pipe, so we only
302
+ # have to check the one)
303
+ begin
304
+ exception = Marshal.load exception_pipe.first
305
+ raise exception
306
+ rescue EOFError
307
+ # No exceptions
308
+ ensure
309
+ exception_pipe.first.close
310
+ end
311
+ return
312
+ end
313
+
314
+ categories ||= []
315
+
316
+ previous_shard = nil
317
+ close_connections_if_needed = lambda do |shard|
318
+ # prune the prior connection unless it happened to be the same
319
+ if previous_shard && shard != previous_shard &&
320
+ (shard.database_server != previous_shard.database_server || !previous_shard.database_server.shareable?)
321
+ previous_shard.activate do
322
+ ::Shackles.activated_environments.each do |env|
323
+ ::Shackles.activate(env) do
324
+ if ::ActiveRecord::Base.connected? && ::ActiveRecord::Base.connection.open_transactions == 0
325
+ ::ActiveRecord::Base.connection_pool.current_pool.disconnect!
326
+ end
327
+ end
328
+ end
329
+ end
330
+ end
331
+ end
332
+
333
+ result = []
334
+ exception = nil
335
+ scope.each do |shard|
336
+ # shard references a database server that isn't configured in this environment
337
+ next unless shard.database_server
338
+ close_connections_if_needed.call(shard)
339
+ shard.activate(*categories) do
340
+ begin
341
+ result.concat Array.wrap(yield)
342
+ rescue
343
+ case options[:exception]
344
+ when :ignore
345
+ when :defer
346
+ exception ||= $!
347
+ when Proc
348
+ options[:exception].call
349
+ when :raise
350
+ raise
351
+ else
352
+ raise
353
+ end
354
+ end
355
+ end
356
+ previous_shard = shard
357
+ end
358
+ close_connections_if_needed.call(Shard.current)
359
+ raise exception if exception
360
+ result
361
+ end
362
+
363
+ def partition_by_shard(array, partition_proc = nil)
364
+ shard_arrays = {}
365
+ array.each do |object|
366
+ partition_object = partition_proc ? partition_proc.call(object) : object
367
+ case partition_object
368
+ when Shard
369
+ shard = partition_object
370
+ when ::ActiveRecord::Base
371
+ if partition_object.respond_to?(:associated_shards)
372
+ partition_object.associated_shards.each do |a_shard|
373
+ shard_arrays[a_shard] ||= []
374
+ shard_arrays[a_shard] << object
375
+ end
376
+ next
377
+ else
378
+ shard = partition_object.shard
379
+ end
380
+ when Integer, /^\d+$/, /^(\d+)~(\d+)$/
381
+ local_id, shard = Shard.local_id_for(partition_object)
382
+ local_id ||= partition_object
383
+ object = local_id if !partition_proc
384
+ end
385
+ shard ||= Shard.current
386
+ shard_arrays[shard] ||= []
387
+ shard_arrays[shard] << object
388
+ end
389
+ # TODO: use with_each_shard (or vice versa) to get
390
+ # connection management and parallelism benefits
391
+ shard_arrays.inject([]) do |results, (shard, objects)|
392
+ results.concat shard.activate { Array.wrap(yield objects) }
393
+ end
394
+ end
395
+
396
+ # converts an AR object, integral id, string id, or string short-global-id to a
397
+ # integral id. nil if it can't be interpreted
398
+ def integral_id_for(any_id)
399
+ if ::Rails.version >= '4.2' && any_id.is_a?(::Arel::Nodes::Casted)
400
+ any_id = any_id.val
401
+ end
402
+
403
+ case any_id
404
+ when ::ActiveRecord::Base
405
+ any_id.id
406
+ when /^(\d+)~(\d+)$/
407
+ local_id = $2.to_i
408
+ # doesn't make sense to have a double-global id
409
+ return nil if local_id > IDS_PER_SHARD
410
+ $1.to_i * IDS_PER_SHARD + local_id
411
+ when Integer, /^\d+$/
412
+ any_id.to_i
413
+ else
414
+ nil
415
+ end
416
+ end
417
+
418
+ # takes an id-ish, and returns a local id and the shard it's
419
+ # local to. [nil, nil] if it can't be interpreted. [id, nil]
420
+ # if it's already a local ID
421
+ NIL_NIL_ID = [nil, nil].freeze
422
+ def local_id_for(any_id)
423
+ id = integral_id_for(any_id)
424
+ return NIL_NIL_ID unless id
425
+ if id < IDS_PER_SHARD
426
+ [id, nil]
427
+ elsif shard = lookup(id / IDS_PER_SHARD)
428
+ [id % IDS_PER_SHARD, shard]
429
+ else
430
+ NIL_NIL_ID
431
+ end
432
+ end
433
+
434
+ # takes an id-ish, and returns an integral id relative to
435
+ # target_shard. returns any_id itself if it can't be interpreted
436
+ def relative_id_for(any_id, source_shard, target_shard)
437
+ local_id, shard = local_id_for(any_id)
438
+ return any_id unless local_id
439
+ shard ||= source_shard
440
+ return local_id if shard == target_shard
441
+ shard.global_id_for(local_id)
442
+ end
443
+
444
+ # takes an id-ish, and returns a shortened global
445
+ # string id if global, and itself if local.
446
+ # returns any_id itself if it can't be interpreted
447
+ def short_id_for(any_id)
448
+ local_id, shard = local_id_for(any_id)
449
+ return any_id unless local_id
450
+ return local_id unless shard
451
+ "#{shard.id}~#{local_id}"
452
+ end
453
+
454
+ # takes an id-ish, and returns an integral global id.
455
+ # returns nil if it can't be interpreted
456
+ def global_id_for(any_id, source_shard = nil)
457
+ id = integral_id_for(any_id)
458
+ return any_id unless id
459
+ if id >= IDS_PER_SHARD
460
+ id
461
+ else
462
+ source_shard ||= Shard.current
463
+ source_shard.global_id_for(id)
464
+ end
465
+ end
466
+
467
+ def shard_for(any_id, source_shard = nil)
468
+ _, shard = local_id_for(any_id)
469
+ shard || source_shard || Shard.current
470
+ end
471
+
472
+ # given the provided option, determines whether we need to (and whether
473
+ # it's possible) to determine a reasonable default.
474
+ def determine_max_procs(max_procs_input, parallel_input=2)
475
+ max_procs = nil
476
+ if max_procs_input
477
+ max_procs = max_procs_input.to_i
478
+ max_procs = nil if max_procs == 0
479
+ else
480
+ return 1 if parallel_input.nil? || parallel_input < 1
481
+ cpus = Environment.cpu_count
482
+ if cpus && cpus > 0
483
+ max_procs = cpus * parallel_input
484
+ end
485
+ end
486
+
487
+ return max_procs
488
+ end
489
+
490
+ private
491
+ # in-process caching
492
+ def cached_shards
493
+ @cached_shards ||= {}.compare_by_identity
494
+ end
495
+
496
+ def add_to_cache(shard)
497
+ cached_shards[shard.id] = shard
498
+ end
499
+
500
+ def remove_from_cache(shard)
501
+ cached_shards.delete(shard.id)
502
+ end
503
+
504
+ def active_shards
505
+ Thread.current[:active_shards] ||= {}.compare_by_identity
506
+ end
507
+ end
508
+
509
+ def name
510
+ unless instance_variable_defined?(:@name)
511
+ # protect against re-entrancy
512
+ @name = nil
513
+ @name = read_attribute(:name) || default_name
514
+ end
515
+ @name
516
+ end
517
+
518
+ def name=(name)
519
+ write_attribute(:name, @name = name)
520
+ remove_instance_variable(:@name) if name == nil
521
+ end
522
+
523
+ def database_server
524
+ @database_server ||= DatabaseServer.find(self.database_server_id)
525
+ end
526
+
527
+ def database_server=(database_server)
528
+ self.database_server_id = database_server.id
529
+ @database_server = database_server
530
+ end
531
+
532
+ def primary?
533
+ self == database_server.primary_shard
534
+ end
535
+
536
+ def description
537
+ [database_server.id, name].compact.join(':')
538
+ end
539
+
540
+ # Shards are always on the default shard
541
+ def shard
542
+ Shard.default
543
+ end
544
+
545
+ def activate(*categories)
546
+ shards = hashify_categories(categories)
547
+ Shard.activate(shards) do
548
+ yield
549
+ end
550
+ end
551
+
552
+ # for use from console ONLY
553
+ def activate!(*categories)
554
+ shards = hashify_categories(categories)
555
+ Shard.activate!(shards)
556
+ nil
557
+ end
558
+
559
+ # custom serialization, since shard is self-referential
560
+ def _dump(depth)
561
+ self.id.to_s
562
+ end
563
+
564
+ def self._load(str)
565
+ lookup(str.to_i)
566
+ end
567
+
568
+ def drop_database
569
+ raise("Cannot drop the database of the default shard") if self.default?
570
+ return unless read_attribute(:name)
571
+
572
+ begin
573
+ adapter = self.database_server.config[:adapter]
574
+ sharding_config = Switchman.config || {}
575
+ drop_statement = sharding_config[adapter].try(:[], :drop_statement)
576
+ drop_statement ||= sharding_config[:drop_statement]
577
+ if drop_statement
578
+ drop_statement = Array(drop_statement).dup.
579
+ map { |statement| statement.gsub('%{name}', self.name) }
580
+ end
581
+
582
+ case adapter
583
+ when 'mysql', 'mysql2'
584
+ self.activate do
585
+ ::Shackles.activate(:deploy) do
586
+ drop_statement ||= "DROP DATABASE #{self.name}"
587
+ Array(drop_statement).each do |stmt|
588
+ ::ActiveRecord::Base.connection.execute(stmt)
589
+ end
590
+ end
591
+ end
592
+ when 'postgresql'
593
+ self.activate do
594
+ ::Shackles.activate(:deploy) do
595
+ # Shut up, Postgres!
596
+ conn = ::ActiveRecord::Base.connection
597
+ old_proc = conn.raw_connection.set_notice_processor {}
598
+ begin
599
+ drop_statement ||= "DROP SCHEMA #{self.name} CASCADE"
600
+ Array(drop_statement).each do |stmt|
601
+ ::ActiveRecord::Base.connection.execute(stmt)
602
+ end
603
+ ensure
604
+ conn.raw_connection.set_notice_processor(&old_proc) if old_proc
605
+ end
606
+ end
607
+ end
608
+ when 'sqlite3'
609
+ File.delete(self.name) unless self.name == ':memory:'
610
+ end
611
+ rescue
612
+ logger.info "Drop failed: #{$!}"
613
+ end
614
+ end
615
+
616
+ # takes an id local to this shard, and returns a global id
617
+ def global_id_for(local_id)
618
+ return nil unless local_id
619
+ local_id + self.id * IDS_PER_SHARD
620
+ end
621
+
622
+ # skip global_id.hash
623
+ def hash
624
+ id.hash
625
+ end
626
+
627
+ def destroy
628
+ raise("Cannot destroy the default shard") if self.default?
629
+ super
630
+ end
631
+
632
+ private
633
+
634
+ def clear_cache
635
+ Shard.default.activate do
636
+ Switchman.cache.delete(['shard', id].join('/'))
637
+ end
638
+ end
639
+
640
+ def default_name
641
+ database_server.shard_name(self)
642
+ end
643
+
644
+ def hashify_categories(categories)
645
+ if categories.empty?
646
+ { :default => self }
647
+ else
648
+ categories.inject({}) { |h, category| h[category] = self; h }
649
+ end
650
+ end
651
+
652
+ end
653
+ end