gizzmo 0.11.0 → 0.11.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,192 @@
1
+ module Gizzard
2
+ class MigratorConfig
3
+ attr_accessor :prefix, :table_id, :source_type, :destination_type, :forwarding_space, :forwarding_space_min, :manifest
4
+
5
+ def initialize(opts = {})
6
+ opts.each {|(k,v)| send("#{k}=", v) if respond_to? "{k}=" }
7
+ end
8
+
9
+ def shard_name(enum)
10
+ table_id_segment = (table_id && table_id < 0) ? "n#{table_id.abs}" : table_id
11
+ [prefix, table_id, "%04d" % enum].compact.join("_")
12
+ end
13
+ end
14
+
15
+ class Migrator
16
+ BALANCE_TOLERANCE = 1
17
+
18
+ attr_reader :configured_templates, :existing_map, :existing_templates, :total_shards
19
+
20
+ # populated via derive_changes
21
+ attr_reader :new_templates, :unrecognized_templates, :similar_templates, :unchanged_templates
22
+
23
+ def initialize(existing_map, config_templates, default_total_shards, config)
24
+ @configured_templates = config_templates
25
+ @existing_map = existing_map
26
+
27
+ @existing_templates = existing_map.keys
28
+ @total_shards = @existing_map.values.map { |a| a.length }.inject { |a, b| a + b } || default_total_shards
29
+ @config = config
30
+
31
+ derive_changes
32
+ end
33
+
34
+ def prepare!(nameserver)
35
+ transformations.each {|t| t.prepare! nameserver, @config }
36
+ end
37
+
38
+ def copy!(nameserver)
39
+ transformations.each {|t| t.copy! nameserver, @config }
40
+ end
41
+
42
+ def wait_for_copies(nameserver)
43
+ transformations.each {|t| t.wait_for_copies nameserver, @config }
44
+ end
45
+
46
+ def cleanup!(nameserver)
47
+ transformations.each {|t| t.cleanup! nameserver, @config }
48
+ end
49
+
50
+ def transformations
51
+ return @transformations if @transformations
52
+
53
+ # no changes
54
+ return @transformations = [] if similar_templates.empty? and unrecognized_templates.empty? and new_templates.empty?
55
+
56
+ configured_map = configured_templates.inject({}) {|h, t| h.update t => [] }
57
+
58
+ @transformations = []
59
+
60
+ if existing_templates.empty?
61
+ # no forwardings exist, we must populate the forwarding index.
62
+ forwardings = generate_new_forwardings(total_shards)
63
+
64
+ # add the new shard ids to a member of the configured map. will
65
+ # be rebalanced later.
66
+ configured_map.values.first.concat forwardings.values
67
+
68
+ @transformations << ForwardingTransformation.new(@config.table_id, forwardings.inject({}) {|f, (b, e)| f.update b => @config.shard_name(e) })
69
+ end
70
+
71
+ # map the unchanged templates straight over
72
+ move_unchanged(existing_map, configured_map)
73
+
74
+ # map similar templates over to their new versions
75
+ move_similar(existing_map, configured_map)
76
+
77
+ # move shards from unrecognized templates to new templates (or
78
+ # existing ones)
79
+ move_unrecognized_to_new(existing_map, configured_map)
80
+
81
+ # rebalance
82
+ rebalance_shards(configured_map)
83
+
84
+ # transformation generation
85
+ @transformations = generate_transformations(existing_map, configured_map) + @transformations
86
+ end
87
+
88
+ private
89
+
90
+ def generate_new_forwardings(shard_count)
91
+ forwardings = {}
92
+ step_size = @config.forwarding_space / shard_count
93
+ bases = (0...shard_count).map { |i| @config.forwarding_space_min + (i * step_size) }
94
+
95
+ bases.each_with_index do |base_id, i|
96
+ forwardings[base_id] = i
97
+ end
98
+
99
+ forwardings
100
+ end
101
+
102
+ def move_unchanged(existing, configured)
103
+ unchanged_templates.each {|u| configured[u] = existing[u].dup }
104
+ end
105
+
106
+ def move_similar(existing, configured)
107
+ similar_templates.each {|from, to| configured[to] = existing[from].dup }
108
+ end
109
+
110
+ def move_unrecognized_to_new(existing, configured)
111
+ # duplicate so we can mutate our copy
112
+ unrecognized = unrecognized_templates.dup
113
+
114
+ # for each new template, grab an unrecognized one's shards
115
+ # and pop it off
116
+ new_templates.each do |n|
117
+ if u = unrecognized.pop
118
+ configured[n] = existing[u].dup
119
+ end
120
+ end
121
+
122
+ # if there are any unrecognized templates for which we haven't
123
+ # moved shards over, add their shards to the first template. they will get rebalanced later
124
+ leftover_shards = unrecognized.inject([]) {|a, u| a.concat existing[u] }
125
+
126
+ configured.values.last.concat leftover_shards unless leftover_shards.empty?
127
+ end
128
+
129
+ def rebalance_shards(configured)
130
+ until shards_balanced? configured
131
+ smallest(configured) << largest(configured).pop
132
+ end
133
+ end
134
+
135
+ def generate_transformations(existing, configured)
136
+ existing_shards = shards_to_templates(existing)
137
+ configured_shards = shards_to_templates(configured)
138
+
139
+ # find the list of shards which have moved, and emit a
140
+ # transformation for each one.
141
+ (configured_shards.to_a - existing_shards.to_a).inject({}) do |transformations, (shard, to)|
142
+ from = existing_shards[shard]
143
+ (transformations[[from, to]] ||= Transformation.new(from, to, [])).shards << shard
144
+ transformations
145
+ end.values
146
+ end
147
+
148
+ def shards_balanced?(template_map)
149
+ sorted_sizes = template_map.values.map {|s| s.length }.uniq.sort.reverse
150
+ sorted_sizes.first - sorted_sizes.last <= BALANCE_TOLERANCE
151
+ end
152
+
153
+ def smallest(template_map)
154
+ template_map.values.sort {|a,b| a.length <=> b.length }.first
155
+ end
156
+
157
+ def largest(template_map)
158
+ template_map.values.sort {|a,b| b.length <=> a.length }.first
159
+ end
160
+
161
+ def shards_to_templates(templates_to_shards)
162
+ templates_to_shards.inject({}) do |h, (template, shards)|
163
+ shards.each {|shard| h[shard] = template }; h
164
+ end
165
+ end
166
+
167
+ def derive_changes
168
+ @unrecognized_templates, @new_templates, related_templates =
169
+ split_set(existing_templates, configured_templates) {|a, b| a.similar? b }
170
+
171
+ @similar_templates = related_templates.reject {|(a,b)| a == b }
172
+ @unchanged_templates = related_templates.keys - @similar_templates.keys
173
+ end
174
+
175
+ def split_set(a, b, &predicate)
176
+ in_a = a.dup
177
+ in_b = b.dup
178
+ overlap = {}
179
+
180
+ in_a.each_with_index do |a, a_i|
181
+ in_b.each_with_index do |b, b_i|
182
+ if predicate.call(a, b)
183
+ overlap[a] = b
184
+ in_a[a_i] = in_b[b_i] = nil
185
+ end
186
+ end
187
+ end
188
+
189
+ [in_a.compact, in_b.compact, overlap]
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,206 @@
1
+ module Gizzard
2
+ Shard = Struct.new(:info, :children, :weight)
3
+
4
+ class Shard
5
+ class << self
6
+ def canonical_table_prefix(enum, table_id = nil, base_prefix = "shard")
7
+ enum_s = "%0.4i" % enum
8
+ table_id_s = table_id.nil? ? nil : table_id < 0 ? "n#{table_id.abs}" : table_id.to_s
9
+ [base_prefix, table_id_s, enum_s].compact.join('_')
10
+ end
11
+
12
+ def parse_enumeration(table_prefix)
13
+ if match = table_prefix.match(/\d{3,}/)
14
+ match[0].to_i
15
+ else
16
+ raise "Cannot derive enumeration!"
17
+ end
18
+ end
19
+ end
20
+
21
+ VIRTUAL_SHARD_TYPES = [
22
+ "FailingOverShard",
23
+ "ReplicatingShard",
24
+ "ReadOnlyShard",
25
+ "WriteOnlyShard",
26
+ "BlockedShard",
27
+ ]
28
+
29
+ REPLICATING_SHARD_TYPES = ["ReplicatingShard", "FailingOverShard"]
30
+
31
+ INVALID_COPY_TYPES = ["ReadOnlyShard", "WriteOnlyShard", "BlockedShard"]
32
+
33
+ SHARD_SUFFIXES = {
34
+ "FailingOverShard" => 'replicating',
35
+ "ReplicatingShard" => 'replicating',
36
+ "ReadOnlyShard" => 'read_only',
37
+ "WriteOnlyShard" => 'write_only',
38
+ "BlockedShard" => 'blocked'
39
+ }
40
+
41
+ def id; info.id end
42
+ def hostname; id.hostname end
43
+ def table_prefix; id.table_prefix end
44
+ def class_name; info.class_name end
45
+ def source_type; info.source_type end
46
+ def destination_type; info.destination_type end
47
+ def busy; info.busy end
48
+
49
+ def template
50
+ child_templates = children.map {|c| c.template }
51
+
52
+ ShardTemplate.new(info.class_name,
53
+ id.hostname,
54
+ weight,
55
+ info.source_type,
56
+ info.destination_type,
57
+ child_templates)
58
+ end
59
+
60
+ def enumeration
61
+ self.class.parse_enumeration(table_prefix)
62
+ end
63
+
64
+ def canonical_shard_id_map(base_prefix = "shard", table_id = nil, enum = nil)
65
+ enum ||= self.enumeration
66
+ base = Shard.canonical_table_prefix(enum, table_id, base_prefix)
67
+ suffix = SHARD_SUFFIXES[class_name.split('.').last]
68
+ canonical_name = [base, suffix].compact.join('_')
69
+ canonical_id = ShardId.new(self.hostname, canonical_name)
70
+
71
+ children.inject(canonical_id => self.id) do |m, c|
72
+ m.update c.canonical_shard_id_map(base_prefix, table_id, enum)
73
+ end
74
+ end
75
+ end
76
+
77
+ class Nameserver
78
+
79
+ DEFAULT_PORT = 7917
80
+ DEFAULT_RETRIES = 20
81
+ PARALLELISM = 10
82
+
83
+ attr_reader :hosts, :logfile, :dryrun, :framed
84
+ alias dryrun? dryrun
85
+
86
+ def initialize(*hosts)
87
+ options = hosts.last.is_a?(Hash) ? hosts.pop : {}
88
+ @retries = options[:retries] || DEFAULT_RETRIES
89
+ @logfile = options[:log] || "/tmp/gizzmo.log"
90
+ @dryrun = options[:dry_run] || false
91
+ @framed = options[:framed] || false
92
+ @hosts = hosts.flatten
93
+ end
94
+
95
+ def get_shards(ids)
96
+ ids.map {|id| with_retry { client.get_shard(id) } }
97
+ end
98
+
99
+ def reload_config
100
+ all_clients.each {|c| with_retry { c.reload_config } }
101
+ end
102
+
103
+ def copy_shard(from_shard_id, to_shard_id)
104
+ c = random_client
105
+ with_retry { c.copy_shard(from_shard_id, to_shard_id) }
106
+ end
107
+
108
+ def respond_to?(method)
109
+ client.respond_to? method or super
110
+ end
111
+
112
+ def method_missing(method, *args, &block)
113
+ if client.respond_to?(method)
114
+ with_retry { client.send(method, *args, &block) }
115
+ else
116
+ super
117
+ end
118
+ end
119
+
120
+ def manifest(*table_ids)
121
+ Manifest.new(self, table_ids)
122
+ end
123
+
124
+ private
125
+
126
+ def client
127
+ @client ||= create_client(hosts.first)
128
+ end
129
+
130
+ def all_clients
131
+ @all_clients ||= hosts.map {|host| create_client(host) }
132
+ end
133
+
134
+ def random_client
135
+ all_clients[rand(all_clients.length)]
136
+ end
137
+
138
+ def create_client(host)
139
+ host, port = host.split(":")
140
+ port ||= DEFAULT_PORT
141
+ Manager.new(host, port.to_i, logfile, framed, dryrun)
142
+ end
143
+
144
+ private
145
+
146
+ def with_retry
147
+ times ||= @retries
148
+ yield
149
+ rescue ThriftClient::Simple::ThriftException, NoMethodError, Gizzard::GizzardException => e
150
+ raise if e.is_a? Gizzard::GizzardException and e.message !~ /Communications link failure/
151
+
152
+ times -= 1
153
+ (times < 0) ? raise : (sleep 2; retry)
154
+ end
155
+
156
+ class Manifest
157
+ attr_reader :forwardings, :links, :shard_infos, :trees, :templates
158
+
159
+
160
+ def initialize(nameserver, table_ids)
161
+ states = table_ids.map {|id| nameserver.dump_nameserver(id) }
162
+
163
+ @forwardings = states.map {|s| s.forwardings }.flatten
164
+
165
+ @links = states.map {|s| s.links }.flatten.inject({}) do |h, link|
166
+ (h[link.up_id] ||= []) << [link.down_id, link.weight]; h
167
+ end
168
+
169
+ @shard_infos = states.map {|s| s.shards }.flatten.inject({}) do |h, shard|
170
+ h.update shard.id => shard
171
+ end
172
+
173
+ @trees = @forwardings.inject({}) do |h, forwarding|
174
+ h.update forwarding => build_tree(forwarding.shard_id)
175
+ end
176
+
177
+ @templates = @trees.inject({}) do |h, (forwarding, shard)|
178
+ (h[shard.template] ||= []) << forwarding; h
179
+ end
180
+ end
181
+
182
+ private
183
+
184
+ def get_filtered_forwardings(nameserver, filter)
185
+ return filter[:forwardings] if filter[:forwardings]
186
+
187
+ forwardings = nameserver.get_forwardings
188
+
189
+ if table_id = filter[:table_id]
190
+ forwardings.reject! {|f| f.table_id != table_id }
191
+ end
192
+
193
+ forwardings
194
+ end
195
+
196
+ def build_tree(shard_id, link_weight=ShardTemplate::DEFAULT_WEIGHT)
197
+ children = (links[shard_id] || []).map do |(child_id, child_weight)|
198
+ build_tree(child_id, child_weight)
199
+ end
200
+
201
+ info = shard_infos[shard_id] or raise "shard info not found for: #{shard_id}"
202
+ Shard.new(info, children, link_weight)
203
+ end
204
+ end
205
+ end
206
+ end
@@ -0,0 +1,252 @@
1
+ module Gizzard
2
+ class ShardTemplate
3
+ include Comparable
4
+
5
+ ABSTRACT_HOST = "localhost"
6
+ DEFAULT_WEIGHT = 1
7
+
8
+ attr_reader :type, :weight, :source_type, :dest_type
9
+
10
+ def initialize(type, host, weight, source_type, dest_type, children)
11
+ @type, @host, @weight, @source_type, @dest_type, @children =
12
+ type, host, weight, source_type || '', dest_type || '', children
13
+ end
14
+
15
+ def self.concrete?(type)
16
+ !Shard::VIRTUAL_SHARD_TYPES.include? type.split('.').last
17
+ end
18
+
19
+ def concrete?
20
+ self.class.concrete? type
21
+ end
22
+
23
+ def replicating?
24
+ Shard::REPLICATING_SHARD_TYPES.include? type.split('.').last
25
+ end
26
+
27
+ def valid_copy_source?
28
+ !Shard::INVALID_COPY_TYPES.include? type.split('.').last
29
+ end
30
+
31
+ def identifier
32
+ concrete? ? "#{type}/#{host}" : type.to_s
33
+ end
34
+
35
+ def table_name_suffix
36
+ Shard::SHARD_SUFFIXES[type.split('.').last]
37
+ end
38
+
39
+ def host
40
+ if concrete?
41
+ @host
42
+ elsif replicating?
43
+ ABSTRACT_HOST
44
+ else
45
+ children.first.host
46
+ end
47
+ end
48
+
49
+ def children
50
+ @children.sort { |a, b| b <=> a }
51
+ end
52
+
53
+ def descendants
54
+ [self].concat children.map {|c| c.descendants }.flatten
55
+ end
56
+
57
+ alias flatten descendants
58
+
59
+ def concrete_descendants
60
+ descendants.select {|t| t.concrete? }
61
+ end
62
+
63
+ def copy_sources
64
+ return [] unless self.valid_copy_source?
65
+ self.concrete? ? [self] : children.inject([]) {|a, c| a.concat c.copy_sources }
66
+ end
67
+
68
+ def inspect
69
+ to_config
70
+ end
71
+ alias to_s inspect
72
+
73
+ # Concretization
74
+
75
+ def to_shard_id(table_prefix, translations = {})
76
+ table_prefix = [table_prefix, table_name_suffix].compact.join('_')
77
+ shard_id = ShardId.new(host, table_prefix)
78
+ translations[shard_id] || shard_id
79
+ end
80
+
81
+ def to_shard_info(table_prefix, translations = {})
82
+ ShardInfo.new(to_shard_id(table_prefix, translations), type, source_type, dest_type, 0)
83
+ end
84
+
85
+ def to_shard(table_prefix, translations = {})
86
+ Shard.new(to_shard_info(table_prefix, translations), children.map {|c|
87
+ c.to_shard(table_prefix, translations)
88
+ }, weight)
89
+ end
90
+
91
+ # Similarity/Equality
92
+
93
+ def <=>(other)
94
+ raise ArgumentError, "other is not a ShardTemplate" unless other.is_a? ShardTemplate
95
+
96
+ to_a = lambda {|t| [t.host, t.type, t.source_type.to_s, t.dest_type.to_s, t.weight] }
97
+
98
+ if (cmp = to_a.call(self) <=> to_a.call(other)) == 0
99
+ children <=> other.children
100
+ else
101
+ cmp
102
+ end
103
+ end
104
+
105
+ def eql?(other)
106
+ return false unless other.is_a? ShardTemplate
107
+ (self <=> other) == 0
108
+ end
109
+
110
+ def shard_eql?(other)
111
+ raise ArgumentError, "other is not a ShardTemplate" unless other.is_a? ShardTemplate
112
+
113
+ to_a = lambda {|t| [t.host, t.type, t.source_type.to_s, t.dest_type.to_s] }
114
+ to_a.call(self) == to_a.call(other)
115
+ end
116
+
117
+ def link_eql?(other)
118
+ raise ArgumentError, "other is not a ShardTemplate" unless other.is_a? ShardTemplate
119
+
120
+ to_a = lambda {|t| [t.host, t.type, t.source_type.to_s, t.dest_type.to_s, t.weight] }
121
+ to_a.call(self) == to_a.call(other)
122
+ end
123
+
124
+ def shared_host?(other)
125
+ raise ArgumentError, "other is not a ShardTemplate" unless other.is_a? ShardTemplate
126
+
127
+ (self.concrete_descendants & other.concrete_descendants).length > 0
128
+ end
129
+
130
+ def hash
131
+ weight.hash + host.hash + type.hash + children.hash
132
+ end
133
+
134
+
135
+ # Config
136
+
137
+ def config_definition
138
+ args = identifier.split("/")
139
+ args << weight
140
+ args.concat [@source_type,@dest_type] unless [@source_type, @dest_type].reject {|s| s.empty? }.empty?
141
+
142
+ type = args.shift
143
+ args_s = args.empty? ? "" : "(#{args.join(",")})"
144
+
145
+ type + args_s
146
+ end
147
+
148
+ private :config_definition
149
+
150
+ def to_config_struct
151
+ if children.empty?
152
+ config_definition
153
+ else
154
+ child_defs = children.map {|c| c.to_config_struct }
155
+ { config_definition => (child_defs.length == 1 ? child_defs.first : child_defs) }
156
+ end
157
+ end
158
+
159
+ def to_config
160
+ if children.empty?
161
+ config_definition
162
+ else
163
+ child_defs = children.map {|c| c.to_config }
164
+ child_defs_s = child_defs.length == 1 ? child_defs.first : "(#{child_defs.join(", ")})"
165
+ "#{config_definition} -> #{child_defs_s}"
166
+ end
167
+ end
168
+
169
+
170
+ # Class Methods
171
+
172
+ class << self
173
+ def parse(string)
174
+ definition_s, children_s = string.split(/\s*->\s*/, 2)
175
+
176
+ children =
177
+ if children_s.nil?
178
+ []
179
+ else
180
+ list = parse_arg_list(children_s).map {|c| parse c }
181
+ raise ArgumentError, "invalid shard config. -> given, no children found" if list.empty?
182
+ list
183
+ end
184
+
185
+ template_args = parse_definition(definition_s) << children
186
+ ShardTemplate.new(*template_args)
187
+ end
188
+
189
+ private
190
+
191
+ def parse_definition(definition_s)
192
+ type, arg_list = definition_s.split("(", 2)
193
+
194
+ host, weight, source_type, dest_type =
195
+ if arg_list.nil?
196
+ nil
197
+ else
198
+ args = parse_arg_list("(" + arg_list)
199
+ args.unshift nil unless concrete? type
200
+ args
201
+ end
202
+
203
+ validate_host_arg(host, definition_s) if concrete? type
204
+ validate_weight_arg(weight, definition_s)
205
+
206
+ weight = (weight || DEFAULT_WEIGHT).to_i
207
+ source_type ||= ""
208
+ dest_type ||= ""
209
+
210
+ [type, host, weight, source_type, dest_type]
211
+ end
212
+
213
+ def parse_arg_list(string)
214
+ string = string.strip
215
+ if m = string.match(/\A\((.*)\)\Z/)
216
+ string = m[1]
217
+ end
218
+
219
+ depth = 0
220
+ results = [[]]
221
+
222
+ string.each_char do |c|
223
+ case c
224
+ when ","
225
+ if depth == 0
226
+ results << []
227
+ next
228
+ end
229
+ when "(" then depth += 1
230
+ when ")" then depth -= 1
231
+ end
232
+
233
+ results.last << c
234
+ end
235
+
236
+ results.map {|r| r.join.strip }
237
+ end
238
+
239
+ def validate_weight_arg(arg, definition)
240
+ if arg && YAML.load(arg.to_s).is_a?(String)
241
+ raise ArgumentError, "Invalid weight #{arg} for shard in: #{definition}"
242
+ end
243
+ end
244
+
245
+ def validate_host_arg(arg, definition)
246
+ if arg.nil? || YAML.load(arg.to_s).is_a?(Numeric)
247
+ raise ArgumentError, "Invalid host #{arg} for shard in: #{definition}"
248
+ end
249
+ end
250
+ end
251
+ end
252
+ end