gizzmo 0.11.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,192 @@
1
+ module Gizzard
2
+ class MigratorConfig
3
+ attr_accessor :prefix, :table_id, :source_type, :destination_type, :forwarding_space, :forwarding_space_min, :manifest
4
+
5
+ def initialize(opts = {})
6
+ opts.each {|(k,v)| send("#{k}=", v) if respond_to? "{k}=" }
7
+ end
8
+
9
+ def shard_name(enum)
10
+ table_id_segment = (table_id && table_id < 0) ? "n#{table_id.abs}" : table_id
11
+ [prefix, table_id, "%04d" % enum].compact.join("_")
12
+ end
13
+ end
14
+
15
+ class Migrator
16
+ BALANCE_TOLERANCE = 1
17
+
18
+ attr_reader :configured_templates, :existing_map, :existing_templates, :total_shards
19
+
20
+ # populated via derive_changes
21
+ attr_reader :new_templates, :unrecognized_templates, :similar_templates, :unchanged_templates
22
+
23
+ def initialize(existing_map, config_templates, default_total_shards, config)
24
+ @configured_templates = config_templates
25
+ @existing_map = existing_map
26
+
27
+ @existing_templates = existing_map.keys
28
+ @total_shards = @existing_map.values.map { |a| a.length }.inject { |a, b| a + b } || default_total_shards
29
+ @config = config
30
+
31
+ derive_changes
32
+ end
33
+
34
+ def prepare!(nameserver)
35
+ transformations.each {|t| t.prepare! nameserver, @config }
36
+ end
37
+
38
+ def copy!(nameserver)
39
+ transformations.each {|t| t.copy! nameserver, @config }
40
+ end
41
+
42
+ def wait_for_copies(nameserver)
43
+ transformations.each {|t| t.wait_for_copies nameserver, @config }
44
+ end
45
+
46
+ def cleanup!(nameserver)
47
+ transformations.each {|t| t.cleanup! nameserver, @config }
48
+ end
49
+
50
+ def transformations
51
+ return @transformations if @transformations
52
+
53
+ # no changes
54
+ return @transformations = [] if similar_templates.empty? and unrecognized_templates.empty? and new_templates.empty?
55
+
56
+ configured_map = configured_templates.inject({}) {|h, t| h.update t => [] }
57
+
58
+ @transformations = []
59
+
60
+ if existing_templates.empty?
61
+ # no forwardings exist, we must populate the forwarding index.
62
+ forwardings = generate_new_forwardings(total_shards)
63
+
64
+ # add the new shard ids to a member of the configured map. will
65
+ # be rebalanced later.
66
+ configured_map.values.first.concat forwardings.values
67
+
68
+ @transformations << ForwardingTransformation.new(@config.table_id, forwardings.inject({}) {|f, (b, e)| f.update b => @config.shard_name(e) })
69
+ end
70
+
71
+ # map the unchanged templates straight over
72
+ move_unchanged(existing_map, configured_map)
73
+
74
+ # map similar templates over to their new versions
75
+ move_similar(existing_map, configured_map)
76
+
77
+ # move shards from unrecognized templates to new templates (or
78
+ # existing ones)
79
+ move_unrecognized_to_new(existing_map, configured_map)
80
+
81
+ # rebalance
82
+ rebalance_shards(configured_map)
83
+
84
+ # transformation generation
85
+ @transformations = generate_transformations(existing_map, configured_map) + @transformations
86
+ end
87
+
88
+ private
89
+
90
+ def generate_new_forwardings(shard_count)
91
+ forwardings = {}
92
+ step_size = @config.forwarding_space / shard_count
93
+ bases = (0...shard_count).map { |i| @config.forwarding_space_min + (i * step_size) }
94
+
95
+ bases.each_with_index do |base_id, i|
96
+ forwardings[base_id] = i
97
+ end
98
+
99
+ forwardings
100
+ end
101
+
102
+ def move_unchanged(existing, configured)
103
+ unchanged_templates.each {|u| configured[u] = existing[u].dup }
104
+ end
105
+
106
+ def move_similar(existing, configured)
107
+ similar_templates.each {|from, to| configured[to] = existing[from].dup }
108
+ end
109
+
110
+ def move_unrecognized_to_new(existing, configured)
111
+ # duplicate so we can mutate our copy
112
+ unrecognized = unrecognized_templates.dup
113
+
114
+ # for each new template, grab an unrecognized one's shards
115
+ # and pop it off
116
+ new_templates.each do |n|
117
+ if u = unrecognized.pop
118
+ configured[n] = existing[u].dup
119
+ end
120
+ end
121
+
122
+ # if there are any unrecognized templates for which we haven't
123
+ # moved shards over, add their shards to the first template. they will get rebalanced later
124
+ leftover_shards = unrecognized.inject([]) {|a, u| a.concat existing[u] }
125
+
126
+ configured.values.last.concat leftover_shards unless leftover_shards.empty?
127
+ end
128
+
129
+ def rebalance_shards(configured)
130
+ until shards_balanced? configured
131
+ smallest(configured) << largest(configured).pop
132
+ end
133
+ end
134
+
135
+ def generate_transformations(existing, configured)
136
+ existing_shards = shards_to_templates(existing)
137
+ configured_shards = shards_to_templates(configured)
138
+
139
+ # find the list of shards which have moved, and emit a
140
+ # transformation for each one.
141
+ (configured_shards.to_a - existing_shards.to_a).inject({}) do |transformations, (shard, to)|
142
+ from = existing_shards[shard]
143
+ (transformations[[from, to]] ||= Transformation.new(from, to, [])).shards << shard
144
+ transformations
145
+ end.values
146
+ end
147
+
148
+ def shards_balanced?(template_map)
149
+ sorted_sizes = template_map.values.map {|s| s.length }.uniq.sort.reverse
150
+ sorted_sizes.first - sorted_sizes.last <= BALANCE_TOLERANCE
151
+ end
152
+
153
+ def smallest(template_map)
154
+ template_map.values.sort {|a,b| a.length <=> b.length }.first
155
+ end
156
+
157
+ def largest(template_map)
158
+ template_map.values.sort {|a,b| b.length <=> a.length }.first
159
+ end
160
+
161
+ def shards_to_templates(templates_to_shards)
162
+ templates_to_shards.inject({}) do |h, (template, shards)|
163
+ shards.each {|shard| h[shard] = template }; h
164
+ end
165
+ end
166
+
167
+ def derive_changes
168
+ @unrecognized_templates, @new_templates, related_templates =
169
+ split_set(existing_templates, configured_templates) {|a, b| a.similar? b }
170
+
171
+ @similar_templates = related_templates.reject {|(a,b)| a == b }
172
+ @unchanged_templates = related_templates.keys - @similar_templates.keys
173
+ end
174
+
175
+ def split_set(a, b, &predicate)
176
+ in_a = a.dup
177
+ in_b = b.dup
178
+ overlap = {}
179
+
180
+ in_a.each_with_index do |a, a_i|
181
+ in_b.each_with_index do |b, b_i|
182
+ if predicate.call(a, b)
183
+ overlap[a] = b
184
+ in_a[a_i] = in_b[b_i] = nil
185
+ end
186
+ end
187
+ end
188
+
189
+ [in_a.compact, in_b.compact, overlap]
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,206 @@
1
+ module Gizzard
2
+ Shard = Struct.new(:info, :children, :weight)
3
+
4
+ class Shard
5
+ class << self
6
+ def canonical_table_prefix(enum, table_id = nil, base_prefix = "shard")
7
+ enum_s = "%0.4i" % enum
8
+ table_id_s = table_id.nil? ? nil : table_id < 0 ? "n#{table_id.abs}" : table_id.to_s
9
+ [base_prefix, table_id_s, enum_s].compact.join('_')
10
+ end
11
+
12
+ def parse_enumeration(table_prefix)
13
+ if match = table_prefix.match(/\d{3,}/)
14
+ match[0].to_i
15
+ else
16
+ raise "Cannot derive enumeration!"
17
+ end
18
+ end
19
+ end
20
+
21
+ VIRTUAL_SHARD_TYPES = [
22
+ "FailingOverShard",
23
+ "ReplicatingShard",
24
+ "ReadOnlyShard",
25
+ "WriteOnlyShard",
26
+ "BlockedShard",
27
+ ]
28
+
29
+ REPLICATING_SHARD_TYPES = ["ReplicatingShard", "FailingOverShard"]
30
+
31
+ INVALID_COPY_TYPES = ["ReadOnlyShard", "WriteOnlyShard", "BlockedShard"]
32
+
33
+ SHARD_SUFFIXES = {
34
+ "FailingOverShard" => 'replicating',
35
+ "ReplicatingShard" => 'replicating',
36
+ "ReadOnlyShard" => 'read_only',
37
+ "WriteOnlyShard" => 'write_only',
38
+ "BlockedShard" => 'blocked'
39
+ }
40
+
41
+ def id; info.id end
42
+ def hostname; id.hostname end
43
+ def table_prefix; id.table_prefix end
44
+ def class_name; info.class_name end
45
+ def source_type; info.source_type end
46
+ def destination_type; info.destination_type end
47
+ def busy; info.busy end
48
+
49
+ def template
50
+ child_templates = children.map {|c| c.template }
51
+
52
+ ShardTemplate.new(info.class_name,
53
+ id.hostname,
54
+ weight,
55
+ info.source_type,
56
+ info.destination_type,
57
+ child_templates)
58
+ end
59
+
60
+ def enumeration
61
+ self.class.parse_enumeration(table_prefix)
62
+ end
63
+
64
+ def canonical_shard_id_map(base_prefix = "shard", table_id = nil, enum = nil)
65
+ enum ||= self.enumeration
66
+ base = Shard.canonical_table_prefix(enum, table_id, base_prefix)
67
+ suffix = SHARD_SUFFIXES[class_name.split('.').last]
68
+ canonical_name = [base, suffix].compact.join('_')
69
+ canonical_id = ShardId.new(self.hostname, canonical_name)
70
+
71
+ children.inject(canonical_id => self.id) do |m, c|
72
+ m.update c.canonical_shard_id_map(base_prefix, table_id, enum)
73
+ end
74
+ end
75
+ end
76
+
77
+ class Nameserver
78
+
79
+ DEFAULT_PORT = 7917
80
+ DEFAULT_RETRIES = 20
81
+ PARALLELISM = 10
82
+
83
+ attr_reader :hosts, :logfile, :dryrun, :framed
84
+ alias dryrun? dryrun
85
+
86
+ def initialize(*hosts)
87
+ options = hosts.last.is_a?(Hash) ? hosts.pop : {}
88
+ @retries = options[:retries] || DEFAULT_RETRIES
89
+ @logfile = options[:log] || "/tmp/gizzmo.log"
90
+ @dryrun = options[:dry_run] || false
91
+ @framed = options[:framed] || false
92
+ @hosts = hosts.flatten
93
+ end
94
+
95
+ def get_shards(ids)
96
+ ids.map {|id| with_retry { client.get_shard(id) } }
97
+ end
98
+
99
+ def reload_config
100
+ all_clients.each {|c| with_retry { c.reload_config } }
101
+ end
102
+
103
+ def copy_shard(from_shard_id, to_shard_id)
104
+ c = random_client
105
+ with_retry { c.copy_shard(from_shard_id, to_shard_id) }
106
+ end
107
+
108
+ def respond_to?(method)
109
+ client.respond_to? method or super
110
+ end
111
+
112
+ def method_missing(method, *args, &block)
113
+ if client.respond_to?(method)
114
+ with_retry { client.send(method, *args, &block) }
115
+ else
116
+ super
117
+ end
118
+ end
119
+
120
+ def manifest(*table_ids)
121
+ Manifest.new(self, table_ids)
122
+ end
123
+
124
+ private
125
+
126
+ def client
127
+ @client ||= create_client(hosts.first)
128
+ end
129
+
130
+ def all_clients
131
+ @all_clients ||= hosts.map {|host| create_client(host) }
132
+ end
133
+
134
+ def random_client
135
+ all_clients[rand(all_clients.length)]
136
+ end
137
+
138
+ def create_client(host)
139
+ host, port = host.split(":")
140
+ port ||= DEFAULT_PORT
141
+ Manager.new(host, port.to_i, logfile, framed, dryrun)
142
+ end
143
+
144
+ private
145
+
146
+ def with_retry
147
+ times ||= @retries
148
+ yield
149
+ rescue ThriftClient::Simple::ThriftException, NoMethodError, Gizzard::GizzardException => e
150
+ raise if e.is_a? Gizzard::GizzardException and e.message !~ /Communications link failure/
151
+
152
+ times -= 1
153
+ (times < 0) ? raise : (sleep 2; retry)
154
+ end
155
+
156
+ class Manifest
157
+ attr_reader :forwardings, :links, :shard_infos, :trees, :templates
158
+
159
+
160
+ def initialize(nameserver, table_ids)
161
+ states = table_ids.map {|id| nameserver.dump_nameserver(id) }
162
+
163
+ @forwardings = states.map {|s| s.forwardings }.flatten
164
+
165
+ @links = states.map {|s| s.links }.flatten.inject({}) do |h, link|
166
+ (h[link.up_id] ||= []) << [link.down_id, link.weight]; h
167
+ end
168
+
169
+ @shard_infos = states.map {|s| s.shards }.flatten.inject({}) do |h, shard|
170
+ h.update shard.id => shard
171
+ end
172
+
173
+ @trees = @forwardings.inject({}) do |h, forwarding|
174
+ h.update forwarding => build_tree(forwarding.shard_id)
175
+ end
176
+
177
+ @templates = @trees.inject({}) do |h, (forwarding, shard)|
178
+ (h[shard.template] ||= []) << forwarding; h
179
+ end
180
+ end
181
+
182
+ private
183
+
184
+ def get_filtered_forwardings(nameserver, filter)
185
+ return filter[:forwardings] if filter[:forwardings]
186
+
187
+ forwardings = nameserver.get_forwardings
188
+
189
+ if table_id = filter[:table_id]
190
+ forwardings.reject! {|f| f.table_id != table_id }
191
+ end
192
+
193
+ forwardings
194
+ end
195
+
196
+ def build_tree(shard_id, link_weight=ShardTemplate::DEFAULT_WEIGHT)
197
+ children = (links[shard_id] || []).map do |(child_id, child_weight)|
198
+ build_tree(child_id, child_weight)
199
+ end
200
+
201
+ info = shard_infos[shard_id] or raise "shard info not found for: #{shard_id}"
202
+ Shard.new(info, children, link_weight)
203
+ end
204
+ end
205
+ end
206
+ end
@@ -0,0 +1,252 @@
1
+ module Gizzard
2
+ class ShardTemplate
3
+ include Comparable
4
+
5
+ ABSTRACT_HOST = "localhost"
6
+ DEFAULT_WEIGHT = 1
7
+
8
+ attr_reader :type, :weight, :source_type, :dest_type
9
+
10
+ def initialize(type, host, weight, source_type, dest_type, children)
11
+ @type, @host, @weight, @source_type, @dest_type, @children =
12
+ type, host, weight, source_type || '', dest_type || '', children
13
+ end
14
+
15
+ def self.concrete?(type)
16
+ !Shard::VIRTUAL_SHARD_TYPES.include? type.split('.').last
17
+ end
18
+
19
+ def concrete?
20
+ self.class.concrete? type
21
+ end
22
+
23
+ def replicating?
24
+ Shard::REPLICATING_SHARD_TYPES.include? type.split('.').last
25
+ end
26
+
27
+ def valid_copy_source?
28
+ !Shard::INVALID_COPY_TYPES.include? type.split('.').last
29
+ end
30
+
31
+ def identifier
32
+ concrete? ? "#{type}/#{host}" : type.to_s
33
+ end
34
+
35
+ def table_name_suffix
36
+ Shard::SHARD_SUFFIXES[type.split('.').last]
37
+ end
38
+
39
+ def host
40
+ if concrete?
41
+ @host
42
+ elsif replicating?
43
+ ABSTRACT_HOST
44
+ else
45
+ children.first.host
46
+ end
47
+ end
48
+
49
+ def children
50
+ @children.sort { |a, b| b <=> a }
51
+ end
52
+
53
+ def descendants
54
+ [self].concat children.map {|c| c.descendants }.flatten
55
+ end
56
+
57
+ alias flatten descendants
58
+
59
+ def concrete_descendants
60
+ descendants.select {|t| t.concrete? }
61
+ end
62
+
63
+ def copy_sources
64
+ return [] unless self.valid_copy_source?
65
+ self.concrete? ? [self] : children.inject([]) {|a, c| a.concat c.copy_sources }
66
+ end
67
+
68
+ def inspect
69
+ to_config
70
+ end
71
+ alias to_s inspect
72
+
73
+ # Concretization
74
+
75
+ def to_shard_id(table_prefix, translations = {})
76
+ table_prefix = [table_prefix, table_name_suffix].compact.join('_')
77
+ shard_id = ShardId.new(host, table_prefix)
78
+ translations[shard_id] || shard_id
79
+ end
80
+
81
+ def to_shard_info(table_prefix, translations = {})
82
+ ShardInfo.new(to_shard_id(table_prefix, translations), type, source_type, dest_type, 0)
83
+ end
84
+
85
+ def to_shard(table_prefix, translations = {})
86
+ Shard.new(to_shard_info(table_prefix, translations), children.map {|c|
87
+ c.to_shard(table_prefix, translations)
88
+ }, weight)
89
+ end
90
+
91
+ # Similarity/Equality
92
+
93
+ def <=>(other)
94
+ raise ArgumentError, "other is not a ShardTemplate" unless other.is_a? ShardTemplate
95
+
96
+ to_a = lambda {|t| [t.host, t.type, t.source_type.to_s, t.dest_type.to_s, t.weight] }
97
+
98
+ if (cmp = to_a.call(self) <=> to_a.call(other)) == 0
99
+ children <=> other.children
100
+ else
101
+ cmp
102
+ end
103
+ end
104
+
105
+ def eql?(other)
106
+ return false unless other.is_a? ShardTemplate
107
+ (self <=> other) == 0
108
+ end
109
+
110
+ def shard_eql?(other)
111
+ raise ArgumentError, "other is not a ShardTemplate" unless other.is_a? ShardTemplate
112
+
113
+ to_a = lambda {|t| [t.host, t.type, t.source_type.to_s, t.dest_type.to_s] }
114
+ to_a.call(self) == to_a.call(other)
115
+ end
116
+
117
+ def link_eql?(other)
118
+ raise ArgumentError, "other is not a ShardTemplate" unless other.is_a? ShardTemplate
119
+
120
+ to_a = lambda {|t| [t.host, t.type, t.source_type.to_s, t.dest_type.to_s, t.weight] }
121
+ to_a.call(self) == to_a.call(other)
122
+ end
123
+
124
+ def shared_host?(other)
125
+ raise ArgumentError, "other is not a ShardTemplate" unless other.is_a? ShardTemplate
126
+
127
+ (self.concrete_descendants & other.concrete_descendants).length > 0
128
+ end
129
+
130
+ def hash
131
+ weight.hash + host.hash + type.hash + children.hash
132
+ end
133
+
134
+
135
+ # Config
136
+
137
+ def config_definition
138
+ args = identifier.split("/")
139
+ args << weight
140
+ args.concat [@source_type,@dest_type] unless [@source_type, @dest_type].reject {|s| s.empty? }.empty?
141
+
142
+ type = args.shift
143
+ args_s = args.empty? ? "" : "(#{args.join(",")})"
144
+
145
+ type + args_s
146
+ end
147
+
148
+ private :config_definition
149
+
150
+ def to_config_struct
151
+ if children.empty?
152
+ config_definition
153
+ else
154
+ child_defs = children.map {|c| c.to_config_struct }
155
+ { config_definition => (child_defs.length == 1 ? child_defs.first : child_defs) }
156
+ end
157
+ end
158
+
159
+ def to_config
160
+ if children.empty?
161
+ config_definition
162
+ else
163
+ child_defs = children.map {|c| c.to_config }
164
+ child_defs_s = child_defs.length == 1 ? child_defs.first : "(#{child_defs.join(", ")})"
165
+ "#{config_definition} -> #{child_defs_s}"
166
+ end
167
+ end
168
+
169
+
170
+ # Class Methods
171
+
172
+ class << self
173
+ def parse(string)
174
+ definition_s, children_s = string.split(/\s*->\s*/, 2)
175
+
176
+ children =
177
+ if children_s.nil?
178
+ []
179
+ else
180
+ list = parse_arg_list(children_s).map {|c| parse c }
181
+ raise ArgumentError, "invalid shard config. -> given, no children found" if list.empty?
182
+ list
183
+ end
184
+
185
+ template_args = parse_definition(definition_s) << children
186
+ ShardTemplate.new(*template_args)
187
+ end
188
+
189
+ private
190
+
191
+ def parse_definition(definition_s)
192
+ type, arg_list = definition_s.split("(", 2)
193
+
194
+ host, weight, source_type, dest_type =
195
+ if arg_list.nil?
196
+ nil
197
+ else
198
+ args = parse_arg_list("(" + arg_list)
199
+ args.unshift nil unless concrete? type
200
+ args
201
+ end
202
+
203
+ validate_host_arg(host, definition_s) if concrete? type
204
+ validate_weight_arg(weight, definition_s)
205
+
206
+ weight = (weight || DEFAULT_WEIGHT).to_i
207
+ source_type ||= ""
208
+ dest_type ||= ""
209
+
210
+ [type, host, weight, source_type, dest_type]
211
+ end
212
+
213
+ def parse_arg_list(string)
214
+ string = string.strip
215
+ if m = string.match(/\A\((.*)\)\Z/)
216
+ string = m[1]
217
+ end
218
+
219
+ depth = 0
220
+ results = [[]]
221
+
222
+ string.each_char do |c|
223
+ case c
224
+ when ","
225
+ if depth == 0
226
+ results << []
227
+ next
228
+ end
229
+ when "(" then depth += 1
230
+ when ")" then depth -= 1
231
+ end
232
+
233
+ results.last << c
234
+ end
235
+
236
+ results.map {|r| r.join.strip }
237
+ end
238
+
239
+ def validate_weight_arg(arg, definition)
240
+ if arg && YAML.load(arg.to_s).is_a?(String)
241
+ raise ArgumentError, "Invalid weight #{arg} for shard in: #{definition}"
242
+ end
243
+ end
244
+
245
+ def validate_host_arg(arg, definition)
246
+ if arg.nil? || YAML.load(arg.to_s).is_a?(Numeric)
247
+ raise ArgumentError, "Invalid host #{arg} for shard in: #{definition}"
248
+ end
249
+ end
250
+ end
251
+ end
252
+ end