jetpants 0.8.0 → 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +4 -9
  3. data/bin/jetpants +7 -6
  4. data/doc/capacity_plan.rdoc +77 -0
  5. data/doc/commands.rdoc +1 -1
  6. data/doc/jetpants_collins.rdoc +2 -1
  7. data/doc/online_schema_change.rdoc +45 -0
  8. data/doc/plugins.rdoc +7 -1
  9. data/doc/requirements.rdoc +1 -1
  10. data/doc/upgrade_helper.rdoc +68 -0
  11. data/lib/jetpants/db/client.rb +2 -1
  12. data/lib/jetpants/db/import_export.rb +12 -3
  13. data/lib/jetpants/db/replication.rb +6 -2
  14. data/lib/jetpants/db/schema.rb +40 -0
  15. data/lib/jetpants/db/server.rb +2 -2
  16. data/lib/jetpants/host.rb +12 -1
  17. data/lib/jetpants/pool.rb +41 -0
  18. data/lib/jetpants/shard.rb +201 -124
  19. data/lib/jetpants/table.rb +80 -10
  20. data/plugins/capacity_plan/capacity_plan.rb +353 -0
  21. data/plugins/capacity_plan/commandsuite.rb +19 -0
  22. data/plugins/capacity_plan/monkeypatch.rb +20 -0
  23. data/plugins/jetpants_collins/db.rb +45 -6
  24. data/plugins/jetpants_collins/jetpants_collins.rb +32 -21
  25. data/plugins/jetpants_collins/pool.rb +22 -1
  26. data/plugins/jetpants_collins/shard.rb +9 -2
  27. data/plugins/jetpants_collins/topology.rb +8 -9
  28. data/plugins/online_schema_change/commandsuite.rb +56 -0
  29. data/plugins/online_schema_change/db.rb +33 -0
  30. data/plugins/online_schema_change/online_schema_change.rb +5 -0
  31. data/plugins/online_schema_change/pool.rb +105 -0
  32. data/plugins/online_schema_change/topology.rb +56 -0
  33. data/plugins/simple_tracker/shard.rb +1 -1
  34. data/plugins/upgrade_helper/commandsuite.rb +212 -0
  35. data/plugins/upgrade_helper/db.rb +78 -0
  36. data/plugins/upgrade_helper/host.rb +22 -0
  37. data/plugins/upgrade_helper/pool.rb +259 -0
  38. data/plugins/upgrade_helper/shard.rb +61 -0
  39. data/plugins/upgrade_helper/upgrade_helper.rb +21 -0
  40. data/scripts/global_rowcount.rb +75 -0
  41. metadata +28 -15
@@ -41,24 +41,86 @@ module Jetpants
41
41
  # the default of 1 (meaning no chunking). For tables with hundreds of millions
42
42
  # of rows, you may want to do exports/imports in a few hundred chunks to speed
43
43
  # things up and keep the transactions smaller.
44
- attr_reader :chunks
44
+ attr_accessor :chunks
45
45
 
46
- # Create a Table. Params should have string keys, not symbols. Possible keys include
47
- # 'sharding_key' (or equivalently 'primary_key'), 'chunks', and 'order_by'.
46
+ # The SQL statement read from the DB via SHOW CREATE TABLE
47
+ attr_reader :create_table_sql
48
+
49
+ # The primary key of the table, returns an array on a multi-
50
+ # column PK
51
+ attr_reader :primary_key
52
+
53
+ # A list of indexes mapped to the columns in them
54
+ attr_reader :indexes
55
+
56
+ # A list of the table column names
57
+ attr_reader :columns
58
+
59
+ # Pool object this Table is related to
60
+ attr_reader :pool
61
+
62
+ # Create a Table. Possible keys include 'sharding_key', 'chunks', 'order_by',
63
+ # 'create_table', 'pool', 'indexes', and anything else handled by plugins
48
64
  def initialize(name, params={})
49
65
  @name = name
50
66
  parse_params(params)
51
67
  end
52
68
 
53
69
  def parse_params(params = {})
54
- params['sharding_key'] ||= params['primary_keys'] || params['primary_key'] || 'user_id'
55
- @sharding_keys = (params['sharding_key'].is_a?(Array) ? params['sharding_key'] : [params['sharding_key']])
70
+ # Convert symbols to strings
71
+ params.keys.select {|k| k.is_a? Symbol}.each do |symbol_key|
72
+ params[symbol_key.to_s] = params[symbol_key]
73
+ params.delete symbol_key
74
+ end
75
+
76
+ # accept singular or plural for some params
77
+ params['sharding_key'] ||= params['sharding_keys']
78
+ params['primary_key'] ||= params['primary_keys']
79
+
80
+ @sharding_keys = (params['sharding_key'].is_a?(Array) ? params['sharding_key'] : [params['sharding_key']]) if params['sharding_key']
81
+ @sharding_keys ||= []
82
+
83
+ @primary_key = params['primary_key']
56
84
  @chunks = params['chunks'] || 1
57
85
  @order_by = params['order_by']
86
+ @create_table_sql = params['create_table'] || params['create_table_sql']
87
+ @pool = params['pool']
88
+ @indexes = params['indexes']
89
+ @columns = params['columns']
58
90
  end
59
91
 
92
+ # Returns the current maximum primary key value, returns
93
+ # the values of the record when ordered by the key fields
94
+ # in order, descending on a multi-value PK
95
+ def max_pk_val_query
96
+ if @primary_key.is_a?(Array)
97
+ pk_str = @primary_key.join(",")
98
+ pk_ordering = @primary_key.map{|key| "#{key} DESC"}.join(',')
99
+ sql = "SELECT #{pk_str} FROM #{@name} ORDER BY #{pk_ordering} LIMIT 1"
100
+ else
101
+ sql = "SELECT MAX(#{@primary_key}) FROM #{@name}"
102
+ end
103
+ return sql
104
+ end
105
+
106
+ # Returns the first column of the primary key, or nil if there isn't one
107
+ def first_pk_col
108
+ if @primary_key.is_a? Array
109
+ @primary_key.first
110
+ else
111
+ @primary_key
112
+ end
113
+ end
114
+
115
+ # Returns true if the table is associated with the supplied pool
116
+ def belongs_to?(pool)
117
+ return @pool == pool
118
+ end
119
+
60
120
  # Return an array of Table objects based on the contents of Jetpants' config file entry
61
121
  # of the given label.
122
+ # TODO: integrate better with table schema detection code. Consider auto-detecting chunk
123
+ # count based on file size and row count estimate.
62
124
  def Table.from_config(label)
63
125
  result = []
64
126
  Jetpants.send(label).map {|name, attributes| Table.new name, attributes}
@@ -127,13 +189,21 @@ module Jetpants
127
189
  return sql
128
190
  end
129
191
 
130
- # Counts number of rows between the given ID ranges. Warning: will give
131
- # potentially misleading counts on multi-sharding-key tables.
192
+ # Returns SQL to counts number of rows between the given ID ranges.
193
+ # Warning: will give potentially misleading counts on multi-sharding-key tables.
132
194
  def sql_count_rows(min_id, max_id)
133
- sql = "SELECT COUNT(*) FROM #{@name} WHERE "
195
+ sql = "SELECT COUNT(*) FROM #{@name}"
196
+ return sql unless min_id && max_id
197
+
134
198
  wheres = []
135
- @sharding_keys.each {|col| wheres << "(#{col} >= #{min_id} AND #{col} <= #{max_id})"}
136
- sql << wheres.join(" OR ")
199
+
200
+ if @sharding_keys.size > 0
201
+ @sharding_keys.each {|col| wheres << "(#{col} >= #{min_id} AND #{col} <= #{max_id})"}
202
+ sql << ' WHERE ' + wheres.join(" OR ")
203
+ elsif first_pk_col
204
+ sql << " WHERE #{first_pk_col} >= #{min_id} AND #{first_pk_col} <= #{max_id}"
205
+ end
206
+ sql
137
207
  end
138
208
 
139
209
  # Returns a file path (as a String) for the export dumpfile of the given ID range.
@@ -0,0 +1,353 @@
1
+ require 'capacity_plan/commandsuite'
2
+ require 'json'
3
+ require 'pony'
4
+ require 'capacity_plan/monkeypatch'
5
+
6
+ module Jetpants
7
+ module Plugin
8
+ class Capacity
9
+ @@db
10
+
11
+ # set the db and connect
12
+ def initialize
13
+ @@db = Jetpants.topology.pool(Jetpants.plugins['capacity_plan']['pool_name']).master
14
+ @@db.connect(user: Jetpants.plugins['capacity_plan']['user'], schema: Jetpants.plugins['capacity_plan']['schema'], pass: Jetpants.plugins['capacity_plan']['pass'])
15
+ end
16
+
17
+ ## grab snapshot of data and store it in mysql
18
+ def snapshot
19
+ storage_sizes = {}
20
+ timestamp = Time.now.to_i
21
+
22
+ current_sizes_storage = current_sizes
23
+
24
+ all_mounts.each do |key, value|
25
+ storage_sizes[key] = value
26
+ storage_sizes[key]['db_sizes'] = current_sizes_storage[key]
27
+ end
28
+
29
+ store_data(storage_sizes, timestamp)
30
+ end
31
+
32
+ ## generate the capacity plan and if email is true also send it to the email address listed
33
+ def plan(email=false)
34
+ history = get_history
35
+ mount_stats_storage = all_mounts
36
+ now = Time.now.to_i
37
+ output = ''
38
+
39
+ if Jetpants.topology.respond_to? :capacity_plan_notices
40
+ output += "\n\n________________________________________________________________________________________________________\n"
41
+ output += "Notices\n\n"
42
+ output += Jetpants.topology.capacity_plan_notices
43
+ end
44
+
45
+ criticals = []
46
+ warnings = []
47
+ ## check to see if any mounts are currently over the usage points
48
+ mount_stats_storage.each do |key, value|
49
+ if value['used'].to_f/value['total'].to_f > Jetpants.plugins['capacity_plan']['critical_mount']
50
+ criticals << key
51
+ elsif value['used'].to_f/value['total'].to_f > Jetpants.plugins['capacity_plan']['warning_mount']
52
+ warnings << key
53
+ end
54
+ end
55
+
56
+ if criticals.count > 0
57
+ output += "\n\n________________________________________________________________________________________________________\n"
58
+ output += "Critical Mounts\n\n"
59
+ criticals.each do |mount|
60
+ output += mount + "\n"
61
+ end
62
+ end
63
+
64
+ if warnings.count > 0
65
+ output += "\n\n________________________________________________________________________________________________________\n"
66
+ output += "Warning Mounts\n\n"
67
+ warnings.each do |mount|
68
+ output += mount + "\n"
69
+ end
70
+ end
71
+
72
+ output += "\n\n________________________________________________________________________________________________________\n"
73
+ output += "Usage and Time Left\n"
74
+ output += " --------- The 'GB per day' and 'Days left' fields are using a growth rate that is calulated by taking \n --------- a exponically decaying avg\n\n"
75
+
76
+ ##get segments for 24 hour blocks
77
+ segments = segmentify(history, 60 * 60 * 24)
78
+
79
+ output += "%30s %20s %10s %10s %16s\n" % ["pool name","Current Data Size","GB per day","Days left","(until critical)"]
80
+ output += "%30s %20s %10s %10s\n" % ["---------","-----------------","----------","---------"]
81
+
82
+ mount_stats_storage.each do |name, temp|
83
+ growth_rate = false
84
+ segments[name].each do |range, value|
85
+ growth_rate = calc_avg(growth_rate || value, value)
86
+ end
87
+ critical = mount_stats_storage[name]['total'].to_f * Jetpants.plugins['capacity_plan']['critical_mount']
88
+ if (per_day(bytes_to_gb(growth_rate))) <= 0 || ((critical - mount_stats_storage[name]['used'].to_f)/ per_day(growth_rate)) > 999
89
+ output += "%30s %20.2f %10.2f %10s\n" % [name, bytes_to_gb(mount_stats_storage[name]['used'].to_f), (per_day(bytes_to_gb(growth_rate+0))), 'N/A']
90
+ else
91
+ output += "%30s %20.2f %10.2f %10.2f\n" % [name, bytes_to_gb(mount_stats_storage[name]['used'].to_f), (per_day(bytes_to_gb(growth_rate+0))),((critical - mount_stats_storage[name]['used'].to_f)/ per_day(growth_rate))]
92
+ end
93
+ end
94
+
95
+ output += "\n\n________________________________________________________________________________________________________\nDay Over Day\n\n"
96
+
97
+ output += "%30s %10s %10s %10s %10s %11s\n" % ["pool name", "today", "1 day ago", "2 days ago", "7 days ago", "14 days ago"]
98
+ output += "%30s %10s %10s %10s %10s %11s\n" % ["---------", "-----", "---------", "----------", "----------", "-----------"]
99
+
100
+ mount_stats_storage.each do |name, temp|
101
+ out_array = []
102
+ segments[name].each do |range, value|
103
+ out_array << per_day(bytes_to_gb(value))+0
104
+ end
105
+ output += "%30s %10s %10s %10s %10s %11s\n" % [name, (out_array.reverse[0] ? "%.2f" % out_array.reverse[0] : 'N/A'), (out_array.reverse[1] ? "%.2f" % out_array.reverse[1] : 'N/A'), (out_array.reverse[2] ? "%.2f" % out_array.reverse[2] : 'N/A'), (out_array.reverse[7] ? "%.2f" % out_array.reverse[7] : 'N/A'), (out_array.reverse[14] ? "%.2f" % out_array.reverse[14] : 'N/A')]
106
+ end
107
+
108
+ output += outliers
109
+
110
+ collins_results = get_hardware_stats
111
+
112
+ output += collins_results
113
+
114
+ puts output
115
+
116
+ html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><pre style="font-size=20px;">' + output + '</pre></body></html>'
117
+
118
+ if email
119
+ Pony.mail(:to => email, :from => 'jetpants', :subject => 'Jetpants Capacity Plan - '+Time.now.strftime("%m/%d/%Y %H:%M:%S"), :html_body => html)
120
+ end
121
+ end
122
+
123
+ def bytes_to_gb(size)
124
+ size.to_f / 1024.0 / 1049000.0
125
+ end
126
+
127
+ def bytes_to_mb(size)
128
+ size.to_f / 1024.0 / 1024.0
129
+ end
130
+
131
+ def per_day(size)
132
+ size * 60 * 60 * 24
133
+ end
134
+
135
+ def per_week(size)
136
+ size * 60 * 60 * 24 * 7
137
+ end
138
+
139
+ def per_month(size)
140
+ size * 60 * 60 * 24 * 30
141
+ end
142
+
143
+ #use a exponically decaying avg unless there is a count then use a cummulative moving avg
144
+ def calc_avg(avg, new_value, count=false)
145
+ unless count
146
+ (new_value * 0.5) + (avg * (1.0 - 0.5))
147
+ else
148
+ avg + ((new_value - avg) / count)
149
+ end
150
+ end
151
+
152
+ ## grab the current sizes from actuall data set size including logs (in bytes)
153
+ def current_sizes
154
+ pool_sizes = {}
155
+ Jetpants.pools.each do |p|
156
+ pool_sizes[p.name] = p.data_set_size
157
+ end
158
+ pool_sizes
159
+
160
+ end
161
+
162
+ ## get all mount's data in kilobytes
163
+ def all_mounts
164
+ mount_stats = {}
165
+ Jetpants.pools.each do |p|
166
+ mount_stats[p.name] ||= p.mount_stats
167
+ end
168
+ mount_stats
169
+ end
170
+
171
+ ## loop through data and enter it in mysql
172
+ def store_data(mount_data,timestamp)
173
+ mount_data.each do |key, value|
174
+ @@db.query('INSERT INTO storage (`timestamp`, `pool`, `total`, `used`, `available`, `db_sizes`) VALUES ( ? , ? , ? , ? , ? , ? )', timestamp.to_s, key, value['total'].to_s, value['used'].to_s, value['available'].to_s, value['db_sizes'].to_s)
175
+ end
176
+ end
177
+
178
+ ## get history from mysql of all data right now
179
+ def get_history
180
+ history = {}
181
+ @@db.query_return_array('select timestamp, pool, total, used, available, db_sizes from storage order by id').each do |row|
182
+ history[row[:pool]] ||= {}
183
+ history[row[:pool]][row[:timestamp]] ||= {}
184
+ history[row[:pool]][row[:timestamp]]['total'] = row[:total]
185
+ history[row[:pool]][row[:timestamp]]['used'] = row[:used]
186
+ history[row[:pool]][row[:timestamp]]['available'] = row[:available]
187
+ history[row[:pool]][row[:timestamp]]['db_sizes'] = row[:db_sizes]
188
+ end
189
+ history
190
+ end
191
+
192
+ ## segment out groups to a given time period
193
+ def segmentify(hash, timeperiod)
194
+ new_hash = {}
195
+ hash.each do |name, temp|
196
+ before_timestamp = false
197
+ keeper = []
198
+ last_timestamp = nil
199
+ last_value = nil
200
+ hash[name].sort.each do |timestamp, value|
201
+ new_hash[name] ||= {}
202
+ last_timestamp = timestamp
203
+ last_value = value
204
+ unless before_timestamp && timestamp > (timeperiod - 60 ) + before_timestamp
205
+ unless before_timestamp
206
+ before_timestamp = timestamp
207
+ end
208
+ keeper << value
209
+ else
210
+ new_hash[name][before_timestamp.to_s+"-"+timestamp.to_s] = (keeper[0]['used'].to_f - value['used'].to_f )/(before_timestamp.to_f - timestamp.to_f)
211
+ before_timestamp = timestamp
212
+ keeper = []
213
+ keeper << value
214
+ end
215
+ end
216
+ if keeper.length > 1
217
+ new_hash[name][before_timestamp.to_s+"-"+last_timestamp.to_s] = (keeper[0]['used'].to_f - last_value['used'].to_f )/(before_timestamp.to_f - last_timestamp.to_f)
218
+ end
219
+ end
220
+
221
+ new_hash
222
+ end
223
+
224
+ # get a hash of machines to display at then end of the email
225
+ # you need to have a method in Jetpants.topology.machine_status_counts to get
226
+ # your machine types and states
227
+ def get_hardware_stats
228
+
229
+ #see if function exists
230
+ return '' unless Jetpants.topology.respond_to? :machine_status_counts
231
+
232
+ data = Jetpants.topology.machine_status_counts
233
+
234
+ output = ''
235
+ output += "\n________________________________________________________________________________________________________\n"
236
+ output += "Hardware status\n\n"
237
+
238
+ headers = ['status'].concat(data.first[1].keys).concat(['total'])
239
+ output += (headers.map { |i| "%20s"}.join(" ")+"\n") % headers
240
+ output += (headers.map { |i| "%20s"}.join(" ")+"\n") % headers.map { |i| '------------------'}
241
+
242
+ data.each do |key, status|
243
+ unless key == 'unallocated'
244
+ total = 0
245
+ status.each do |nodeclass, value|
246
+ total += value.to_i
247
+ end
248
+ output += (headers.map { |i| "%20s"}.join(" ")+"\n") % [key].concat(status.values).concat([total])
249
+ end
250
+ end
251
+
252
+ output += "\nTotal Unallocated nodes - " + data['unallocated'] + "\n\n"
253
+
254
+ output
255
+ end
256
+
257
+ # figure out the outliers for the last 3 days
258
+ def outliers
259
+ output = ''
260
+
261
+ output += "\n________________________________________________________________________________________________________\n"
262
+ output += "New Outliers\n"
263
+ output += "--Compare the last 3 days in 2 hour blocks to the same 2 hour block 7, 14, 21, 28 days ago\n\n"
264
+
265
+ output += "%30s %25s %25s %10s %11s\n" % ['Pool Name', 'Start Time', 'End Time', 'Usage', 'Prev Weeks']
266
+ output += "%30s %25s %25s %10s %11s\n" % ['---------', '----------', '--------', '-----', '----------']
267
+
268
+ block_sizes = 60 * 60 * 2 + 120
269
+ days_from = [7,14,21,28]
270
+ Jetpants.pools.each do |p|
271
+ start_time = Time.now.to_i - 3 * 24 * 60 * 60
272
+ counter = 0
273
+ counter_time = 0
274
+ output_buffer = ''
275
+ last_per = nil
276
+
277
+ name = p.name
278
+ while start_time + (60 * 62) < Time.now.to_i
279
+ temp_array = []
280
+ from_blocks = {}
281
+ from_per = {}
282
+
283
+ now_block = get_history_block(name, start_time, start_time + block_sizes)
284
+ unless now_block.count == 0
285
+ now_per = (now_block.first[1]['used'].to_f - now_block.values.last['used'].to_f)/(now_block.first[0].to_f - now_block.keys.last.to_f)
286
+
287
+
288
+ days_from.each do |days|
289
+ temp = get_history_block(name, start_time - (days * 24 * 60 * 60), start_time - (days * 24 * 60 * 60) + block_sizes)
290
+ if temp.count >= 2
291
+ from_blocks[days] = temp
292
+ from_per[days] = (from_blocks[days].first[1]['used'].to_f - from_blocks[days].values.last['used'].to_f)/(from_blocks[days].first[0].to_f - from_blocks[days].keys.last.to_f)
293
+ end
294
+ end
295
+
296
+ # remove outliers from compare array because we only care about current outliers not old outliers
297
+ from_per.each do |day, value|
298
+ if(value > from_per.values.mean * 5.0 || value < from_per.values.mean * -5.0)
299
+ from_per.delete(day)
300
+ end
301
+ end
302
+
303
+ if from_per.count > 0
304
+ if((now_per > (from_per.values.mean * 2.2) && from_per.values.mean != 0) || (from_per.values.mean == 0 && now_per > 1048576))
305
+ if counter == 0
306
+ counter_time = start_time
307
+ end
308
+ counter += 1
309
+ if counter > 3
310
+ output_buffer = "%30s %25s %25s %10.2f %11.2f\n" % [name, Time.at(counter_time.to_i).strftime("%m/%d/%Y %H:%M:%S"), Time.at(start_time + block_sizes).strftime("%m/%d/%Y %H:%M:%S"), per_day(bytes_to_gb(now_per)), per_day(bytes_to_gb(from_per.values.mean))]
311
+ end
312
+ else
313
+ counter = 0
314
+ unless output_buffer == ''
315
+ output += output_buffer
316
+ output_buffer = ''
317
+ end
318
+ end
319
+
320
+ if((now_per > (from_per.values.mean * 5.0) && from_per.values.mean != 0) || (from_per.values.mean == 0 && now_per > 1048576))
321
+ output += "%30s %25s %25s %10.2f %11.2f\n" % [name, Time.at(start_time).strftime("%m/%d/%Y %H:%M:%S"), Time.at(start_time + block_sizes).strftime("%m/%d/%Y %H:%M:%S"), per_day(bytes_to_gb(now_per)), per_day(bytes_to_gb(from_per.values.mean))]
322
+ end
323
+ end # end if hash has values
324
+
325
+ end
326
+
327
+ start_time += block_sizes - 120
328
+ end # end while loop for last 3 days
329
+ output_buffer = ''
330
+ counter = 0
331
+ counter_time = 0
332
+ end
333
+
334
+ output
335
+
336
+ end
337
+
338
+ ## get history from mysql of all data right now
339
+ def get_history_block(pool,time_start,time_stop)
340
+ history = {}
341
+ @@db.query_return_array('select timestamp, pool, total, used, available, db_sizes from storage where pool = ? and timestamp >= ? and timestamp <= ? order by id', pool, time_start, time_stop).each do |row|
342
+ history[row[:timestamp]] ||= {}
343
+ history[row[:timestamp]]['total'] = row[:total]
344
+ history[row[:timestamp]]['used'] = row[:used]
345
+ history[row[:timestamp]]['available'] = row[:available]
346
+ history[row[:timestamp]]['db_sizes'] = row[:db_sizes]
347
+ end
348
+ history
349
+ end
350
+
351
+ end
352
+ end
353
+ end