jetpants 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +4 -9
  3. data/bin/jetpants +7 -6
  4. data/doc/capacity_plan.rdoc +77 -0
  5. data/doc/commands.rdoc +1 -1
  6. data/doc/jetpants_collins.rdoc +2 -1
  7. data/doc/online_schema_change.rdoc +45 -0
  8. data/doc/plugins.rdoc +7 -1
  9. data/doc/requirements.rdoc +1 -1
  10. data/doc/upgrade_helper.rdoc +68 -0
  11. data/lib/jetpants/db/client.rb +2 -1
  12. data/lib/jetpants/db/import_export.rb +12 -3
  13. data/lib/jetpants/db/replication.rb +6 -2
  14. data/lib/jetpants/db/schema.rb +40 -0
  15. data/lib/jetpants/db/server.rb +2 -2
  16. data/lib/jetpants/host.rb +12 -1
  17. data/lib/jetpants/pool.rb +41 -0
  18. data/lib/jetpants/shard.rb +201 -124
  19. data/lib/jetpants/table.rb +80 -10
  20. data/plugins/capacity_plan/capacity_plan.rb +353 -0
  21. data/plugins/capacity_plan/commandsuite.rb +19 -0
  22. data/plugins/capacity_plan/monkeypatch.rb +20 -0
  23. data/plugins/jetpants_collins/db.rb +45 -6
  24. data/plugins/jetpants_collins/jetpants_collins.rb +32 -21
  25. data/plugins/jetpants_collins/pool.rb +22 -1
  26. data/plugins/jetpants_collins/shard.rb +9 -2
  27. data/plugins/jetpants_collins/topology.rb +8 -9
  28. data/plugins/online_schema_change/commandsuite.rb +56 -0
  29. data/plugins/online_schema_change/db.rb +33 -0
  30. data/plugins/online_schema_change/online_schema_change.rb +5 -0
  31. data/plugins/online_schema_change/pool.rb +105 -0
  32. data/plugins/online_schema_change/topology.rb +56 -0
  33. data/plugins/simple_tracker/shard.rb +1 -1
  34. data/plugins/upgrade_helper/commandsuite.rb +212 -0
  35. data/plugins/upgrade_helper/db.rb +78 -0
  36. data/plugins/upgrade_helper/host.rb +22 -0
  37. data/plugins/upgrade_helper/pool.rb +259 -0
  38. data/plugins/upgrade_helper/shard.rb +61 -0
  39. data/plugins/upgrade_helper/upgrade_helper.rb +21 -0
  40. data/scripts/global_rowcount.rb +75 -0
  41. metadata +28 -15
@@ -41,24 +41,86 @@ module Jetpants
41
41
  # the default of 1 (meaning no chunking). For tables with hundreds of millions
42
42
  # of rows, you may want to do exports/imports in a few hundred chunks to speed
43
43
  # things up and keep the transactions smaller.
44
- attr_reader :chunks
44
+ attr_accessor :chunks
45
45
 
46
- # Create a Table. Params should have string keys, not symbols. Possible keys include
47
- # 'sharding_key' (or equivalently 'primary_key'), 'chunks', and 'order_by'.
46
+ # The SQL statement read from the DB via SHOW CREATE TABLE
47
+ attr_reader :create_table_sql
48
+
49
+ # The primary key of the table, returns an array on a multi-
50
+ # column PK
51
+ attr_reader :primary_key
52
+
53
+ # A list of indexes mapped to the columns in them
54
+ attr_reader :indexes
55
+
56
+ # A list of the table column names
57
+ attr_reader :columns
58
+
59
+ # Pool object this Table is related to
60
+ attr_reader :pool
61
+
62
+ # Create a Table. Possible keys include 'sharding_key', 'chunks', 'order_by',
63
+ # 'create_table', 'pool', 'indexes', and anything else handled by plugins
48
64
  def initialize(name, params={})
49
65
  @name = name
50
66
  parse_params(params)
51
67
  end
52
68
 
53
69
  def parse_params(params = {})
54
- params['sharding_key'] ||= params['primary_keys'] || params['primary_key'] || 'user_id'
55
- @sharding_keys = (params['sharding_key'].is_a?(Array) ? params['sharding_key'] : [params['sharding_key']])
70
+ # Convert symbols to strings
71
+ params.keys.select {|k| k.is_a? Symbol}.each do |symbol_key|
72
+ params[symbol_key.to_s] = params[symbol_key]
73
+ params.delete symbol_key
74
+ end
75
+
76
+ # accept singular or plural for some params
77
+ params['sharding_key'] ||= params['sharding_keys']
78
+ params['primary_key'] ||= params['primary_keys']
79
+
80
+ @sharding_keys = (params['sharding_key'].is_a?(Array) ? params['sharding_key'] : [params['sharding_key']]) if params['sharding_key']
81
+ @sharding_keys ||= []
82
+
83
+ @primary_key = params['primary_key']
56
84
  @chunks = params['chunks'] || 1
57
85
  @order_by = params['order_by']
86
+ @create_table_sql = params['create_table'] || params['create_table_sql']
87
+ @pool = params['pool']
88
+ @indexes = params['indexes']
89
+ @columns = params['columns']
58
90
  end
59
91
 
92
+ # Returns the current maximum primary key value, returns
93
+ # the values of the record when ordered by the key fields
94
+ # in order, descending on a multi-value PK
95
+ def max_pk_val_query
96
+ if @primary_key.is_a?(Array)
97
+ pk_str = @primary_key.join(",")
98
+ pk_ordering = @primary_key.map{|key| "#{key} DESC"}.join(',')
99
+ sql = "SELECT #{pk_str} FROM #{@name} ORDER BY #{pk_ordering} LIMIT 1"
100
+ else
101
+ sql = "SELECT MAX(#{@primary_key}) FROM #{@name}"
102
+ end
103
+ return sql
104
+ end
105
+
106
+ # Returns the first column of the primary key, or nil if there isn't one
107
+ def first_pk_col
108
+ if @primary_key.is_a? Array
109
+ @primary_key.first
110
+ else
111
+ @primary_key
112
+ end
113
+ end
114
+
115
+ # Returns true if the table is associated with the supplied pool
116
+ def belongs_to?(pool)
117
+ return @pool == pool
118
+ end
119
+
60
120
  # Return an array of Table objects based on the contents of Jetpants' config file entry
61
121
  # of the given label.
122
+ # TODO: integrate better with table schema detection code. Consider auto-detecting chunk
123
+ # count based on file size and row count estimate.
62
124
  def Table.from_config(label)
63
125
  result = []
64
126
  Jetpants.send(label).map {|name, attributes| Table.new name, attributes}
@@ -127,13 +189,21 @@ module Jetpants
127
189
  return sql
128
190
  end
129
191
 
130
- # Counts number of rows between the given ID ranges. Warning: will give
131
- # potentially misleading counts on multi-sharding-key tables.
192
+ # Returns SQL to counts number of rows between the given ID ranges.
193
+ # Warning: will give potentially misleading counts on multi-sharding-key tables.
132
194
  def sql_count_rows(min_id, max_id)
133
- sql = "SELECT COUNT(*) FROM #{@name} WHERE "
195
+ sql = "SELECT COUNT(*) FROM #{@name}"
196
+ return sql unless min_id && max_id
197
+
134
198
  wheres = []
135
- @sharding_keys.each {|col| wheres << "(#{col} >= #{min_id} AND #{col} <= #{max_id})"}
136
- sql << wheres.join(" OR ")
199
+
200
+ if @sharding_keys.size > 0
201
+ @sharding_keys.each {|col| wheres << "(#{col} >= #{min_id} AND #{col} <= #{max_id})"}
202
+ sql << ' WHERE ' + wheres.join(" OR ")
203
+ elsif first_pk_col
204
+ sql << " WHERE #{first_pk_col} >= #{min_id} AND #{first_pk_col} <= #{max_id}"
205
+ end
206
+ sql
137
207
  end
138
208
 
139
209
  # Returns a file path (as a String) for the export dumpfile of the given ID range.
@@ -0,0 +1,353 @@
1
+ require 'capacity_plan/commandsuite'
2
+ require 'json'
3
+ require 'pony'
4
+ require 'capacity_plan/monkeypatch'
5
+
6
+ module Jetpants
7
+ module Plugin
8
+ class Capacity
9
+ @@db
10
+
11
+ # set the db and connect
12
+ def initialize
13
+ @@db = Jetpants.topology.pool(Jetpants.plugins['capacity_plan']['pool_name']).master
14
+ @@db.connect(user: Jetpants.plugins['capacity_plan']['user'], schema: Jetpants.plugins['capacity_plan']['schema'], pass: Jetpants.plugins['capacity_plan']['pass'])
15
+ end
16
+
17
+ ## grab snapshot of data and store it in mysql
18
+ def snapshot
19
+ storage_sizes = {}
20
+ timestamp = Time.now.to_i
21
+
22
+ current_sizes_storage = current_sizes
23
+
24
+ all_mounts.each do |key, value|
25
+ storage_sizes[key] = value
26
+ storage_sizes[key]['db_sizes'] = current_sizes_storage[key]
27
+ end
28
+
29
+ store_data(storage_sizes, timestamp)
30
+ end
31
+
32
+ ## generate the capacity plan and if email is true also send it to the email address listed
33
+ def plan(email=false)
34
+ history = get_history
35
+ mount_stats_storage = all_mounts
36
+ now = Time.now.to_i
37
+ output = ''
38
+
39
+ if Jetpants.topology.respond_to? :capacity_plan_notices
40
+ output += "\n\n________________________________________________________________________________________________________\n"
41
+ output += "Notices\n\n"
42
+ output += Jetpants.topology.capacity_plan_notices
43
+ end
44
+
45
+ criticals = []
46
+ warnings = []
47
+ ## check to see if any mounts are currently over the usage points
48
+ mount_stats_storage.each do |key, value|
49
+ if value['used'].to_f/value['total'].to_f > Jetpants.plugins['capacity_plan']['critical_mount']
50
+ criticals << key
51
+ elsif value['used'].to_f/value['total'].to_f > Jetpants.plugins['capacity_plan']['warning_mount']
52
+ warnings << key
53
+ end
54
+ end
55
+
56
+ if criticals.count > 0
57
+ output += "\n\n________________________________________________________________________________________________________\n"
58
+ output += "Critical Mounts\n\n"
59
+ criticals.each do |mount|
60
+ output += mount + "\n"
61
+ end
62
+ end
63
+
64
+ if warnings.count > 0
65
+ output += "\n\n________________________________________________________________________________________________________\n"
66
+ output += "Warning Mounts\n\n"
67
+ warnings.each do |mount|
68
+ output += mount + "\n"
69
+ end
70
+ end
71
+
72
+ output += "\n\n________________________________________________________________________________________________________\n"
73
+ output += "Usage and Time Left\n"
74
+ output += " --------- The 'GB per day' and 'Days left' fields are using a growth rate that is calulated by taking \n --------- a exponically decaying avg\n\n"
75
+
76
+ ##get segments for 24 hour blocks
77
+ segments = segmentify(history, 60 * 60 * 24)
78
+
79
+ output += "%30s %20s %10s %10s %16s\n" % ["pool name","Current Data Size","GB per day","Days left","(until critical)"]
80
+ output += "%30s %20s %10s %10s\n" % ["---------","-----------------","----------","---------"]
81
+
82
+ mount_stats_storage.each do |name, temp|
83
+ growth_rate = false
84
+ segments[name].each do |range, value|
85
+ growth_rate = calc_avg(growth_rate || value, value)
86
+ end
87
+ critical = mount_stats_storage[name]['total'].to_f * Jetpants.plugins['capacity_plan']['critical_mount']
88
+ if (per_day(bytes_to_gb(growth_rate))) <= 0 || ((critical - mount_stats_storage[name]['used'].to_f)/ per_day(growth_rate)) > 999
89
+ output += "%30s %20.2f %10.2f %10s\n" % [name, bytes_to_gb(mount_stats_storage[name]['used'].to_f), (per_day(bytes_to_gb(growth_rate+0))), 'N/A']
90
+ else
91
+ output += "%30s %20.2f %10.2f %10.2f\n" % [name, bytes_to_gb(mount_stats_storage[name]['used'].to_f), (per_day(bytes_to_gb(growth_rate+0))),((critical - mount_stats_storage[name]['used'].to_f)/ per_day(growth_rate))]
92
+ end
93
+ end
94
+
95
+ output += "\n\n________________________________________________________________________________________________________\nDay Over Day\n\n"
96
+
97
+ output += "%30s %10s %10s %10s %10s %11s\n" % ["pool name", "today", "1 day ago", "2 days ago", "7 days ago", "14 days ago"]
98
+ output += "%30s %10s %10s %10s %10s %11s\n" % ["---------", "-----", "---------", "----------", "----------", "-----------"]
99
+
100
+ mount_stats_storage.each do |name, temp|
101
+ out_array = []
102
+ segments[name].each do |range, value|
103
+ out_array << per_day(bytes_to_gb(value))+0
104
+ end
105
+ output += "%30s %10s %10s %10s %10s %11s\n" % [name, (out_array.reverse[0] ? "%.2f" % out_array.reverse[0] : 'N/A'), (out_array.reverse[1] ? "%.2f" % out_array.reverse[1] : 'N/A'), (out_array.reverse[2] ? "%.2f" % out_array.reverse[2] : 'N/A'), (out_array.reverse[7] ? "%.2f" % out_array.reverse[7] : 'N/A'), (out_array.reverse[14] ? "%.2f" % out_array.reverse[14] : 'N/A')]
106
+ end
107
+
108
+ output += outliers
109
+
110
+ collins_results = get_hardware_stats
111
+
112
+ output += collins_results
113
+
114
+ puts output
115
+
116
+ html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><pre style="font-size=20px;">' + output + '</pre></body></html>'
117
+
118
+ if email
119
+ Pony.mail(:to => email, :from => 'jetpants', :subject => 'Jetpants Capacity Plan - '+Time.now.strftime("%m/%d/%Y %H:%M:%S"), :html_body => html)
120
+ end
121
+ end
122
+
123
+ def bytes_to_gb(size)
124
+ size.to_f / 1024.0 / 1049000.0
125
+ end
126
+
127
+ def bytes_to_mb(size)
128
+ size.to_f / 1024.0 / 1024.0
129
+ end
130
+
131
+ def per_day(size)
132
+ size * 60 * 60 * 24
133
+ end
134
+
135
+ def per_week(size)
136
+ size * 60 * 60 * 24 * 7
137
+ end
138
+
139
+ def per_month(size)
140
+ size * 60 * 60 * 24 * 30
141
+ end
142
+
143
+ #use a exponically decaying avg unless there is a count then use a cummulative moving avg
144
+ def calc_avg(avg, new_value, count=false)
145
+ unless count
146
+ (new_value * 0.5) + (avg * (1.0 - 0.5))
147
+ else
148
+ avg + ((new_value - avg) / count)
149
+ end
150
+ end
151
+
152
+ ## grab the current sizes from actuall data set size including logs (in bytes)
153
+ def current_sizes
154
+ pool_sizes = {}
155
+ Jetpants.pools.each do |p|
156
+ pool_sizes[p.name] = p.data_set_size
157
+ end
158
+ pool_sizes
159
+
160
+ end
161
+
162
+ ## get all mount's data in kilobytes
163
+ def all_mounts
164
+ mount_stats = {}
165
+ Jetpants.pools.each do |p|
166
+ mount_stats[p.name] ||= p.mount_stats
167
+ end
168
+ mount_stats
169
+ end
170
+
171
+ ## loop through data and enter it in mysql
172
+ def store_data(mount_data,timestamp)
173
+ mount_data.each do |key, value|
174
+ @@db.query('INSERT INTO storage (`timestamp`, `pool`, `total`, `used`, `available`, `db_sizes`) VALUES ( ? , ? , ? , ? , ? , ? )', timestamp.to_s, key, value['total'].to_s, value['used'].to_s, value['available'].to_s, value['db_sizes'].to_s)
175
+ end
176
+ end
177
+
178
+ ## get history from mysql of all data right now
179
+ def get_history
180
+ history = {}
181
+ @@db.query_return_array('select timestamp, pool, total, used, available, db_sizes from storage order by id').each do |row|
182
+ history[row[:pool]] ||= {}
183
+ history[row[:pool]][row[:timestamp]] ||= {}
184
+ history[row[:pool]][row[:timestamp]]['total'] = row[:total]
185
+ history[row[:pool]][row[:timestamp]]['used'] = row[:used]
186
+ history[row[:pool]][row[:timestamp]]['available'] = row[:available]
187
+ history[row[:pool]][row[:timestamp]]['db_sizes'] = row[:db_sizes]
188
+ end
189
+ history
190
+ end
191
+
192
+ ## segment out groups to a given time period
193
+ def segmentify(hash, timeperiod)
194
+ new_hash = {}
195
+ hash.each do |name, temp|
196
+ before_timestamp = false
197
+ keeper = []
198
+ last_timestamp = nil
199
+ last_value = nil
200
+ hash[name].sort.each do |timestamp, value|
201
+ new_hash[name] ||= {}
202
+ last_timestamp = timestamp
203
+ last_value = value
204
+ unless before_timestamp && timestamp > (timeperiod - 60 ) + before_timestamp
205
+ unless before_timestamp
206
+ before_timestamp = timestamp
207
+ end
208
+ keeper << value
209
+ else
210
+ new_hash[name][before_timestamp.to_s+"-"+timestamp.to_s] = (keeper[0]['used'].to_f - value['used'].to_f )/(before_timestamp.to_f - timestamp.to_f)
211
+ before_timestamp = timestamp
212
+ keeper = []
213
+ keeper << value
214
+ end
215
+ end
216
+ if keeper.length > 1
217
+ new_hash[name][before_timestamp.to_s+"-"+last_timestamp.to_s] = (keeper[0]['used'].to_f - last_value['used'].to_f )/(before_timestamp.to_f - last_timestamp.to_f)
218
+ end
219
+ end
220
+
221
+ new_hash
222
+ end
223
+
224
+ # get a hash of machines to display at then end of the email
225
+ # you need to have a method in Jetpants.topology.machine_status_counts to get
226
+ # your machine types and states
227
+ def get_hardware_stats
228
+
229
+ #see if function exists
230
+ return '' unless Jetpants.topology.respond_to? :machine_status_counts
231
+
232
+ data = Jetpants.topology.machine_status_counts
233
+
234
+ output = ''
235
+ output += "\n________________________________________________________________________________________________________\n"
236
+ output += "Hardware status\n\n"
237
+
238
+ headers = ['status'].concat(data.first[1].keys).concat(['total'])
239
+ output += (headers.map { |i| "%20s"}.join(" ")+"\n") % headers
240
+ output += (headers.map { |i| "%20s"}.join(" ")+"\n") % headers.map { |i| '------------------'}
241
+
242
+ data.each do |key, status|
243
+ unless key == 'unallocated'
244
+ total = 0
245
+ status.each do |nodeclass, value|
246
+ total += value.to_i
247
+ end
248
+ output += (headers.map { |i| "%20s"}.join(" ")+"\n") % [key].concat(status.values).concat([total])
249
+ end
250
+ end
251
+
252
+ output += "\nTotal Unallocated nodes - " + data['unallocated'] + "\n\n"
253
+
254
+ output
255
+ end
256
+
257
+ # figure out the outliers for the last 3 days
258
+ def outliers
259
+ output = ''
260
+
261
+ output += "\n________________________________________________________________________________________________________\n"
262
+ output += "New Outliers\n"
263
+ output += "--Compare the last 3 days in 2 hour blocks to the same 2 hour block 7, 14, 21, 28 days ago\n\n"
264
+
265
+ output += "%30s %25s %25s %10s %11s\n" % ['Pool Name', 'Start Time', 'End Time', 'Usage', 'Prev Weeks']
266
+ output += "%30s %25s %25s %10s %11s\n" % ['---------', '----------', '--------', '-----', '----------']
267
+
268
+ block_sizes = 60 * 60 * 2 + 120
269
+ days_from = [7,14,21,28]
270
+ Jetpants.pools.each do |p|
271
+ start_time = Time.now.to_i - 3 * 24 * 60 * 60
272
+ counter = 0
273
+ counter_time = 0
274
+ output_buffer = ''
275
+ last_per = nil
276
+
277
+ name = p.name
278
+ while start_time + (60 * 62) < Time.now.to_i
279
+ temp_array = []
280
+ from_blocks = {}
281
+ from_per = {}
282
+
283
+ now_block = get_history_block(name, start_time, start_time + block_sizes)
284
+ unless now_block.count == 0
285
+ now_per = (now_block.first[1]['used'].to_f - now_block.values.last['used'].to_f)/(now_block.first[0].to_f - now_block.keys.last.to_f)
286
+
287
+
288
+ days_from.each do |days|
289
+ temp = get_history_block(name, start_time - (days * 24 * 60 * 60), start_time - (days * 24 * 60 * 60) + block_sizes)
290
+ if temp.count >= 2
291
+ from_blocks[days] = temp
292
+ from_per[days] = (from_blocks[days].first[1]['used'].to_f - from_blocks[days].values.last['used'].to_f)/(from_blocks[days].first[0].to_f - from_blocks[days].keys.last.to_f)
293
+ end
294
+ end
295
+
296
+ # remove outliers from compare array because we only care about current outliers not old outliers
297
+ from_per.each do |day, value|
298
+ if(value > from_per.values.mean * 5.0 || value < from_per.values.mean * -5.0)
299
+ from_per.delete(day)
300
+ end
301
+ end
302
+
303
+ if from_per.count > 0
304
+ if((now_per > (from_per.values.mean * 2.2) && from_per.values.mean != 0) || (from_per.values.mean == 0 && now_per > 1048576))
305
+ if counter == 0
306
+ counter_time = start_time
307
+ end
308
+ counter += 1
309
+ if counter > 3
310
+ output_buffer = "%30s %25s %25s %10.2f %11.2f\n" % [name, Time.at(counter_time.to_i).strftime("%m/%d/%Y %H:%M:%S"), Time.at(start_time + block_sizes).strftime("%m/%d/%Y %H:%M:%S"), per_day(bytes_to_gb(now_per)), per_day(bytes_to_gb(from_per.values.mean))]
311
+ end
312
+ else
313
+ counter = 0
314
+ unless output_buffer == ''
315
+ output += output_buffer
316
+ output_buffer = ''
317
+ end
318
+ end
319
+
320
+ if((now_per > (from_per.values.mean * 5.0) && from_per.values.mean != 0) || (from_per.values.mean == 0 && now_per > 1048576))
321
+ output += "%30s %25s %25s %10.2f %11.2f\n" % [name, Time.at(start_time).strftime("%m/%d/%Y %H:%M:%S"), Time.at(start_time + block_sizes).strftime("%m/%d/%Y %H:%M:%S"), per_day(bytes_to_gb(now_per)), per_day(bytes_to_gb(from_per.values.mean))]
322
+ end
323
+ end # end if hash has values
324
+
325
+ end
326
+
327
+ start_time += block_sizes - 120
328
+ end # end while loop for last 3 days
329
+ output_buffer = ''
330
+ counter = 0
331
+ counter_time = 0
332
+ end
333
+
334
+ output
335
+
336
+ end
337
+
338
+ ## get history from mysql of all data right now
339
+ def get_history_block(pool,time_start,time_stop)
340
+ history = {}
341
+ @@db.query_return_array('select timestamp, pool, total, used, available, db_sizes from storage where pool = ? and timestamp >= ? and timestamp <= ? order by id', pool, time_start, time_stop).each do |row|
342
+ history[row[:timestamp]] ||= {}
343
+ history[row[:timestamp]]['total'] = row[:total]
344
+ history[row[:timestamp]]['used'] = row[:used]
345
+ history[row[:timestamp]]['available'] = row[:available]
346
+ history[row[:timestamp]]['db_sizes'] = row[:db_sizes]
347
+ end
348
+ history
349
+ end
350
+
351
+ end
352
+ end
353
+ end