sbader-lhm 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/.gitignore +6 -0
  2. data/.travis.yml +10 -0
  3. data/CHANGELOG.md +99 -0
  4. data/LICENSE +27 -0
  5. data/README.md +146 -0
  6. data/Rakefile +20 -0
  7. data/bin/lhm-kill-queue +172 -0
  8. data/bin/lhm-spec-clobber.sh +36 -0
  9. data/bin/lhm-spec-grants.sh +25 -0
  10. data/bin/lhm-spec-setup-cluster.sh +67 -0
  11. data/bin/lhm-test-all.sh +10 -0
  12. data/gemfiles/ar-2.3_mysql.gemfile +5 -0
  13. data/gemfiles/ar-3.2_mysql.gemfile +5 -0
  14. data/gemfiles/ar-3.2_mysql2.gemfile +5 -0
  15. data/lhm.gemspec +27 -0
  16. data/lib/lhm.rb +45 -0
  17. data/lib/lhm/atomic_switcher.rb +49 -0
  18. data/lib/lhm/chunker.rb +114 -0
  19. data/lib/lhm/command.rb +46 -0
  20. data/lib/lhm/entangler.rb +98 -0
  21. data/lib/lhm/intersection.rb +63 -0
  22. data/lib/lhm/invoker.rb +49 -0
  23. data/lib/lhm/locked_switcher.rb +71 -0
  24. data/lib/lhm/migration.rb +30 -0
  25. data/lib/lhm/migrator.rb +219 -0
  26. data/lib/lhm/sql_helper.rb +85 -0
  27. data/lib/lhm/table.rb +97 -0
  28. data/lib/lhm/version.rb +6 -0
  29. data/spec/.lhm.example +4 -0
  30. data/spec/README.md +51 -0
  31. data/spec/bootstrap.rb +13 -0
  32. data/spec/fixtures/destination.ddl +6 -0
  33. data/spec/fixtures/origin.ddl +6 -0
  34. data/spec/fixtures/small_table.ddl +4 -0
  35. data/spec/fixtures/users.ddl +12 -0
  36. data/spec/integration/atomic_switcher_spec.rb +42 -0
  37. data/spec/integration/chunker_spec.rb +32 -0
  38. data/spec/integration/entangler_spec.rb +66 -0
  39. data/spec/integration/integration_helper.rb +140 -0
  40. data/spec/integration/lhm_spec.rb +204 -0
  41. data/spec/integration/locked_switcher_spec.rb +42 -0
  42. data/spec/integration/table_spec.rb +48 -0
  43. data/spec/unit/atomic_switcher_spec.rb +31 -0
  44. data/spec/unit/chunker_spec.rb +111 -0
  45. data/spec/unit/entangler_spec.rb +76 -0
  46. data/spec/unit/intersection_spec.rb +39 -0
  47. data/spec/unit/locked_switcher_spec.rb +51 -0
  48. data/spec/unit/migration_spec.rb +23 -0
  49. data/spec/unit/migrator_spec.rb +134 -0
  50. data/spec/unit/sql_helper_spec.rb +32 -0
  51. data/spec/unit/table_spec.rb +34 -0
  52. data/spec/unit/unit_helper.rb +14 -0
  53. metadata +173 -0
@@ -0,0 +1,6 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ gemfiles/*.lock
5
+ pkg/*
6
+ .rvmrc
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ before_script:
3
+ - "mysql -e 'create database lhm;'"
4
+ rvm:
5
+ - 1.8.7
6
+ - 1.9.3
7
+ gemfile:
8
+ - gemfiles/ar-2.3_mysql.gemfile
9
+ - gemfiles/ar-3.2_mysql.gemfile
10
+ - gemfiles/ar-3.2_mysql2.gemfile
@@ -0,0 +1,99 @@
1
+ # 1.1.0 (April 29, 2012)
2
+
3
+ * Add option to specify custom index name
4
+ * Add mysql2 compatibility
5
+ * Add AtomicSwitcher
6
+
7
+ # 1.0.3 (February 23, 2012)
8
+
9
+ * Improve change_column
10
+
11
+ # 1.0.2 (February 17, 2012)
12
+
13
+ * closes https://github.com/soundcloud/large-hadron-migrator/issues/11
14
+ this critical bug could cause data loss. table parser was replaced with
15
+ an implementation that reads directly from information_schema.
16
+
17
+ # 1.0.1 (February 09, 2012)
18
+
19
+ * released to rubygems
20
+
21
+ # 1.0.0 (February 09, 2012)
22
+
23
+ * added change_column
24
+ * final 1.0 release
25
+
26
+ # 1.0.0.rc8 (February 09, 2012)
27
+
28
+ * removed spec binaries from gem bins
29
+
30
+ # 1.0.0.rc7 (January 31, 2012)
31
+
32
+ * added SqlHelper.annotation into the middle of trigger statements. this
33
+ is for the benefit of the killer script which should not kill trigger
34
+ statements.
35
+
36
+ # 1.0.0.rc6 (January 30, 2012)
37
+
38
+ * added --confirm to kill script; fixes to kill script
39
+
40
+ # 1.0.0.rc5 (January 30, 2012)
41
+
42
+ * moved scripts into bin, renamed, added to gem binaries
43
+
44
+ # 1.0.0.rc4 (January 29, 2012)
45
+
46
+ * added '-- lhm' to the end of statements for more visibility
47
+
48
+ # 1.0.0.rc3 (January 19, 2012)
49
+
50
+ * Speedup migrations for tables with large minimum id
51
+ * Add a bit yard documentation
52
+ * Fix issues with index creation on reserved column names
53
+ * Improve error handling
54
+ * Add tests for replication
55
+ * Rename public API method from `hadron_change_table` to `change_table`
56
+ * Add tests for ActiveRecord 2.3 and 3.1 compatibility
57
+
58
+ # 1.0.0.rc2 (January 18, 2012)
59
+
60
+ * Speedup migrations for tables with large ids
61
+ * Fix conversion of milliseconds to seconds
62
+ * Fix handling of sql errors
63
+ * Add helper to create unique index
64
+ * Allow index creation on prefix of column
65
+ * Quote column names on index creation
66
+ * Remove ambiguous method signature
67
+ * Documentation fix
68
+ * 1.8.7 compatibility
69
+
70
+ # 1.0.0.rc1 (January 15, 2012)
71
+
72
+ * rewrite.
73
+
74
+ # 0.2.1 (November 26, 2011)
75
+
76
+ * Include changelog in gem
77
+
78
+ # 0.2.0 (November 26, 2011)
79
+
80
+ * Add Ruby 1.8 compatibility
81
+ * Setup travis continuous integration
82
+ * Fix record lose issue
83
+ * Fix and speed up specs
84
+
85
+ # 0.1.4
86
+
87
+ * Merged [Pullrequest #9](https://github.com/soundcloud/large-hadron-migrator/pull/9)
88
+
89
+ # 0.1.3
90
+
91
+ * code cleanup
92
+ * Merged [Pullrequest #8](https://github.com/soundcloud/large-hadron-migrator/pull/8)
93
+ * Merged [Pullrequest #7](https://github.com/soundcloud/large-hadron-migrator/pull/7)
94
+ * Merged [Pullrequest #4](https://github.com/soundcloud/large-hadron-migrator/pull/4)
95
+ * Merged [Pullrequest #1](https://github.com/soundcloud/large-hadron-migrator/pull/1)
96
+
97
+ # 0.1.2
98
+
99
+ * Initial Release
data/LICENSE ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2011, SoundCloud, Rany Keddo, Tobias Bielohlawek, Tobias Schmidt
2
+
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ - Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+ - Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+ - Neither the name of the SoundCloud nor the names of its contributors may be
14
+ used to endorse or promote products derived from this software without
15
+ specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+
@@ -0,0 +1,146 @@
1
+ # Large Hadron Migrator [![Build Status](https://secure.travis-ci.org/soundcloud/large-hadron-migrator.png)][4]
2
+
3
+ Rails style database migrations are a useful way to evolve your data schema in
4
+ an agile manner. Most Rails projects start like this, and at first, making
5
+ changes is fast and easy.
6
+
7
+ That is until your tables grow to millions of records. At this point, the
8
+ locking nature of `ALTER TABLE` may take your site down for an hour or more
9
+ while critical tables are migrated. In order to avoid this, developers begin
10
+ to design around the problem by introducing join tables or moving the data
11
+ into another layer. Development gets less and less agile as tables grow and
12
+ grow. To make the problem worse, adding or changing indices to optimize data
13
+ access becomes just as difficult.
14
+
15
+ > Side effects may include black holes and universe implosion.
16
+
17
+ There are few things that can be done at the server or engine level. It is
18
+ possible to change default values in an `ALTER TABLE` without locking the
19
+ table. The InnoDB Plugin provides facilities for online index creation, which
20
+ is great if you are using this engine, but only solves half the problem.
21
+
22
+ At SoundCloud we started having migration pains quite a while ago, and after
23
+ looking around for third party solutions, we decided to create our
24
+ own. We called it Large Hadron Migrator, and it is a gem for online
25
+ ActiveRecord migrations.
26
+
27
+ ![LHC](http://farm4.static.flickr.com/3093/2844971993_17f2ddf2a8_z.jpg)
28
+
29
+ [The Large Hadron collider at CERN](http://en.wikipedia.org/wiki/Large_Hadron_Collider)
30
+
31
+ ## The idea
32
+
33
+ The basic idea is to perform the migration online while the system is live,
34
+ without locking the table. In contrast to [OAK][0] and the
35
+ [facebook tool][1], we only use a copy table and triggers.
36
+
37
+ The Large Hadron is a test driven Ruby solution which can easily be dropped
38
+ into an ActiveRecord migration. It presumes a single auto incremented
39
+ numerical primary key called id as per the Rails convention. Unlike the
40
+ [twitter solution][2], it does not require the presence of an indexed
41
+ `updated_at` column.
42
+
43
+ ## Requirements
44
+
45
+ Lhm currently only works with MySQL databases and requires an established
46
+ ActiveRecord connection.
47
+
48
+ It is compatible and [continuously tested][4] with Ruby 1.8.7 and Ruby 1.9.x,
49
+ ActiveRecord 2.3.x and 3.x as well as mysql and mysql2 adapters.
50
+
51
+ ## Installation
52
+
53
+ Install it via `gem install lhm` or add `gem "lhm"` to your Gemfile.
54
+
55
+ ## Usage
56
+
57
+ You can invoke Lhm directly from a plain ruby file after connecting ActiveRecord
58
+ to your mysql instance:
59
+
60
+ ```ruby
61
+ require 'lhm'
62
+
63
+ ActiveRecord::Base.establish_connection(
64
+ :adapter => 'mysql',
65
+ :host => '127.0.0.1',
66
+ :database => 'lhm'
67
+ )
68
+
69
+ Lhm.change_table :users do |m|
70
+ m.add_column :arbitrary, "INT(12)"
71
+ m.add_index [:arbitrary_id, :created_at]
72
+ m.ddl("alter table %s add column flag tinyint(1)" % m.name)
73
+ end
74
+ ```
75
+
76
+ To use Lhm from an ActiveRecord::Migration in a Rails project, add it to your
77
+ Gemfile, then invoke as follows:
78
+
79
+ ```ruby
80
+ require 'lhm'
81
+
82
+ class MigrateUsers < ActiveRecord::Migration
83
+ def self.up
84
+ Lhm.change_table :users do |m|
85
+ m.add_column :arbitrary, "INT(12)"
86
+ m.add_index [:arbitrary_id, :created_at]
87
+ m.ddl("alter table %s add column flag tinyint(1)" % m.name)
88
+ end
89
+ end
90
+
91
+ def self.down
92
+ Lhm.change_table :users do |m|
93
+ m.remove_index [:arbitrary_id, :created_at]
94
+ m.remove_column :arbitrary)
95
+ end
96
+ end
97
+ end
98
+ ```
99
+
100
+ ## Table rename strategies
101
+
102
+ There are two different table rename strategies available: LockedSwitcher and
103
+ AtomicSwitcher.
104
+
105
+ For all setups which use replication and a MySQL version
106
+ affected by the the [binlog bug #39675](http://bugs.mysql.com/bug.php?id=39675),
107
+ we recommend the LockedSwitcher strategy to avoid replication issues. This
108
+ strategy locks the table being migrated and issues two ALTER TABLE statements.
109
+ The AtomicSwitcher uses a single atomic RENAME TABLE query and should be favored
110
+ in setups which do not suffer from the mentioned replication bug.
111
+
112
+ Lhm chooses the strategy automatically based on the used MySQL server version,
113
+ but you can override the behavior with an option:
114
+
115
+ ```ruby
116
+ Lhm.change_table :users, :atomic_switch => true do |m|
117
+ # ...
118
+ end
119
+ ```
120
+
121
+ ## Contributing
122
+
123
+ We'll check out your contribution if you:
124
+
125
+ * Provide a comprehensive suite of tests for your fork.
126
+ * Have a clear and documented rationale for your changes.
127
+ * Package these up in a pull request.
128
+
129
+ We'll do our best to help you out with any contribution issues you may have.
130
+
131
+ ## License
132
+
133
+ The license is included as LICENSE in this directory.
134
+
135
+ ## Similar solutions
136
+
137
+ * [OAK: online alter table][0]
138
+ * [Facebook][1]
139
+ * [Twitter][2]
140
+ * [pt-online-schema-change][3]
141
+
142
+ [0]: http://openarkkit.googlecode.com
143
+ [1]: http://www.facebook.com/note.php?note\_id=430801045932
144
+ [2]: https://github.com/freels/table_migrator
145
+ [3]: http://www.percona.com/doc/percona-toolkit/2.1/pt-online-schema-change.html
146
+ [4]: http://travis-ci.org/soundcloud/large-hadron-migrator
@@ -0,0 +1,20 @@
1
+ require 'rake/testtask'
2
+ require 'bundler'
3
+
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ Rake::TestTask.new("unit") do |t|
7
+ t.libs.push "lib"
8
+ t.test_files = FileList['spec/unit/*_spec.rb']
9
+ t.verbose = true
10
+ end
11
+
12
+ Rake::TestTask.new("integration") do |t|
13
+ t.libs.push "lib"
14
+ t.test_files = FileList['spec/integration/*_spec.rb']
15
+ t.verbose = true
16
+ end
17
+
18
+ task :specs => [:unit, :integration]
19
+ task :default => :specs
20
+
@@ -0,0 +1,172 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'active_record'
4
+ require 'lhm/sql_helper'
5
+ require 'optparse'
6
+
7
+ module Lhm
8
+ class KillQueue
9
+
10
+ def initialize
11
+ @port = 3306
12
+ @grace = 10
13
+ @tiny = 0.1
14
+ @marker = "%#{ SqlHelper.annotation }%"
15
+
16
+ OptionParser.new do |opts|
17
+ opts.on("-h", "--hostname HOSTNAME") { |v| @hostname = v }
18
+ opts.on("-u", "--username USERNAME") { |v| @username = v }
19
+ opts.on("-p", "--password PASSWORD") { |v| @password = v }
20
+ opts.on("-d", "--database DATABASE") { |v| @database = v }
21
+ opts.on("-m", "--mode MODE") { |v| @mode = v.to_sym }
22
+ opts.on("-y", "--confirm") { |v| @confirm = true }
23
+ end.parse!
24
+
25
+ unless(@hostname && @username && @password && @database)
26
+ abort usage
27
+ end
28
+
29
+ unless([:kill, :master, :slave].include?(@mode))
30
+ abort "specify -m kill OR -m master OR -m slave"
31
+ end
32
+
33
+ connect
34
+ end
35
+
36
+ def usage
37
+ <<-desc.gsub(/^ /, '')
38
+ kills queries on the given server after detecting 'lock table#{ @marker }'.
39
+ usage:
40
+ lhm-kill-queue -h hostname -u username -p password -d database \\
41
+ (-m kill | -m master | -m slave) [--confirm]
42
+
43
+ desc
44
+ end
45
+
46
+ def run
47
+ case @mode
48
+ when :kill then kill
49
+ when :master then master
50
+ when :slave then slave
51
+ end
52
+ end
53
+
54
+ def kill
55
+ lock = trip
56
+ kill_process(lock)
57
+ end
58
+
59
+ def master
60
+ lock = trip
61
+ puts "starting to kill non lhm processes in #{ @grace } seconds"
62
+ sleep(@grace + @tiny)
63
+
64
+ [list_non_lhm].flatten.each do |process|
65
+ kill_process(process)
66
+ sleep(@tiny)
67
+ end
68
+ end
69
+
70
+ def slave
71
+ lock = trip
72
+ puts "starting to kill non lhm SELECT processes in #{ @grace } seconds"
73
+ sleep(@grace + @tiny)
74
+
75
+ [list_non_lhm].flatten.each do |process|
76
+ if(select?(process))
77
+ kill_process(process)
78
+ sleep(@tiny)
79
+ end
80
+ end
81
+ end
82
+
83
+ private
84
+
85
+ def connect
86
+ ActiveRecord::Base.establish_connection({
87
+ :adapter => 'mysql',
88
+ :host => @hostname,
89
+ :port => @port,
90
+ :username => @username,
91
+ :password => @password,
92
+ :database => @database
93
+ })
94
+ end
95
+
96
+ def connection
97
+ ActiveRecord::Base.connection
98
+ end
99
+
100
+ def list_non_lhm
101
+ select_processes %Q(
102
+ info not like '#{ @marker }' and time > #{ @grace } and command = 'Query'
103
+ )
104
+ end
105
+
106
+ def trip
107
+ until res = select_processes("info like 'lock table#{ @marker }'").first
108
+ sleep @tiny
109
+ print '.'
110
+ end
111
+
112
+ res
113
+ end
114
+
115
+ def kill_process(process_id)
116
+ puts "killing #{ select_statement(process_id) }"
117
+
118
+ if(@confirm)
119
+ print "confirm ('y' to confirm): "
120
+
121
+ if(gets.strip != 'y')
122
+ puts "skipped."
123
+ return
124
+ end
125
+ end
126
+
127
+ connection.execute("kill #{ process_id }")
128
+ puts "killed #{ process_id }"
129
+ end
130
+
131
+ def select?(process)
132
+ if statement = select_statement(process)
133
+ case statement
134
+ when /delete/i then false
135
+ when /update/i then false
136
+ when /insert/i then false
137
+ else
138
+ !!statement.match(/select/i)
139
+ end
140
+ end
141
+ end
142
+
143
+ def select_statement(process)
144
+ if process
145
+ value %Q(
146
+ select info from information_schema.processlist where id = #{ process }
147
+ )
148
+ end
149
+ end
150
+
151
+ def select_processes(predicate)
152
+ values %Q(
153
+ select id from information_schema.processlist
154
+ where db = '#{ @database }'
155
+ and user = '#{ @username }'
156
+ and #{ predicate }
157
+ )
158
+ end
159
+
160
+ def value(statement)
161
+ connection.select_value(statement)
162
+ end
163
+
164
+ def values(statement)
165
+ connection.select_values(statement)
166
+ end
167
+ end
168
+ end
169
+
170
+ killer = Lhm::KillQueue.new
171
+ killer.run
172
+