schema_transformer 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -62,6 +62,25 @@ Thank you. Have a very nice day.
62
62
  tung@walle $
63
63
  </pre>
64
64
 
65
+ It is strongly recommended that the tables that you are altering has updated_at timestamp columns
66
+ with indexes on them. If the tables do not updated_at columns at all, then only the last 100,000 rows
67
+ get updated in the final sync in the "schema_transformer switch ..." command. If the updated_at column
68
+ is available then the final sync will use it to update all the data. However, because it uses the
69
+ updated_at column, it is extremly important that the updated_at column is indexed or the final
70
+ "schema_transformer switch ..." command possibly could be slow. Because of this, you should analyze your
71
+ database schema for missing updated_at columns and indexes with the command "schema_transformer analyze".
72
+
73
+ Example:
74
+ <pre>
75
+ tung@walle $ schema_transformer analyze
76
+ Analyzing your database schema...
77
+ There are no tables without the updated_at timestamp. GOOD
78
+ These tables do have an updated_at timestamp, but no index:
79
+ users
80
+ tung@walle $
81
+ </pre>
82
+
83
+
65
84
  FAQ
66
85
  -------
67
86
 
data/TODO CHANGED
@@ -8,4 +8,3 @@
8
8
  * TODO:
9
9
  * add logging again: schema_transformer.log
10
10
  * updated_at if its available and use a real time vs some guess
11
- * clean up spec: use real mocks, get rid of $testing_books
@@ -7,4 +7,6 @@ require 'fileutils'
7
7
  require File.expand_path('../schema_transformer/version', __FILE__)
8
8
  require File.expand_path('../schema_transformer/help', __FILE__)
9
9
  require File.expand_path('../schema_transformer/base', __FILE__)
10
- require File.expand_path('../schema_transformer/cli', __FILE__)
10
+ require File.expand_path('../schema_transformer/transform', __FILE__)
11
+ require File.expand_path('../schema_transformer/cli', __FILE__)
12
+ require File.expand_path('../schema_transformer/analyze', __FILE__)
@@ -0,0 +1,52 @@
1
+ module SchemaTransformer
2
+ class Analyze < Base
3
+ def self.run(options)
4
+ @analyze = Analyze.new(options[:base] || Dir.pwd, options)
5
+ puts "Analyzing your database schema..."
6
+ if @analyze.no_timestamps.empty?
7
+ puts "There are no tables without the updated_at timestamp. GOOD"
8
+ else
9
+ puts "These tables do not have updated_at timestamps: "
10
+ puts " #{@analyze.no_timestamps.join("\n ")}"
11
+ end
12
+ if @analyze.no_indexes.empty?
13
+ puts "There are no tables with updated_at timestamp but no indexes. GOOD"
14
+ else
15
+ puts "These tables do have an updated_at timestamp, but no index: "
16
+ puts " #{@analyze.no_indexes.join("\n ")}"
17
+ end
18
+ if @analyze.no_timestamps.empty? or @analyze.no_timestamps.empty?
19
+ "Everything looks GOOD!"
20
+ else
21
+ puts "You should add the missing columns or indexes."
22
+ end
23
+ end
24
+
25
+ # tells which tables are missing updated_at and index on updated_at
26
+ def no_timestamps
27
+ @conn.tables - timestamps
28
+ end
29
+
30
+ def timestamps
31
+ tables = []
32
+ @conn.tables.each do |table|
33
+ has_updated_at = @conn.columns(table).detect {|col| col.name == "updated_at" }
34
+ tables << table if has_updated_at
35
+ end
36
+ tables
37
+ end
38
+
39
+ def indexes
40
+ tables = []
41
+ timestamps.each do |table|
42
+ has_index = @conn.indexes(table).detect {|col| col.columns == ["updated_at"] }
43
+ tables << table if has_index
44
+ end
45
+ tables
46
+ end
47
+
48
+ def no_indexes
49
+ timestamps - indexes
50
+ end
51
+ end
52
+ end
@@ -1,16 +1,6 @@
1
1
  module SchemaTransformer
2
- class UsageError < RuntimeError; end
3
-
4
2
  class Base
5
- include Help
6
- @@stagger = 0
7
- def self.run(options)
8
- @@stagger = options[:stagger] || 0
9
- @transformer = SchemaTransformer::Base.new(options[:base] || Dir.pwd)
10
- @transformer.run(options)
11
- end
12
-
13
- attr_reader :options, :temp_table, :table
3
+ attr_reader :options
14
4
  def initialize(base = File.expand_path("..", __FILE__), options = {})
15
5
  @base = base
16
6
  @db, @log, @mail = ActiveWrapper.setup(
@@ -20,241 +10,6 @@ module SchemaTransformer
20
10
  )
21
11
  @db.establish_connection
22
12
  @conn = ActiveRecord::Base.connection
23
-
24
- @batch_size = options[:batch_size] || 10_000
25
- end
26
-
27
- def run(options)
28
- @action = options[:action].first
29
- case @action
30
- when "generate"
31
- self.generate
32
- help(:generate)
33
- when "sync"
34
- help(:sync_progress)
35
- table = options[:action][1]
36
- self.gather_info(table)
37
- self.create
38
- self.sync
39
- help(:sync)
40
- when "switch"
41
- table = options[:action][1]
42
- self.gather_info(table)
43
- self.switch
44
- self.cleanup
45
- help(:switch)
46
- else
47
- raise UsageError, "Invalid action #{@action}"
48
- end
49
- end
50
-
51
- def generate
52
- data = {}
53
- ask "What is the name of the table you want to alter?"
54
- data[:table] = gets(:table)
55
- ask <<-TXT
56
- What is the modification to the table?
57
- Examples 1:
58
- ADD COLUMN smart tinyint(1) DEFAULT '0'
59
- Examples 2:
60
- ADD INDEX idx_name (name)
61
- Examples 3:
62
- ADD COLUMN smart tinyint(1) DEFAULT '0', DROP COLUMN full_name
63
- TXT
64
- data[:mod] = gets(:mod)
65
- path = transform_file(data[:table])
66
- FileUtils.mkdir(File.dirname(path)) unless File.exist?(File.dirname(path))
67
- File.open(path,"w") { |f| f << data.to_json }
68
- @table = data[:table]
69
- data
70
- end
71
-
72
- def gather_info(table)
73
- if table.nil?
74
- raise UsageError, "You need to specific the table name: schema_transformer #{@action} <table_name>"
75
- end
76
- data = JSON.parse(IO.read(transform_file(table)))
77
- @table = data["table"]
78
- @mod = data["mod"]
79
- # variables need for rest of the program
80
- @temp_table = "#{@table}_st_temp"
81
- @trash_table = "#{@table}_st_trash"
82
- @model = define_model(@table)
83
- end
84
-
85
- def create
86
- if self.temp_table_exists?
87
- @temp_model = define_model(@temp_table)
88
- else
89
- sql_create = %{CREATE TABLE #{@temp_table} LIKE #{@table}}
90
- sql_mod = %{ALTER TABLE #{@temp_table} #{@mod}}
91
- @conn.execute(sql_create)
92
- @conn.execute(sql_mod)
93
- @temp_model = define_model(@temp_table)
94
- end
95
- reset_column_info
96
- end
97
-
98
- def sync
99
- res = @conn.execute("SELECT max(id) AS max_id FROM `#{@temp_table}`")
100
- start = res.fetch_row[0].to_i + 1 # nil case is okay: [nil][0].to_i => 0
101
- find_in_batches(@table, :start => start, :batch_size => @batch_size) do |batch|
102
- # puts "batch #{batch.inspect}"
103
- lower = batch.first
104
- upper = batch.last
105
-
106
- columns = insert_columns_sql
107
- sql = %Q{
108
- INSERT INTO #{@temp_table} (
109
- SELECT #{columns}
110
- FROM #{@table} WHERE id >= #{lower} AND id <= #{upper}
111
- )
112
- }
113
- # puts sql
114
- @conn.execute(sql)
115
-
116
- if @@stagger > 0
117
- log("Staggering: delaying for #{@@stagger} seconds before next batch insert")
118
- sleep(@@stagger)
119
- end
120
- end
121
- end
122
-
123
- def final_sync
124
- @temp_model = define_model(@temp_table)
125
- reset_column_info
126
-
127
- sync
128
- columns = subset_columns.collect{|x| "#{@temp_table}.`#{x}` = #{@table}.`#{x}`" }.join(", ")
129
- # need to limit the final sync, if we do the entire table it takes a long time
130
- limit_cond = get_limit_cond
131
- sql = %{
132
- UPDATE #{@temp_table} INNER JOIN #{@table}
133
- ON #{@temp_table}.id = #{@table}.id
134
- SET #{columns}
135
- WHERE #{limit_cond}
136
- }
137
- # puts sql
138
- @conn.execute(sql)
139
- end
140
-
141
- def switch
142
- final_sync
143
- to_trash = %Q{RENAME TABLE #{@table} TO #{@trash_table}}
144
- from_temp = %Q{RENAME TABLE #{@temp_table} TO #{@table}}
145
- @conn.execute(to_trash)
146
- @conn.execute(from_temp)
147
- end
148
-
149
- def cleanup
150
- sql = %Q{DROP TABLE #{@trash_table}}
151
- @conn.execute(sql)
152
- end
153
-
154
- def get_limit_cond
155
- if @model.column_names.include?("updated_at")
156
- "#{@table}.updated_at >= '#{1.day.ago.strftime("%Y-%m-%d")}'"
157
- else
158
- sql = "select id from #{@table} order by id desc limit 100000"
159
- resp = @conn.execute(sql)
160
- bound = 0
161
- while row = resp.fetch_row do
162
- bound = row[0].to_i
163
- end
164
- "#{@table}.id >= #{bound}"
165
- end
166
- end
167
-
168
- # the parameter is only for testing
169
- def gets(name = nil)
170
- STDIN.gets.strip
171
- end
172
-
173
- def subset_columns
174
- removed = @model.column_names - @temp_model.column_names
175
- subset = @model.column_names - removed
176
- end
177
-
178
- def insert_columns_sql
179
- # existing subset
180
- subset = subset_columns
181
-
182
- # added
183
- added_s = @temp_model.column_names - @model.column_names
184
- added = @temp_model.columns.
185
- select{|c| added_s.include?(c.name) }.
186
- collect{|c| "#{extract_default(c)} AS `#{c.name}`" }
187
-
188
- # combine both
189
- columns = subset.collect{|x| "`#{x}`"} + added
190
- sql = columns.join(", ")
191
- end
192
-
193
- # returns Array of record ids
194
- def find(table, cond)
195
- sql = "SELECT id FROM #{table} WHERE #{cond}"
196
- response = @conn.execute(sql)
197
- results = []
198
- while row = response.fetch_row do
199
- results << row[0].to_i
200
- end
201
- results
202
- end
203
-
204
- # lower memory heavy version of ActiveRecord's find in batches
205
- def find_in_batches(table, options = {})
206
- raise "You can't specify an order, it's forced to be #{batch_order}" if options[:order]
207
- raise "You can't specify a limit, it's forced to be the batch_size" if options[:limit]
208
-
209
- start = options.delete(:start).to_i
210
- batch_size = options.delete(:batch_size) || 1000
211
- order_limit = "ORDER BY id LIMIT #{batch_size}"
212
-
213
- records = find(table, "id >= #{start} #{order_limit}")
214
- while records.any?
215
- yield records
216
-
217
- break if records.size < batch_size
218
- records = find(table, "id > #{records.last} #{order_limit}")
219
- end
220
- end
221
-
222
- def define_model(table)
223
- # Object.const_set(table.classify, Class.new(ActiveRecord::Base))
224
- Object.class_eval(<<-code)
225
- class #{table.classify} < ActiveRecord::Base
226
- set_table_name "#{table}"
227
- end
228
- code
229
- table.classify.constantize # returns the constant
230
- end
231
-
232
- def transform_file(table)
233
- @base+"/config/schema_transformations/#{table}.json"
234
- end
235
-
236
- def temp_table_exists?
237
- @conn.table_exists?(@temp_table)
238
- end
239
-
240
- def reset_column_info
241
- @model.reset_column_information
242
- @temp_model.reset_column_information
243
- end
244
-
245
- def log(msg)
246
- @log.info(msg)
247
- end
248
-
249
- private
250
- def ask(msg)
251
- puts msg
252
- print "> "
253
- end
254
-
255
- def extract_default(col)
256
- @conn.quote(col.default)
257
13
  end
258
-
259
14
  end
260
- end
15
+ end
@@ -1,8 +1,3 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'rubygems'
4
- require 'active_wrapper'
5
-
6
1
  module SchemaTransformer
7
2
  class CLI
8
3
 
@@ -80,18 +75,23 @@ module SchemaTransformer
80
75
  end
81
76
 
82
77
  def run
83
- begin
84
- SchemaTransformer::Base.run(options)
85
- rescue UsageError => e
86
- puts "Usage Error: #{e.message}"
87
- puts help_message
88
- puts option_parser
78
+ @action = options[:action].first
79
+ if @action == "analyze"
80
+ SchemaTransformer::Analyze.run(options)
81
+ else
82
+ begin
83
+ SchemaTransformer::Transform.run(options)
84
+ rescue UsageError => e
85
+ puts "Usage Error: #{e.message}"
86
+ puts help_message
87
+ puts option_parser
88
+ end
89
89
  end
90
90
  end
91
91
 
92
92
  private
93
93
  def help_message
94
- "Available actions: generate, sync, switch"
94
+ "Available actions: analyze, generate, sync, switch"
95
95
  end
96
96
  end
97
97
 
@@ -0,0 +1,252 @@
1
+ module SchemaTransformer
2
+ class UsageError < RuntimeError; end
3
+
4
+ class Transform < Base
5
+ include Help
6
+ @@stagger = 0
7
+ def self.run(options)
8
+ @@stagger = options[:stagger] || 0
9
+ @transformer = SchemaTransformer::Transform.new(options[:base] || Dir.pwd)
10
+ @transformer.run(options)
11
+ end
12
+
13
+ attr_reader :temp_table, :table
14
+ def initialize(base = File.expand_path("..", __FILE__), options = {})
15
+ super
16
+ @batch_size = options[:batch_size] || 10_000
17
+ end
18
+
19
+ def run(options)
20
+ @action = options[:action].first
21
+ case @action
22
+ when "generate"
23
+ self.generate
24
+ help(:generate)
25
+ when "sync"
26
+ help(:sync_progress)
27
+ table = options[:action][1]
28
+ self.gather_info(table)
29
+ self.create
30
+ self.sync
31
+ help(:sync)
32
+ when "switch"
33
+ table = options[:action][1]
34
+ self.gather_info(table)
35
+ self.switch
36
+ self.cleanup
37
+ help(:switch)
38
+ else
39
+ raise UsageError, "Invalid action #{@action}"
40
+ end
41
+ end
42
+
43
+ def generate
44
+ data = {}
45
+ ask "What is the name of the table you want to alter?"
46
+ data[:table] = gets(:table)
47
+ ask <<-TXT
48
+ What is the modification to the table?
49
+ Examples 1:
50
+ ADD COLUMN smart tinyint(1) DEFAULT '0'
51
+ Examples 2:
52
+ ADD INDEX idx_name (name)
53
+ Examples 3:
54
+ ADD COLUMN smart tinyint(1) DEFAULT '0', DROP COLUMN full_name
55
+ TXT
56
+ data[:mod] = gets(:mod)
57
+ path = transform_file(data[:table])
58
+ FileUtils.mkdir(File.dirname(path)) unless File.exist?(File.dirname(path))
59
+ File.open(path,"w") { |f| f << data.to_json }
60
+ @table = data[:table]
61
+ data
62
+ end
63
+
64
+ def gather_info(table)
65
+ if table.nil?
66
+ raise UsageError, "You need to specific the table name: schema_transformer #{@action} <table_name>"
67
+ end
68
+ data = JSON.parse(IO.read(transform_file(table)))
69
+ @table = data["table"]
70
+ @mod = data["mod"]
71
+ # variables need for rest of the program
72
+ @temp_table = "#{@table}_st_temp"
73
+ @trash_table = "#{@table}_st_trash"
74
+ @model = define_model(@table)
75
+ end
76
+
77
+ def create
78
+ if self.temp_table_exists?
79
+ @temp_model = define_model(@temp_table)
80
+ else
81
+ sql_create = %{CREATE TABLE #{@temp_table} LIKE #{@table}}
82
+ sql_mod = %{ALTER TABLE #{@temp_table} #{@mod}}
83
+ @conn.execute(sql_create)
84
+ @conn.execute(sql_mod)
85
+ @temp_model = define_model(@temp_table)
86
+ end
87
+ reset_column_info
88
+ end
89
+
90
+ def sync
91
+ res = @conn.execute("SELECT max(id) AS max_id FROM `#{@temp_table}`")
92
+ start = res.fetch_row[0].to_i + 1 # nil case is okay: [nil][0].to_i => 0
93
+ find_in_batches(@table, :start => start, :batch_size => @batch_size) do |batch|
94
+ # puts "batch #{batch.inspect}"
95
+ lower = batch.first
96
+ upper = batch.last
97
+
98
+ columns = insert_columns_sql
99
+ sql = %Q{
100
+ INSERT INTO #{@temp_table} (
101
+ SELECT #{columns}
102
+ FROM #{@table} WHERE id >= #{lower} AND id <= #{upper}
103
+ )
104
+ }
105
+ # puts sql
106
+ @conn.execute(sql)
107
+
108
+ if @@stagger > 0
109
+ log("Staggering: delaying for #{@@stagger} seconds before next batch insert")
110
+ sleep(@@stagger)
111
+ end
112
+ end
113
+ end
114
+
115
+ def final_sync
116
+ @temp_model = define_model(@temp_table)
117
+ reset_column_info
118
+
119
+ sync
120
+ columns = subset_columns.collect{|x| "#{@temp_table}.`#{x}` = #{@table}.`#{x}`" }.join(", ")
121
+ # need to limit the final sync, if we do the entire table it takes a long time
122
+ limit_cond = get_limit_cond
123
+ sql = %{
124
+ UPDATE #{@temp_table} INNER JOIN #{@table}
125
+ ON #{@temp_table}.id = #{@table}.id
126
+ SET #{columns}
127
+ WHERE #{limit_cond}
128
+ }
129
+ # puts sql
130
+ @conn.execute(sql)
131
+ end
132
+
133
+ def switch
134
+ final_sync
135
+ to_trash = %Q{RENAME TABLE #{@table} TO #{@trash_table}}
136
+ from_temp = %Q{RENAME TABLE #{@temp_table} TO #{@table}}
137
+ @conn.execute(to_trash)
138
+ @conn.execute(from_temp)
139
+ end
140
+
141
+ def cleanup
142
+ sql = %Q{DROP TABLE #{@trash_table}}
143
+ @conn.execute(sql)
144
+ end
145
+
146
+ def get_limit_cond
147
+ if @model.column_names.include?("updated_at")
148
+ "#{@table}.updated_at >= '#{1.day.ago.strftime("%Y-%m-%d")}'"
149
+ else
150
+ sql = "select id from #{@table} order by id desc limit 100000"
151
+ resp = @conn.execute(sql)
152
+ bound = 0
153
+ while row = resp.fetch_row do
154
+ bound = row[0].to_i
155
+ end
156
+ "#{@table}.id >= #{bound}"
157
+ end
158
+ end
159
+
160
+ # the parameter is only for testing
161
+ def gets(name = nil)
162
+ STDIN.gets.strip
163
+ end
164
+
165
+ def subset_columns
166
+ removed = @model.column_names - @temp_model.column_names
167
+ subset = @model.column_names - removed
168
+ end
169
+
170
+ def insert_columns_sql
171
+ # existing subset
172
+ subset = subset_columns
173
+
174
+ # added
175
+ added_s = @temp_model.column_names - @model.column_names
176
+ added = @temp_model.columns.
177
+ select{|c| added_s.include?(c.name) }.
178
+ collect{|c| "#{extract_default(c)} AS `#{c.name}`" }
179
+
180
+ # combine both
181
+ columns = subset.collect{|x| "`#{x}`"} + added
182
+ sql = columns.join(", ")
183
+ end
184
+
185
+ # returns Array of record ids
186
+ def find(table, cond)
187
+ sql = "SELECT id FROM #{table} WHERE #{cond}"
188
+ response = @conn.execute(sql)
189
+ results = []
190
+ while row = response.fetch_row do
191
+ results << row[0].to_i
192
+ end
193
+ results
194
+ end
195
+
196
+ # lower memory heavy version of ActiveRecord's find in batches
197
+ def find_in_batches(table, options = {})
198
+ raise "You can't specify an order, it's forced to be #{batch_order}" if options[:order]
199
+ raise "You can't specify a limit, it's forced to be the batch_size" if options[:limit]
200
+
201
+ start = options.delete(:start).to_i
202
+ batch_size = options.delete(:batch_size) || 1000
203
+ order_limit = "ORDER BY id LIMIT #{batch_size}"
204
+
205
+ records = find(table, "id >= #{start} #{order_limit}")
206
+ while records.any?
207
+ yield records
208
+
209
+ break if records.size < batch_size
210
+ records = find(table, "id > #{records.last} #{order_limit}")
211
+ end
212
+ end
213
+
214
+ def define_model(table)
215
+ # Object.const_set(table.classify, Class.new(ActiveRecord::Base))
216
+ Object.class_eval(<<-code)
217
+ class #{table.classify} < ActiveRecord::Base
218
+ set_table_name "#{table}"
219
+ end
220
+ code
221
+ table.classify.constantize # returns the constant
222
+ end
223
+
224
+ def transform_file(table)
225
+ @base+"/config/schema_transformations/#{table}.json"
226
+ end
227
+
228
+ def temp_table_exists?
229
+ @conn.table_exists?(@temp_table)
230
+ end
231
+
232
+ def reset_column_info
233
+ @model.reset_column_information
234
+ @temp_model.reset_column_information
235
+ end
236
+
237
+ def log(msg)
238
+ @log.info(msg)
239
+ end
240
+
241
+ private
242
+ def ask(msg)
243
+ puts msg
244
+ print "> "
245
+ end
246
+
247
+ def extract_default(col)
248
+ @conn.quote(col.default)
249
+ end
250
+
251
+ end
252
+ end