schema_transformer 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -62,6 +62,25 @@ Thank you. Have a very nice day.
62
62
  tung@walle $
63
63
  </pre>
64
64
 
65
+ It is strongly recommended that the tables that you are altering has updated_at timestamp columns
66
+ with indexes on them. If the tables do not updated_at columns at all, then only the last 100,000 rows
67
+ get updated in the final sync in the "schema_transformer switch ..." command. If the updated_at column
68
+ is available then the final sync will use it to update all the data. However, because it uses the
69
+ updated_at column, it is extremly important that the updated_at column is indexed or the final
70
+ "schema_transformer switch ..." command possibly could be slow. Because of this, you should analyze your
71
+ database schema for missing updated_at columns and indexes with the command "schema_transformer analyze".
72
+
73
+ Example:
74
+ <pre>
75
+ tung@walle $ schema_transformer analyze
76
+ Analyzing your database schema...
77
+ There are no tables without the updated_at timestamp. GOOD
78
+ These tables do have an updated_at timestamp, but no index:
79
+ users
80
+ tung@walle $
81
+ </pre>
82
+
83
+
65
84
  FAQ
66
85
  -------
67
86
 
data/TODO CHANGED
@@ -8,4 +8,3 @@
8
8
  * TODO:
9
9
  * add logging again: schema_transformer.log
10
10
  * updated_at if its available and use a real time vs some guess
11
- * clean up spec: use real mocks, get rid of $testing_books
@@ -7,4 +7,6 @@ require 'fileutils'
7
7
  require File.expand_path('../schema_transformer/version', __FILE__)
8
8
  require File.expand_path('../schema_transformer/help', __FILE__)
9
9
  require File.expand_path('../schema_transformer/base', __FILE__)
10
- require File.expand_path('../schema_transformer/cli', __FILE__)
10
+ require File.expand_path('../schema_transformer/transform', __FILE__)
11
+ require File.expand_path('../schema_transformer/cli', __FILE__)
12
+ require File.expand_path('../schema_transformer/analyze', __FILE__)
@@ -0,0 +1,52 @@
1
+ module SchemaTransformer
2
+ class Analyze < Base
3
+ def self.run(options)
4
+ @analyze = Analyze.new(options[:base] || Dir.pwd, options)
5
+ puts "Analyzing your database schema..."
6
+ if @analyze.no_timestamps.empty?
7
+ puts "There are no tables without the updated_at timestamp. GOOD"
8
+ else
9
+ puts "These tables do not have updated_at timestamps: "
10
+ puts " #{@analyze.no_timestamps.join("\n ")}"
11
+ end
12
+ if @analyze.no_indexes.empty?
13
+ puts "There are no tables with updated_at timestamp but no indexes. GOOD"
14
+ else
15
+ puts "These tables do have an updated_at timestamp, but no index: "
16
+ puts " #{@analyze.no_indexes.join("\n ")}"
17
+ end
18
+ if @analyze.no_timestamps.empty? or @analyze.no_timestamps.empty?
19
+ "Everything looks GOOD!"
20
+ else
21
+ puts "You should add the missing columns or indexes."
22
+ end
23
+ end
24
+
25
+ # tells which tables are missing updated_at and index on updated_at
26
+ def no_timestamps
27
+ @conn.tables - timestamps
28
+ end
29
+
30
+ def timestamps
31
+ tables = []
32
+ @conn.tables.each do |table|
33
+ has_updated_at = @conn.columns(table).detect {|col| col.name == "updated_at" }
34
+ tables << table if has_updated_at
35
+ end
36
+ tables
37
+ end
38
+
39
+ def indexes
40
+ tables = []
41
+ timestamps.each do |table|
42
+ has_index = @conn.indexes(table).detect {|col| col.columns == ["updated_at"] }
43
+ tables << table if has_index
44
+ end
45
+ tables
46
+ end
47
+
48
+ def no_indexes
49
+ timestamps - indexes
50
+ end
51
+ end
52
+ end
@@ -1,16 +1,6 @@
1
1
  module SchemaTransformer
2
- class UsageError < RuntimeError; end
3
-
4
2
  class Base
5
- include Help
6
- @@stagger = 0
7
- def self.run(options)
8
- @@stagger = options[:stagger] || 0
9
- @transformer = SchemaTransformer::Base.new(options[:base] || Dir.pwd)
10
- @transformer.run(options)
11
- end
12
-
13
- attr_reader :options, :temp_table, :table
3
+ attr_reader :options
14
4
  def initialize(base = File.expand_path("..", __FILE__), options = {})
15
5
  @base = base
16
6
  @db, @log, @mail = ActiveWrapper.setup(
@@ -20,241 +10,6 @@ module SchemaTransformer
20
10
  )
21
11
  @db.establish_connection
22
12
  @conn = ActiveRecord::Base.connection
23
-
24
- @batch_size = options[:batch_size] || 10_000
25
- end
26
-
27
- def run(options)
28
- @action = options[:action].first
29
- case @action
30
- when "generate"
31
- self.generate
32
- help(:generate)
33
- when "sync"
34
- help(:sync_progress)
35
- table = options[:action][1]
36
- self.gather_info(table)
37
- self.create
38
- self.sync
39
- help(:sync)
40
- when "switch"
41
- table = options[:action][1]
42
- self.gather_info(table)
43
- self.switch
44
- self.cleanup
45
- help(:switch)
46
- else
47
- raise UsageError, "Invalid action #{@action}"
48
- end
49
- end
50
-
51
- def generate
52
- data = {}
53
- ask "What is the name of the table you want to alter?"
54
- data[:table] = gets(:table)
55
- ask <<-TXT
56
- What is the modification to the table?
57
- Examples 1:
58
- ADD COLUMN smart tinyint(1) DEFAULT '0'
59
- Examples 2:
60
- ADD INDEX idx_name (name)
61
- Examples 3:
62
- ADD COLUMN smart tinyint(1) DEFAULT '0', DROP COLUMN full_name
63
- TXT
64
- data[:mod] = gets(:mod)
65
- path = transform_file(data[:table])
66
- FileUtils.mkdir(File.dirname(path)) unless File.exist?(File.dirname(path))
67
- File.open(path,"w") { |f| f << data.to_json }
68
- @table = data[:table]
69
- data
70
- end
71
-
72
- def gather_info(table)
73
- if table.nil?
74
- raise UsageError, "You need to specific the table name: schema_transformer #{@action} <table_name>"
75
- end
76
- data = JSON.parse(IO.read(transform_file(table)))
77
- @table = data["table"]
78
- @mod = data["mod"]
79
- # variables need for rest of the program
80
- @temp_table = "#{@table}_st_temp"
81
- @trash_table = "#{@table}_st_trash"
82
- @model = define_model(@table)
83
- end
84
-
85
- def create
86
- if self.temp_table_exists?
87
- @temp_model = define_model(@temp_table)
88
- else
89
- sql_create = %{CREATE TABLE #{@temp_table} LIKE #{@table}}
90
- sql_mod = %{ALTER TABLE #{@temp_table} #{@mod}}
91
- @conn.execute(sql_create)
92
- @conn.execute(sql_mod)
93
- @temp_model = define_model(@temp_table)
94
- end
95
- reset_column_info
96
- end
97
-
98
- def sync
99
- res = @conn.execute("SELECT max(id) AS max_id FROM `#{@temp_table}`")
100
- start = res.fetch_row[0].to_i + 1 # nil case is okay: [nil][0].to_i => 0
101
- find_in_batches(@table, :start => start, :batch_size => @batch_size) do |batch|
102
- # puts "batch #{batch.inspect}"
103
- lower = batch.first
104
- upper = batch.last
105
-
106
- columns = insert_columns_sql
107
- sql = %Q{
108
- INSERT INTO #{@temp_table} (
109
- SELECT #{columns}
110
- FROM #{@table} WHERE id >= #{lower} AND id <= #{upper}
111
- )
112
- }
113
- # puts sql
114
- @conn.execute(sql)
115
-
116
- if @@stagger > 0
117
- log("Staggering: delaying for #{@@stagger} seconds before next batch insert")
118
- sleep(@@stagger)
119
- end
120
- end
121
- end
122
-
123
- def final_sync
124
- @temp_model = define_model(@temp_table)
125
- reset_column_info
126
-
127
- sync
128
- columns = subset_columns.collect{|x| "#{@temp_table}.`#{x}` = #{@table}.`#{x}`" }.join(", ")
129
- # need to limit the final sync, if we do the entire table it takes a long time
130
- limit_cond = get_limit_cond
131
- sql = %{
132
- UPDATE #{@temp_table} INNER JOIN #{@table}
133
- ON #{@temp_table}.id = #{@table}.id
134
- SET #{columns}
135
- WHERE #{limit_cond}
136
- }
137
- # puts sql
138
- @conn.execute(sql)
139
- end
140
-
141
- def switch
142
- final_sync
143
- to_trash = %Q{RENAME TABLE #{@table} TO #{@trash_table}}
144
- from_temp = %Q{RENAME TABLE #{@temp_table} TO #{@table}}
145
- @conn.execute(to_trash)
146
- @conn.execute(from_temp)
147
- end
148
-
149
- def cleanup
150
- sql = %Q{DROP TABLE #{@trash_table}}
151
- @conn.execute(sql)
152
- end
153
-
154
- def get_limit_cond
155
- if @model.column_names.include?("updated_at")
156
- "#{@table}.updated_at >= '#{1.day.ago.strftime("%Y-%m-%d")}'"
157
- else
158
- sql = "select id from #{@table} order by id desc limit 100000"
159
- resp = @conn.execute(sql)
160
- bound = 0
161
- while row = resp.fetch_row do
162
- bound = row[0].to_i
163
- end
164
- "#{@table}.id >= #{bound}"
165
- end
166
- end
167
-
168
- # the parameter is only for testing
169
- def gets(name = nil)
170
- STDIN.gets.strip
171
- end
172
-
173
- def subset_columns
174
- removed = @model.column_names - @temp_model.column_names
175
- subset = @model.column_names - removed
176
- end
177
-
178
- def insert_columns_sql
179
- # existing subset
180
- subset = subset_columns
181
-
182
- # added
183
- added_s = @temp_model.column_names - @model.column_names
184
- added = @temp_model.columns.
185
- select{|c| added_s.include?(c.name) }.
186
- collect{|c| "#{extract_default(c)} AS `#{c.name}`" }
187
-
188
- # combine both
189
- columns = subset.collect{|x| "`#{x}`"} + added
190
- sql = columns.join(", ")
191
- end
192
-
193
- # returns Array of record ids
194
- def find(table, cond)
195
- sql = "SELECT id FROM #{table} WHERE #{cond}"
196
- response = @conn.execute(sql)
197
- results = []
198
- while row = response.fetch_row do
199
- results << row[0].to_i
200
- end
201
- results
202
- end
203
-
204
- # lower memory heavy version of ActiveRecord's find in batches
205
- def find_in_batches(table, options = {})
206
- raise "You can't specify an order, it's forced to be #{batch_order}" if options[:order]
207
- raise "You can't specify a limit, it's forced to be the batch_size" if options[:limit]
208
-
209
- start = options.delete(:start).to_i
210
- batch_size = options.delete(:batch_size) || 1000
211
- order_limit = "ORDER BY id LIMIT #{batch_size}"
212
-
213
- records = find(table, "id >= #{start} #{order_limit}")
214
- while records.any?
215
- yield records
216
-
217
- break if records.size < batch_size
218
- records = find(table, "id > #{records.last} #{order_limit}")
219
- end
220
- end
221
-
222
- def define_model(table)
223
- # Object.const_set(table.classify, Class.new(ActiveRecord::Base))
224
- Object.class_eval(<<-code)
225
- class #{table.classify} < ActiveRecord::Base
226
- set_table_name "#{table}"
227
- end
228
- code
229
- table.classify.constantize # returns the constant
230
- end
231
-
232
- def transform_file(table)
233
- @base+"/config/schema_transformations/#{table}.json"
234
- end
235
-
236
- def temp_table_exists?
237
- @conn.table_exists?(@temp_table)
238
- end
239
-
240
- def reset_column_info
241
- @model.reset_column_information
242
- @temp_model.reset_column_information
243
- end
244
-
245
- def log(msg)
246
- @log.info(msg)
247
- end
248
-
249
- private
250
- def ask(msg)
251
- puts msg
252
- print "> "
253
- end
254
-
255
- def extract_default(col)
256
- @conn.quote(col.default)
257
13
  end
258
-
259
14
  end
260
- end
15
+ end
@@ -1,8 +1,3 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'rubygems'
4
- require 'active_wrapper'
5
-
6
1
  module SchemaTransformer
7
2
  class CLI
8
3
 
@@ -80,18 +75,23 @@ module SchemaTransformer
80
75
  end
81
76
 
82
77
  def run
83
- begin
84
- SchemaTransformer::Base.run(options)
85
- rescue UsageError => e
86
- puts "Usage Error: #{e.message}"
87
- puts help_message
88
- puts option_parser
78
+ @action = options[:action].first
79
+ if @action == "analyze"
80
+ SchemaTransformer::Analyze.run(options)
81
+ else
82
+ begin
83
+ SchemaTransformer::Transform.run(options)
84
+ rescue UsageError => e
85
+ puts "Usage Error: #{e.message}"
86
+ puts help_message
87
+ puts option_parser
88
+ end
89
89
  end
90
90
  end
91
91
 
92
92
  private
93
93
  def help_message
94
- "Available actions: generate, sync, switch"
94
+ "Available actions: analyze, generate, sync, switch"
95
95
  end
96
96
  end
97
97
 
@@ -0,0 +1,252 @@
1
+ module SchemaTransformer
2
+ class UsageError < RuntimeError; end
3
+
4
+ class Transform < Base
5
+ include Help
6
+ @@stagger = 0
7
+ def self.run(options)
8
+ @@stagger = options[:stagger] || 0
9
+ @transformer = SchemaTransformer::Transform.new(options[:base] || Dir.pwd)
10
+ @transformer.run(options)
11
+ end
12
+
13
+ attr_reader :temp_table, :table
14
+ def initialize(base = File.expand_path("..", __FILE__), options = {})
15
+ super
16
+ @batch_size = options[:batch_size] || 10_000
17
+ end
18
+
19
+ def run(options)
20
+ @action = options[:action].first
21
+ case @action
22
+ when "generate"
23
+ self.generate
24
+ help(:generate)
25
+ when "sync"
26
+ help(:sync_progress)
27
+ table = options[:action][1]
28
+ self.gather_info(table)
29
+ self.create
30
+ self.sync
31
+ help(:sync)
32
+ when "switch"
33
+ table = options[:action][1]
34
+ self.gather_info(table)
35
+ self.switch
36
+ self.cleanup
37
+ help(:switch)
38
+ else
39
+ raise UsageError, "Invalid action #{@action}"
40
+ end
41
+ end
42
+
43
+ def generate
44
+ data = {}
45
+ ask "What is the name of the table you want to alter?"
46
+ data[:table] = gets(:table)
47
+ ask <<-TXT
48
+ What is the modification to the table?
49
+ Examples 1:
50
+ ADD COLUMN smart tinyint(1) DEFAULT '0'
51
+ Examples 2:
52
+ ADD INDEX idx_name (name)
53
+ Examples 3:
54
+ ADD COLUMN smart tinyint(1) DEFAULT '0', DROP COLUMN full_name
55
+ TXT
56
+ data[:mod] = gets(:mod)
57
+ path = transform_file(data[:table])
58
+ FileUtils.mkdir(File.dirname(path)) unless File.exist?(File.dirname(path))
59
+ File.open(path,"w") { |f| f << data.to_json }
60
+ @table = data[:table]
61
+ data
62
+ end
63
+
64
+ def gather_info(table)
65
+ if table.nil?
66
+ raise UsageError, "You need to specific the table name: schema_transformer #{@action} <table_name>"
67
+ end
68
+ data = JSON.parse(IO.read(transform_file(table)))
69
+ @table = data["table"]
70
+ @mod = data["mod"]
71
+ # variables need for rest of the program
72
+ @temp_table = "#{@table}_st_temp"
73
+ @trash_table = "#{@table}_st_trash"
74
+ @model = define_model(@table)
75
+ end
76
+
77
+ def create
78
+ if self.temp_table_exists?
79
+ @temp_model = define_model(@temp_table)
80
+ else
81
+ sql_create = %{CREATE TABLE #{@temp_table} LIKE #{@table}}
82
+ sql_mod = %{ALTER TABLE #{@temp_table} #{@mod}}
83
+ @conn.execute(sql_create)
84
+ @conn.execute(sql_mod)
85
+ @temp_model = define_model(@temp_table)
86
+ end
87
+ reset_column_info
88
+ end
89
+
90
+ def sync
91
+ res = @conn.execute("SELECT max(id) AS max_id FROM `#{@temp_table}`")
92
+ start = res.fetch_row[0].to_i + 1 # nil case is okay: [nil][0].to_i => 0
93
+ find_in_batches(@table, :start => start, :batch_size => @batch_size) do |batch|
94
+ # puts "batch #{batch.inspect}"
95
+ lower = batch.first
96
+ upper = batch.last
97
+
98
+ columns = insert_columns_sql
99
+ sql = %Q{
100
+ INSERT INTO #{@temp_table} (
101
+ SELECT #{columns}
102
+ FROM #{@table} WHERE id >= #{lower} AND id <= #{upper}
103
+ )
104
+ }
105
+ # puts sql
106
+ @conn.execute(sql)
107
+
108
+ if @@stagger > 0
109
+ log("Staggering: delaying for #{@@stagger} seconds before next batch insert")
110
+ sleep(@@stagger)
111
+ end
112
+ end
113
+ end
114
+
115
+ def final_sync
116
+ @temp_model = define_model(@temp_table)
117
+ reset_column_info
118
+
119
+ sync
120
+ columns = subset_columns.collect{|x| "#{@temp_table}.`#{x}` = #{@table}.`#{x}`" }.join(", ")
121
+ # need to limit the final sync, if we do the entire table it takes a long time
122
+ limit_cond = get_limit_cond
123
+ sql = %{
124
+ UPDATE #{@temp_table} INNER JOIN #{@table}
125
+ ON #{@temp_table}.id = #{@table}.id
126
+ SET #{columns}
127
+ WHERE #{limit_cond}
128
+ }
129
+ # puts sql
130
+ @conn.execute(sql)
131
+ end
132
+
133
+ def switch
134
+ final_sync
135
+ to_trash = %Q{RENAME TABLE #{@table} TO #{@trash_table}}
136
+ from_temp = %Q{RENAME TABLE #{@temp_table} TO #{@table}}
137
+ @conn.execute(to_trash)
138
+ @conn.execute(from_temp)
139
+ end
140
+
141
+ def cleanup
142
+ sql = %Q{DROP TABLE #{@trash_table}}
143
+ @conn.execute(sql)
144
+ end
145
+
146
+ def get_limit_cond
147
+ if @model.column_names.include?("updated_at")
148
+ "#{@table}.updated_at >= '#{1.day.ago.strftime("%Y-%m-%d")}'"
149
+ else
150
+ sql = "select id from #{@table} order by id desc limit 100000"
151
+ resp = @conn.execute(sql)
152
+ bound = 0
153
+ while row = resp.fetch_row do
154
+ bound = row[0].to_i
155
+ end
156
+ "#{@table}.id >= #{bound}"
157
+ end
158
+ end
159
+
160
+ # the parameter is only for testing
161
+ def gets(name = nil)
162
+ STDIN.gets.strip
163
+ end
164
+
165
+ def subset_columns
166
+ removed = @model.column_names - @temp_model.column_names
167
+ subset = @model.column_names - removed
168
+ end
169
+
170
+ def insert_columns_sql
171
+ # existing subset
172
+ subset = subset_columns
173
+
174
+ # added
175
+ added_s = @temp_model.column_names - @model.column_names
176
+ added = @temp_model.columns.
177
+ select{|c| added_s.include?(c.name) }.
178
+ collect{|c| "#{extract_default(c)} AS `#{c.name}`" }
179
+
180
+ # combine both
181
+ columns = subset.collect{|x| "`#{x}`"} + added
182
+ sql = columns.join(", ")
183
+ end
184
+
185
+ # returns Array of record ids
186
+ def find(table, cond)
187
+ sql = "SELECT id FROM #{table} WHERE #{cond}"
188
+ response = @conn.execute(sql)
189
+ results = []
190
+ while row = response.fetch_row do
191
+ results << row[0].to_i
192
+ end
193
+ results
194
+ end
195
+
196
+ # lower memory heavy version of ActiveRecord's find in batches
197
+ def find_in_batches(table, options = {})
198
+ raise "You can't specify an order, it's forced to be #{batch_order}" if options[:order]
199
+ raise "You can't specify a limit, it's forced to be the batch_size" if options[:limit]
200
+
201
+ start = options.delete(:start).to_i
202
+ batch_size = options.delete(:batch_size) || 1000
203
+ order_limit = "ORDER BY id LIMIT #{batch_size}"
204
+
205
+ records = find(table, "id >= #{start} #{order_limit}")
206
+ while records.any?
207
+ yield records
208
+
209
+ break if records.size < batch_size
210
+ records = find(table, "id > #{records.last} #{order_limit}")
211
+ end
212
+ end
213
+
214
+ def define_model(table)
215
+ # Object.const_set(table.classify, Class.new(ActiveRecord::Base))
216
+ Object.class_eval(<<-code)
217
+ class #{table.classify} < ActiveRecord::Base
218
+ set_table_name "#{table}"
219
+ end
220
+ code
221
+ table.classify.constantize # returns the constant
222
+ end
223
+
224
+ def transform_file(table)
225
+ @base+"/config/schema_transformations/#{table}.json"
226
+ end
227
+
228
+ def temp_table_exists?
229
+ @conn.table_exists?(@temp_table)
230
+ end
231
+
232
+ def reset_column_info
233
+ @model.reset_column_information
234
+ @temp_model.reset_column_information
235
+ end
236
+
237
+ def log(msg)
238
+ @log.info(msg)
239
+ end
240
+
241
+ private
242
+ def ask(msg)
243
+ puts msg
244
+ print "> "
245
+ end
246
+
247
+ def extract_default(col)
248
+ @conn.quote(col.default)
249
+ end
250
+
251
+ end
252
+ end