schema_transformer 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +19 -0
- data/TODO +0 -1
- data/lib/schema_transformer.rb +3 -1
- data/lib/schema_transformer/analyze.rb +52 -0
- data/lib/schema_transformer/base.rb +2 -247
- data/lib/schema_transformer/cli.rb +12 -12
- data/lib/schema_transformer/transform.rb +252 -0
- data/lib/schema_transformer/version.rb +1 -1
- data/test/fake_app/config/schema_transformations/books.json +1 -1
- data/test/fake_app/log/schema_transformer.log +2143 -57778
- data/test/schema_transformer_test.rb +141 -193
- data/test/test_helper.rb +75 -0
- metadata +7 -5
- data/test/fake_app/config/schema_transformations/users.json +0 -1
data/README.markdown
CHANGED
@@ -62,6 +62,25 @@ Thank you. Have a very nice day.
|
|
62
62
|
tung@walle $
|
63
63
|
</pre>
|
64
64
|
|
65
|
+
It is strongly recommended that the tables that you are altering has updated_at timestamp columns
|
66
|
+
with indexes on them. If the tables do not updated_at columns at all, then only the last 100,000 rows
|
67
|
+
get updated in the final sync in the "schema_transformer switch ..." command. If the updated_at column
|
68
|
+
is available then the final sync will use it to update all the data. However, because it uses the
|
69
|
+
updated_at column, it is extremly important that the updated_at column is indexed or the final
|
70
|
+
"schema_transformer switch ..." command possibly could be slow. Because of this, you should analyze your
|
71
|
+
database schema for missing updated_at columns and indexes with the command "schema_transformer analyze".
|
72
|
+
|
73
|
+
Example:
|
74
|
+
<pre>
|
75
|
+
tung@walle $ schema_transformer analyze
|
76
|
+
Analyzing your database schema...
|
77
|
+
There are no tables without the updated_at timestamp. GOOD
|
78
|
+
These tables do have an updated_at timestamp, but no index:
|
79
|
+
users
|
80
|
+
tung@walle $
|
81
|
+
</pre>
|
82
|
+
|
83
|
+
|
65
84
|
FAQ
|
66
85
|
-------
|
67
86
|
|
data/TODO
CHANGED
data/lib/schema_transformer.rb
CHANGED
@@ -7,4 +7,6 @@ require 'fileutils'
|
|
7
7
|
require File.expand_path('../schema_transformer/version', __FILE__)
|
8
8
|
require File.expand_path('../schema_transformer/help', __FILE__)
|
9
9
|
require File.expand_path('../schema_transformer/base', __FILE__)
|
10
|
-
require File.expand_path('../schema_transformer/
|
10
|
+
require File.expand_path('../schema_transformer/transform', __FILE__)
|
11
|
+
require File.expand_path('../schema_transformer/cli', __FILE__)
|
12
|
+
require File.expand_path('../schema_transformer/analyze', __FILE__)
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module SchemaTransformer
|
2
|
+
class Analyze < Base
|
3
|
+
def self.run(options)
|
4
|
+
@analyze = Analyze.new(options[:base] || Dir.pwd, options)
|
5
|
+
puts "Analyzing your database schema..."
|
6
|
+
if @analyze.no_timestamps.empty?
|
7
|
+
puts "There are no tables without the updated_at timestamp. GOOD"
|
8
|
+
else
|
9
|
+
puts "These tables do not have updated_at timestamps: "
|
10
|
+
puts " #{@analyze.no_timestamps.join("\n ")}"
|
11
|
+
end
|
12
|
+
if @analyze.no_indexes.empty?
|
13
|
+
puts "There are no tables with updated_at timestamp but no indexes. GOOD"
|
14
|
+
else
|
15
|
+
puts "These tables do have an updated_at timestamp, but no index: "
|
16
|
+
puts " #{@analyze.no_indexes.join("\n ")}"
|
17
|
+
end
|
18
|
+
if @analyze.no_timestamps.empty? or @analyze.no_timestamps.empty?
|
19
|
+
"Everything looks GOOD!"
|
20
|
+
else
|
21
|
+
puts "You should add the missing columns or indexes."
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# tells which tables are missing updated_at and index on updated_at
|
26
|
+
def no_timestamps
|
27
|
+
@conn.tables - timestamps
|
28
|
+
end
|
29
|
+
|
30
|
+
def timestamps
|
31
|
+
tables = []
|
32
|
+
@conn.tables.each do |table|
|
33
|
+
has_updated_at = @conn.columns(table).detect {|col| col.name == "updated_at" }
|
34
|
+
tables << table if has_updated_at
|
35
|
+
end
|
36
|
+
tables
|
37
|
+
end
|
38
|
+
|
39
|
+
def indexes
|
40
|
+
tables = []
|
41
|
+
timestamps.each do |table|
|
42
|
+
has_index = @conn.indexes(table).detect {|col| col.columns == ["updated_at"] }
|
43
|
+
tables << table if has_index
|
44
|
+
end
|
45
|
+
tables
|
46
|
+
end
|
47
|
+
|
48
|
+
def no_indexes
|
49
|
+
timestamps - indexes
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -1,16 +1,6 @@
|
|
1
1
|
module SchemaTransformer
|
2
|
-
class UsageError < RuntimeError; end
|
3
|
-
|
4
2
|
class Base
|
5
|
-
|
6
|
-
@@stagger = 0
|
7
|
-
def self.run(options)
|
8
|
-
@@stagger = options[:stagger] || 0
|
9
|
-
@transformer = SchemaTransformer::Base.new(options[:base] || Dir.pwd)
|
10
|
-
@transformer.run(options)
|
11
|
-
end
|
12
|
-
|
13
|
-
attr_reader :options, :temp_table, :table
|
3
|
+
attr_reader :options
|
14
4
|
def initialize(base = File.expand_path("..", __FILE__), options = {})
|
15
5
|
@base = base
|
16
6
|
@db, @log, @mail = ActiveWrapper.setup(
|
@@ -20,241 +10,6 @@ module SchemaTransformer
|
|
20
10
|
)
|
21
11
|
@db.establish_connection
|
22
12
|
@conn = ActiveRecord::Base.connection
|
23
|
-
|
24
|
-
@batch_size = options[:batch_size] || 10_000
|
25
|
-
end
|
26
|
-
|
27
|
-
def run(options)
|
28
|
-
@action = options[:action].first
|
29
|
-
case @action
|
30
|
-
when "generate"
|
31
|
-
self.generate
|
32
|
-
help(:generate)
|
33
|
-
when "sync"
|
34
|
-
help(:sync_progress)
|
35
|
-
table = options[:action][1]
|
36
|
-
self.gather_info(table)
|
37
|
-
self.create
|
38
|
-
self.sync
|
39
|
-
help(:sync)
|
40
|
-
when "switch"
|
41
|
-
table = options[:action][1]
|
42
|
-
self.gather_info(table)
|
43
|
-
self.switch
|
44
|
-
self.cleanup
|
45
|
-
help(:switch)
|
46
|
-
else
|
47
|
-
raise UsageError, "Invalid action #{@action}"
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
def generate
|
52
|
-
data = {}
|
53
|
-
ask "What is the name of the table you want to alter?"
|
54
|
-
data[:table] = gets(:table)
|
55
|
-
ask <<-TXT
|
56
|
-
What is the modification to the table?
|
57
|
-
Examples 1:
|
58
|
-
ADD COLUMN smart tinyint(1) DEFAULT '0'
|
59
|
-
Examples 2:
|
60
|
-
ADD INDEX idx_name (name)
|
61
|
-
Examples 3:
|
62
|
-
ADD COLUMN smart tinyint(1) DEFAULT '0', DROP COLUMN full_name
|
63
|
-
TXT
|
64
|
-
data[:mod] = gets(:mod)
|
65
|
-
path = transform_file(data[:table])
|
66
|
-
FileUtils.mkdir(File.dirname(path)) unless File.exist?(File.dirname(path))
|
67
|
-
File.open(path,"w") { |f| f << data.to_json }
|
68
|
-
@table = data[:table]
|
69
|
-
data
|
70
|
-
end
|
71
|
-
|
72
|
-
def gather_info(table)
|
73
|
-
if table.nil?
|
74
|
-
raise UsageError, "You need to specific the table name: schema_transformer #{@action} <table_name>"
|
75
|
-
end
|
76
|
-
data = JSON.parse(IO.read(transform_file(table)))
|
77
|
-
@table = data["table"]
|
78
|
-
@mod = data["mod"]
|
79
|
-
# variables need for rest of the program
|
80
|
-
@temp_table = "#{@table}_st_temp"
|
81
|
-
@trash_table = "#{@table}_st_trash"
|
82
|
-
@model = define_model(@table)
|
83
|
-
end
|
84
|
-
|
85
|
-
def create
|
86
|
-
if self.temp_table_exists?
|
87
|
-
@temp_model = define_model(@temp_table)
|
88
|
-
else
|
89
|
-
sql_create = %{CREATE TABLE #{@temp_table} LIKE #{@table}}
|
90
|
-
sql_mod = %{ALTER TABLE #{@temp_table} #{@mod}}
|
91
|
-
@conn.execute(sql_create)
|
92
|
-
@conn.execute(sql_mod)
|
93
|
-
@temp_model = define_model(@temp_table)
|
94
|
-
end
|
95
|
-
reset_column_info
|
96
|
-
end
|
97
|
-
|
98
|
-
def sync
|
99
|
-
res = @conn.execute("SELECT max(id) AS max_id FROM `#{@temp_table}`")
|
100
|
-
start = res.fetch_row[0].to_i + 1 # nil case is okay: [nil][0].to_i => 0
|
101
|
-
find_in_batches(@table, :start => start, :batch_size => @batch_size) do |batch|
|
102
|
-
# puts "batch #{batch.inspect}"
|
103
|
-
lower = batch.first
|
104
|
-
upper = batch.last
|
105
|
-
|
106
|
-
columns = insert_columns_sql
|
107
|
-
sql = %Q{
|
108
|
-
INSERT INTO #{@temp_table} (
|
109
|
-
SELECT #{columns}
|
110
|
-
FROM #{@table} WHERE id >= #{lower} AND id <= #{upper}
|
111
|
-
)
|
112
|
-
}
|
113
|
-
# puts sql
|
114
|
-
@conn.execute(sql)
|
115
|
-
|
116
|
-
if @@stagger > 0
|
117
|
-
log("Staggering: delaying for #{@@stagger} seconds before next batch insert")
|
118
|
-
sleep(@@stagger)
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
def final_sync
|
124
|
-
@temp_model = define_model(@temp_table)
|
125
|
-
reset_column_info
|
126
|
-
|
127
|
-
sync
|
128
|
-
columns = subset_columns.collect{|x| "#{@temp_table}.`#{x}` = #{@table}.`#{x}`" }.join(", ")
|
129
|
-
# need to limit the final sync, if we do the entire table it takes a long time
|
130
|
-
limit_cond = get_limit_cond
|
131
|
-
sql = %{
|
132
|
-
UPDATE #{@temp_table} INNER JOIN #{@table}
|
133
|
-
ON #{@temp_table}.id = #{@table}.id
|
134
|
-
SET #{columns}
|
135
|
-
WHERE #{limit_cond}
|
136
|
-
}
|
137
|
-
# puts sql
|
138
|
-
@conn.execute(sql)
|
139
|
-
end
|
140
|
-
|
141
|
-
def switch
|
142
|
-
final_sync
|
143
|
-
to_trash = %Q{RENAME TABLE #{@table} TO #{@trash_table}}
|
144
|
-
from_temp = %Q{RENAME TABLE #{@temp_table} TO #{@table}}
|
145
|
-
@conn.execute(to_trash)
|
146
|
-
@conn.execute(from_temp)
|
147
|
-
end
|
148
|
-
|
149
|
-
def cleanup
|
150
|
-
sql = %Q{DROP TABLE #{@trash_table}}
|
151
|
-
@conn.execute(sql)
|
152
|
-
end
|
153
|
-
|
154
|
-
def get_limit_cond
|
155
|
-
if @model.column_names.include?("updated_at")
|
156
|
-
"#{@table}.updated_at >= '#{1.day.ago.strftime("%Y-%m-%d")}'"
|
157
|
-
else
|
158
|
-
sql = "select id from #{@table} order by id desc limit 100000"
|
159
|
-
resp = @conn.execute(sql)
|
160
|
-
bound = 0
|
161
|
-
while row = resp.fetch_row do
|
162
|
-
bound = row[0].to_i
|
163
|
-
end
|
164
|
-
"#{@table}.id >= #{bound}"
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
|
-
# the parameter is only for testing
|
169
|
-
def gets(name = nil)
|
170
|
-
STDIN.gets.strip
|
171
|
-
end
|
172
|
-
|
173
|
-
def subset_columns
|
174
|
-
removed = @model.column_names - @temp_model.column_names
|
175
|
-
subset = @model.column_names - removed
|
176
|
-
end
|
177
|
-
|
178
|
-
def insert_columns_sql
|
179
|
-
# existing subset
|
180
|
-
subset = subset_columns
|
181
|
-
|
182
|
-
# added
|
183
|
-
added_s = @temp_model.column_names - @model.column_names
|
184
|
-
added = @temp_model.columns.
|
185
|
-
select{|c| added_s.include?(c.name) }.
|
186
|
-
collect{|c| "#{extract_default(c)} AS `#{c.name}`" }
|
187
|
-
|
188
|
-
# combine both
|
189
|
-
columns = subset.collect{|x| "`#{x}`"} + added
|
190
|
-
sql = columns.join(", ")
|
191
|
-
end
|
192
|
-
|
193
|
-
# returns Array of record ids
|
194
|
-
def find(table, cond)
|
195
|
-
sql = "SELECT id FROM #{table} WHERE #{cond}"
|
196
|
-
response = @conn.execute(sql)
|
197
|
-
results = []
|
198
|
-
while row = response.fetch_row do
|
199
|
-
results << row[0].to_i
|
200
|
-
end
|
201
|
-
results
|
202
|
-
end
|
203
|
-
|
204
|
-
# lower memory heavy version of ActiveRecord's find in batches
|
205
|
-
def find_in_batches(table, options = {})
|
206
|
-
raise "You can't specify an order, it's forced to be #{batch_order}" if options[:order]
|
207
|
-
raise "You can't specify a limit, it's forced to be the batch_size" if options[:limit]
|
208
|
-
|
209
|
-
start = options.delete(:start).to_i
|
210
|
-
batch_size = options.delete(:batch_size) || 1000
|
211
|
-
order_limit = "ORDER BY id LIMIT #{batch_size}"
|
212
|
-
|
213
|
-
records = find(table, "id >= #{start} #{order_limit}")
|
214
|
-
while records.any?
|
215
|
-
yield records
|
216
|
-
|
217
|
-
break if records.size < batch_size
|
218
|
-
records = find(table, "id > #{records.last} #{order_limit}")
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
|
-
def define_model(table)
|
223
|
-
# Object.const_set(table.classify, Class.new(ActiveRecord::Base))
|
224
|
-
Object.class_eval(<<-code)
|
225
|
-
class #{table.classify} < ActiveRecord::Base
|
226
|
-
set_table_name "#{table}"
|
227
|
-
end
|
228
|
-
code
|
229
|
-
table.classify.constantize # returns the constant
|
230
|
-
end
|
231
|
-
|
232
|
-
def transform_file(table)
|
233
|
-
@base+"/config/schema_transformations/#{table}.json"
|
234
|
-
end
|
235
|
-
|
236
|
-
def temp_table_exists?
|
237
|
-
@conn.table_exists?(@temp_table)
|
238
|
-
end
|
239
|
-
|
240
|
-
def reset_column_info
|
241
|
-
@model.reset_column_information
|
242
|
-
@temp_model.reset_column_information
|
243
|
-
end
|
244
|
-
|
245
|
-
def log(msg)
|
246
|
-
@log.info(msg)
|
247
|
-
end
|
248
|
-
|
249
|
-
private
|
250
|
-
def ask(msg)
|
251
|
-
puts msg
|
252
|
-
print "> "
|
253
|
-
end
|
254
|
-
|
255
|
-
def extract_default(col)
|
256
|
-
@conn.quote(col.default)
|
257
13
|
end
|
258
|
-
|
259
14
|
end
|
260
|
-
end
|
15
|
+
end
|
@@ -1,8 +1,3 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'active_wrapper'
|
5
|
-
|
6
1
|
module SchemaTransformer
|
7
2
|
class CLI
|
8
3
|
|
@@ -80,18 +75,23 @@ module SchemaTransformer
|
|
80
75
|
end
|
81
76
|
|
82
77
|
def run
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
78
|
+
@action = options[:action].first
|
79
|
+
if @action == "analyze"
|
80
|
+
SchemaTransformer::Analyze.run(options)
|
81
|
+
else
|
82
|
+
begin
|
83
|
+
SchemaTransformer::Transform.run(options)
|
84
|
+
rescue UsageError => e
|
85
|
+
puts "Usage Error: #{e.message}"
|
86
|
+
puts help_message
|
87
|
+
puts option_parser
|
88
|
+
end
|
89
89
|
end
|
90
90
|
end
|
91
91
|
|
92
92
|
private
|
93
93
|
def help_message
|
94
|
-
"Available actions: generate, sync, switch"
|
94
|
+
"Available actions: analyze, generate, sync, switch"
|
95
95
|
end
|
96
96
|
end
|
97
97
|
|
@@ -0,0 +1,252 @@
|
|
1
|
+
module SchemaTransformer
|
2
|
+
class UsageError < RuntimeError; end
|
3
|
+
|
4
|
+
class Transform < Base
|
5
|
+
include Help
|
6
|
+
@@stagger = 0
|
7
|
+
def self.run(options)
|
8
|
+
@@stagger = options[:stagger] || 0
|
9
|
+
@transformer = SchemaTransformer::Transform.new(options[:base] || Dir.pwd)
|
10
|
+
@transformer.run(options)
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_reader :temp_table, :table
|
14
|
+
def initialize(base = File.expand_path("..", __FILE__), options = {})
|
15
|
+
super
|
16
|
+
@batch_size = options[:batch_size] || 10_000
|
17
|
+
end
|
18
|
+
|
19
|
+
def run(options)
|
20
|
+
@action = options[:action].first
|
21
|
+
case @action
|
22
|
+
when "generate"
|
23
|
+
self.generate
|
24
|
+
help(:generate)
|
25
|
+
when "sync"
|
26
|
+
help(:sync_progress)
|
27
|
+
table = options[:action][1]
|
28
|
+
self.gather_info(table)
|
29
|
+
self.create
|
30
|
+
self.sync
|
31
|
+
help(:sync)
|
32
|
+
when "switch"
|
33
|
+
table = options[:action][1]
|
34
|
+
self.gather_info(table)
|
35
|
+
self.switch
|
36
|
+
self.cleanup
|
37
|
+
help(:switch)
|
38
|
+
else
|
39
|
+
raise UsageError, "Invalid action #{@action}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def generate
|
44
|
+
data = {}
|
45
|
+
ask "What is the name of the table you want to alter?"
|
46
|
+
data[:table] = gets(:table)
|
47
|
+
ask <<-TXT
|
48
|
+
What is the modification to the table?
|
49
|
+
Examples 1:
|
50
|
+
ADD COLUMN smart tinyint(1) DEFAULT '0'
|
51
|
+
Examples 2:
|
52
|
+
ADD INDEX idx_name (name)
|
53
|
+
Examples 3:
|
54
|
+
ADD COLUMN smart tinyint(1) DEFAULT '0', DROP COLUMN full_name
|
55
|
+
TXT
|
56
|
+
data[:mod] = gets(:mod)
|
57
|
+
path = transform_file(data[:table])
|
58
|
+
FileUtils.mkdir(File.dirname(path)) unless File.exist?(File.dirname(path))
|
59
|
+
File.open(path,"w") { |f| f << data.to_json }
|
60
|
+
@table = data[:table]
|
61
|
+
data
|
62
|
+
end
|
63
|
+
|
64
|
+
def gather_info(table)
|
65
|
+
if table.nil?
|
66
|
+
raise UsageError, "You need to specific the table name: schema_transformer #{@action} <table_name>"
|
67
|
+
end
|
68
|
+
data = JSON.parse(IO.read(transform_file(table)))
|
69
|
+
@table = data["table"]
|
70
|
+
@mod = data["mod"]
|
71
|
+
# variables need for rest of the program
|
72
|
+
@temp_table = "#{@table}_st_temp"
|
73
|
+
@trash_table = "#{@table}_st_trash"
|
74
|
+
@model = define_model(@table)
|
75
|
+
end
|
76
|
+
|
77
|
+
def create
|
78
|
+
if self.temp_table_exists?
|
79
|
+
@temp_model = define_model(@temp_table)
|
80
|
+
else
|
81
|
+
sql_create = %{CREATE TABLE #{@temp_table} LIKE #{@table}}
|
82
|
+
sql_mod = %{ALTER TABLE #{@temp_table} #{@mod}}
|
83
|
+
@conn.execute(sql_create)
|
84
|
+
@conn.execute(sql_mod)
|
85
|
+
@temp_model = define_model(@temp_table)
|
86
|
+
end
|
87
|
+
reset_column_info
|
88
|
+
end
|
89
|
+
|
90
|
+
def sync
|
91
|
+
res = @conn.execute("SELECT max(id) AS max_id FROM `#{@temp_table}`")
|
92
|
+
start = res.fetch_row[0].to_i + 1 # nil case is okay: [nil][0].to_i => 0
|
93
|
+
find_in_batches(@table, :start => start, :batch_size => @batch_size) do |batch|
|
94
|
+
# puts "batch #{batch.inspect}"
|
95
|
+
lower = batch.first
|
96
|
+
upper = batch.last
|
97
|
+
|
98
|
+
columns = insert_columns_sql
|
99
|
+
sql = %Q{
|
100
|
+
INSERT INTO #{@temp_table} (
|
101
|
+
SELECT #{columns}
|
102
|
+
FROM #{@table} WHERE id >= #{lower} AND id <= #{upper}
|
103
|
+
)
|
104
|
+
}
|
105
|
+
# puts sql
|
106
|
+
@conn.execute(sql)
|
107
|
+
|
108
|
+
if @@stagger > 0
|
109
|
+
log("Staggering: delaying for #{@@stagger} seconds before next batch insert")
|
110
|
+
sleep(@@stagger)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def final_sync
|
116
|
+
@temp_model = define_model(@temp_table)
|
117
|
+
reset_column_info
|
118
|
+
|
119
|
+
sync
|
120
|
+
columns = subset_columns.collect{|x| "#{@temp_table}.`#{x}` = #{@table}.`#{x}`" }.join(", ")
|
121
|
+
# need to limit the final sync, if we do the entire table it takes a long time
|
122
|
+
limit_cond = get_limit_cond
|
123
|
+
sql = %{
|
124
|
+
UPDATE #{@temp_table} INNER JOIN #{@table}
|
125
|
+
ON #{@temp_table}.id = #{@table}.id
|
126
|
+
SET #{columns}
|
127
|
+
WHERE #{limit_cond}
|
128
|
+
}
|
129
|
+
# puts sql
|
130
|
+
@conn.execute(sql)
|
131
|
+
end
|
132
|
+
|
133
|
+
def switch
|
134
|
+
final_sync
|
135
|
+
to_trash = %Q{RENAME TABLE #{@table} TO #{@trash_table}}
|
136
|
+
from_temp = %Q{RENAME TABLE #{@temp_table} TO #{@table}}
|
137
|
+
@conn.execute(to_trash)
|
138
|
+
@conn.execute(from_temp)
|
139
|
+
end
|
140
|
+
|
141
|
+
def cleanup
|
142
|
+
sql = %Q{DROP TABLE #{@trash_table}}
|
143
|
+
@conn.execute(sql)
|
144
|
+
end
|
145
|
+
|
146
|
+
def get_limit_cond
|
147
|
+
if @model.column_names.include?("updated_at")
|
148
|
+
"#{@table}.updated_at >= '#{1.day.ago.strftime("%Y-%m-%d")}'"
|
149
|
+
else
|
150
|
+
sql = "select id from #{@table} order by id desc limit 100000"
|
151
|
+
resp = @conn.execute(sql)
|
152
|
+
bound = 0
|
153
|
+
while row = resp.fetch_row do
|
154
|
+
bound = row[0].to_i
|
155
|
+
end
|
156
|
+
"#{@table}.id >= #{bound}"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# the parameter is only for testing
|
161
|
+
def gets(name = nil)
|
162
|
+
STDIN.gets.strip
|
163
|
+
end
|
164
|
+
|
165
|
+
def subset_columns
|
166
|
+
removed = @model.column_names - @temp_model.column_names
|
167
|
+
subset = @model.column_names - removed
|
168
|
+
end
|
169
|
+
|
170
|
+
def insert_columns_sql
|
171
|
+
# existing subset
|
172
|
+
subset = subset_columns
|
173
|
+
|
174
|
+
# added
|
175
|
+
added_s = @temp_model.column_names - @model.column_names
|
176
|
+
added = @temp_model.columns.
|
177
|
+
select{|c| added_s.include?(c.name) }.
|
178
|
+
collect{|c| "#{extract_default(c)} AS `#{c.name}`" }
|
179
|
+
|
180
|
+
# combine both
|
181
|
+
columns = subset.collect{|x| "`#{x}`"} + added
|
182
|
+
sql = columns.join(", ")
|
183
|
+
end
|
184
|
+
|
185
|
+
# returns Array of record ids
|
186
|
+
def find(table, cond)
|
187
|
+
sql = "SELECT id FROM #{table} WHERE #{cond}"
|
188
|
+
response = @conn.execute(sql)
|
189
|
+
results = []
|
190
|
+
while row = response.fetch_row do
|
191
|
+
results << row[0].to_i
|
192
|
+
end
|
193
|
+
results
|
194
|
+
end
|
195
|
+
|
196
|
+
# lower memory heavy version of ActiveRecord's find in batches
|
197
|
+
def find_in_batches(table, options = {})
|
198
|
+
raise "You can't specify an order, it's forced to be #{batch_order}" if options[:order]
|
199
|
+
raise "You can't specify a limit, it's forced to be the batch_size" if options[:limit]
|
200
|
+
|
201
|
+
start = options.delete(:start).to_i
|
202
|
+
batch_size = options.delete(:batch_size) || 1000
|
203
|
+
order_limit = "ORDER BY id LIMIT #{batch_size}"
|
204
|
+
|
205
|
+
records = find(table, "id >= #{start} #{order_limit}")
|
206
|
+
while records.any?
|
207
|
+
yield records
|
208
|
+
|
209
|
+
break if records.size < batch_size
|
210
|
+
records = find(table, "id > #{records.last} #{order_limit}")
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def define_model(table)
|
215
|
+
# Object.const_set(table.classify, Class.new(ActiveRecord::Base))
|
216
|
+
Object.class_eval(<<-code)
|
217
|
+
class #{table.classify} < ActiveRecord::Base
|
218
|
+
set_table_name "#{table}"
|
219
|
+
end
|
220
|
+
code
|
221
|
+
table.classify.constantize # returns the constant
|
222
|
+
end
|
223
|
+
|
224
|
+
def transform_file(table)
|
225
|
+
@base+"/config/schema_transformations/#{table}.json"
|
226
|
+
end
|
227
|
+
|
228
|
+
def temp_table_exists?
|
229
|
+
@conn.table_exists?(@temp_table)
|
230
|
+
end
|
231
|
+
|
232
|
+
def reset_column_info
|
233
|
+
@model.reset_column_information
|
234
|
+
@temp_model.reset_column_information
|
235
|
+
end
|
236
|
+
|
237
|
+
def log(msg)
|
238
|
+
@log.info(msg)
|
239
|
+
end
|
240
|
+
|
241
|
+
private
|
242
|
+
def ask(msg)
|
243
|
+
puts msg
|
244
|
+
print "> "
|
245
|
+
end
|
246
|
+
|
247
|
+
def extract_default(col)
|
248
|
+
@conn.quote(col.default)
|
249
|
+
end
|
250
|
+
|
251
|
+
end
|
252
|
+
end
|