table_syncer 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,9 @@
1
+ == 0.2.0 2009-09-39
2
+
3
+ * Now prompts you if you use --commit [so you don't do it on accident]
4
+ * Minor code cleanups :)
5
+
6
+ == 0.1.0 not a real release
7
+
8
+ * Now does some structure syncing
9
+
data/Manifest.txt ADDED
@@ -0,0 +1,25 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ bin/table_syncer
7
+ config/hoe.rb
8
+ config/requirements.rb
9
+ lib/table_syncer.rb
10
+ lib/table_syncer/version.rb
11
+ script/console
12
+ script/destroy
13
+ script/generate
14
+ script/txt2html
15
+ setup.rb
16
+ tasks/deployment.rake
17
+ tasks/environment.rake
18
+ tasks/website.rake
19
+ test/test_helper.rb
20
+ test/test_table_syncer.rb
21
+ website/index.html
22
+ website/index.txt
23
+ website/javascripts/rounded_corners_lite.inc.js
24
+ website/stylesheets/screen.css
25
+ website/template.html.erb
data/PostInstall.txt ADDED
@@ -0,0 +1,7 @@
1
+
2
+ For more information on table_syncer, see http://table_syncer.rubyforge.org
3
+
4
+ NOTE: Change this information in PostInstall.txt
5
+ You can also delete it if you don't want it.
6
+
7
+
data/README.rdoc ADDED
@@ -0,0 +1,48 @@
1
+ = table_syncer
2
+
3
+ * FIX (url)
4
+
5
+ == DESCRIPTION:
6
+
7
+ FIX (describe your package)
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * FIX (list of features or problems)
12
+
13
+ == SYNOPSIS:
14
+
15
+ FIX (code sample of usage)
16
+
17
+ == REQUIREMENTS:
18
+
19
+ * FIX (list of requirements)
20
+
21
+ == INSTALL:
22
+
23
+ * FIX (sudo gem install, anything else)
24
+
25
+ == LICENSE:
26
+
27
+ (The MIT License)
28
+
29
+ Copyright (c) 2008 FIXME full name
30
+
31
+ Permission is hereby granted, free of charge, to any person obtaining
32
+ a copy of this software and associated documentation files (the
33
+ 'Software'), to deal in the Software without restriction, including
34
+ without limitation the rights to use, copy, modify, merge, publish,
35
+ distribute, sublicense, and/or sell copies of the Software, and to
36
+ permit persons to whom the Software is furnished to do so, subject to
37
+ the following conditions:
38
+
39
+ The above copyright notice and this permission notice shall be
40
+ included in all copies or substantial portions of the Software.
41
+
42
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
43
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
44
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
45
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
46
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
47
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
48
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,4 @@
1
+ require 'config/requirements'
2
+ require 'config/hoe' # setup Hoe + all gem configuration
3
+
4
+ Dir['tasks/**/*.rake'].each { |rake| load rake }
data/bin/table_syncer ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Created on 2008-9-23.
4
+ # Copyright (c) 2008. All rights reserved.
5
+
6
+ begin
7
+ require 'rubygems'
8
+ require 'table_syncer' # runs everything
9
+ rescue LoadError
10
+ # no rubygems to load, so we fail silently
11
+ end
data/config/hoe.rb ADDED
@@ -0,0 +1,73 @@
1
+ require 'table_syncer/version'
2
+
3
+ AUTHOR = 'Roger Pack' # can also be an array of Authors
4
+ EMAIL = "rogerpack2005@gmail.com"
5
+ DESCRIPTION = "tool to synchronize data across databases"
6
+ GEM_NAME = 'table_syncer' # what ppl will type to install your gem
7
+ RUBYFORGE_PROJECT = 'table-syncer' # The unix name for your project
8
+ HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
9
+ DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
10
+ EXTRA_DEPENDENCIES = [
11
+ # ['activesupport', '>= 1.3.1']
12
+ ] # An array of rubygem dependencies [name, version]
13
+
14
+ @config_file = "~/.rubyforge/user-config.yml"
15
+ @config = nil
16
+ RUBYFORGE_USERNAME = "rogerdpack"
17
+ def rubyforge_username
18
+ unless @config
19
+ begin
20
+ @config = YAML.load(File.read(File.expand_path(@config_file)))
21
+ rescue
22
+ puts <<-EOS
23
+ ERROR: No rubyforge config file found: #{@config_file}
24
+ Run 'rubyforge setup' to prepare your env for access to Rubyforge
25
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
26
+ EOS
27
+ exit
28
+ end
29
+ end
30
+ RUBYFORGE_USERNAME.replace @config["username"]
31
+ end
32
+
33
+
34
+ REV = nil
35
+ # UNCOMMENT IF REQUIRED:
36
+ # REV = YAML.load(`svn info`)['Revision']
37
+ VERS = TableSyncer::VERSION::STRING + (REV ? ".#{REV}" : "")
38
+ RDOC_OPTS = ['--quiet', '--title', 'table_syncer documentation',
39
+ "--opname", "index.html",
40
+ "--line-numbers",
41
+ "--main", "README",
42
+ "--inline-source"]
43
+
44
+ class Hoe
45
+ def extra_deps
46
+ @extra_deps.reject! { |x| Array(x).first == 'hoe' }
47
+ @extra_deps
48
+ end
49
+ end
50
+
51
+ # Generate all the Rake tasks
52
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
53
+ $hoe = Hoe.new(GEM_NAME, VERS) do |p|
54
+ p.developer(AUTHOR, EMAIL)
55
+ p.description = DESCRIPTION
56
+ p.summary = DESCRIPTION
57
+ p.url = HOMEPATH
58
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
59
+ p.test_globs = ["test/**/test_*.rb"]
60
+ p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store'] #An array of file patterns to delete on clean.
61
+
62
+ # == Optional
63
+ p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
64
+ #p.extra_deps = EXTRA_DEPENDENCIES
65
+
66
+ #p.spec_extras = {} # A hash of extra values to set in the gemspec.
67
+ end
68
+
69
+ CHANGES = $hoe.paragraphs_of('History.txt', 0..1).join("\\n\\n")
70
+ PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
71
+ $hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
72
+ $hoe.rsync_args = '-av --delete --ignore-errors'
73
+ $hoe.spec.post_install_message = File.open(File.dirname(__FILE__) + "/../PostInstall.txt").read rescue ""
@@ -0,0 +1,15 @@
1
+ require 'fileutils'
2
+ include FileUtils
3
+
4
+ require 'rubygems'
5
+ %w[rake hoe newgem rubigen].each do |req_gem|
6
+ begin
7
+ require req_gem
8
+ rescue LoadError
9
+ puts "This Rakefile requires the '#{req_gem}' RubyGem."
10
+ puts "Installation: gem install #{req_gem} -y"
11
+ exit
12
+ end
13
+ end
14
+
15
+ $:.unshift(File.join(File.dirname(__FILE__), %w[.. lib]))
@@ -0,0 +1,508 @@
1
+ #!/usr/bin/env ruby
2
+ # the amazing mysql table syncer!
3
+ # this program syncs between two database tables to make a recipient match a donor table
4
+ # to use: define some database connections at the top of this file
5
+ # then run --help to see options
6
+ # See also http://code.google.com/p/ruby-roger-useful-functions/wiki/MysqlTableSyncer
7
+ # Note that it automatically creates would_has_run.sql which has the contents of what if would have executed, had you passed --commit
8
+ #
9
+ # Enjoy.
10
+ # Note that with ssh hosts if it fails to connect, it will try and auto-connect to that host.
11
+ # the "auto established" tunnel will then be running in the background.
12
+ # Re-run the script and it will hopefully work the second time.
13
+ # If it doesn't, then run the command output [it will print out the ssh tunnel command appropriate] in a different terminal window
14
+
15
+ # 2008 Roger Pack Public Domain
16
+ # No warranty of any type :)
17
+
18
+ require 'rubygems'
19
+ begin
20
+ require 'mysqlplus'
21
+ rescue LoadError
22
+ require "mysql"
23
+ end
24
+ require 'optparse'
25
+
26
+ # define some databases and how you connect with them, if foreign
27
+ example_db_1 = {:host => '127.0.0.1', :user => 'root', :password => '', :db => 'wilkboar_ties'}
28
+ example_db_2 = {:host => '127.0.0.1', :user => 'root', :password => '', :db => 'new_database'}
29
+
30
+ all_database_names = ['example_db_1', 'example_db_2'] # just used so that the --help command is more useful and can print out the database connection names available
31
+
32
+ # setup defaults -- it will use these databases by default unless you specify otherwise on the command line:
33
+ db_from_name = 'example_db_1'
34
+ db_to_name = 'example_db_2'
35
+ actually_run_queries = false # default to just previewing -- use --commit to actually run it
36
+ verbose = true
37
+
38
+
39
+ # now parse incoming options
40
+ my_options = {}
41
+ auto_create_ssh_tunnels = true
42
+ tables_to_sync = nil
43
+ # make sure use normal style options, or nothing :)
44
+ my_options[:skip_the_warning_prompt_for_commit] = false
45
+
46
+ do_structure_sync_only = false
47
+
48
+ OptionParser.new do |opts|
49
+ opts.banner = "Usage: #{$0} [options] table names"
50
+
51
+ opts.on("-f", "--from=FROM", "from database name #{all_database_names.inspect}") do |from_name|
52
+ db_from_name = from_name
53
+ end
54
+
55
+ opts.on("-t", "--to=TO", "to database name #{all_database_names.inspect} \n\t\t\t\t\t\tedit databases at #{__FILE__}") do |to_name|
56
+ db_to_name = to_name
57
+ end
58
+
59
+ opts.on('-y', 'skip the warning prompt for commit') do
60
+ my_options[:skip_the_warning_prompt_for_commit] = true
61
+ end
62
+
63
+ opts.on("-s", "--tables=", "tables list comma separated, ex --tables=table_one,table_two -- or if you'd just like to do all tables, then do --tables=ALL_TABLES", Array) do |tables|
64
+ tables_to_sync = []
65
+ for entry in tables do
66
+ tables_to_sync << entry
67
+ end
68
+ end
69
+
70
+ opts.on("", "--commit", " tell us whether or not to actually run queries -- the default is to not run queries") do
71
+ print "DOING IT COMMITTING LIVE QUERIES\n"
72
+ actually_run_queries = true
73
+ end
74
+
75
+ opts.on("-z", "--extra_sql=STRING", "specify an sql sql string to run after the script ends [in the 'to' database]") do |sql|
76
+ print "extra sql", sql
77
+ my_options[:extra_sql] = sql
78
+ end
79
+
80
+ opts.on('-q', '--quiet', 'Non-verbose -- dont output as much junk, just the summaries') do |quiet_true|
81
+ verbose = false
82
+ end
83
+
84
+ opts.on('--perform_structure_sync_only', "Do a structure morph for the designated tables -- \nnote that it doesnt yet do indices and you need to still pass --commit for it to do the structure change. Use it with --tables=STRUCTURE_SYNC_THE_WHOLE_THING to drop any existing tables not found in the origin database and structure sync over any existing tables") do
85
+ do_structure_sync_only = true
86
+ end
87
+
88
+ end.parse!
89
+
90
+ # grab the right db's
91
+ db_from_info = db_to_info = nil
92
+ print "from db: #{db_from_name}\n"
93
+ eval("db_from_info = #{db_from_name}")
94
+ print "to db: #{db_to_name}\n"
95
+ eval("db_to_info = #{db_to_name}")
96
+
97
+ # some error checking
98
+ raise if defined?(production) and db_to_name == 'production' and actually_run_queries # I never wanted to commit to one db
99
+ raise 'missing a database selected?' unless db_to_info and db_from_info
100
+
101
+ # custom parse table names they have within the parameters
102
+ unless tables_to_sync
103
+ extra_table_args = ARGV.select{|arg| arg[0..0] != '-'}
104
+ if extra_table_args.length > 0
105
+ tables_to_sync = []
106
+ for arg in extra_table_args
107
+ tables_to_sync += Array(arg.split(','))
108
+ end
109
+ else
110
+ print 'no tables specified! run with --help', "\n"
111
+ exit
112
+ end
113
+ else
114
+ if ARGV.find{|arg| arg[0..0] != '-'}
115
+ print "warning--ignoring some apparently extra parameters at the end, since you passed in tables via a command line arg"
116
+ end
117
+ end
118
+ ARGV.clear
119
+ raise 'must specify tables or ALL for structure_sync to work--we\'re conservative and disallow it otherwise' if tables_to_sync.empty? and do_structure_sync_only
120
+
121
+ example_out_file = File.open 'would_have_run.sql' , 'w' unless actually_run_queries
122
+
123
+
124
+ class Hash
125
+ def to_sql_update_query(table_name, nonmatching_keys) # ltodo take some 'params' :)
126
+ raise unless self['id']
127
+ query = "update #{table_name} set"
128
+ comma = ''
129
+ self.each_key do |key|
130
+ query << "#{comma} #{key} = #{self[key] ? "'" + self[key].gsub("'", "\\\\'") + "'": 'NULL'}" if nonmatching_keys.include? key
131
+ comma = ',' if nonmatching_keys.include? key
132
+ end
133
+ query << " where id = #{self['id']}"
134
+ end
135
+
136
+ def to_sql_create_query(table_name)
137
+ query = "insert into #{table_name} ("
138
+ comma = ''
139
+ self.each_key { |key_name|
140
+ query += "#{comma}#{key_name} "
141
+ comma = ','
142
+ }
143
+ query += ") values ( "
144
+ comma = ''
145
+ self.each_key { |key_name|
146
+ query += "#{comma} #{self[key_name] ? "'" + self[key_name].gsub("'", "\\\\'") + "'" : 'NULL'}" # assume it will leave the others are null, I guess
147
+ comma = ','
148
+ }
149
+ query += ");"
150
+ end
151
+ end
152
+
153
+ def sync_structure(db_to, db_from, table, actually_run_queries)
154
+ print "structure syncing #{table}\n"
155
+ good_structure = db_from.query("desc #{table}").use_result
156
+ all_from_columns = {}
157
+ good_structure.each_hash{|h| all_from_columns[h['Field']] = h }
158
+ good_structure.free
159
+ # we basically cheat and just fakely recreate mismatched columns by "modifying them" to match the creation script given by 'show create table x' for that column
160
+ good_creation_query = db_from.query("show create table #{table}").use_result
161
+ create_whole_table_script = good_creation_query.fetch_hash['Create Table']
162
+ good_creation_script = create_whole_table_script.split("\n")
163
+ good_creation_query.free
164
+
165
+ questionable_to_structure = db_to.query("desc #{table}").use_result rescue nil
166
+ unless questionable_to_structure
167
+ if actually_run_queries
168
+ db_to.query(create_whole_table_script)
169
+ else
170
+ print "would have created new table #{table} thus: #{create_whole_table_script}\n"
171
+ db_to = db_from # fake it that they match so we don't raise any errors for the duration of this method call
172
+ end
173
+ questionable_to_structure = db_to.query("desc #{table}").use_result
174
+ end
175
+
176
+ all_to_columns = {}
177
+ questionable_to_structure.each_hash{|h| all_to_columns[h['Field']] = h }
178
+ questionable_to_structure.free
179
+
180
+ for column_name, specs in all_from_columns do
181
+
182
+ matching_creation_line = good_creation_script.find{|line| line =~ /^\s*`#{column_name}`/} # starts with column name--kind of fallible, but hey, we're working with english single words here
183
+
184
+ matching_to_column_specs = all_to_columns[column_name]
185
+ matching_creation_line = matching_creation_line[0..-2] if matching_creation_line[-1..-1] == ','
186
+ unless matching_to_column_specs # get it from the script
187
+ # create it
188
+ if specs['Extra'] != ''
189
+ raise "uh oh currently we don't sync id's they're assumed to exist already! Try deleting the old column #{column_name} or table #{table} entirely"
190
+ end
191
+
192
+ running = "ALTER TABLE #{table} ADD COLUMN #{matching_creation_line}"
193
+ print "running #{running}-- for #{column_name}\n"
194
+ db_to.query running if actually_run_queries
195
+ else
196
+ # we don't want key differences to make a difference--those are handle after
197
+ to_specs_non_keyed = matching_to_column_specs.dup
198
+ specs_non_keyed = specs.dup
199
+ to_specs_non_keyed.delete('Key')
200
+ specs_non_keyed.delete('Key')
201
+ if specs_non_keyed != to_specs_non_keyed
202
+ line = "ALTER TABLE #{table} CHANGE #{column_name} #{matching_creation_line}"
203
+
204
+ # for some reason the create table script doesn't include defaults if they're NULL or ''
205
+ unless line =~ /default/i
206
+ if specs_non_keyed['Default'] == nil
207
+ line += " DEFAULT NULL"
208
+ else
209
+ line += " DEFAULT '#{ specs_non_keyed['Default'] }'"
210
+ end
211
+ end
212
+ print "modifying #{column_name} -- #{line} \n"
213
+ print "#{specs_non_keyed.inspect} != the to guy: #{to_specs_non_keyed.inspect}"
214
+ db_to.query line if actually_run_queries
215
+ end
216
+ all_to_columns.delete(column_name)
217
+ end
218
+ end
219
+
220
+ for column_name, description in all_to_columns # left overs
221
+ print "REMOVING COLUMN #{column_name}"
222
+ db_to.query("ALTER TABLE #{table} DROP #{column_name}") if actually_run_queries
223
+ end
224
+
225
+ indices = db_from.query("show index from #{table};").use_result
226
+ all_indices = []
227
+ indices.each_hash{|h| h.delete('Cardinality'); all_indices << h } # Cardinality doesn't make a difference...AFAIK
228
+ indices.free
229
+
230
+ existing_indices = db_to.query("show index from #{table}").use_result
231
+ all_existing_indices = []
232
+ existing_indices.each_hash{|h| h.delete('Cardinality'); all_existing_indices << h }
233
+ existing_indices.free
234
+ apparently_lacking = all_indices.map{|index| index['Column_name']} - all_existing_indices.map{|index| index['Column_name']}
235
+
236
+ for index in apparently_lacking
237
+ # ltodo if it looks nice and generic then go ahead and add it
238
+ end
239
+
240
+ if all_indices != all_existing_indices # this is right
241
+ print "\n\nWARNING #{table}: you are missing some indexes now or there is some type of discrepancy [these aren't handled yet]-- you may want to add them a la\nCREATE INDEX some_name_usually_column_name_here ON #{table} (column_name_here)\n for apparently the following columns: #{apparently_lacking.inspect}\n\n
242
+ you might get away with dropping the old table and letting it be recreated -- that might also add the right indices\n\n
243
+ #{all_indices.inspect}\n != \n#{all_existing_indices.inspect}"
244
+ end
245
+ end
246
+
247
+ if db_from_info[:ssh_host]
248
+ db_from_info[:host] = db_from_info[:ssh_local_to_host] || '127.0.0.1'
249
+ db_from_info[:port] = 4000
250
+ end
251
+
252
+ if db_to_info[:ssh_host]
253
+ db_to_info[:host] = db_from_info[:ssh_local_to_host] || '127.0.0.1'
254
+ db_to_info[:port] = 4000
255
+ end
256
+ commit_style = actually_run_queries ? '---COMMITTING----' : 'previewing (no changes made)'
257
+
258
+ print "#{db_from_info[:db]} => #{db_to_info[:db]}\n\n"
259
+ print "#{commit_style} run\n\n"
260
+ print "#{db_from_info[:ssh_host] || db_from_info[:host]}:#{db_from_info[:db]} #{tables_to_sync.inspect}\n"
261
+ print "\t=> #{db_to_info[:ssh_host] || db_to_info[:host]}:#{db_to_info[:db]} #{tables_to_sync.inspect}\n"
262
+ # ltodo add in the local_to_stuff here
263
+
264
+ if actually_run_queries and !my_options[:skip_the_warning_prompt_for_commit]
265
+ print "Continue (yes/no)?"
266
+ input = gets
267
+ if !['y', 'yes'].include? input.downcase.strip
268
+ print "aborting -- you gave me #{input}"
269
+ exit
270
+ end
271
+ end
272
+
273
+
274
+ start_time = Time.now
275
+ retried = false
276
+ begin
277
+ # connect to the MySQL servers
278
+ print 'connecting to to DB...', db_to_info[:db]; STDOUT.flush
279
+ db_to = Mysql.real_connect(db_to_info[:host], db_to_info[:user], db_to_info[:password], db_to_info[:db], db_to_info[:port], nil, Mysql::CLIENT_COMPRESS)
280
+ print 'connected', "\n", 'now connecting to from DB ', db_from_info[:db]; STDOUT.flush
281
+ db_from = Mysql.real_connect(db_from_info[:host], db_from_info[:user], db_from_info[:password], db_from_info[:db], db_from_info[:port], nil, Mysql::CLIENT_COMPRESS)
282
+ print "connected\n"
283
+ rescue Mysql::Error => e
284
+ puts "Error code: #{e.errno}"
285
+ puts "Error message: #{e.error}"
286
+ puts "This may mean a tunnel is not working" if e.error.include?('127.0.0.1')
287
+ # note that, if you do add ssh -> ssh, you may still only need one connection!
288
+ if db_from_info[:ssh_host] or db_to_info[:ssh_host]
289
+ ssh_port = db_from_info[:ssh_port] || db_to_info[:ssh_port]
290
+ ssh_local_to_port = db_from_info[:ssh_local_to_port] || db_to_info[:ssh_local_to_port] || 3306
291
+ ssh_user = db_from_info[:ssh_user] || db_to_info[:ssh_user]
292
+ ssh_local_to_host = db_from_info[:ssh_local_to_host] || db_to_info[:ssh_local_to_host] || 'localhost'
293
+ ssh_host = db_from_info[:ssh_host] || db_to_info[:ssh_host]
294
+ command = "ssh -N #{ssh_port ? '-p ' + ssh_port.to_s : nil} -L 4000:#{ssh_local_to_host}:#{ssh_local_to_port} #{ssh_user}@#{ssh_host} \n" # NOTE DOES NOT YET ALLOW FOR TWO SSH DB's
295
+ if auto_create_ssh_tunnels and !retried
296
+ print "trying to auto create ssh tunnel via: #{command}\n"
297
+ Thread.new { system(command) }
298
+ retried = true # this doesn't actually work :P
299
+ retry
300
+ else
301
+ print "unable to connect to server--try running\n#{command}in another window or try again!"
302
+ end
303
+ end
304
+ exit
305
+ ensure
306
+ # ltodo: disconnect from server here [?] -- also do we free, and disconnect, at all, during this? :)
307
+ end
308
+
309
+ summary_information = '' # so we can print it all (again), at the end
310
+
311
+ db_to.query_with_result = false
312
+ db_from.query_with_result = false # allow us to read them from the wire when they are coming in. Save a little CPU time :)
313
+
314
+
315
+ # we need to delete any extra tables if they're there in one and not the other
316
+ if do_structure_sync_only and tables_to_sync == ['STRUCTURE_SYNC_THE_WHOLE_THING'] or tables_to_sync == ['ALL_TABLES']
317
+ tables_from = db_from.query("show tables").use_result
318
+ tables_from_array = []
319
+ tables_from.each_hash {|h| h.each{|k, v| tables_from_array << v}}
320
+ tables_from.free
321
+ tables_to_sync = tables_from_array
322
+ if tables_to_sync == ['STRUCTURE_SYNC_THE_WHOLE_THING'] # then we want to drop some tables if they exist
323
+ tables_to = db_to.query("show tables").use_result
324
+ tables_to_array = []
325
+ tables_to.each_hash {|h| h.each{|k, v| tables_to_array << v}}
326
+ tables_to.free
327
+ nukables = tables_to_array - tables_from_array
328
+ for table in nukables do
329
+ query = "DROP TABLE #{table}"
330
+ print "dropping table -- #{query}\n"
331
+ db_to.query(query) if actually_run_queries
332
+ end
333
+ end
334
+ end
335
+
336
+ for table in tables_to_sync do
337
+ print "start #{commit_style} table #{table}" + "**" * 10 + "\n"
338
+ if do_structure_sync_only
339
+ sync_structure(db_to, db_from, table, actually_run_queries)
340
+ next
341
+ end
342
+
343
+ all_to_keys_not_yet_processed = {}
344
+ select_all_to = db_to.query("SELECT * FROM #{table}") # could easily be 'select id', as well note this assumes distinct id's! Otherwise we'd need hashes, one at a time, etc. etc.
345
+ select_all_to = select_all_to.use_result
346
+ select_all_to.each_hash { |to_element|
347
+ if all_to_keys_not_yet_processed[to_element['id']] # duplicated id's are a fringe case and not yet handled! TODO use hashes or somefin' bet-uh
348
+ raise "\n\n\n\nERROR detected a duplicated id (or the lack of id at all) in #{table} -- aborting [consider clearing [DELETE FROM #{table} in the 'to' database and trying again, if in a pinch]!\n\n\n\n"
349
+ end
350
+ all_to_keys_not_yet_processed[to_element['id']] = to_element
351
+ }
352
+
353
+ res = db_from.query("SELECT * from #{table}")
354
+ res = res.use_result
355
+ count_updated = 0
356
+ count_created = 0
357
+
358
+ res.each_hash do |from_element|
359
+ existing = all_to_keys_not_yet_processed[from_element['id']]
360
+ # now there are a few cases--we can find a matching id->rest locally, or an id->nonmatching (update) or non_id (insert)
361
+ # the problem is that we need to keep track of which id's we never used, and delete them from the offending table, afterward
362
+ if existing # we have a match--test if it is truly matching
363
+ to_element = existing# ltodo rename
364
+ all_nonmatching_keys = []
365
+ for key in from_element.keys do
366
+ if from_element[key] != to_element[key]
367
+ all_nonmatching_keys << key
368
+ print " #{key}\t\t\t[", from_element[key].inspect, "]!!!!======to:[", to_element[key].inspect||'', ']', "\n" if verbose
369
+ else
370
+ # equal, ok
371
+ end
372
+
373
+ end
374
+ if all_nonmatching_keys.length > 0
375
+ count_updated += 1
376
+ query = from_element.to_sql_update_query(table, all_nonmatching_keys)
377
+ print "update query on #{to_element['name']}: #{query}\n" if verbose
378
+ db_to.query query if actually_run_queries
379
+ example_out_file.write query + ";\n" unless actually_run_queries
380
+ end
381
+ else
382
+ count_created += 1
383
+ create_query = from_element.to_sql_create_query(table)
384
+ print "insert query on #{from_element['name']}: #{create_query}\n" if verbose
385
+ db_to.query create_query if actually_run_queries
386
+ example_out_file.write create_query + ";\n" unless actually_run_queries
387
+ end
388
+ all_to_keys_not_yet_processed.delete(from_element['id'])
389
+ end
390
+ print "\n" if (count_updated>0 or count_created>0) if verbose
391
+
392
+
393
+ count_deleted = all_to_keys_not_yet_processed.length
394
+ if count_deleted > 0
395
+ ids = []
396
+ for id in all_to_keys_not_yet_processed.keys do
397
+ ids << id
398
+ end
399
+ double_check_all_query = "select * from #{table} where id IN (#{ids.join(',')})" # this allows us to make sure we don't delete any doubled ones (which would be a weird situation and too odd to handle), and also so we can have a nice verbose 'we are deleting this row' message
400
+ double_check_result = db_to.query(double_check_all_query)
401
+ double_check_result = double_check_result.use_result
402
+
403
+
404
+ victims = {}
405
+
406
+ double_check_result.each_hash {|victim|
407
+ raise 'duplicate' if victims[victim['id']]
408
+ victims[victim['id']] = victim['name']
409
+ }
410
+ raise 'weird deleted--got back strange number of rows -- refusing to delete' unless double_check_result.num_rows == count_deleted
411
+ double_check_result.free
412
+ for id in all_to_keys_not_yet_processed.keys do
413
+ query = "delete from #{table} where id = #{id}"
414
+ print "DELETE query, for #{victims[id]} is #{query}\n" if verbose
415
+ db_to.query query if actually_run_queries
416
+ example_out_file.write query + ";\n" unless actually_run_queries
417
+ end
418
+ end
419
+
420
+ res.free
421
+ print "done #{commit_style} "
422
+ summary = "#{table} -- updated #{count_updated}, created #{count_created}, deleted #{count_deleted}\n"
423
+ print summary
424
+ summary_information << summary
425
+ end
426
+ if my_options[:extra_sql] and actually_run_queries
427
+ print "doing sql #{my_options[:extra_sql]}\n"
428
+ result = db_to.query my_options[:extra_sql]
429
+ require 'pp'
430
+ pp "got sql result", result
431
+ end
432
+ db_from.close if db_from
433
+ db_to.close if db_to
434
+ print "\n--summary--\n", summary_information
435
+ print "total transfer time #{Time.now - start_time}\n"
436
+
437
+ # Todo list:
438
+ # inlined todo notes
439
+ # ltodo: some SSH examples in here
440
+ # ltodo: optionally 'only add from one to the other' -- only add new entries past the current maxes for the table
441
+ # ltodo: one BIG transaction, so that ctrl+c will work. [?]
442
+ # ltodo: read_only directive
443
+ # todo by default output a 'backup' log somewhere, too! oh baby! 10 of them! what the heck! :)
444
+ # just needs docs and rock and roll [publicize] :)
445
+ # note this lacks 'transaction's thus far
446
+ # and its SSH is hackish, requiring the user to start a tunnel in another window
447
+ # ltodo: tell people how to install ssh for windows users [?]
448
+ # TODO sql escape things better (does it a little already) [use mysql escape function itself or whatever it is]
449
+ # TODO could use a 'mass insert' and mass delete to speed things up (several giant strings batch mode)
450
+ # could do: something along the lines of rsync for database tables--calculate some checksums, foreign host, etc., anything to save on bandwidth lol. It is, however, true that most changes come from the "latter end" of tables so...probably has tons of potential savings
451
+ # ltodo handle ssh => ssh [separate hosts] -- note: may want to specify a 'local port' per ssh host :)
452
+ # could do: download from both people at the same time, per table, or what not muhaha
453
+ # could do: some more free's in here [double check to make sure I free everwhere]
454
+ # TODO when it does something require keyboard input unless they specify --force or something
455
+ # ltodo handle ginormous tables :) that wouldn't ever ever fit in memory :)
456
+ # could do: have this cool 'difference since then' setting thingers...like...ok you sync'ed that since then, you've changed this much we know, and that one has changed that much, we know...so the sum diff is...
457
+ # whoa!
458
+ # ltodo: you can chain together a few updates, too, a la "update x where y; update z where q; update a where b;" or the 'super table mass updater' that uses a temp table lol
459
+ # :)
460
+ # need to use my_options for the syncing stuff, clean up code, too
461
+ # todo: this errs
462
+ #>> gc.description.pretty_inspect=> ""GCU\\\\’s Bachelor of Science in Entrepreneurial Studies program is built on the principles of personal integrity, values, and innovation. Emphasizing the philosophy of being an \\"Entrepreneurial School by Entrepreneurs,\\" the program provides students with the skills to think analytically, ask the right questions, solve problems, and function as an entrepreneur in both small and large companies. Students are prepared to be self-motivated, self-managed, and self-disciplined entrepreneurs with the skill-set to manage their own careers either by starting their own business venture or working within a start-up, entrepreneurial business environment. Interaction with successful entrepreneurs, business consulting opportunities, and individual venture capital projects are highlighted in the program."\n"
463
+ #>> gc.description
464
+ #=> "GCU\\’s Bachelor of Science in Entrepreneurial Studies program is built on the principles of personal integrity, values, and innovation. Emphasizing the philosophy of being an "Entrepreneurial School by Entrepreneurs," the program provides students with the skills to think analytically, ask the right questions, solve problems, and function as an entrepreneur in both small and large companies. Students are prepared to be self-motivated, self-managed, and self-disciplined entrepreneurs with the skill-set to manage their own careers either by starting their own business venture or working within a start-up, entrepreneurial business environment. Interaction with successful entrepreneurs, business consulting opportunities, and individual venture capital projects are highlighted in the program."
465
+
466
+
467
+
468
+
469
+ =begin
470
+ multiple insertions ex:
471
+ dbh.query("INSERT INTO animal (name, category)
472
+ VALUES
473
+ ('snake', 'reptile'),
474
+ ('frog', 'amphibian'),
475
+ ('tuna', 'fish'),
476
+ ('racoon', 'mammal')
477
+ ")
478
+
479
+ class MultipleDeleter # not thread safe
480
+ @@batch_max = 1000;
481
+ def initialize connection, table_name
482
+ @connection = connection
483
+ @table_name = table_name
484
+ ids = []
485
+ end
486
+
487
+ def add_id_t_delete id
488
+ ids << id
489
+ if ids.length == @@batch_max
490
+ send
491
+ end
492
+ end
493
+
494
+ def send
495
+ @connection.query "delete from #{@table_name} where id IN (#{ids.join(',')});"
496
+ ids = []
497
+ end
498
+
499
+ def flush
500
+ send
501
+ end
502
+ end
503
+ =end
504
+
505
+
506
+ # ltodo: alias for table names :) czs, ps, etc :)
507
+
508
+ # we really need to be able to handle many to many: just use a hash + counter based system instead of an id based system