benhutton-mysql2psql 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,230 @@
1
+ require 'mysql'
2
+ require 'csv'
3
+
4
+ class Mysql2psql
5
+
6
+ class MysqlReader
7
+ class Field
8
+ end
9
+
10
+ class Table
11
+ attr_reader :name
12
+
13
+ def initialize(reader, name)
14
+ @reader = reader
15
+ @name = name
16
+ end
17
+
18
+ @@types = %w(tiny enum decimal short long float double null timestamp longlong int24 date time datetime year set blob string var_string char).inject({}) do |list, type|
19
+ list[eval("::Mysql::Field::TYPE_#{type.upcase}")] = type
20
+ list
21
+ end
22
+
23
+ @@types[246] = "decimal"
24
+
25
+ def columns
26
+ @columns ||= load_columns
27
+ end
28
+
29
+ def convert_type(type)
30
+ case type
31
+ when /^int.* unsigned/
32
+ "bigint"
33
+ when /bigint/
34
+ "bigint"
35
+ when "bit(1)"
36
+ "boolean"
37
+ when /smallint.* unsigned/
38
+ "integer"
39
+ when /smallint/
40
+ "smallint"
41
+ when "tinyint(1)"
42
+ "boolean"
43
+ when /tinyint/
44
+ "tinyint"
45
+ when /int/
46
+ "integer"
47
+ when /varchar/
48
+ "varchar"
49
+ when /char/
50
+ "char"
51
+ when /decimal/
52
+ "decimal"
53
+ when /float/
54
+ "float"
55
+ when /real|double/
56
+ "double precision"
57
+ else
58
+ type
59
+ end
60
+ end
61
+
62
+ def load_columns
63
+ @reader.reconnect
64
+ result = @reader.mysql.list_fields(name)
65
+ mysql_flags = ::Mysql::Field.constants.select {|c| c =~ /FLAG/}
66
+ fields = []
67
+ @reader.mysql.query("EXPLAIN `#{name}`") do |res|
68
+ while field = res.fetch_row do
69
+ length = -1
70
+ length = field[1][/\((\d+)\)/, 1] if field[1] =~ /\((\d+)\)/
71
+ length = field[1][/\((\d+),(\d+)\)/, 1] if field[1] =~ /\((\d+),(\d+)\)/
72
+ desc = {
73
+ :name => field[0],
74
+ :table_name => name,
75
+ :type => convert_type(field[1]),
76
+ :length => length && length.to_i,
77
+ :decimals => field[1][/\((\d+),(\d+)\)/, 2],
78
+ :null => field[2] == "YES",
79
+ :primary_key => field[3] == "PRI",
80
+ :auto_increment => field[5] == "auto_increment"
81
+ }
82
+ desc[:default] = field[4] unless field[4].nil?
83
+ fields << desc
84
+ end
85
+ end
86
+
87
+ fields.select {|field| field[:auto_increment]}.each do |field|
88
+ @reader.mysql.query("SELECT max(`#{field[:name]}`) FROM `#{name}`") do |res|
89
+ field[:maxval] = res.fetch_row[0].to_i
90
+ end
91
+ end
92
+ fields
93
+ end
94
+
95
+
96
+ def indexes
97
+ load_indexes unless @indexes
98
+ @indexes
99
+ end
100
+
101
+ def foreign_keys
102
+ load_indexes unless @foreign_keys
103
+ @foreign_keys
104
+ end
105
+
106
+ def load_indexes
107
+ @indexes = []
108
+ @foreign_keys = []
109
+
110
+ @reader.mysql.query("SHOW CREATE TABLE `#{name}`") do |result|
111
+ explain = result.fetch_row[1]
112
+ explain.split(/\n/).each do |line|
113
+ next unless line =~ / KEY /
114
+ index = {}
115
+ if match_data = /CONSTRAINT `(\w+)` FOREIGN KEY \((.*?)\) REFERENCES `(\w+)` \((.*?)\)(.*)/.match(line)
116
+ index[:name] = match_data[1]
117
+ index[:column] = match_data[2].parse_csv(:quote_char => '`',:col_sep => ', ')
118
+ index[:ref_table] = match_data[3]
119
+ index[:ref_column] = match_data[4].parse_csv(:quote_char => '`',:col_sep => ', ')
120
+
121
+ the_rest = match_data[5]
122
+
123
+ if match_data = /ON DELETE (SET NULL|SET DEFAULT|RESTRICT|NO ACTION|CASCADE)/.match(the_rest)
124
+ index[:on_delete] = match_data[1]
125
+ else
126
+ index[:on_delete] ||= 'RESTRICT'
127
+ end
128
+
129
+ if match_data = /ON UPDATE (SET NULL|SET DEFAULT|RESTRICT|NO ACTION|CASCADE)/.match(the_rest)
130
+ index[:on_update] = match_data[1]
131
+ else
132
+ index[:on_update] ||= 'RESTRICT'
133
+ end
134
+
135
+ @foreign_keys << index
136
+ elsif match_data = /KEY `(\w+)` \((.*)\)/.match(line)
137
+ index[:name] = match_data[1]
138
+ index[:columns] = match_data[2].split(",").map {|col| col[/`(\w+)`/, 1]}
139
+ index[:unique] = true if line =~ /UNIQUE/
140
+ @indexes << index
141
+ elsif match_data = /PRIMARY KEY .*\((.*)\)/.match(line)
142
+ index[:primary] = true
143
+ index[:columns] = match_data[1].split(",").map {|col| col.strip.gsub(/`/, "")}
144
+ @indexes << index
145
+ end
146
+ end
147
+ end
148
+ end
149
+
150
+ def count_rows
151
+ @reader.mysql.query("SELECT COUNT(*) FROM `#{name}`") do |res|
152
+ return res.fetch_row[0].to_i
153
+ end
154
+ end
155
+
156
+ def has_id?
157
+ !!columns.find {|col| col[:name] == "id"}
158
+ end
159
+
160
+ def count_for_pager
161
+ query = has_id? ? 'MAX(id)' : 'COUNT(*)'
162
+ @reader.mysql.query("SELECT #{query} FROM `#{name}`") do |res|
163
+ return res.fetch_row[0].to_i
164
+ end
165
+ end
166
+
167
+ def query_for_pager
168
+ query = has_id? ? 'WHERE id >= ? AND id < ?' : 'LIMIT ?,?'
169
+ "SELECT #{columns.map{|c| "`"+c[:name]+"`"}.join(", ")} FROM `#{name}` #{query}"
170
+ end
171
+ end
172
+
173
+ def connect
174
+ @mysql = ::Mysql.connect(@host, @user, @passwd, @db, @port, @sock, @flag)
175
+ @mysql.query("SET NAMES utf8")
176
+ @mysql.query("SET SESSION query_cache_type = OFF")
177
+ end
178
+
179
+ def reconnect
180
+ @mysql.close rescue false
181
+ connect
182
+ end
183
+
184
+ def initialize(options)
185
+ @host, @user, @passwd, @db, @port, @sock, @flag =
186
+ options.mysqlhostname('localhost'), options.mysqlusername,
187
+ options.mysqlpassword, options.mysqldatabase,
188
+ options.mysqlport, options.mysqlsocket
189
+ @port = nil if @port == "" # for things like Amazon's RDS you don't have a port and connect fails with "" for a value
190
+ @sock = nil if @sock == ""
191
+ @flag = nil if @flag == ""
192
+ connect
193
+ end
194
+
195
+ attr_reader :mysql
196
+
197
+ def views
198
+ unless defined? @views
199
+ @mysql.query("SELECT t.TABLE_NAME FROM INFORMATION_SCHEMA.TABLES t WHERE t.TABLE_SCHEMA = '#{@db}' AND t.TABLE_TYPE = 'VIEW';") do |res|
200
+ @views = []
201
+ res.each { |row| @views << row[0] }
202
+ end
203
+ end
204
+
205
+ @views
206
+ end
207
+
208
+ def tables
209
+ @tables ||= (@mysql.list_tables - views).map do |table|
210
+ Table.new(self, table)
211
+ end
212
+ end
213
+
214
+ def paginated_read(table, page_size)
215
+ count = table.count_for_pager
216
+ return if count < 1
217
+ statement = @mysql.prepare(table.query_for_pager)
218
+ counter = 0
219
+ 0.upto((count + page_size)/page_size) do |i|
220
+ statement.execute(i*page_size, table.has_id? ? (i+1)*page_size : page_size)
221
+ while row = statement.fetch
222
+ counter += 1
223
+ yield(row, counter)
224
+ end
225
+ end
226
+ counter
227
+ end
228
+ end
229
+
230
+ end
@@ -0,0 +1,201 @@
1
+ require 'pg'
2
+
3
+ require 'mysql2psql/postgres_writer'
4
+
5
+ class Mysql2psql
6
+
7
+ class PostgresDbWriter < PostgresWriter
8
+ attr_reader :conn, :hostname, :login, :password, :database, :schema, :port
9
+
10
+ def initialize(options)
11
+ @hostname, @login, @password, @database, @port =
12
+ options.pghostname('localhost'), options.pgusername,
13
+ options.pgpassword, options.pgdatabase, options.pgport(5432).to_s
14
+ @database, @schema = database.split(":")
15
+ open
16
+ end
17
+
18
+ def open
19
+ @conn = PGconn.new(hostname, port, '', '', database, login, password)
20
+ @conn.exec("SET search_path TO #{PGconn.quote_ident(schema)}") if schema
21
+ @conn.exec("SET client_encoding = 'UTF8'")
22
+ @conn.exec("SET standard_conforming_strings = off") if @conn.server_version >= 80200
23
+ @conn.exec("SET check_function_bodies = false")
24
+ @conn.exec("SET client_min_messages = warning")
25
+ end
26
+
27
+ def close
28
+ @conn.close
29
+ end
30
+
31
+ def exists?(relname)
32
+ rc = @conn.exec("SELECT COUNT(*) FROM pg_class WHERE relname = '#{relname}'")
33
+ (!rc.nil?) && (rc.to_a.length==1) && (rc.first.count.to_i==1)
34
+ end
35
+
36
+ def write_sequence_update(table, options)
37
+ serial_key_column = table.columns.detect do |column|
38
+ column[:auto_increment]
39
+ end
40
+
41
+ if serial_key_column
42
+ serial_key = serial_key_column[:name]
43
+ max_value = serial_key_column[:maxval].to_i < 1 ? 1 : serial_key_column[:maxval] + 1
44
+ serial_key_seq = "#{table.name}_#{serial_key}_seq"
45
+
46
+ if !options.supress_ddl
47
+ if @conn.server_version < 80200
48
+ @conn.exec("DROP SEQUENCE #{serial_key_seq} CASCADE") if exists?(serial_key_seq)
49
+ else
50
+ @conn.exec("DROP SEQUENCE IF EXISTS #{serial_key_seq} CASCADE")
51
+ end
52
+ @conn.exec <<-EOF
53
+ CREATE SEQUENCE #{serial_key_seq}
54
+ INCREMENT BY 1
55
+ NO MAXVALUE
56
+ NO MINVALUE
57
+ CACHE 1
58
+ EOF
59
+ end
60
+
61
+ if !options.supress_sequence_update
62
+ puts "Updated sequence #{serial_key_seq} to current value of #{max_value}"
63
+ @conn.exec sqlfor_set_serial_sequence(table, serial_key_seq, max_value)
64
+ end
65
+ end
66
+ end
67
+
68
+ def write_table(table, options)
69
+ puts "Creating table #{table.name}..."
70
+ primary_keys = []
71
+
72
+ columns = table.columns.map do |column|
73
+ if column[:primary_key]
74
+ primary_keys << column[:name]
75
+ end
76
+ " " + column_description(column, options)
77
+ end.join(",\n")
78
+
79
+ if @conn.server_version < 80200
80
+ @conn.exec "DROP TABLE #{PGconn.quote_ident(table.name)} CASCADE;" if exists?(table.name)
81
+ else
82
+ @conn.exec "DROP TABLE IF EXISTS #{PGconn.quote_ident(table.name)} CASCADE;"
83
+ end
84
+ create_sql = "CREATE TABLE #{PGconn.quote_ident(table.name)} (\n" + columns + "\n)\nWITHOUT OIDS;"
85
+ begin
86
+ @conn.exec(create_sql)
87
+ rescue Exception => e
88
+ puts "Error: \n#{create_sql}"
89
+ raise
90
+ end
91
+ puts "Created table #{table.name}"
92
+
93
+ end
94
+
95
+ def write_indexes(table)
96
+ puts "Indexing table #{table.name}..."
97
+ if primary_index = table.indexes.find {|index| index[:primary]}
98
+ index_sql = "ALTER TABLE #{PGconn.quote_ident(table.name)} ADD CONSTRAINT \"#{table.name}_pkey\" PRIMARY KEY(#{primary_index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")})"
99
+ @conn.exec(index_sql)
100
+ end
101
+
102
+ table.indexes.each do |index|
103
+ next if index[:primary]
104
+ unique = index[:unique] ? "UNIQUE " : nil
105
+
106
+ # MySQL allows an index name which could be equal to a table name, Postgres doesn't
107
+ indexname = index[:name]
108
+ indexname_quoted = ''
109
+
110
+ if indexname.eql?(table.name)
111
+ indexname = (@conn.server_version < 90000) ? "#{indexname}_index" : nil
112
+ puts "WARNING: index \"#{index[:name]}\" equals table name. This is not allowed in PostgreSQL and will be renamed."
113
+ end
114
+
115
+ if indexname
116
+ indexname_quoted = PGconn.quote_ident(indexname)
117
+ if @conn.server_version < 80200
118
+ @conn.exec("DROP INDEX #{PGconn.quote_ident(indexname)} CASCADE;") if exists?(indexname)
119
+ else
120
+ @conn.exec("DROP INDEX IF EXISTS #{PGconn.quote_ident(indexname)} CASCADE;")
121
+ end
122
+ end
123
+
124
+ index_sql = "CREATE #{unique}INDEX #{indexname_quoted} ON #{PGconn.quote_ident(table.name)} (#{index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")});"
125
+ @conn.exec(index_sql)
126
+ end
127
+
128
+ #@conn.exec("VACUUM FULL ANALYZE #{PGconn.quote_ident(table.name)}")
129
+ puts "Indexed table #{table.name}"
130
+ rescue Exception => e
131
+ puts "Couldn't create indexes on #{table} (#{table.indexes.inspect})"
132
+ puts e
133
+ puts e.backtrace[0,3].join("\n")
134
+ end
135
+
136
+ def write_constraints(table)
137
+ table.foreign_keys.each do |key|
138
+ key_sql = "ALTER TABLE #{PGconn.quote_ident(table.name)} ADD FOREIGN KEY (#{key[:column].map{|c|PGconn.quote_ident(c)}.join(', ')}) REFERENCES #{PGconn.quote_ident(key[:ref_table])}(#{key[:ref_column].map{|c|PGconn.quote_ident(c)}.join(', ')}) ON UPDATE #{key[:on_update]} ON DELETE #{key[:on_delete]}"
139
+ begin
140
+ @conn.exec(key_sql)
141
+ rescue Exception => e
142
+ puts "Error: \n#{key_sql}\n#{e}"
143
+ end
144
+ end
145
+ end
146
+
147
+ def format_eta (t)
148
+ t = t.to_i
149
+ sec = t % 60
150
+ min = (t / 60) % 60
151
+ hour = t / 3600
152
+ sprintf("%02dh:%02dm:%02ds", hour, min, sec)
153
+ end
154
+
155
+ def write_contents(table, reader)
156
+ _time1 = Time.now
157
+ copy_line = "COPY #{PGconn.quote_ident(table.name)} (#{table.columns.map {|column| PGconn.quote_ident(column[:name])}.join(", ")}) FROM stdin;"
158
+ @conn.exec(copy_line)
159
+ puts "Counting rows of #{table.name}... "
160
+ STDOUT.flush
161
+ rowcount = table.count_rows
162
+ puts "Rows counted"
163
+ puts "Loading #{table.name}..."
164
+ STDOUT.flush
165
+ _counter = reader.paginated_read(table, 1000) do |row, counter|
166
+ process_row(table, row)
167
+ @conn.put_copy_data(row.join("\t") + "\n")
168
+
169
+ if counter != 0 && counter % 20000 == 0
170
+ elapsedTime = Time.now - _time1
171
+ eta = elapsedTime * rowcount / counter - elapsedTime
172
+ etaf = self.format_eta(eta)
173
+ etatimef = (Time.now + eta).strftime("%Y/%m/%d %H:%M")
174
+ printf "\r#{counter} of #{rowcount} rows loaded. [ETA: #{etatimef} (#{etaf})]"
175
+ STDOUT.flush
176
+ end
177
+
178
+ if counter % 5000 == 0
179
+ @conn.put_copy_end
180
+ res = @conn.get_result
181
+ if res.cmdtuples != 5000
182
+ puts "\nWARNING: #{table.name} expected 5000 tuple inserts got #{res.cmdtuples} at row #{counter}\n"
183
+ end
184
+ @conn.exec(copy_line)
185
+ end
186
+
187
+ end
188
+ @conn.put_copy_end
189
+ if _counter && (_counter % 5000) > 0
190
+ res = @conn.get_result
191
+ if res.cmdtuples != (_counter % 5000)
192
+ puts "\nWARNING: table #{table.name} expected #{_counter % 5000} tuple inserts got #{res.cmdtuples}\n"
193
+ end
194
+ end
195
+ _time2 = Time.now
196
+ puts "\n#{table.name} #{_counter} rows loaded in #{((_time2 - _time1) / 60).round}min #{((_time2 - _time1) % 60).round}s"
197
+ end
198
+
199
+ end
200
+
201
+ end
@@ -0,0 +1,152 @@
1
+ require 'mysql2psql/postgres_writer'
2
+
3
+ class Mysql2psql
4
+
5
+ class PostgresFileWriter < PostgresWriter
6
+ def initialize(file)
7
+ @f = File.open(file, "w+")
8
+ @f << <<-EOF
9
+ -- MySQL 2 PostgreSQL dump\n
10
+ SET client_encoding = 'UTF8';
11
+ SET standard_conforming_strings = off;
12
+ SET check_function_bodies = false;
13
+ SET client_min_messages = warning;
14
+
15
+ EOF
16
+ end
17
+
18
+ def truncate(table)
19
+ serial_key = nil
20
+ maxval = nil
21
+
22
+ table.columns.map do |column|
23
+ if column[:auto_increment]
24
+ serial_key = column[:name]
25
+ maxval = column[:maxval].to_i < 1 ? 1 : column[:maxval] + 1
26
+ end
27
+ end
28
+
29
+ @f << <<-EOF
30
+ -- TRUNCATE #{table.name};
31
+ TRUNCATE #{PGconn.quote_ident(table.name)} CASCADE;
32
+
33
+ EOF
34
+ if serial_key
35
+ @f << <<-EOF
36
+ #{sqlfor_reset_serial_sequence(table,serial_key,maxval)}
37
+ EOF
38
+ end
39
+ end
40
+
41
+ def write_sequence_update(table, options)
42
+ serial_key_column = table.columns.detect do |column|
43
+ column[:auto_increment]
44
+ end
45
+
46
+ if serial_key_column
47
+ serial_key = serial_key_column[:name]
48
+ serial_key_seq = "#{table.name}_#{serial_key}_seq"
49
+ max_value = serial_key_column[:maxval].to_i < 1 ? 1 : serial_key_column[:maxval] + 1
50
+
51
+ @f << <<-EOF
52
+ --
53
+ -- Name: #{serial_key_seq}; Type: SEQUENCE; Schema: public
54
+ --
55
+ EOF
56
+
57
+ if !options.supress_ddl
58
+ @f << <<-EOF
59
+ DROP SEQUENCE IF EXISTS #{serial_key_seq} CASCADE;
60
+
61
+ CREATE SEQUENCE #{serial_key_seq}
62
+ INCREMENT BY 1
63
+ NO MAXVALUE
64
+ NO MINVALUE
65
+ CACHE 1;
66
+ EOF
67
+ end
68
+
69
+ if !options.supress_sequence_update
70
+ @f << <<-EOF
71
+ #{sqlfor_set_serial_sequence(table, serial_key_seq, max_value)}
72
+ EOF
73
+ end
74
+ end
75
+ end
76
+
77
+ def write_table(table, options)
78
+ primary_keys = []
79
+ serial_key = nil
80
+ maxval = nil
81
+
82
+ columns = table.columns.map do |column|
83
+ if column[:primary_key]
84
+ primary_keys << column[:name]
85
+ end
86
+ " " + column_description(column, options)
87
+ end.join(",\n")
88
+
89
+ @f << <<-EOF
90
+ -- Table: #{table.name}
91
+
92
+ -- DROP TABLE #{table.name};
93
+ DROP TABLE IF EXISTS #{PGconn.quote_ident(table.name)} CASCADE;
94
+
95
+ CREATE TABLE #{PGconn.quote_ident(table.name)} (
96
+ EOF
97
+
98
+ @f << columns
99
+
100
+ if primary_index = table.indexes.find {|index| index[:primary]}
101
+ @f << ",\n CONSTRAINT #{table.name}_pkey PRIMARY KEY(#{primary_index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")})"
102
+ end
103
+
104
+ @f << <<-EOF
105
+ \n)
106
+ WITHOUT OIDS;
107
+ EOF
108
+
109
+ table.indexes.each do |index|
110
+ next if index[:primary]
111
+ unique = index[:unique] ? "UNIQUE " : nil
112
+ @f << <<-EOF
113
+ DROP INDEX IF EXISTS #{PGconn.quote_ident(index[:name])} CASCADE;
114
+ CREATE #{unique}INDEX #{PGconn.quote_ident(index[:name])} ON #{PGconn.quote_ident(table.name)} (#{index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")});
115
+ EOF
116
+ end
117
+
118
+ end
119
+
120
+ def write_indexes(table)
121
+ end
122
+
123
+ def write_constraints(table)
124
+ table.foreign_keys.each do |key|
125
+ @f << "ALTER TABLE #{PGconn.quote_ident(table.name)} ADD FOREIGN KEY (#{key[:column].map{|c|PGconn.quote_ident(c)}.join(', ')}) REFERENCES #{PGconn.quote_ident(key[:ref_table])}(#{key[:ref_column].map{|c|PGconn.quote_ident(c)}.join(', ')}) ON UPDATE #{key[:on_update]} ON DELETE #{key[:on_delete]};\n"
126
+ end
127
+ end
128
+
129
+
130
+ def write_contents(table, reader)
131
+ @f << <<-EOF
132
+ --
133
+ -- Data for Name: #{table.name}; Type: TABLE DATA; Schema: public
134
+ --
135
+
136
+ COPY "#{table.name}" (#{table.columns.map {|column| PGconn.quote_ident(column[:name])}.join(", ")}) FROM stdin;
137
+ EOF
138
+
139
+ reader.paginated_read(table, 1000) do |row, counter|
140
+ process_row(table, row)
141
+ @f << row.join("\t") + "\n"
142
+ end
143
+ @f << "\\.\n\n"
144
+ #@f << "VACUUM FULL ANALYZE #{PGconn.quote_ident(table.name)};\n\n"
145
+ end
146
+
147
+ def close
148
+ @f.close
149
+ end
150
+ end
151
+
152
+ end