benhutton-mysql2psql 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,230 @@
1
+ require 'mysql'
2
+ require 'csv'
3
+
4
+ class Mysql2psql
5
+
6
+ class MysqlReader
7
+ class Field
8
+ end
9
+
10
+ class Table
11
+ attr_reader :name
12
+
13
+ def initialize(reader, name)
14
+ @reader = reader
15
+ @name = name
16
+ end
17
+
18
+ @@types = %w(tiny enum decimal short long float double null timestamp longlong int24 date time datetime year set blob string var_string char).inject({}) do |list, type|
19
+ list[eval("::Mysql::Field::TYPE_#{type.upcase}")] = type
20
+ list
21
+ end
22
+
23
+ @@types[246] = "decimal"
24
+
25
+ def columns
26
+ @columns ||= load_columns
27
+ end
28
+
29
+ def convert_type(type)
30
+ case type
31
+ when /^int.* unsigned/
32
+ "bigint"
33
+ when /bigint/
34
+ "bigint"
35
+ when "bit(1)"
36
+ "boolean"
37
+ when /smallint.* unsigned/
38
+ "integer"
39
+ when /smallint/
40
+ "smallint"
41
+ when "tinyint(1)"
42
+ "boolean"
43
+ when /tinyint/
44
+ "tinyint"
45
+ when /int/
46
+ "integer"
47
+ when /varchar/
48
+ "varchar"
49
+ when /char/
50
+ "char"
51
+ when /decimal/
52
+ "decimal"
53
+ when /float/
54
+ "float"
55
+ when /real|double/
56
+ "double precision"
57
+ else
58
+ type
59
+ end
60
+ end
61
+
62
+ def load_columns
63
+ @reader.reconnect
64
+ result = @reader.mysql.list_fields(name)
65
+ mysql_flags = ::Mysql::Field.constants.select {|c| c =~ /FLAG/}
66
+ fields = []
67
+ @reader.mysql.query("EXPLAIN `#{name}`") do |res|
68
+ while field = res.fetch_row do
69
+ length = -1
70
+ length = field[1][/\((\d+)\)/, 1] if field[1] =~ /\((\d+)\)/
71
+ length = field[1][/\((\d+),(\d+)\)/, 1] if field[1] =~ /\((\d+),(\d+)\)/
72
+ desc = {
73
+ :name => field[0],
74
+ :table_name => name,
75
+ :type => convert_type(field[1]),
76
+ :length => length && length.to_i,
77
+ :decimals => field[1][/\((\d+),(\d+)\)/, 2],
78
+ :null => field[2] == "YES",
79
+ :primary_key => field[3] == "PRI",
80
+ :auto_increment => field[5] == "auto_increment"
81
+ }
82
+ desc[:default] = field[4] unless field[4].nil?
83
+ fields << desc
84
+ end
85
+ end
86
+
87
+ fields.select {|field| field[:auto_increment]}.each do |field|
88
+ @reader.mysql.query("SELECT max(`#{field[:name]}`) FROM `#{name}`") do |res|
89
+ field[:maxval] = res.fetch_row[0].to_i
90
+ end
91
+ end
92
+ fields
93
+ end
94
+
95
+
96
+ def indexes
97
+ load_indexes unless @indexes
98
+ @indexes
99
+ end
100
+
101
+ def foreign_keys
102
+ load_indexes unless @foreign_keys
103
+ @foreign_keys
104
+ end
105
+
106
+ def load_indexes
107
+ @indexes = []
108
+ @foreign_keys = []
109
+
110
+ @reader.mysql.query("SHOW CREATE TABLE `#{name}`") do |result|
111
+ explain = result.fetch_row[1]
112
+ explain.split(/\n/).each do |line|
113
+ next unless line =~ / KEY /
114
+ index = {}
115
+ if match_data = /CONSTRAINT `(\w+)` FOREIGN KEY \((.*?)\) REFERENCES `(\w+)` \((.*?)\)(.*)/.match(line)
116
+ index[:name] = match_data[1]
117
+ index[:column] = match_data[2].parse_csv(:quote_char => '`',:col_sep => ', ')
118
+ index[:ref_table] = match_data[3]
119
+ index[:ref_column] = match_data[4].parse_csv(:quote_char => '`',:col_sep => ', ')
120
+
121
+ the_rest = match_data[5]
122
+
123
+ if match_data = /ON DELETE (SET NULL|SET DEFAULT|RESTRICT|NO ACTION|CASCADE)/.match(the_rest)
124
+ index[:on_delete] = match_data[1]
125
+ else
126
+ index[:on_delete] ||= 'RESTRICT'
127
+ end
128
+
129
+ if match_data = /ON UPDATE (SET NULL|SET DEFAULT|RESTRICT|NO ACTION|CASCADE)/.match(the_rest)
130
+ index[:on_update] = match_data[1]
131
+ else
132
+ index[:on_update] ||= 'RESTRICT'
133
+ end
134
+
135
+ @foreign_keys << index
136
+ elsif match_data = /KEY `(\w+)` \((.*)\)/.match(line)
137
+ index[:name] = match_data[1]
138
+ index[:columns] = match_data[2].split(",").map {|col| col[/`(\w+)`/, 1]}
139
+ index[:unique] = true if line =~ /UNIQUE/
140
+ @indexes << index
141
+ elsif match_data = /PRIMARY KEY .*\((.*)\)/.match(line)
142
+ index[:primary] = true
143
+ index[:columns] = match_data[1].split(",").map {|col| col.strip.gsub(/`/, "")}
144
+ @indexes << index
145
+ end
146
+ end
147
+ end
148
+ end
149
+
150
+ def count_rows
151
+ @reader.mysql.query("SELECT COUNT(*) FROM `#{name}`") do |res|
152
+ return res.fetch_row[0].to_i
153
+ end
154
+ end
155
+
156
+ def has_id?
157
+ !!columns.find {|col| col[:name] == "id"}
158
+ end
159
+
160
+ def count_for_pager
161
+ query = has_id? ? 'MAX(id)' : 'COUNT(*)'
162
+ @reader.mysql.query("SELECT #{query} FROM `#{name}`") do |res|
163
+ return res.fetch_row[0].to_i
164
+ end
165
+ end
166
+
167
+ def query_for_pager
168
+ query = has_id? ? 'WHERE id >= ? AND id < ?' : 'LIMIT ?,?'
169
+ "SELECT #{columns.map{|c| "`"+c[:name]+"`"}.join(", ")} FROM `#{name}` #{query}"
170
+ end
171
+ end
172
+
173
+ def connect
174
+ @mysql = ::Mysql.connect(@host, @user, @passwd, @db, @port, @sock, @flag)
175
+ @mysql.query("SET NAMES utf8")
176
+ @mysql.query("SET SESSION query_cache_type = OFF")
177
+ end
178
+
179
+ def reconnect
180
+ @mysql.close rescue false
181
+ connect
182
+ end
183
+
184
+ def initialize(options)
185
+ @host, @user, @passwd, @db, @port, @sock, @flag =
186
+ options.mysqlhostname('localhost'), options.mysqlusername,
187
+ options.mysqlpassword, options.mysqldatabase,
188
+ options.mysqlport, options.mysqlsocket
189
+ @port = nil if @port == "" # for things like Amazon's RDS you don't have a port and connect fails with "" for a value
190
+ @sock = nil if @sock == ""
191
+ @flag = nil if @flag == ""
192
+ connect
193
+ end
194
+
195
+ attr_reader :mysql
196
+
197
+ def views
198
+ unless defined? @views
199
+ @mysql.query("SELECT t.TABLE_NAME FROM INFORMATION_SCHEMA.TABLES t WHERE t.TABLE_SCHEMA = '#{@db}' AND t.TABLE_TYPE = 'VIEW';") do |res|
200
+ @views = []
201
+ res.each { |row| @views << row[0] }
202
+ end
203
+ end
204
+
205
+ @views
206
+ end
207
+
208
+ def tables
209
+ @tables ||= (@mysql.list_tables - views).map do |table|
210
+ Table.new(self, table)
211
+ end
212
+ end
213
+
214
+ def paginated_read(table, page_size)
215
+ count = table.count_for_pager
216
+ return if count < 1
217
+ statement = @mysql.prepare(table.query_for_pager)
218
+ counter = 0
219
+ 0.upto((count + page_size)/page_size) do |i|
220
+ statement.execute(i*page_size, table.has_id? ? (i+1)*page_size : page_size)
221
+ while row = statement.fetch
222
+ counter += 1
223
+ yield(row, counter)
224
+ end
225
+ end
226
+ counter
227
+ end
228
+ end
229
+
230
+ end
@@ -0,0 +1,201 @@
1
+ require 'pg'
2
+
3
+ require 'mysql2psql/postgres_writer'
4
+
5
+ class Mysql2psql
6
+
7
+ class PostgresDbWriter < PostgresWriter
8
+ attr_reader :conn, :hostname, :login, :password, :database, :schema, :port
9
+
10
+ def initialize(options)
11
+ @hostname, @login, @password, @database, @port =
12
+ options.pghostname('localhost'), options.pgusername,
13
+ options.pgpassword, options.pgdatabase, options.pgport(5432).to_s
14
+ @database, @schema = database.split(":")
15
+ open
16
+ end
17
+
18
+ def open
19
+ @conn = PGconn.new(hostname, port, '', '', database, login, password)
20
+ @conn.exec("SET search_path TO #{PGconn.quote_ident(schema)}") if schema
21
+ @conn.exec("SET client_encoding = 'UTF8'")
22
+ @conn.exec("SET standard_conforming_strings = off") if @conn.server_version >= 80200
23
+ @conn.exec("SET check_function_bodies = false")
24
+ @conn.exec("SET client_min_messages = warning")
25
+ end
26
+
27
+ def close
28
+ @conn.close
29
+ end
30
+
31
+ def exists?(relname)
32
+ rc = @conn.exec("SELECT COUNT(*) FROM pg_class WHERE relname = '#{relname}'")
33
+ (!rc.nil?) && (rc.to_a.length==1) && (rc.first.count.to_i==1)
34
+ end
35
+
36
+ def write_sequence_update(table, options)
37
+ serial_key_column = table.columns.detect do |column|
38
+ column[:auto_increment]
39
+ end
40
+
41
+ if serial_key_column
42
+ serial_key = serial_key_column[:name]
43
+ max_value = serial_key_column[:maxval].to_i < 1 ? 1 : serial_key_column[:maxval] + 1
44
+ serial_key_seq = "#{table.name}_#{serial_key}_seq"
45
+
46
+ if !options.supress_ddl
47
+ if @conn.server_version < 80200
48
+ @conn.exec("DROP SEQUENCE #{serial_key_seq} CASCADE") if exists?(serial_key_seq)
49
+ else
50
+ @conn.exec("DROP SEQUENCE IF EXISTS #{serial_key_seq} CASCADE")
51
+ end
52
+ @conn.exec <<-EOF
53
+ CREATE SEQUENCE #{serial_key_seq}
54
+ INCREMENT BY 1
55
+ NO MAXVALUE
56
+ NO MINVALUE
57
+ CACHE 1
58
+ EOF
59
+ end
60
+
61
+ if !options.supress_sequence_update
62
+ puts "Updated sequence #{serial_key_seq} to current value of #{max_value}"
63
+ @conn.exec sqlfor_set_serial_sequence(table, serial_key_seq, max_value)
64
+ end
65
+ end
66
+ end
67
+
68
+ def write_table(table, options)
69
+ puts "Creating table #{table.name}..."
70
+ primary_keys = []
71
+
72
+ columns = table.columns.map do |column|
73
+ if column[:primary_key]
74
+ primary_keys << column[:name]
75
+ end
76
+ " " + column_description(column, options)
77
+ end.join(",\n")
78
+
79
+ if @conn.server_version < 80200
80
+ @conn.exec "DROP TABLE #{PGconn.quote_ident(table.name)} CASCADE;" if exists?(table.name)
81
+ else
82
+ @conn.exec "DROP TABLE IF EXISTS #{PGconn.quote_ident(table.name)} CASCADE;"
83
+ end
84
+ create_sql = "CREATE TABLE #{PGconn.quote_ident(table.name)} (\n" + columns + "\n)\nWITHOUT OIDS;"
85
+ begin
86
+ @conn.exec(create_sql)
87
+ rescue Exception => e
88
+ puts "Error: \n#{create_sql}"
89
+ raise
90
+ end
91
+ puts "Created table #{table.name}"
92
+
93
+ end
94
+
95
+ def write_indexes(table)
96
+ puts "Indexing table #{table.name}..."
97
+ if primary_index = table.indexes.find {|index| index[:primary]}
98
+ index_sql = "ALTER TABLE #{PGconn.quote_ident(table.name)} ADD CONSTRAINT \"#{table.name}_pkey\" PRIMARY KEY(#{primary_index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")})"
99
+ @conn.exec(index_sql)
100
+ end
101
+
102
+ table.indexes.each do |index|
103
+ next if index[:primary]
104
+ unique = index[:unique] ? "UNIQUE " : nil
105
+
106
+ # MySQL allows an index name which could be equal to a table name, Postgres doesn't
107
+ indexname = index[:name]
108
+ indexname_quoted = ''
109
+
110
+ if indexname.eql?(table.name)
111
+ indexname = (@conn.server_version < 90000) ? "#{indexname}_index" : nil
112
+ puts "WARNING: index \"#{index[:name]}\" equals table name. This is not allowed in PostgreSQL and will be renamed."
113
+ end
114
+
115
+ if indexname
116
+ indexname_quoted = PGconn.quote_ident(indexname)
117
+ if @conn.server_version < 80200
118
+ @conn.exec("DROP INDEX #{PGconn.quote_ident(indexname)} CASCADE;") if exists?(indexname)
119
+ else
120
+ @conn.exec("DROP INDEX IF EXISTS #{PGconn.quote_ident(indexname)} CASCADE;")
121
+ end
122
+ end
123
+
124
+ index_sql = "CREATE #{unique}INDEX #{indexname_quoted} ON #{PGconn.quote_ident(table.name)} (#{index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")});"
125
+ @conn.exec(index_sql)
126
+ end
127
+
128
+ #@conn.exec("VACUUM FULL ANALYZE #{PGconn.quote_ident(table.name)}")
129
+ puts "Indexed table #{table.name}"
130
+ rescue Exception => e
131
+ puts "Couldn't create indexes on #{table} (#{table.indexes.inspect})"
132
+ puts e
133
+ puts e.backtrace[0,3].join("\n")
134
+ end
135
+
136
+ def write_constraints(table)
137
+ table.foreign_keys.each do |key|
138
+ key_sql = "ALTER TABLE #{PGconn.quote_ident(table.name)} ADD FOREIGN KEY (#{key[:column].map{|c|PGconn.quote_ident(c)}.join(', ')}) REFERENCES #{PGconn.quote_ident(key[:ref_table])}(#{key[:ref_column].map{|c|PGconn.quote_ident(c)}.join(', ')}) ON UPDATE #{key[:on_update]} ON DELETE #{key[:on_delete]}"
139
+ begin
140
+ @conn.exec(key_sql)
141
+ rescue Exception => e
142
+ puts "Error: \n#{key_sql}\n#{e}"
143
+ end
144
+ end
145
+ end
146
+
147
+ def format_eta (t)
148
+ t = t.to_i
149
+ sec = t % 60
150
+ min = (t / 60) % 60
151
+ hour = t / 3600
152
+ sprintf("%02dh:%02dm:%02ds", hour, min, sec)
153
+ end
154
+
155
+ def write_contents(table, reader)
156
+ _time1 = Time.now
157
+ copy_line = "COPY #{PGconn.quote_ident(table.name)} (#{table.columns.map {|column| PGconn.quote_ident(column[:name])}.join(", ")}) FROM stdin;"
158
+ @conn.exec(copy_line)
159
+ puts "Counting rows of #{table.name}... "
160
+ STDOUT.flush
161
+ rowcount = table.count_rows
162
+ puts "Rows counted"
163
+ puts "Loading #{table.name}..."
164
+ STDOUT.flush
165
+ _counter = reader.paginated_read(table, 1000) do |row, counter|
166
+ process_row(table, row)
167
+ @conn.put_copy_data(row.join("\t") + "\n")
168
+
169
+ if counter != 0 && counter % 20000 == 0
170
+ elapsedTime = Time.now - _time1
171
+ eta = elapsedTime * rowcount / counter - elapsedTime
172
+ etaf = self.format_eta(eta)
173
+ etatimef = (Time.now + eta).strftime("%Y/%m/%d %H:%M")
174
+ printf "\r#{counter} of #{rowcount} rows loaded. [ETA: #{etatimef} (#{etaf})]"
175
+ STDOUT.flush
176
+ end
177
+
178
+ if counter % 5000 == 0
179
+ @conn.put_copy_end
180
+ res = @conn.get_result
181
+ if res.cmdtuples != 5000
182
+ puts "\nWARNING: #{table.name} expected 5000 tuple inserts got #{res.cmdtuples} at row #{counter}\n"
183
+ end
184
+ @conn.exec(copy_line)
185
+ end
186
+
187
+ end
188
+ @conn.put_copy_end
189
+ if _counter && (_counter % 5000) > 0
190
+ res = @conn.get_result
191
+ if res.cmdtuples != (_counter % 5000)
192
+ puts "\nWARNING: table #{table.name} expected #{_counter % 5000} tuple inserts got #{res.cmdtuples}\n"
193
+ end
194
+ end
195
+ _time2 = Time.now
196
+ puts "\n#{table.name} #{_counter} rows loaded in #{((_time2 - _time1) / 60).round}min #{((_time2 - _time1) % 60).round}s"
197
+ end
198
+
199
+ end
200
+
201
+ end
@@ -0,0 +1,152 @@
1
+ require 'mysql2psql/postgres_writer'
2
+
3
+ class Mysql2psql
4
+
5
+ class PostgresFileWriter < PostgresWriter
6
+ def initialize(file)
7
+ @f = File.open(file, "w+")
8
+ @f << <<-EOF
9
+ -- MySQL 2 PostgreSQL dump\n
10
+ SET client_encoding = 'UTF8';
11
+ SET standard_conforming_strings = off;
12
+ SET check_function_bodies = false;
13
+ SET client_min_messages = warning;
14
+
15
+ EOF
16
+ end
17
+
18
+ def truncate(table)
19
+ serial_key = nil
20
+ maxval = nil
21
+
22
+ table.columns.map do |column|
23
+ if column[:auto_increment]
24
+ serial_key = column[:name]
25
+ maxval = column[:maxval].to_i < 1 ? 1 : column[:maxval] + 1
26
+ end
27
+ end
28
+
29
+ @f << <<-EOF
30
+ -- TRUNCATE #{table.name};
31
+ TRUNCATE #{PGconn.quote_ident(table.name)} CASCADE;
32
+
33
+ EOF
34
+ if serial_key
35
+ @f << <<-EOF
36
+ #{sqlfor_reset_serial_sequence(table,serial_key,maxval)}
37
+ EOF
38
+ end
39
+ end
40
+
41
+ def write_sequence_update(table, options)
42
+ serial_key_column = table.columns.detect do |column|
43
+ column[:auto_increment]
44
+ end
45
+
46
+ if serial_key_column
47
+ serial_key = serial_key_column[:name]
48
+ serial_key_seq = "#{table.name}_#{serial_key}_seq"
49
+ max_value = serial_key_column[:maxval].to_i < 1 ? 1 : serial_key_column[:maxval] + 1
50
+
51
+ @f << <<-EOF
52
+ --
53
+ -- Name: #{serial_key_seq}; Type: SEQUENCE; Schema: public
54
+ --
55
+ EOF
56
+
57
+ if !options.supress_ddl
58
+ @f << <<-EOF
59
+ DROP SEQUENCE IF EXISTS #{serial_key_seq} CASCADE;
60
+
61
+ CREATE SEQUENCE #{serial_key_seq}
62
+ INCREMENT BY 1
63
+ NO MAXVALUE
64
+ NO MINVALUE
65
+ CACHE 1;
66
+ EOF
67
+ end
68
+
69
+ if !options.supress_sequence_update
70
+ @f << <<-EOF
71
+ #{sqlfor_set_serial_sequence(table, serial_key_seq, max_value)}
72
+ EOF
73
+ end
74
+ end
75
+ end
76
+
77
+ def write_table(table, options)
78
+ primary_keys = []
79
+ serial_key = nil
80
+ maxval = nil
81
+
82
+ columns = table.columns.map do |column|
83
+ if column[:primary_key]
84
+ primary_keys << column[:name]
85
+ end
86
+ " " + column_description(column, options)
87
+ end.join(",\n")
88
+
89
+ @f << <<-EOF
90
+ -- Table: #{table.name}
91
+
92
+ -- DROP TABLE #{table.name};
93
+ DROP TABLE IF EXISTS #{PGconn.quote_ident(table.name)} CASCADE;
94
+
95
+ CREATE TABLE #{PGconn.quote_ident(table.name)} (
96
+ EOF
97
+
98
+ @f << columns
99
+
100
+ if primary_index = table.indexes.find {|index| index[:primary]}
101
+ @f << ",\n CONSTRAINT #{table.name}_pkey PRIMARY KEY(#{primary_index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")})"
102
+ end
103
+
104
+ @f << <<-EOF
105
+ \n)
106
+ WITHOUT OIDS;
107
+ EOF
108
+
109
+ table.indexes.each do |index|
110
+ next if index[:primary]
111
+ unique = index[:unique] ? "UNIQUE " : nil
112
+ @f << <<-EOF
113
+ DROP INDEX IF EXISTS #{PGconn.quote_ident(index[:name])} CASCADE;
114
+ CREATE #{unique}INDEX #{PGconn.quote_ident(index[:name])} ON #{PGconn.quote_ident(table.name)} (#{index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")});
115
+ EOF
116
+ end
117
+
118
+ end
119
+
120
+ def write_indexes(table)
121
+ end
122
+
123
+ def write_constraints(table)
124
+ table.foreign_keys.each do |key|
125
+ @f << "ALTER TABLE #{PGconn.quote_ident(table.name)} ADD FOREIGN KEY (#{key[:column].map{|c|PGconn.quote_ident(c)}.join(', ')}) REFERENCES #{PGconn.quote_ident(key[:ref_table])}(#{key[:ref_column].map{|c|PGconn.quote_ident(c)}.join(', ')}) ON UPDATE #{key[:on_update]} ON DELETE #{key[:on_delete]};\n"
126
+ end
127
+ end
128
+
129
+
130
+ def write_contents(table, reader)
131
+ @f << <<-EOF
132
+ --
133
+ -- Data for Name: #{table.name}; Type: TABLE DATA; Schema: public
134
+ --
135
+
136
+ COPY "#{table.name}" (#{table.columns.map {|column| PGconn.quote_ident(column[:name])}.join(", ")}) FROM stdin;
137
+ EOF
138
+
139
+ reader.paginated_read(table, 1000) do |row, counter|
140
+ process_row(table, row)
141
+ @f << row.join("\t") + "\n"
142
+ end
143
+ @f << "\\.\n\n"
144
+ #@f << "VACUUM FULL ANALYZE #{PGconn.quote_ident(table.name)};\n\n"
145
+ end
146
+
147
+ def close
148
+ @f.close
149
+ end
150
+ end
151
+
152
+ end