mysql2psql 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+
2
+ class Mysql2psql
3
+
4
+ class GeneralError < StandardError
5
+ end
6
+
7
+ class ConfigurationError < StandardError
8
+ end
9
+ class UninitializedValueError < ConfigurationError
10
+ end
11
+ class ConfigurationFileNotFound < ConfigurationError
12
+ end
13
+ class ConfigurationFileInitialized < ConfigurationError
14
+ end
15
+
16
+ end
@@ -0,0 +1,191 @@
1
+ require 'mysql'
2
+
3
+ class Mysql2psql
4
+
5
+ class MysqlReader
6
+ class Field
7
+ end
8
+
9
+ class Table
10
+ attr_reader :name
11
+
12
+ def initialize(reader, name)
13
+ @reader = reader
14
+ @name = name
15
+ end
16
+
17
+ @@types = %w(tiny enum decimal short long float double null timestamp longlong int24 date time datetime year set blob string var_string char).inject({}) do |list, type|
18
+ list[eval("::Mysql::Field::TYPE_#{type.upcase}")] = type
19
+ list
20
+ end
21
+
22
+ @@types[246] = "decimal"
23
+
24
+ def columns
25
+ @columns ||= load_columns
26
+ end
27
+
28
+ def convert_type(type)
29
+ case type
30
+ when /int.* unsigned/
31
+ "bigint"
32
+ when /bigint/
33
+ "bigint"
34
+ when "bit(1)"
35
+ "boolean"
36
+ when "tinyint(1)"
37
+ "boolean"
38
+ when /tinyint/
39
+ "tinyint"
40
+ when /int/
41
+ "integer"
42
+ when /varchar/
43
+ "varchar"
44
+ when /char/
45
+ "char"
46
+ when /(float|decimal)/
47
+ "decimal"
48
+ when /double/
49
+ "double precision"
50
+ else
51
+ type
52
+ end
53
+ end
54
+
55
+ def load_columns
56
+ @reader.reconnect
57
+ result = @reader.mysql.list_fields(name)
58
+ mysql_flags = ::Mysql::Field.constants.select {|c| c =~ /FLAG/}
59
+ fields = []
60
+ @reader.mysql.query("EXPLAIN `#{name}`") do |res|
61
+ while field = res.fetch_row do
62
+ length = field[1][/\((\d+)\)/, 1] if field[1] =~ /\((\d+)\)/
63
+ length = field[1][/\((\d+),(\d+)\)/, 1] if field[1] =~ /\((\d+),(\d+)\)/
64
+ desc = {
65
+ :name => field[0],
66
+ :table_name => name,
67
+ :type => convert_type(field[1]),
68
+ :length => length && length.to_i,
69
+ :decimals => field[1][/\((\d+),(\d+)\)/, 2],
70
+ :null => field[2] == "YES",
71
+ :primary_key => field[3] == "PRI",
72
+ :auto_increment => field[5] == "auto_increment"
73
+ }
74
+ desc[:default] = field[4] unless field[4].nil?
75
+ fields << desc
76
+ end
77
+ end
78
+
79
+ fields.select {|field| field[:auto_increment]}.each do |field|
80
+ @reader.mysql.query("SELECT max(`#{field[:name]}`) FROM `#{name}`") do |res|
81
+ field[:maxval] = res.fetch_row[0].to_i
82
+ end
83
+ end
84
+ fields
85
+ end
86
+
87
+
88
+ def indexes
89
+ load_indexes unless @indexes
90
+ @indexes
91
+ end
92
+
93
+ def foreign_keys
94
+ load_indexes unless @foreign_keys
95
+ @foreign_keys
96
+ end
97
+
98
+ def load_indexes
99
+ @indexes = []
100
+ @foreign_keys = []
101
+
102
+ @reader.mysql.query("SHOW CREATE TABLE `#{name}`") do |result|
103
+ explain = result.fetch_row[1]
104
+ explain.split(/\n/).each do |line|
105
+ next unless line =~ / KEY /
106
+ index = {}
107
+ if match_data = /CONSTRAINT `(\w+)` FOREIGN KEY \(`(\w+)`\) REFERENCES `(\w+)` \(`(\w+)`\)/.match(line)
108
+ index[:name] = match_data[1]
109
+ index[:column] = match_data[2]
110
+ index[:ref_table] = match_data[3]
111
+ index[:ref_column] = match_data[4]
112
+ @foreign_keys << index
113
+ elsif match_data = /KEY `(\w+)` \((.*)\)/.match(line)
114
+ index[:name] = match_data[1]
115
+ index[:columns] = match_data[2].split(",").map {|col| col[/`(\w+)`/, 1]}
116
+ index[:unique] = true if line =~ /UNIQUE/
117
+ @indexes << index
118
+ elsif match_data = /PRIMARY KEY .*\((.*)\)/.match(line)
119
+ index[:primary] = true
120
+ index[:columns] = match_data[1].split(",").map {|col| col.strip.gsub(/`/, "")}
121
+ @indexes << index
122
+ end
123
+ end
124
+ end
125
+ end
126
+
127
+ def count_rows
128
+ @reader.mysql.query("SELECT COUNT(*) FROM `#{name}`") do |res|
129
+ return res.fetch_row[0].to_i
130
+ end
131
+ end
132
+
133
+ def has_id?
134
+ !!columns.find {|col| col[:name] == "id"}
135
+ end
136
+
137
+ def count_for_pager
138
+ query = has_id? ? 'MAX(id)' : 'COUNT(*)'
139
+ @reader.mysql.query("SELECT #{query} FROM `#{name}`") do |res|
140
+ return res.fetch_row[0].to_i
141
+ end
142
+ end
143
+
144
+ def query_for_pager
145
+ query = has_id? ? 'WHERE id >= ? AND id < ?' : 'LIMIT ?,?'
146
+ "SELECT #{columns.map{|c| "`"+c[:name]+"`"}.join(", ")} FROM `#{name}` #{query}"
147
+ end
148
+ end
149
+
150
+ def connect
151
+ @mysql = ::Mysql.connect(@host, @user, @passwd, @db, @port, @sock, @flag)
152
+ @mysql.query("SET NAMES utf8")
153
+ @mysql.query("SET SESSION query_cache_type = OFF")
154
+ end
155
+
156
+ def reconnect
157
+ @mysql.close rescue false
158
+ connect
159
+ end
160
+
161
+ def initialize(options)
162
+ @host, @user, @passwd, @db, @port, @sock, @flag =
163
+ options.mysqlhostname('localhost'), options.mysqlusername,
164
+ options.mysqlpassword, options.mysqldatabase,
165
+ options.mysqlport, options.mysqlsocket
166
+ connect
167
+ end
168
+
169
+ attr_reader :mysql
170
+
171
+ def tables
172
+ @tables ||= @mysql.list_tables.map {|table| Table.new(self, table)}
173
+ end
174
+
175
+ def paginated_read(table, page_size)
176
+ count = table.count_for_pager
177
+ return if count < 1
178
+ statement = @mysql.prepare(table.query_for_pager)
179
+ counter = 0
180
+ 0.upto((count + page_size)/page_size) do |i|
181
+ statement.execute(i*page_size, table.has_id? ? (i+1)*page_size : page_size)
182
+ while row = statement.fetch
183
+ counter += 1
184
+ yield(row, counter)
185
+ end
186
+ end
187
+ counter
188
+ end
189
+ end
190
+
191
+ end
@@ -0,0 +1,179 @@
1
+ require 'pg'
2
+
3
+ require 'mysql2psql/postgres_writer'
4
+
5
+ class Mysql2psql
6
+
7
+ class PostgresDbWriter < PostgresWriter
8
+ attr_reader :conn, :hostname, :login, :password, :database, :schema, :port
9
+
10
+ def initialize(options)
11
+ @hostname, @login, @password, @database, @port =
12
+ options.pghostname('localhost'), options.pgusername,
13
+ options.pgpassword, options.pgdatabase, options.pgport(5432).to_s
14
+ @database, @schema = database.split(":")
15
+ open
16
+ end
17
+
18
+ def open
19
+ @conn = PGconn.new(hostname, port, '', '', database, login, password)
20
+ @conn.exec("SET search_path TO #{PGconn.quote_ident(schema)}") if schema
21
+ @conn.exec("SET client_encoding = 'UTF8'")
22
+ @conn.exec("SET standard_conforming_strings = off") if @conn.server_version >= 80200
23
+ @conn.exec("SET check_function_bodies = false")
24
+ @conn.exec("SET client_min_messages = warning")
25
+ end
26
+
27
+ def close
28
+ @conn.close
29
+ end
30
+
31
+ def exists?(relname)
32
+ rc = @conn.exec("SELECT COUNT(*) FROM pg_class WHERE relname = '#{relname}'")
33
+ (!rc.nil?) && (rc.to_a.length==1) && (rc.first.count.to_i==1)
34
+ end
35
+
36
+ def write_table(table)
37
+ puts "Creating table #{table.name}..."
38
+ primary_keys = []
39
+ serial_key = nil
40
+ maxval = nil
41
+
42
+ columns = table.columns.map do |column|
43
+ if column[:auto_increment]
44
+ serial_key = column[:name]
45
+ maxval = column[:maxval].to_i < 1 ? 1 : column[:maxval] + 1
46
+ end
47
+ if column[:primary_key]
48
+ primary_keys << column[:name]
49
+ end
50
+ " " + column_description(column)
51
+ end.join(",\n")
52
+
53
+ if serial_key
54
+ if @conn.server_version < 80200
55
+ serial_key_seq = "#{table.name}_#{serial_key}_seq"
56
+ @conn.exec("DROP SEQUENCE #{serial_key_seq} CASCADE") if exists?(serial_key_seq)
57
+ else
58
+ @conn.exec("DROP SEQUENCE IF EXISTS #{table.name}_#{serial_key}_seq CASCADE")
59
+ end
60
+ @conn.exec <<-EOF
61
+ CREATE SEQUENCE #{table.name}_#{serial_key}_seq
62
+ INCREMENT BY 1
63
+ NO MAXVALUE
64
+ NO MINVALUE
65
+ CACHE 1
66
+ EOF
67
+
68
+ @conn.exec "SELECT pg_catalog.setval('#{table.name}_#{serial_key}_seq', #{maxval}, true)"
69
+ end
70
+
71
+ if @conn.server_version < 80200
72
+ @conn.exec "DROP TABLE #{PGconn.quote_ident(table.name)} CASCADE;" if exists?(table.name)
73
+ else
74
+ @conn.exec "DROP TABLE IF EXISTS #{PGconn.quote_ident(table.name)} CASCADE;"
75
+ end
76
+ create_sql = "CREATE TABLE #{PGconn.quote_ident(table.name)} (\n" + columns + "\n)\nWITHOUT OIDS;"
77
+ begin
78
+ @conn.exec(create_sql)
79
+ rescue Exception => e
80
+ puts "Error: \n#{create_sql}"
81
+ raise
82
+ end
83
+ puts "Created table #{table.name}"
84
+
85
+ end
86
+
87
+ def write_indexes(table)
88
+ puts "Indexing table #{table.name}..."
89
+ if primary_index = table.indexes.find {|index| index[:primary]}
90
+ @conn.exec("ALTER TABLE #{PGconn.quote_ident(table.name)} ADD CONSTRAINT \"#{table.name}_pkey\" PRIMARY KEY(#{primary_index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")})")
91
+ end
92
+
93
+ table.indexes.each do |index|
94
+ next if index[:primary]
95
+ unique = index[:unique] ? "UNIQUE " : nil
96
+
97
+ #MySQL allows an index name which could be equal to a table name, Postgres doesn't
98
+ indexname = index[:name]
99
+ if indexname.eql?(table.name)
100
+ indexnamenew = "#{indexname}_index"
101
+ puts "WARNING: index \"#{indexname}\" equals table name. This is not allowed by postgres and will be renamed to \"#{indexnamenew}\""
102
+ indexname = indexnamenew
103
+ end
104
+
105
+ if @conn.server_version < 80200
106
+ @conn.exec("DROP INDEX #{PGconn.quote_ident(indexname)} CASCADE;") if exists?(indexname)
107
+ else
108
+ @conn.exec("DROP INDEX IF EXISTS #{PGconn.quote_ident(indexname)} CASCADE;")
109
+ end
110
+ @conn.exec("CREATE #{unique}INDEX #{PGconn.quote_ident(indexname)} ON #{PGconn.quote_ident(table.name)} (#{index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")});")
111
+ end
112
+
113
+
114
+ #@conn.exec("VACUUM FULL ANALYZE #{PGconn.quote_ident(table.name)}")
115
+ puts "Indexed table #{table.name}"
116
+ rescue Exception => e
117
+ puts "Couldn't create indexes on #{table} (#{table.indexes.inspect})"
118
+ puts e
119
+ puts e.backtrace[0,3].join("\n")
120
+ end
121
+
122
+ def write_constraints(table)
123
+ table.foreign_keys.each do |key|
124
+ key_sql = "ALTER TABLE #{PGconn.quote_ident(table.name)} ADD FOREIGN KEY (#{PGconn.quote_ident(key[:column])}) REFERENCES #{PGconn.quote_ident(key[:ref_table])}(#{PGconn.quote_ident(key[:ref_column])})"
125
+ begin
126
+ @conn.exec(key_sql)
127
+ rescue Exception => e
128
+ puts "Error: \n#{key_sql}\n#{e}"
129
+ end
130
+ end
131
+ end
132
+
133
+ def format_eta (t)
134
+ t = t.to_i
135
+ sec = t % 60
136
+ min = (t / 60) % 60
137
+ hour = t / 3600
138
+ sprintf("%02dh:%02dm:%02ds", hour, min, sec)
139
+ end
140
+
141
+ def write_contents(table, reader)
142
+ _time1 = Time.now
143
+ copy_line = "COPY #{PGconn.quote_ident(table.name)} (#{table.columns.map {|column| PGconn.quote_ident(column[:name])}.join(", ")}) FROM stdin;"
144
+ @conn.exec(copy_line)
145
+ puts "Counting rows of #{table.name}... "
146
+ STDOUT.flush
147
+ rowcount = table.count_rows
148
+ puts "Rows counted"
149
+ puts "Loading #{table.name}..."
150
+ STDOUT.flush
151
+ _counter = reader.paginated_read(table, 1000) do |row, counter|
152
+ line = []
153
+ process_row(table, row)
154
+ @conn.put_copy_data(row.join("\t") + "\n")
155
+
156
+ if counter != 0 && counter % 20000 == 0
157
+ elapsedTime = Time.now - _time1
158
+ eta = elapsedTime * rowcount / counter - elapsedTime
159
+ etaf = self.format_eta(eta)
160
+ etatimef = (Time.now + eta).strftime("%Y/%m/%d %H:%M")
161
+ printf "\r#{counter} of #{rowcount} rows loaded. [ETA: #{etatimef} (#{etaf})]"
162
+ STDOUT.flush
163
+ end
164
+
165
+ if counter % 5000 == 0
166
+ @conn.put_copy_end
167
+ @conn.exec(copy_line)
168
+ end
169
+
170
+ end
171
+ _time2 = Time.now
172
+ puts "\n#{_counter} rows loaded in #{((_time2 - _time1) / 60).round}min #{((_time2 - _time1) % 60).round}s"
173
+ # @conn.putline(".\n")
174
+ @conn.put_copy_end
175
+ end
176
+
177
+ end
178
+
179
+ end
@@ -0,0 +1,142 @@
1
+ require 'mysql2psql/postgres_writer'
2
+
3
+ class Mysql2psql
4
+
5
+ class PostgresFileWriter < PostgresWriter
6
+ def initialize(file)
7
+ @f = File.open(file, "w+")
8
+ @f << <<-EOF
9
+ -- MySQL 2 PostgreSQL dump\n
10
+ SET client_encoding = 'UTF8';
11
+ SET standard_conforming_strings = off;
12
+ SET check_function_bodies = false;
13
+ SET client_min_messages = warning;
14
+
15
+ EOF
16
+ end
17
+
18
+ def truncate(table)
19
+ serial_key = nil
20
+ maxval = nil
21
+
22
+ table.columns.map do |column|
23
+ if column[:auto_increment]
24
+ serial_key = column[:name]
25
+ maxval = column[:maxval].to_i < 1 ? 1 : column[:maxval] + 1
26
+ end
27
+ end
28
+
29
+ @f << <<-EOF
30
+ -- TRUNCATE #{table.name};
31
+ TRUNCATE #{PGconn.quote_ident(table.name)} CASCADE;
32
+
33
+ EOF
34
+ if serial_key
35
+ @f << <<-EOF
36
+ SELECT pg_catalog.setval(pg_get_serial_sequence('#{table.name}', '#{serial_key}'), #{maxval}, true);
37
+ EOF
38
+ end
39
+ end
40
+
41
+ def write_table(table)
42
+ primary_keys = []
43
+ serial_key = nil
44
+ maxval = nil
45
+
46
+ columns = table.columns.map do |column|
47
+ if column[:auto_increment]
48
+ serial_key = column[:name]
49
+ maxval = column[:maxval].to_i < 1 ? 1 : column[:maxval] + 1
50
+ end
51
+ if column[:primary_key]
52
+ primary_keys << column[:name]
53
+ end
54
+ " " + column_description(column)
55
+ end.join(",\n")
56
+
57
+ if serial_key
58
+
59
+ @f << <<-EOF
60
+ --
61
+ -- Name: #{table.name}_#{serial_key}_seq; Type: SEQUENCE; Schema: public
62
+ --
63
+
64
+ DROP SEQUENCE IF EXISTS #{table.name}_#{serial_key}_seq CASCADE;
65
+
66
+ CREATE SEQUENCE #{table.name}_#{serial_key}_seq
67
+ INCREMENT BY 1
68
+ NO MAXVALUE
69
+ NO MINVALUE
70
+ CACHE 1;
71
+
72
+
73
+ SELECT pg_catalog.setval('#{table.name}_#{serial_key}_seq', #{maxval}, true);
74
+
75
+ EOF
76
+ end
77
+
78
+ @f << <<-EOF
79
+ -- Table: #{table.name}
80
+
81
+ -- DROP TABLE #{table.name};
82
+ DROP TABLE IF EXISTS #{PGconn.quote_ident(table.name)} CASCADE;
83
+
84
+ CREATE TABLE #{PGconn.quote_ident(table.name)} (
85
+ EOF
86
+
87
+ @f << columns
88
+
89
+ if primary_index = table.indexes.find {|index| index[:primary]}
90
+ @f << ",\n CONSTRAINT #{table.name}_pkey PRIMARY KEY(#{primary_index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")})"
91
+ end
92
+
93
+ @f << <<-EOF
94
+ \n)
95
+ WITHOUT OIDS;
96
+ EOF
97
+
98
+ table.indexes.each do |index|
99
+ next if index[:primary]
100
+ unique = index[:unique] ? "UNIQUE " : nil
101
+ @f << <<-EOF
102
+ DROP INDEX IF EXISTS #{PGconn.quote_ident(index[:name])} CASCADE;
103
+ CREATE #{unique}INDEX #{PGconn.quote_ident(index[:name])} ON #{PGconn.quote_ident(table.name)} (#{index[:columns].map {|col| PGconn.quote_ident(col)}.join(", ")});
104
+ EOF
105
+ end
106
+
107
+ end
108
+
109
+ def write_indexes(table)
110
+ end
111
+
112
+ def write_constraints(table)
113
+ table.foreign_keys.each do |key|
114
+ @f << "ALTER TABLE #{PGconn.quote_ident(table.name)} ADD FOREIGN KEY (#{PGconn.quote_ident(key[:column])}) REFERENCES #{PGconn.quote_ident(key[:ref_table])}(#{PGconn.quote_ident(key[:ref_column])});\n"
115
+ end
116
+ end
117
+
118
+
119
+ def write_contents(table, reader)
120
+ @f << <<-EOF
121
+ --
122
+ -- Data for Name: #{table.name}; Type: TABLE DATA; Schema: public
123
+ --
124
+
125
+ COPY "#{table.name}" (#{table.columns.map {|column| PGconn.quote_ident(column[:name])}.join(", ")}) FROM stdin;
126
+ EOF
127
+
128
+ reader.paginated_read(table, 1000) do |row, counter|
129
+ line = []
130
+ process_row(table, row)
131
+ @f << row.join("\t") + "\n"
132
+ end
133
+ @f << "\\.\n\n"
134
+ #@f << "VACUUM FULL ANALYZE #{PGconn.quote_ident(table.name)};\n\n"
135
+ end
136
+
137
+ def close
138
+ @f.close
139
+ end
140
+ end
141
+
142
+ end