table_copy 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 82e2e7bc03e341d5b10e172c080499d174abd238
4
+ data.tar.gz: ccf3b5eebc929cefe8b9eeec212d7869b3cacb2c
5
+ SHA512:
6
+ metadata.gz: 9be2278b5259705cbfb905422385a334145eeb5325d8f97b33eb253343e3838014d081e3f86417c7456c519364ad29e19ea40e3cedece2fe84550ca2514c9567
7
+ data.tar.gz: 4191cd39eda48dd99414e3f300a962d6de22e0ad756aaa9f7da8755ac5d0a4d42d14d185d4cf717a40e80524b254f8a80bbfacd4b4c842ba02dbf885a4e698cf
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in table_copy.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Tyler Hartland
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,45 @@
1
+ # TableCopy
2
+
3
+ Move and update data on a table by table basis between two databases. Currently only supports Postgres in a limited fashion.
4
+
5
+ This gem could be made more flexible with a bit of work, but for now is pretty limited to my specific purposes.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'table_copy'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install table_copy
20
+
21
+ ## Usage
22
+
23
+ Run ```table_copy --init``` for an example initializer. Then, access each table link by ```TableCopy.links['link_name']```. You can call ```link.update; link.droppy; link.diffy```
24
+
25
+ Update will attempt to use a sequence field to look for changes. If that field is not available, it will run a diffy(update) operation.
26
+
27
+ Diffy(update) will copy the source table to a temp table, diff it with the destination table, and upsert any changes to the destination.
28
+
29
+ Diffy will perform a diffy(update) and will also diff ids in the destination table against the temp table to find deletions.
30
+
31
+ Droppy will drop the destination table and rebuild/populate it.
32
+
33
+ ### *Very* rough benchmarks:
34
+ - Copy 1M rows ~15 sec
35
+ - Index 1M rows ~2 sec per numeric field, ~40 sec per char field
36
+ - Diff 1M rows ~40 sec
37
+ - Upsert 100k rows into 1M row table ~60 sec
38
+
39
+ ## Contributing
40
+
41
+ 1. Fork it ( https://github.com/th7/table_copy/fork )
42
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
43
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
44
+ 4. Push to the branch (`git push origin my-new-feature`)
45
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/bin/table_copy ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ bin_path = File.expand_path(File.dirname(__FILE__))
3
+ system('mkdir config') unless Dir.exists?('config')
4
+ system('mkdir config/initializers') unless Dir.exists?('config/initializers')
5
+ if system("cp #{bin_path}/../config/initializers/table_copy.rb.example config/initializers")
6
+ puts "Example initializer copied to /config/initializers/table_copy.rb.example"
7
+ else
8
+ puts 'Failed to copy example config.'
9
+ end
10
+
@@ -0,0 +1 @@
1
+ :dbname: table_copy_test
@@ -0,0 +1,62 @@
1
+ require 'yaml'
2
+ require 'pg'
3
+ require 'table_copy'
4
+ require 'table_copy/pg'
5
+
6
+ TABLES = {
7
+ 'table_one' => { sequence_field: 'updated_at' },
8
+ 'table_two' => { skips_fields: [ 'field_to_skip' ] },
9
+ 'table_three' => { sequence_field: 'table_three_id' }, #insert only
10
+ }
11
+
12
+ # TableCopy requires you to specify methods which yield a database connection
13
+ # single connection example
14
+ source_config = YAML.load_file('config/db1.yml')ENV['ENV']
15
+ source_conn = PG::Connection.open(source_config)
16
+
17
+ class SourceDB
18
+ def self.with_conn
19
+ yield source_conn # or use a connection pool!
20
+ end
21
+ end
22
+
23
+ # Active Record connection pool example
24
+ class DestinationDB < ActiveRecord::Base
25
+ self.abstract_class = true
26
+
27
+ def self.with_conn
28
+ self.connection_pool.with_connection do |ar_conn|
29
+ yield ar_conn.raw_connection
30
+ end
31
+ end
32
+ end
33
+
34
+ TableCopy.logger = Logger.new('log/table_copy.log') unless MyEnv.is.development?
35
+
36
+ # if explicitly asking the DB for the PK fails, a proc can be used instead
37
+ infer_pk_proc = Proc.new { |table_name| table_name + '_id' }
38
+
39
+ # or maybe...
40
+ # infer_pk_proc = Proc.new { 'every_table_uses_this_id' }
41
+
42
+ # config requires database queries -- this block defers until it is actually needed
43
+ TableCopy.deferred_config do
44
+ TABLES.each do |table_name, opts|
45
+ source = TableCopy::PG::Source.new(
46
+ table_name: table_name,
47
+ conn_method: SourceDB.method(:with_conn),
48
+ infer_pk_proc: infer_pk_proc
49
+ )
50
+
51
+ destination = TableCopy::PG::Destination.new(
52
+ table_name: table_name,
53
+ primary_key: source.primary_key,
54
+ sequence_field: opts[:sequence_field],
55
+ conn_method: DestinationDB.method(:with_conn),
56
+ indexes: source.indexes,
57
+ fields: source.fields - (opts[:skips_fields] || [])
58
+ )
59
+
60
+ TableCopy.add_link(table_name, source, destination)
61
+ end
62
+ end
@@ -0,0 +1,98 @@
1
+ require 'pg'
2
+
3
+ module TableCopy
4
+ class Copier
5
+ attr_reader :source_table, :destination_table
6
+
7
+ def initialize(source_table, destination_table)
8
+ @source_table = source_table
9
+ @destination_table = destination_table
10
+ end
11
+
12
+ def update
13
+ if destination_table.none?
14
+ droppy
15
+ elsif (max_sequence = destination_table.max_sequence)
16
+ update_data(max_sequence)
17
+ else
18
+ diffy_update
19
+ end
20
+ rescue ::PG::UndefinedTable => e
21
+ ([e.inspect] + e.backtrace).each { |l| logger.warn(l) }
22
+ create_table
23
+ retry
24
+ rescue ::PG::UndefinedColumn => e
25
+ ([e.inspect] + e.backtrace).each { |l| logger.warn(l) }
26
+ droppy
27
+ end
28
+
29
+ def droppy
30
+ logger.info { "Droppy #{destination_table.table_name}" }
31
+ destination_table.transaction do
32
+ destination_table.drop(cascade: true)
33
+ create_table
34
+ moved_count = destination_table.copy_data_from(source_table)
35
+ logger.info { "#{moved_count} rows moved to #{destination_table.table_name}" }
36
+ destination_table.create_indexes
37
+ logger.info { "Completed #{source_table.indexes.count} indexes on #{destination_table.table_name}." }
38
+ end
39
+ end
40
+
41
+ def find_deletes
42
+ logger.info { "Find deletes #{destination_table.table_name}" }
43
+ destination_table.transaction do
44
+ destination_table.create_temp(source_table.fields_ddl)
45
+ moved_count = destination_table.copy_data_from(source_table, temp: true, pk_only: true)
46
+ logger.info { "#{moved_count} rows moved to temp_#{destination_table.table_name}" }
47
+ destination_table.delete_not_in_temp
48
+ logger.info { "Deletetions from #{destination_table.table_name} complete." }
49
+ end
50
+ end
51
+
52
+ def diffy
53
+ logger.info { "Diffy #{destination_table.table_name}" }
54
+ destination_table.transaction do
55
+ destination_table.create_temp(source_table.fields_ddl)
56
+ moved_count = destination_table.copy_data_from(source_table, temp: true)
57
+ logger.info { "#{moved_count} rows moved to temp_#{destination_table.table_name}" }
58
+ destination_table.copy_from_temp
59
+ logger.info { "Upsert to #{destination_table.table_name} complete" }
60
+ destination_table.delete_not_in_temp
61
+ logger.info { "Deletetions from #{destination_table.table_name} complete." }
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def diffy_update
68
+ logger.info "Diffy Update #{destination_table.table_name}"
69
+ destination_table.transaction do
70
+ destination_table.create_temp(source_table.fields_ddl)
71
+ moved_count = destination_table.copy_data_from(source_table, temp: true)
72
+ logger.info "#{moved_count} rows moved to temp_#{destination_table.table_name}"
73
+ destination_table.copy_from_temp
74
+ logger.info "Upsert to #{destination_table.table_name} complete."
75
+ end
76
+ end
77
+
78
+ def update_data(max_sequence)
79
+ logger.info "Update #{destination_table.table_name}"
80
+ destination_table.transaction do
81
+ destination_table.create_temp(source_table.fields_ddl)
82
+ moved_count = destination_table.copy_data_from(source_table, temp: true, update: max_sequence)
83
+ logger.info "#{moved_count} rows moved to temp_#{destination_table.table_name}"
84
+ destination_table.copy_from_temp(except: nil)
85
+ logger.info "Upsert to #{destination_table.table_name} complete."
86
+ end
87
+ end
88
+
89
+ def create_table
90
+ logger.info { "Creating table #{destination_table.table_name}" }
91
+ destination_table.create(source_table.fields_ddl)
92
+ end
93
+
94
+ def logger
95
+ TableCopy.logger
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,162 @@
1
+ module TableCopy
2
+ module PG
3
+ class Destination
4
+ attr_reader :table_name, :conn_method, :indexes, :fields, :primary_key, :sequence_field
5
+
6
+ def initialize(args)
7
+ @table_name = args[:table_name]
8
+ @primary_key = args[:primary_key]
9
+ @sequence_field = args[:sequence_field]
10
+ @conn_method = args[:conn_method]
11
+ @indexes = args[:indexes] || []
12
+ @fields = args[:fields]
13
+ end
14
+
15
+ def transaction
16
+ with_conn do |conn|
17
+ begin
18
+ conn.exec('begin')
19
+ yield
20
+ conn.exec('commit')
21
+ rescue Exception => e
22
+ conn.exec('rollback')
23
+ raise e
24
+ end
25
+ end
26
+ end
27
+
28
+ def create(fields_ddl)
29
+ with_conn do |conn|
30
+ conn.exec("create table #{table_name} (#{fields_ddl})")
31
+ end
32
+ end
33
+
34
+ def drop(opts={})
35
+ cascade = ' cascade' if opts[:cascade]
36
+ with_conn do |conn|
37
+ conn.exec("#{drop_sql}#{cascade}")
38
+ end
39
+ end
40
+
41
+ def create_indexes
42
+ indexes.each do |index|
43
+ create_ddl = index.class.new(table_name, index.name, index.columns).create
44
+ with_conn do |conn|
45
+ conn.exec(create_ddl)
46
+ end
47
+ end
48
+ end
49
+
50
+ def to_s
51
+ table_name
52
+ end
53
+
54
+ def max_sequence
55
+ return unless sequence_field
56
+ with_conn do |conn|
57
+ row = conn.exec(max_sequence_sql).first
58
+ row['max'] if row
59
+ end
60
+ end
61
+
62
+ def create_temp(fields_ddl)
63
+ with_conn do |conn|
64
+ conn.exec("create temp table temp_#{table_name} (#{fields_ddl}) on commit drop")
65
+ end
66
+ end
67
+
68
+ def none?
69
+ with_conn do |conn|
70
+ conn.exec("select count(*) from #{table_name}").first['count'] == '0'
71
+ end
72
+ end
73
+
74
+ def copy_data_from(source_table, temp: nil, pk_only: false, update: false)
75
+ temp = 'temp_' if temp
76
+ fl = pk_only ? primary_key : fields_list
77
+ where = "where #{sequence_field} > '#{update}'" if update && sequence_field
78
+ count = 0
79
+ source_table.copy_from(fl, where) do |source_conn|
80
+ with_conn do |conn|
81
+ conn.copy_data("COPY #{temp}#{table_name} (#{fl}) FROM STDOUT CSV") do
82
+ while row = source_conn.get_copy_data
83
+ count += 1
84
+ conn.put_copy_data(row)
85
+ end
86
+ end
87
+ end
88
+ end
89
+ count
90
+ end
91
+
92
+ def copy_from_temp(except: except_statement)
93
+ with_conn do |conn|
94
+ conn.exec(upsert_sql(except))
95
+ end
96
+ end
97
+
98
+ def delete_not_in_temp
99
+ with_conn do |conn|
100
+ conn.exec("delete from #{table_name} where #{primary_key} in (select #{primary_key} from #{table_name} except select #{primary_key} from temp_#{table_name})")
101
+ end
102
+ end
103
+
104
+ private
105
+
106
+ def fields_list
107
+ @fields_list ||= fields.join(', ')
108
+ end
109
+
110
+ def with_conn(&block)
111
+ conn_method.call(&block)
112
+ end
113
+
114
+ attr_reader :primary_key
115
+
116
+ def drop_sql
117
+ @drop_sql ||= "drop table if exists #{table_name}"
118
+ end
119
+
120
+ def max_sequence_sql
121
+ @max_sequence_sql ||= "select max(#{sequence_field}) from #{table_name}"
122
+ end
123
+
124
+ def upsert_sql(except=except_statement)
125
+ "with new_values as (
126
+ select #{fields_list} from temp_#{table_name}
127
+ #{except}
128
+ )
129
+ ,upsert as (
130
+ UPDATE #{table_name}
131
+ SET #{set_statement(fields)}
132
+ FROM new_values as nv
133
+ WHERE #{table_name}.#{primary_key} = nv.#{primary_key}
134
+ RETURNING #{return_statement(fields)}
135
+ )
136
+
137
+ INSERT INTO #{table_name} (#{fields_list})
138
+ SELECT *
139
+ FROM new_values as nv
140
+ WHERE NOT EXISTS (SELECT 1
141
+ FROM #{table_name}
142
+ WHERE #{table_name}.#{primary_key} = nv.#{primary_key});"
143
+ end
144
+
145
+ def except_statement
146
+ @except_statement ||= "except select #{fields_list} from #{table_name}"
147
+ end
148
+
149
+ def set_statement(keys)
150
+ keys.map.with_index(1) do |key, i|
151
+ "#{key}=nv.#{key}"
152
+ end.join(',')
153
+ end
154
+
155
+ def return_statement(keys)
156
+ keys.map.with_index(1) do |key, i|
157
+ "nv.#{key}"
158
+ end.join(',')
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,42 @@
1
+ module TableCopy
2
+ module PG
3
+ class Field
4
+ attr_reader :name, :type_name, :data_limit
5
+
6
+ def initialize(attrs)
7
+ @name = attrs['column_name']
8
+ data_type = attrs['data_type']
9
+
10
+ if data_type =~ /character/
11
+ @data_limit = attrs['character_maximum_length']
12
+ end
13
+
14
+ if data_type == 'ARRAY' && attrs['udt_name'] == '_varchar'
15
+ @type_name = 'character varying'
16
+ @data_limit = '256'
17
+ @array_ddl = '[]'
18
+ end
19
+
20
+ @type_name ||= data_type
21
+ end
22
+
23
+ def ddl
24
+ @ddl ||= "#{name} #{type_name}#{data_limit_ddl}#{array_ddl}"
25
+ end
26
+
27
+ def auto_index?
28
+ @type_name =~ /int|timestamp|bool/
29
+ end
30
+
31
+ private
32
+
33
+ def data_limit_ddl
34
+ "(#{data_limit})" if @data_limit
35
+ end
36
+
37
+ def array_ddl
38
+ @array_ddl
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,21 @@
1
+ module TableCopy
2
+ module PG
3
+ class Index
4
+ attr_reader :table, :name, :columns
5
+
6
+ def initialize(table, name, columns)
7
+ @table = table
8
+ @name = name
9
+ @columns = columns
10
+ end
11
+
12
+ def create
13
+ @create ||= "create index on #{table} using btree (#{columns.join(', ')})"
14
+ end
15
+
16
+ def drop
17
+ @drop ||= "drop index if exists #{name}"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,139 @@
1
+ require 'table_copy/pg/field'
2
+ require 'table_copy/pg/index'
3
+
4
+ module TableCopy
5
+ module PG
6
+ class Source
7
+ attr_reader :table_name, :conn_method, :infer_pk_proc
8
+
9
+ def initialize(args)
10
+ @table_name = args[:table_name]
11
+ @conn_method = args[:conn_method]
12
+ @infer_pk_proc = args[:infer_pk_proc]
13
+ end
14
+
15
+ def to_s
16
+ table_name
17
+ end
18
+
19
+ def primary_key
20
+ @primary_key ||= get_primary_key
21
+ end
22
+
23
+ def fields_ddl
24
+ @fields_ddl ||= fields_objects.map(&:ddl).join(",\n ")
25
+ end
26
+
27
+ def indexes
28
+ @indexes ||= viable_index_columns.map { |name, columns| TableCopy::PG::Index.new(table_name, name, columns) }
29
+ end
30
+
31
+ def copy_from(fields_list_arg, where=nil)
32
+ with_conn do |conn|
33
+ conn.copy_data("copy (select #{fields_list_arg} from #{table_name} #{where}) to stdout csv") do
34
+ yield conn
35
+ end
36
+ end
37
+ end
38
+
39
+ def fields
40
+ @field_names ||= fields_objects.map(&:name)
41
+ end
42
+
43
+ private
44
+
45
+ def with_conn(&block)
46
+ conn_method.call(&block)
47
+ end
48
+
49
+ def fields_objects
50
+ @fields_objects ||= with_conn do |conn|
51
+ conn.exec(fields_sql).map { |r| TableCopy::PG::Field.new(r) }
52
+ end
53
+ end
54
+
55
+ def viable_index_columns
56
+ @viable_index_columns ||= index_columns.select do |name, columns|
57
+ (columns - fields).empty?
58
+ end
59
+ end
60
+
61
+ def index_columns
62
+ @index_columns ||= raw_indexes.inject({}) do |indexes, ri|
63
+ index_name = ri['index_name']
64
+ indexes[index_name] ||= []
65
+ indexes[index_name] << ri['column_name']
66
+ indexes
67
+ end
68
+ end
69
+
70
+ def raw_indexes
71
+ @raw_indexes || with_conn do |conn|
72
+ @raw_indexes = conn.exec(indexes_sql)
73
+ end
74
+ end
75
+
76
+ def indexes_sql
77
+ <<-SQL
78
+ select
79
+ i.relname as index_name,
80
+ a.attname as column_name
81
+ from
82
+ pg_class t,
83
+ pg_class i,
84
+ pg_index ix,
85
+ pg_attribute a
86
+ where
87
+ t.oid = ix.indrelid
88
+ and i.oid = ix.indexrelid
89
+ and a.attrelid = t.oid
90
+ and a.attnum = ANY(ix.indkey)
91
+ and t.relkind = 'r'
92
+ and t.relname = '#{table_name}'
93
+ order by
94
+ t.relname,
95
+ i.relname;
96
+ SQL
97
+ end
98
+
99
+ def fields_sql
100
+ <<-SQL
101
+ SELECT *
102
+ FROM information_schema.columns
103
+ WHERE table_schema='public' AND table_name='#{table_name}'
104
+ SQL
105
+ end
106
+
107
+ def get_primary_key
108
+ with_conn do |conn|
109
+ rows = conn.exec(primary_key_sql)
110
+ if (row = rows.first) && row['attname']
111
+ row['attname']
112
+ elsif infer_pk_proc
113
+ inferred_pk = infer_pk_proc.call(table_name)
114
+ TableCopy.logger.warn "No explicit PK found for #{table_name}. Falling back to #{inferred_pk}."
115
+ inferred_pk
116
+ else
117
+ TableCopy.logger.warn "No explicit PK found for #{table_name}. Falling back to \"id\"."
118
+ 'id'
119
+ end
120
+ end
121
+ end
122
+
123
+ def primary_key_sql
124
+ <<-SQL
125
+ SELECT
126
+ pg_attribute.attname,
127
+ format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
128
+ FROM pg_index, pg_class, pg_attribute
129
+ WHERE
130
+ pg_class.oid = '#{table_name}'::regclass AND
131
+ indrelid = pg_class.oid AND
132
+ pg_attribute.attrelid = pg_class.oid AND
133
+ pg_attribute.attnum = any(pg_index.indkey)
134
+ AND indisprimary
135
+ SQL
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,9 @@
1
+ require 'table_copy/pg/destination'
2
+ require 'table_copy/pg/field'
3
+ require 'table_copy/pg/index'
4
+ require 'table_copy/pg/source'
5
+
6
+ module TableCopy
7
+ module PG
8
+ end
9
+ end
@@ -0,0 +1,3 @@
1
+ module TableCopy
2
+ VERSION = "0.0.5"
3
+ end