table_copy 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 82e2e7bc03e341d5b10e172c080499d174abd238
4
+ data.tar.gz: ccf3b5eebc929cefe8b9eeec212d7869b3cacb2c
5
+ SHA512:
6
+ metadata.gz: 9be2278b5259705cbfb905422385a334145eeb5325d8f97b33eb253343e3838014d081e3f86417c7456c519364ad29e19ea40e3cedece2fe84550ca2514c9567
7
+ data.tar.gz: 4191cd39eda48dd99414e3f300a962d6de22e0ad756aaa9f7da8755ac5d0a4d42d14d185d4cf717a40e80524b254f8a80bbfacd4b4c842ba02dbf885a4e698cf
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in table_copy.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Tyler Hartland
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,45 @@
1
+ # TableCopy
2
+
3
+ Move and update data on a table by table basis between two databases. Currently only supports Postgres in a limited fashion.
4
+
5
+ This gem could be made more flexible with a bit of work, but for now is pretty limited to my specific purposes.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'table_copy'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install table_copy
20
+
21
+ ## Usage
22
+
23
+ Run ```table_copy --init``` for an example initializer. Then, access each table link by ```TableCopy.links['link_name']```. You can call ```link.update; link.droppy; link.diffy```
24
+
25
+ Update will attempt to use a sequence field to look for changes. If that field is not available, it will run a diffy(update) operation.
26
+
27
+ Diffy(update) will copy the source table to a temp table, diff it with the destination table, and upsert any changes to the destination.
28
+
29
+ Diffy will perform a diffy(update) and will also diff ids in the destination table against the temp table to find deletions.
30
+
31
+ Droppy will drop the destination table and rebuild/populate it.
32
+
33
+ ### *Very* rough benchmarks:
34
+ - Copy 1M rows ~15 sec
35
+ - Index 1M rows ~2 sec per numeric field, ~40 sec per char field
36
+ - Diff 1M rows ~40 sec
37
+ - Upsert 100k rows into 1M row table ~60 sec
38
+
39
+ ## Contributing
40
+
41
+ 1. Fork it ( https://github.com/th7/table_copy/fork )
42
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
43
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
44
+ 4. Push to the branch (`git push origin my-new-feature`)
45
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/bin/table_copy ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ bin_path = File.expand_path(File.dirname(__FILE__))
3
+ system('mkdir config') unless Dir.exists?('config')
4
+ system('mkdir config/initializers') unless Dir.exists?('config/initializers')
5
+ if system("cp #{bin_path}/../config/initializers/table_copy.rb.example config/initializers")
6
+ puts "Example initializer copied to /config/initializers/table_copy.rb.example"
7
+ else
8
+ puts 'Failed to copy example config.'
9
+ end
10
+
@@ -0,0 +1 @@
1
+ :dbname: table_copy_test
@@ -0,0 +1,62 @@
1
+ require 'yaml'
2
+ require 'pg'
3
+ require 'table_copy'
4
+ require 'table_copy/pg'
5
+
6
+ TABLES = {
7
+ 'table_one' => { sequence_field: 'updated_at' },
8
+ 'table_two' => { skips_fields: [ 'field_to_skip' ] },
9
+ 'table_three' => { sequence_field: 'table_three_id' }, #insert only
10
+ }
11
+
12
+ # TableCopy requires you to specify methods which yield a database connection
13
+ # single connection example
14
+ source_config = YAML.load_file('config/db1.yml')ENV['ENV']
15
+ source_conn = PG::Connection.open(source_config)
16
+
17
+ class SourceDB
18
+ def self.with_conn
19
+ yield source_conn # or use a connection pool!
20
+ end
21
+ end
22
+
23
+ # Active Record connection pool example
24
+ class DestinationDB < ActiveRecord::Base
25
+ self.abstract_class = true
26
+
27
+ def self.with_conn
28
+ self.connection_pool.with_connection do |ar_conn|
29
+ yield ar_conn.raw_connection
30
+ end
31
+ end
32
+ end
33
+
34
+ TableCopy.logger = Logger.new('log/table_copy.log') unless MyEnv.is.development?
35
+
36
+ # if explicitly asking the DB for the PK fails, a proc can be used instead
37
+ infer_pk_proc = Proc.new { |table_name| table_name + '_id' }
38
+
39
+ # or maybe...
40
+ # infer_pk_proc = Proc.new { 'every_table_uses_this_id' }
41
+
42
+ # config requires database queries -- this block defers until it is actually needed
43
+ TableCopy.deferred_config do
44
+ TABLES.each do |table_name, opts|
45
+ source = TableCopy::PG::Source.new(
46
+ table_name: table_name,
47
+ conn_method: SourceDB.method(:with_conn),
48
+ infer_pk_proc: infer_pk_proc
49
+ )
50
+
51
+ destination = TableCopy::PG::Destination.new(
52
+ table_name: table_name,
53
+ primary_key: source.primary_key,
54
+ sequence_field: opts[:sequence_field],
55
+ conn_method: DestinationDB.method(:with_conn),
56
+ indexes: source.indexes,
57
+ fields: source.fields - (opts[:skips_fields] || [])
58
+ )
59
+
60
+ TableCopy.add_link(table_name, source, destination)
61
+ end
62
+ end
@@ -0,0 +1,98 @@
1
+ require 'pg'
2
+
3
+ module TableCopy
4
+ class Copier
5
+ attr_reader :source_table, :destination_table
6
+
7
+ def initialize(source_table, destination_table)
8
+ @source_table = source_table
9
+ @destination_table = destination_table
10
+ end
11
+
12
+ def update
13
+ if destination_table.none?
14
+ droppy
15
+ elsif (max_sequence = destination_table.max_sequence)
16
+ update_data(max_sequence)
17
+ else
18
+ diffy_update
19
+ end
20
+ rescue ::PG::UndefinedTable => e
21
+ ([e.inspect] + e.backtrace).each { |l| logger.warn(l) }
22
+ create_table
23
+ retry
24
+ rescue ::PG::UndefinedColumn => e
25
+ ([e.inspect] + e.backtrace).each { |l| logger.warn(l) }
26
+ droppy
27
+ end
28
+
29
+ def droppy
30
+ logger.info { "Droppy #{destination_table.table_name}" }
31
+ destination_table.transaction do
32
+ destination_table.drop(cascade: true)
33
+ create_table
34
+ moved_count = destination_table.copy_data_from(source_table)
35
+ logger.info { "#{moved_count} rows moved to #{destination_table.table_name}" }
36
+ destination_table.create_indexes
37
+ logger.info { "Completed #{source_table.indexes.count} indexes on #{destination_table.table_name}." }
38
+ end
39
+ end
40
+
41
+ def find_deletes
42
+ logger.info { "Find deletes #{destination_table.table_name}" }
43
+ destination_table.transaction do
44
+ destination_table.create_temp(source_table.fields_ddl)
45
+ moved_count = destination_table.copy_data_from(source_table, temp: true, pk_only: true)
46
+ logger.info { "#{moved_count} rows moved to temp_#{destination_table.table_name}" }
47
+ destination_table.delete_not_in_temp
48
+ logger.info { "Deletetions from #{destination_table.table_name} complete." }
49
+ end
50
+ end
51
+
52
+ def diffy
53
+ logger.info { "Diffy #{destination_table.table_name}" }
54
+ destination_table.transaction do
55
+ destination_table.create_temp(source_table.fields_ddl)
56
+ moved_count = destination_table.copy_data_from(source_table, temp: true)
57
+ logger.info { "#{moved_count} rows moved to temp_#{destination_table.table_name}" }
58
+ destination_table.copy_from_temp
59
+ logger.info { "Upsert to #{destination_table.table_name} complete" }
60
+ destination_table.delete_not_in_temp
61
+ logger.info { "Deletetions from #{destination_table.table_name} complete." }
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def diffy_update
68
+ logger.info "Diffy Update #{destination_table.table_name}"
69
+ destination_table.transaction do
70
+ destination_table.create_temp(source_table.fields_ddl)
71
+ moved_count = destination_table.copy_data_from(source_table, temp: true)
72
+ logger.info "#{moved_count} rows moved to temp_#{destination_table.table_name}"
73
+ destination_table.copy_from_temp
74
+ logger.info "Upsert to #{destination_table.table_name} complete."
75
+ end
76
+ end
77
+
78
+ def update_data(max_sequence)
79
+ logger.info "Update #{destination_table.table_name}"
80
+ destination_table.transaction do
81
+ destination_table.create_temp(source_table.fields_ddl)
82
+ moved_count = destination_table.copy_data_from(source_table, temp: true, update: max_sequence)
83
+ logger.info "#{moved_count} rows moved to temp_#{destination_table.table_name}"
84
+ destination_table.copy_from_temp(except: nil)
85
+ logger.info "Upsert to #{destination_table.table_name} complete."
86
+ end
87
+ end
88
+
89
+ def create_table
90
+ logger.info { "Creating table #{destination_table.table_name}" }
91
+ destination_table.create(source_table.fields_ddl)
92
+ end
93
+
94
+ def logger
95
+ TableCopy.logger
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,162 @@
1
+ module TableCopy
2
+ module PG
3
+ class Destination
4
+ attr_reader :table_name, :conn_method, :indexes, :fields, :primary_key, :sequence_field
5
+
6
+ def initialize(args)
7
+ @table_name = args[:table_name]
8
+ @primary_key = args[:primary_key]
9
+ @sequence_field = args[:sequence_field]
10
+ @conn_method = args[:conn_method]
11
+ @indexes = args[:indexes] || []
12
+ @fields = args[:fields]
13
+ end
14
+
15
+ def transaction
16
+ with_conn do |conn|
17
+ begin
18
+ conn.exec('begin')
19
+ yield
20
+ conn.exec('commit')
21
+ rescue Exception => e
22
+ conn.exec('rollback')
23
+ raise e
24
+ end
25
+ end
26
+ end
27
+
28
+ def create(fields_ddl)
29
+ with_conn do |conn|
30
+ conn.exec("create table #{table_name} (#{fields_ddl})")
31
+ end
32
+ end
33
+
34
+ def drop(opts={})
35
+ cascade = ' cascade' if opts[:cascade]
36
+ with_conn do |conn|
37
+ conn.exec("#{drop_sql}#{cascade}")
38
+ end
39
+ end
40
+
41
+ def create_indexes
42
+ indexes.each do |index|
43
+ create_ddl = index.class.new(table_name, index.name, index.columns).create
44
+ with_conn do |conn|
45
+ conn.exec(create_ddl)
46
+ end
47
+ end
48
+ end
49
+
50
+ def to_s
51
+ table_name
52
+ end
53
+
54
+ def max_sequence
55
+ return unless sequence_field
56
+ with_conn do |conn|
57
+ row = conn.exec(max_sequence_sql).first
58
+ row['max'] if row
59
+ end
60
+ end
61
+
62
+ def create_temp(fields_ddl)
63
+ with_conn do |conn|
64
+ conn.exec("create temp table temp_#{table_name} (#{fields_ddl}) on commit drop")
65
+ end
66
+ end
67
+
68
+ def none?
69
+ with_conn do |conn|
70
+ conn.exec("select count(*) from #{table_name}").first['count'] == '0'
71
+ end
72
+ end
73
+
74
+ def copy_data_from(source_table, temp: nil, pk_only: false, update: false)
75
+ temp = 'temp_' if temp
76
+ fl = pk_only ? primary_key : fields_list
77
+ where = "where #{sequence_field} > '#{update}'" if update && sequence_field
78
+ count = 0
79
+ source_table.copy_from(fl, where) do |source_conn|
80
+ with_conn do |conn|
81
+ conn.copy_data("COPY #{temp}#{table_name} (#{fl}) FROM STDOUT CSV") do
82
+ while row = source_conn.get_copy_data
83
+ count += 1
84
+ conn.put_copy_data(row)
85
+ end
86
+ end
87
+ end
88
+ end
89
+ count
90
+ end
91
+
92
+ def copy_from_temp(except: except_statement)
93
+ with_conn do |conn|
94
+ conn.exec(upsert_sql(except))
95
+ end
96
+ end
97
+
98
+ def delete_not_in_temp
99
+ with_conn do |conn|
100
+ conn.exec("delete from #{table_name} where #{primary_key} in (select #{primary_key} from #{table_name} except select #{primary_key} from temp_#{table_name})")
101
+ end
102
+ end
103
+
104
+ private
105
+
106
+ def fields_list
107
+ @fields_list ||= fields.join(', ')
108
+ end
109
+
110
+ def with_conn(&block)
111
+ conn_method.call(&block)
112
+ end
113
+
114
+ attr_reader :primary_key
115
+
116
+ def drop_sql
117
+ @drop_sql ||= "drop table if exists #{table_name}"
118
+ end
119
+
120
+ def max_sequence_sql
121
+ @max_sequence_sql ||= "select max(#{sequence_field}) from #{table_name}"
122
+ end
123
+
124
+ def upsert_sql(except=except_statement)
125
+ "with new_values as (
126
+ select #{fields_list} from temp_#{table_name}
127
+ #{except}
128
+ )
129
+ ,upsert as (
130
+ UPDATE #{table_name}
131
+ SET #{set_statement(fields)}
132
+ FROM new_values as nv
133
+ WHERE #{table_name}.#{primary_key} = nv.#{primary_key}
134
+ RETURNING #{return_statement(fields)}
135
+ )
136
+
137
+ INSERT INTO #{table_name} (#{fields_list})
138
+ SELECT *
139
+ FROM new_values as nv
140
+ WHERE NOT EXISTS (SELECT 1
141
+ FROM #{table_name}
142
+ WHERE #{table_name}.#{primary_key} = nv.#{primary_key});"
143
+ end
144
+
145
+ def except_statement
146
+ @except_statement ||= "except select #{fields_list} from #{table_name}"
147
+ end
148
+
149
+ def set_statement(keys)
150
+ keys.map.with_index(1) do |key, i|
151
+ "#{key}=nv.#{key}"
152
+ end.join(',')
153
+ end
154
+
155
+ def return_statement(keys)
156
+ keys.map.with_index(1) do |key, i|
157
+ "nv.#{key}"
158
+ end.join(',')
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,42 @@
1
+ module TableCopy
2
+ module PG
3
+ class Field
4
+ attr_reader :name, :type_name, :data_limit
5
+
6
+ def initialize(attrs)
7
+ @name = attrs['column_name']
8
+ data_type = attrs['data_type']
9
+
10
+ if data_type =~ /character/
11
+ @data_limit = attrs['character_maximum_length']
12
+ end
13
+
14
+ if data_type == 'ARRAY' && attrs['udt_name'] == '_varchar'
15
+ @type_name = 'character varying'
16
+ @data_limit = '256'
17
+ @array_ddl = '[]'
18
+ end
19
+
20
+ @type_name ||= data_type
21
+ end
22
+
23
+ def ddl
24
+ @ddl ||= "#{name} #{type_name}#{data_limit_ddl}#{array_ddl}"
25
+ end
26
+
27
+ def auto_index?
28
+ @type_name =~ /int|timestamp|bool/
29
+ end
30
+
31
+ private
32
+
33
+ def data_limit_ddl
34
+ "(#{data_limit})" if @data_limit
35
+ end
36
+
37
+ def array_ddl
38
+ @array_ddl
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,21 @@
1
+ module TableCopy
2
+ module PG
3
+ class Index
4
+ attr_reader :table, :name, :columns
5
+
6
+ def initialize(table, name, columns)
7
+ @table = table
8
+ @name = name
9
+ @columns = columns
10
+ end
11
+
12
+ def create
13
+ @create ||= "create index on #{table} using btree (#{columns.join(', ')})"
14
+ end
15
+
16
+ def drop
17
+ @drop ||= "drop index if exists #{name}"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,139 @@
1
+ require 'table_copy/pg/field'
2
+ require 'table_copy/pg/index'
3
+
4
+ module TableCopy
5
+ module PG
6
+ class Source
7
+ attr_reader :table_name, :conn_method, :infer_pk_proc
8
+
9
+ def initialize(args)
10
+ @table_name = args[:table_name]
11
+ @conn_method = args[:conn_method]
12
+ @infer_pk_proc = args[:infer_pk_proc]
13
+ end
14
+
15
+ def to_s
16
+ table_name
17
+ end
18
+
19
+ def primary_key
20
+ @primary_key ||= get_primary_key
21
+ end
22
+
23
+ def fields_ddl
24
+ @fields_ddl ||= fields_objects.map(&:ddl).join(",\n ")
25
+ end
26
+
27
+ def indexes
28
+ @indexes ||= viable_index_columns.map { |name, columns| TableCopy::PG::Index.new(table_name, name, columns) }
29
+ end
30
+
31
+ def copy_from(fields_list_arg, where=nil)
32
+ with_conn do |conn|
33
+ conn.copy_data("copy (select #{fields_list_arg} from #{table_name} #{where}) to stdout csv") do
34
+ yield conn
35
+ end
36
+ end
37
+ end
38
+
39
+ def fields
40
+ @field_names ||= fields_objects.map(&:name)
41
+ end
42
+
43
+ private
44
+
45
+ def with_conn(&block)
46
+ conn_method.call(&block)
47
+ end
48
+
49
+ def fields_objects
50
+ @fields_objects ||= with_conn do |conn|
51
+ conn.exec(fields_sql).map { |r| TableCopy::PG::Field.new(r) }
52
+ end
53
+ end
54
+
55
+ def viable_index_columns
56
+ @viable_index_columns ||= index_columns.select do |name, columns|
57
+ (columns - fields).empty?
58
+ end
59
+ end
60
+
61
+ def index_columns
62
+ @index_columns ||= raw_indexes.inject({}) do |indexes, ri|
63
+ index_name = ri['index_name']
64
+ indexes[index_name] ||= []
65
+ indexes[index_name] << ri['column_name']
66
+ indexes
67
+ end
68
+ end
69
+
70
+ def raw_indexes
71
+ @raw_indexes || with_conn do |conn|
72
+ @raw_indexes = conn.exec(indexes_sql)
73
+ end
74
+ end
75
+
76
+ def indexes_sql
77
+ <<-SQL
78
+ select
79
+ i.relname as index_name,
80
+ a.attname as column_name
81
+ from
82
+ pg_class t,
83
+ pg_class i,
84
+ pg_index ix,
85
+ pg_attribute a
86
+ where
87
+ t.oid = ix.indrelid
88
+ and i.oid = ix.indexrelid
89
+ and a.attrelid = t.oid
90
+ and a.attnum = ANY(ix.indkey)
91
+ and t.relkind = 'r'
92
+ and t.relname = '#{table_name}'
93
+ order by
94
+ t.relname,
95
+ i.relname;
96
+ SQL
97
+ end
98
+
99
+ def fields_sql
100
+ <<-SQL
101
+ SELECT *
102
+ FROM information_schema.columns
103
+ WHERE table_schema='public' AND table_name='#{table_name}'
104
+ SQL
105
+ end
106
+
107
+ def get_primary_key
108
+ with_conn do |conn|
109
+ rows = conn.exec(primary_key_sql)
110
+ if (row = rows.first) && row['attname']
111
+ row['attname']
112
+ elsif infer_pk_proc
113
+ inferred_pk = infer_pk_proc.call(table_name)
114
+ TableCopy.logger.warn "No explicit PK found for #{table_name}. Falling back to #{inferred_pk}."
115
+ inferred_pk
116
+ else
117
+ TableCopy.logger.warn "No explicit PK found for #{table_name}. Falling back to \"id\"."
118
+ 'id'
119
+ end
120
+ end
121
+ end
122
+
123
+ def primary_key_sql
124
+ <<-SQL
125
+ SELECT
126
+ pg_attribute.attname,
127
+ format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
128
+ FROM pg_index, pg_class, pg_attribute
129
+ WHERE
130
+ pg_class.oid = '#{table_name}'::regclass AND
131
+ indrelid = pg_class.oid AND
132
+ pg_attribute.attrelid = pg_class.oid AND
133
+ pg_attribute.attnum = any(pg_index.indkey)
134
+ AND indisprimary
135
+ SQL
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,9 @@
1
+ require 'table_copy/pg/destination'
2
+ require 'table_copy/pg/field'
3
+ require 'table_copy/pg/index'
4
+ require 'table_copy/pg/source'
5
+
6
+ module TableCopy
7
+ module PG
8
+ end
9
+ end
@@ -0,0 +1,3 @@
1
+ module TableCopy
2
+ VERSION = "0.0.5"
3
+ end