table_copy 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +45 -0
- data/Rakefile +2 -0
- data/bin/table_copy +10 -0
- data/config/database.yml +1 -0
- data/config/initializers/table_copy.rb.example +62 -0
- data/lib/table_copy/copier.rb +98 -0
- data/lib/table_copy/pg/destination.rb +162 -0
- data/lib/table_copy/pg/field.rb +42 -0
- data/lib/table_copy/pg/index.rb +21 -0
- data/lib/table_copy/pg/source.rb +139 -0
- data/lib/table_copy/pg.rb +9 -0
- data/lib/table_copy/version.rb +3 -0
- data/lib/table_copy.rb +54 -0
- data/spec/lib/table_copy/copier_spec.rb +126 -0
- data/spec/lib/table_copy/pg/destination_spec.rb +305 -0
- data/spec/lib/table_copy/pg/field_spec.rb +65 -0
- data/spec/lib/table_copy/pg/index_spec.rb +24 -0
- data/spec/lib/table_copy/pg/source_spec.rb +120 -0
- data/spec/lib/table_copy_spec.rb +89 -0
- data/spec/spec_helper.rb +94 -0
- data/table_copy.gemspec +27 -0
- metadata +147 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 82e2e7bc03e341d5b10e172c080499d174abd238
|
4
|
+
data.tar.gz: ccf3b5eebc929cefe8b9eeec212d7869b3cacb2c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9be2278b5259705cbfb905422385a334145eeb5325d8f97b33eb253343e3838014d081e3f86417c7456c519364ad29e19ea40e3cedece2fe84550ca2514c9567
|
7
|
+
data.tar.gz: 4191cd39eda48dd99414e3f300a962d6de22e0ad756aaa9f7da8755ac5d0a4d42d14d185d4cf717a40e80524b254f8a80bbfacd4b4c842ba02dbf885a4e698cf
|
data/.gitignore
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
*.bundle
|
19
|
+
*.so
|
20
|
+
*.o
|
21
|
+
*.a
|
22
|
+
mkmf.log
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Tyler Hartland
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# TableCopy
|
2
|
+
|
3
|
+
Move and update data on a table by table basis between two databases. Currently only supports Postgres in a limited fashion.
|
4
|
+
|
5
|
+
This gem could be made more flexible with a bit of work, but for now is pretty limited to my specific purposes.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'table_copy'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install table_copy
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
Run ```table_copy --init``` for an example initializer. Then, access each table link by ```TableCopy.links['link_name']```. You can call ```link.update; link.droppy; link.diffy```
|
24
|
+
|
25
|
+
Update will attempt to use a sequence field to look for changes. If that field is not available, it will run a diffy(update) operation.
|
26
|
+
|
27
|
+
Diffy(update) will copy the source table to a temp table, diff it with the destination table, and upsert any changes to the destination.
|
28
|
+
|
29
|
+
Diffy will perform a diffy(update) and will also diff ids in the destination table against the temp table to find deletions.
|
30
|
+
|
31
|
+
Droppy will drop the destination table and rebuild/populate it.
|
32
|
+
|
33
|
+
### *Very* rough benchmarks:
|
34
|
+
- Copy 1M rows ~15 sec
|
35
|
+
- Index 1M rows ~2 sec per numeric field, ~40 sec per char field
|
36
|
+
- Diff 1M rows ~40 sec
|
37
|
+
- Upsert 100k rows into 1M row table ~60 sec
|
38
|
+
|
39
|
+
## Contributing
|
40
|
+
|
41
|
+
1. Fork it ( https://github.com/th7/table_copy/fork )
|
42
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
43
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
44
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
45
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/bin/table_copy
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
bin_path = File.expand_path(File.dirname(__FILE__))
|
3
|
+
system('mkdir config') unless Dir.exists?('config')
|
4
|
+
system('mkdir config/initializers') unless Dir.exists?('config/initializers')
|
5
|
+
if system("cp #{bin_path}/../config/initializers/table_copy.rb.example config/initializers")
|
6
|
+
puts "Example initializer copied to /config/initializers/table_copy.rb.example"
|
7
|
+
else
|
8
|
+
puts 'Failed to copy example config.'
|
9
|
+
end
|
10
|
+
|
data/config/database.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
:dbname: table_copy_test
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'pg'
|
3
|
+
require 'table_copy'
|
4
|
+
require 'table_copy/pg'
|
5
|
+
|
6
|
+
TABLES = {
|
7
|
+
'table_one' => { sequence_field: 'updated_at' },
|
8
|
+
'table_two' => { skips_fields: [ 'field_to_skip' ] },
|
9
|
+
'table_three' => { sequence_field: 'table_three_id' }, #insert only
|
10
|
+
}
|
11
|
+
|
12
|
+
# TableCopy requires you to specify methods which yield a database connection
|
13
|
+
# single connection example
|
14
|
+
source_config = YAML.load_file('config/db1.yml')ENV['ENV']
|
15
|
+
source_conn = PG::Connection.open(source_config)
|
16
|
+
|
17
|
+
class SourceDB
|
18
|
+
def self.with_conn
|
19
|
+
yield source_conn # or use a connection pool!
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Active Record connection pool example
|
24
|
+
class DestinationDB < ActiveRecord::Base
|
25
|
+
self.abstract_class = true
|
26
|
+
|
27
|
+
def self.with_conn
|
28
|
+
self.connection_pool.with_connection do |ar_conn|
|
29
|
+
yield ar_conn.raw_connection
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
TableCopy.logger = Logger.new('log/table_copy.log') unless MyEnv.is.development?
|
35
|
+
|
36
|
+
# if explicitly asking the DB for the PK fails, a proc can be used instead
|
37
|
+
infer_pk_proc = Proc.new { |table_name| table_name + '_id' }
|
38
|
+
|
39
|
+
# or maybe...
|
40
|
+
# infer_pk_proc = Proc.new { 'every_table_uses_this_id' }
|
41
|
+
|
42
|
+
# config requires database queries -- this block defers until it is actually needed
|
43
|
+
TableCopy.deferred_config do
|
44
|
+
TABLES.each do |table_name, opts|
|
45
|
+
source = TableCopy::PG::Source.new(
|
46
|
+
table_name: table_name,
|
47
|
+
conn_method: SourceDB.method(:with_conn),
|
48
|
+
infer_pk_proc: infer_pk_proc
|
49
|
+
)
|
50
|
+
|
51
|
+
destination = TableCopy::PG::Destination.new(
|
52
|
+
table_name: table_name,
|
53
|
+
primary_key: source.primary_key,
|
54
|
+
sequence_field: opts[:sequence_field],
|
55
|
+
conn_method: DestinationDB.method(:with_conn),
|
56
|
+
indexes: source.indexes,
|
57
|
+
fields: source.fields - (opts[:skips_fields] || [])
|
58
|
+
)
|
59
|
+
|
60
|
+
TableCopy.add_link(table_name, source, destination)
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'pg'
|
2
|
+
|
3
|
+
module TableCopy
|
4
|
+
class Copier
|
5
|
+
attr_reader :source_table, :destination_table
|
6
|
+
|
7
|
+
def initialize(source_table, destination_table)
|
8
|
+
@source_table = source_table
|
9
|
+
@destination_table = destination_table
|
10
|
+
end
|
11
|
+
|
12
|
+
def update
|
13
|
+
if destination_table.none?
|
14
|
+
droppy
|
15
|
+
elsif (max_sequence = destination_table.max_sequence)
|
16
|
+
update_data(max_sequence)
|
17
|
+
else
|
18
|
+
diffy_update
|
19
|
+
end
|
20
|
+
rescue ::PG::UndefinedTable => e
|
21
|
+
([e.inspect] + e.backtrace).each { |l| logger.warn(l) }
|
22
|
+
create_table
|
23
|
+
retry
|
24
|
+
rescue ::PG::UndefinedColumn => e
|
25
|
+
([e.inspect] + e.backtrace).each { |l| logger.warn(l) }
|
26
|
+
droppy
|
27
|
+
end
|
28
|
+
|
29
|
+
def droppy
|
30
|
+
logger.info { "Droppy #{destination_table.table_name}" }
|
31
|
+
destination_table.transaction do
|
32
|
+
destination_table.drop(cascade: true)
|
33
|
+
create_table
|
34
|
+
moved_count = destination_table.copy_data_from(source_table)
|
35
|
+
logger.info { "#{moved_count} rows moved to #{destination_table.table_name}" }
|
36
|
+
destination_table.create_indexes
|
37
|
+
logger.info { "Completed #{source_table.indexes.count} indexes on #{destination_table.table_name}." }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def find_deletes
|
42
|
+
logger.info { "Find deletes #{destination_table.table_name}" }
|
43
|
+
destination_table.transaction do
|
44
|
+
destination_table.create_temp(source_table.fields_ddl)
|
45
|
+
moved_count = destination_table.copy_data_from(source_table, temp: true, pk_only: true)
|
46
|
+
logger.info { "#{moved_count} rows moved to temp_#{destination_table.table_name}" }
|
47
|
+
destination_table.delete_not_in_temp
|
48
|
+
logger.info { "Deletetions from #{destination_table.table_name} complete." }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def diffy
|
53
|
+
logger.info { "Diffy #{destination_table.table_name}" }
|
54
|
+
destination_table.transaction do
|
55
|
+
destination_table.create_temp(source_table.fields_ddl)
|
56
|
+
moved_count = destination_table.copy_data_from(source_table, temp: true)
|
57
|
+
logger.info { "#{moved_count} rows moved to temp_#{destination_table.table_name}" }
|
58
|
+
destination_table.copy_from_temp
|
59
|
+
logger.info { "Upsert to #{destination_table.table_name} complete" }
|
60
|
+
destination_table.delete_not_in_temp
|
61
|
+
logger.info { "Deletetions from #{destination_table.table_name} complete." }
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def diffy_update
|
68
|
+
logger.info "Diffy Update #{destination_table.table_name}"
|
69
|
+
destination_table.transaction do
|
70
|
+
destination_table.create_temp(source_table.fields_ddl)
|
71
|
+
moved_count = destination_table.copy_data_from(source_table, temp: true)
|
72
|
+
logger.info "#{moved_count} rows moved to temp_#{destination_table.table_name}"
|
73
|
+
destination_table.copy_from_temp
|
74
|
+
logger.info "Upsert to #{destination_table.table_name} complete."
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def update_data(max_sequence)
|
79
|
+
logger.info "Update #{destination_table.table_name}"
|
80
|
+
destination_table.transaction do
|
81
|
+
destination_table.create_temp(source_table.fields_ddl)
|
82
|
+
moved_count = destination_table.copy_data_from(source_table, temp: true, update: max_sequence)
|
83
|
+
logger.info "#{moved_count} rows moved to temp_#{destination_table.table_name}"
|
84
|
+
destination_table.copy_from_temp(except: nil)
|
85
|
+
logger.info "Upsert to #{destination_table.table_name} complete."
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def create_table
|
90
|
+
logger.info { "Creating table #{destination_table.table_name}" }
|
91
|
+
destination_table.create(source_table.fields_ddl)
|
92
|
+
end
|
93
|
+
|
94
|
+
def logger
|
95
|
+
TableCopy.logger
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
module TableCopy
|
2
|
+
module PG
|
3
|
+
class Destination
|
4
|
+
attr_reader :table_name, :conn_method, :indexes, :fields, :primary_key, :sequence_field
|
5
|
+
|
6
|
+
def initialize(args)
|
7
|
+
@table_name = args[:table_name]
|
8
|
+
@primary_key = args[:primary_key]
|
9
|
+
@sequence_field = args[:sequence_field]
|
10
|
+
@conn_method = args[:conn_method]
|
11
|
+
@indexes = args[:indexes] || []
|
12
|
+
@fields = args[:fields]
|
13
|
+
end
|
14
|
+
|
15
|
+
def transaction
|
16
|
+
with_conn do |conn|
|
17
|
+
begin
|
18
|
+
conn.exec('begin')
|
19
|
+
yield
|
20
|
+
conn.exec('commit')
|
21
|
+
rescue Exception => e
|
22
|
+
conn.exec('rollback')
|
23
|
+
raise e
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def create(fields_ddl)
|
29
|
+
with_conn do |conn|
|
30
|
+
conn.exec("create table #{table_name} (#{fields_ddl})")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def drop(opts={})
|
35
|
+
cascade = ' cascade' if opts[:cascade]
|
36
|
+
with_conn do |conn|
|
37
|
+
conn.exec("#{drop_sql}#{cascade}")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def create_indexes
|
42
|
+
indexes.each do |index|
|
43
|
+
create_ddl = index.class.new(table_name, index.name, index.columns).create
|
44
|
+
with_conn do |conn|
|
45
|
+
conn.exec(create_ddl)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_s
|
51
|
+
table_name
|
52
|
+
end
|
53
|
+
|
54
|
+
def max_sequence
|
55
|
+
return unless sequence_field
|
56
|
+
with_conn do |conn|
|
57
|
+
row = conn.exec(max_sequence_sql).first
|
58
|
+
row['max'] if row
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def create_temp(fields_ddl)
|
63
|
+
with_conn do |conn|
|
64
|
+
conn.exec("create temp table temp_#{table_name} (#{fields_ddl}) on commit drop")
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def none?
|
69
|
+
with_conn do |conn|
|
70
|
+
conn.exec("select count(*) from #{table_name}").first['count'] == '0'
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def copy_data_from(source_table, temp: nil, pk_only: false, update: false)
|
75
|
+
temp = 'temp_' if temp
|
76
|
+
fl = pk_only ? primary_key : fields_list
|
77
|
+
where = "where #{sequence_field} > '#{update}'" if update && sequence_field
|
78
|
+
count = 0
|
79
|
+
source_table.copy_from(fl, where) do |source_conn|
|
80
|
+
with_conn do |conn|
|
81
|
+
conn.copy_data("COPY #{temp}#{table_name} (#{fl}) FROM STDOUT CSV") do
|
82
|
+
while row = source_conn.get_copy_data
|
83
|
+
count += 1
|
84
|
+
conn.put_copy_data(row)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
count
|
90
|
+
end
|
91
|
+
|
92
|
+
def copy_from_temp(except: except_statement)
|
93
|
+
with_conn do |conn|
|
94
|
+
conn.exec(upsert_sql(except))
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def delete_not_in_temp
|
99
|
+
with_conn do |conn|
|
100
|
+
conn.exec("delete from #{table_name} where #{primary_key} in (select #{primary_key} from #{table_name} except select #{primary_key} from temp_#{table_name})")
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
private
|
105
|
+
|
106
|
+
def fields_list
|
107
|
+
@fields_list ||= fields.join(', ')
|
108
|
+
end
|
109
|
+
|
110
|
+
def with_conn(&block)
|
111
|
+
conn_method.call(&block)
|
112
|
+
end
|
113
|
+
|
114
|
+
attr_reader :primary_key
|
115
|
+
|
116
|
+
def drop_sql
|
117
|
+
@drop_sql ||= "drop table if exists #{table_name}"
|
118
|
+
end
|
119
|
+
|
120
|
+
def max_sequence_sql
|
121
|
+
@max_sequence_sql ||= "select max(#{sequence_field}) from #{table_name}"
|
122
|
+
end
|
123
|
+
|
124
|
+
def upsert_sql(except=except_statement)
|
125
|
+
"with new_values as (
|
126
|
+
select #{fields_list} from temp_#{table_name}
|
127
|
+
#{except}
|
128
|
+
)
|
129
|
+
,upsert as (
|
130
|
+
UPDATE #{table_name}
|
131
|
+
SET #{set_statement(fields)}
|
132
|
+
FROM new_values as nv
|
133
|
+
WHERE #{table_name}.#{primary_key} = nv.#{primary_key}
|
134
|
+
RETURNING #{return_statement(fields)}
|
135
|
+
)
|
136
|
+
|
137
|
+
INSERT INTO #{table_name} (#{fields_list})
|
138
|
+
SELECT *
|
139
|
+
FROM new_values as nv
|
140
|
+
WHERE NOT EXISTS (SELECT 1
|
141
|
+
FROM #{table_name}
|
142
|
+
WHERE #{table_name}.#{primary_key} = nv.#{primary_key});"
|
143
|
+
end
|
144
|
+
|
145
|
+
def except_statement
|
146
|
+
@except_statement ||= "except select #{fields_list} from #{table_name}"
|
147
|
+
end
|
148
|
+
|
149
|
+
def set_statement(keys)
|
150
|
+
keys.map.with_index(1) do |key, i|
|
151
|
+
"#{key}=nv.#{key}"
|
152
|
+
end.join(',')
|
153
|
+
end
|
154
|
+
|
155
|
+
def return_statement(keys)
|
156
|
+
keys.map.with_index(1) do |key, i|
|
157
|
+
"nv.#{key}"
|
158
|
+
end.join(',')
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module TableCopy
|
2
|
+
module PG
|
3
|
+
class Field
|
4
|
+
attr_reader :name, :type_name, :data_limit
|
5
|
+
|
6
|
+
def initialize(attrs)
|
7
|
+
@name = attrs['column_name']
|
8
|
+
data_type = attrs['data_type']
|
9
|
+
|
10
|
+
if data_type =~ /character/
|
11
|
+
@data_limit = attrs['character_maximum_length']
|
12
|
+
end
|
13
|
+
|
14
|
+
if data_type == 'ARRAY' && attrs['udt_name'] == '_varchar'
|
15
|
+
@type_name = 'character varying'
|
16
|
+
@data_limit = '256'
|
17
|
+
@array_ddl = '[]'
|
18
|
+
end
|
19
|
+
|
20
|
+
@type_name ||= data_type
|
21
|
+
end
|
22
|
+
|
23
|
+
def ddl
|
24
|
+
@ddl ||= "#{name} #{type_name}#{data_limit_ddl}#{array_ddl}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def auto_index?
|
28
|
+
@type_name =~ /int|timestamp|bool/
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def data_limit_ddl
|
34
|
+
"(#{data_limit})" if @data_limit
|
35
|
+
end
|
36
|
+
|
37
|
+
def array_ddl
|
38
|
+
@array_ddl
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module TableCopy
|
2
|
+
module PG
|
3
|
+
class Index
|
4
|
+
attr_reader :table, :name, :columns
|
5
|
+
|
6
|
+
def initialize(table, name, columns)
|
7
|
+
@table = table
|
8
|
+
@name = name
|
9
|
+
@columns = columns
|
10
|
+
end
|
11
|
+
|
12
|
+
def create
|
13
|
+
@create ||= "create index on #{table} using btree (#{columns.join(', ')})"
|
14
|
+
end
|
15
|
+
|
16
|
+
def drop
|
17
|
+
@drop ||= "drop index if exists #{name}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'table_copy/pg/field'
|
2
|
+
require 'table_copy/pg/index'
|
3
|
+
|
4
|
+
module TableCopy
|
5
|
+
module PG
|
6
|
+
class Source
|
7
|
+
attr_reader :table_name, :conn_method, :infer_pk_proc
|
8
|
+
|
9
|
+
def initialize(args)
|
10
|
+
@table_name = args[:table_name]
|
11
|
+
@conn_method = args[:conn_method]
|
12
|
+
@infer_pk_proc = args[:infer_pk_proc]
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_s
|
16
|
+
table_name
|
17
|
+
end
|
18
|
+
|
19
|
+
def primary_key
|
20
|
+
@primary_key ||= get_primary_key
|
21
|
+
end
|
22
|
+
|
23
|
+
def fields_ddl
|
24
|
+
@fields_ddl ||= fields_objects.map(&:ddl).join(",\n ")
|
25
|
+
end
|
26
|
+
|
27
|
+
def indexes
|
28
|
+
@indexes ||= viable_index_columns.map { |name, columns| TableCopy::PG::Index.new(table_name, name, columns) }
|
29
|
+
end
|
30
|
+
|
31
|
+
def copy_from(fields_list_arg, where=nil)
|
32
|
+
with_conn do |conn|
|
33
|
+
conn.copy_data("copy (select #{fields_list_arg} from #{table_name} #{where}) to stdout csv") do
|
34
|
+
yield conn
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def fields
|
40
|
+
@field_names ||= fields_objects.map(&:name)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def with_conn(&block)
|
46
|
+
conn_method.call(&block)
|
47
|
+
end
|
48
|
+
|
49
|
+
def fields_objects
|
50
|
+
@fields_objects ||= with_conn do |conn|
|
51
|
+
conn.exec(fields_sql).map { |r| TableCopy::PG::Field.new(r) }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def viable_index_columns
|
56
|
+
@viable_index_columns ||= index_columns.select do |name, columns|
|
57
|
+
(columns - fields).empty?
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def index_columns
|
62
|
+
@index_columns ||= raw_indexes.inject({}) do |indexes, ri|
|
63
|
+
index_name = ri['index_name']
|
64
|
+
indexes[index_name] ||= []
|
65
|
+
indexes[index_name] << ri['column_name']
|
66
|
+
indexes
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def raw_indexes
|
71
|
+
@raw_indexes || with_conn do |conn|
|
72
|
+
@raw_indexes = conn.exec(indexes_sql)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def indexes_sql
|
77
|
+
<<-SQL
|
78
|
+
select
|
79
|
+
i.relname as index_name,
|
80
|
+
a.attname as column_name
|
81
|
+
from
|
82
|
+
pg_class t,
|
83
|
+
pg_class i,
|
84
|
+
pg_index ix,
|
85
|
+
pg_attribute a
|
86
|
+
where
|
87
|
+
t.oid = ix.indrelid
|
88
|
+
and i.oid = ix.indexrelid
|
89
|
+
and a.attrelid = t.oid
|
90
|
+
and a.attnum = ANY(ix.indkey)
|
91
|
+
and t.relkind = 'r'
|
92
|
+
and t.relname = '#{table_name}'
|
93
|
+
order by
|
94
|
+
t.relname,
|
95
|
+
i.relname;
|
96
|
+
SQL
|
97
|
+
end
|
98
|
+
|
99
|
+
def fields_sql
|
100
|
+
<<-SQL
|
101
|
+
SELECT *
|
102
|
+
FROM information_schema.columns
|
103
|
+
WHERE table_schema='public' AND table_name='#{table_name}'
|
104
|
+
SQL
|
105
|
+
end
|
106
|
+
|
107
|
+
def get_primary_key
|
108
|
+
with_conn do |conn|
|
109
|
+
rows = conn.exec(primary_key_sql)
|
110
|
+
if (row = rows.first) && row['attname']
|
111
|
+
row['attname']
|
112
|
+
elsif infer_pk_proc
|
113
|
+
inferred_pk = infer_pk_proc.call(table_name)
|
114
|
+
TableCopy.logger.warn "No explicit PK found for #{table_name}. Falling back to #{inferred_pk}."
|
115
|
+
inferred_pk
|
116
|
+
else
|
117
|
+
TableCopy.logger.warn "No explicit PK found for #{table_name}. Falling back to \"id\"."
|
118
|
+
'id'
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def primary_key_sql
|
124
|
+
<<-SQL
|
125
|
+
SELECT
|
126
|
+
pg_attribute.attname,
|
127
|
+
format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
|
128
|
+
FROM pg_index, pg_class, pg_attribute
|
129
|
+
WHERE
|
130
|
+
pg_class.oid = '#{table_name}'::regclass AND
|
131
|
+
indrelid = pg_class.oid AND
|
132
|
+
pg_attribute.attrelid = pg_class.oid AND
|
133
|
+
pg_attribute.attnum = any(pg_index.indkey)
|
134
|
+
AND indisprimary
|
135
|
+
SQL
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|