kiba-plus 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/examples/Gemfile.lock +1 -1
- data/examples/incremental_insert.etl +48 -0
- data/lib/kiba/plus/destination/pg_bulk.rb +4 -38
- data/lib/kiba/plus/destination/pg_bulk2.rb +22 -5
- data/lib/kiba/plus/destination/pg_bulk_utils.rb +39 -0
- data/lib/kiba/plus/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ab51f248407db0efeea72628dc83f639fa952f4
|
4
|
+
data.tar.gz: e1d93e552d093ea421c1330ee2d1dbcf3f563c0d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55cfec8dcfcad2f4224bcaf3d37d6aa2e7ca7831786cf0b099ec8800f4b3843b6d19e2ff06a8c57af11c10f8d91be3bf920c5410a3895bb3646f5228065e9471
|
7
|
+
data.tar.gz: 5468e321b8b4dbfb9d8174a41964e7b79c8d5d1c8af9129b751621d02759629bb170ef61c51999483df1b260900e096229ea7d5420e9bdbb3db421f51515c356
|
data/examples/Gemfile.lock
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require_relative 'init'
|
3
|
+
|
4
|
+
SOURCE_URL = 'mysql://root@localhost/shopperplus'
|
5
|
+
|
6
|
+
DEST_URL = 'postgresql://hooopo@localhost:5432/crm2_dev'
|
7
|
+
|
8
|
+
pre_process do
|
9
|
+
@job_id = Kiba::Plus::Job.new(
|
10
|
+
:connect_url => DEST_URL,
|
11
|
+
:start_at => Time.now,
|
12
|
+
:job_name => "customer"
|
13
|
+
).start
|
14
|
+
end
|
15
|
+
|
16
|
+
last_pull_at = Kiba::Plus::Job.new(
|
17
|
+
:connect_url => DEST_URL,
|
18
|
+
:job_name => "customer"
|
19
|
+
).last_pull_at
|
20
|
+
|
21
|
+
source Kiba::Plus::Source::Mysql, { :connect_url => SOURCE_URL,
|
22
|
+
:query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers WHERE updated_at > '#{last_pull_at.to_s}'},
|
23
|
+
:last_pull_at => last_pull_at,
|
24
|
+
:incremental => true
|
25
|
+
}
|
26
|
+
|
27
|
+
destination Kiba::Plus::Destination::PgBulk2, { :connect_url => DEST_URL,
|
28
|
+
:table_name => "customers",
|
29
|
+
:truncate => false,
|
30
|
+
:columns => [:id, :email, :first_name, :last_name],
|
31
|
+
:incremental => true,
|
32
|
+
:unique_by => :id
|
33
|
+
}
|
34
|
+
|
35
|
+
post_process do
|
36
|
+
Kiba::Plus::Job.new(
|
37
|
+
:connect_url => DEST_URL,
|
38
|
+
:job_id => @job_id,
|
39
|
+
:job_name => "customer"
|
40
|
+
).complete
|
41
|
+
result = PG.connect(DEST_URL).query("SELECT COUNT(*) AS num FROM customers")
|
42
|
+
puts "Insert total: #{result.first['num']}"
|
43
|
+
end
|
44
|
+
|
45
|
+
# Output:
|
46
|
+
# I, [2016-05-16T01:53:36.832565 #87909] INFO -- : TRUNCATE TABLE customers;
|
47
|
+
# I, [2016-05-16T01:53:36.841770 #87909] INFO -- : COPY customers (id, email, first_name, last_name) FROM STDIN WITH DELIMITER ',' NULL '\N' CSV
|
48
|
+
# Insert total: 428972
|
@@ -1,5 +1,7 @@
|
|
1
|
+
require_relative 'pg_bulk_utils'
|
1
2
|
module Kiba::Plus::Destination
|
2
3
|
class PgBulk
|
4
|
+
include PgBulkUtils
|
3
5
|
attr_reader :options
|
4
6
|
|
5
7
|
def initialize(options = {})
|
@@ -48,32 +50,6 @@ module Kiba::Plus::Destination
|
|
48
50
|
options.fetch(:unique_by, :id)
|
49
51
|
end
|
50
52
|
|
51
|
-
def write(row)
|
52
|
-
# blank!
|
53
|
-
end
|
54
|
-
|
55
|
-
def staging_table_name
|
56
|
-
table_name + "_staging"
|
57
|
-
end
|
58
|
-
|
59
|
-
def create_staging_table
|
60
|
-
sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
|
61
|
-
Kiba::Plus.logger.info sql
|
62
|
-
@conn.exec(sql)
|
63
|
-
end
|
64
|
-
|
65
|
-
def truncate_staging_table
|
66
|
-
truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
|
67
|
-
Kiba::Plus.logger.info truncate_sql
|
68
|
-
@conn.exec(truncate_sql) rescue nil
|
69
|
-
end
|
70
|
-
|
71
|
-
def truncate_target_table
|
72
|
-
truncate_sql = "TRUNCATE TABLE #{table_name};"
|
73
|
-
Kiba::Plus.logger.info truncate_sql
|
74
|
-
@conn.exec(truncate_sql)
|
75
|
-
end
|
76
|
-
|
77
53
|
def copy_to_target_table
|
78
54
|
sql = "COPY #{table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
|
79
55
|
Kiba::Plus.logger.info sql
|
@@ -86,18 +62,8 @@ module Kiba::Plus::Destination
|
|
86
62
|
@conn.exec(sql)
|
87
63
|
end
|
88
64
|
|
89
|
-
|
90
|
-
|
91
|
-
where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
|
92
|
-
sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
|
93
|
-
Kiba::Plus.logger.info sql
|
94
|
-
@conn.exec(sql)
|
95
|
-
end
|
96
|
-
|
97
|
-
def merge_to_target_table
|
98
|
-
sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
|
99
|
-
Kiba::Plus.logger.info sql
|
100
|
-
@conn.exec(sql)
|
65
|
+
def write(row)
|
66
|
+
# blank!
|
101
67
|
end
|
102
68
|
|
103
69
|
def close
|
@@ -1,7 +1,9 @@
|
|
1
1
|
require 'pg'
|
2
2
|
require 'csv'
|
3
|
+
require_relative 'pg_bulk_utils'
|
3
4
|
module Kiba::Plus::Destination
|
4
5
|
class PgBulk2
|
6
|
+
include PgBulkUtils
|
5
7
|
attr_reader :options
|
6
8
|
|
7
9
|
def initialize(options = {})
|
@@ -10,16 +12,22 @@ module Kiba::Plus::Destination
|
|
10
12
|
:columns,
|
11
13
|
:connect_url,
|
12
14
|
:truncate,
|
13
|
-
:incremental
|
15
|
+
:incremental,
|
16
|
+
:unique_by
|
14
17
|
)
|
15
18
|
|
16
19
|
@conn = PG.connect(connect_url)
|
17
|
-
truncate_sql = "TRUNCATE TABLE #{table_name};"
|
18
20
|
if truncate
|
19
|
-
|
20
|
-
|
21
|
+
truncate_staging_table
|
22
|
+
truncate_target_table
|
23
|
+
end
|
24
|
+
if incremental
|
25
|
+
truncate_staging_table
|
26
|
+
create_staging_table
|
27
|
+
sql = "COPY #{staging_table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
|
28
|
+
else
|
29
|
+
sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
|
21
30
|
end
|
22
|
-
sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
|
23
31
|
Kiba::Plus.logger.info sql
|
24
32
|
@res = @conn.exec(sql)
|
25
33
|
end
|
@@ -55,9 +63,18 @@ module Kiba::Plus::Destination
|
|
55
63
|
options.fetch(:incremental, true)
|
56
64
|
end
|
57
65
|
|
66
|
+
def unique_by
|
67
|
+
options.fetch(:unique_by, :id)
|
68
|
+
end
|
69
|
+
|
58
70
|
def close
|
59
71
|
@conn.put_copy_end
|
60
72
|
@conn.get_last_result
|
73
|
+
if incremental
|
74
|
+
delete_before_insert
|
75
|
+
merge_to_target_table
|
76
|
+
truncate_staging_table
|
77
|
+
end
|
61
78
|
rescue
|
62
79
|
raise
|
63
80
|
ensure
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Kiba::Plus::Destination
|
2
|
+
module PgBulkUtils
|
3
|
+
def staging_table_name
|
4
|
+
table_name + "_staging"
|
5
|
+
end
|
6
|
+
|
7
|
+
def create_staging_table
|
8
|
+
sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
|
9
|
+
Kiba::Plus.logger.info sql
|
10
|
+
@conn.exec(sql)
|
11
|
+
end
|
12
|
+
|
13
|
+
def truncate_staging_table
|
14
|
+
truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
|
15
|
+
Kiba::Plus.logger.info truncate_sql
|
16
|
+
@conn.exec(truncate_sql) rescue nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def truncate_target_table
|
20
|
+
truncate_sql = "TRUNCATE TABLE #{table_name};"
|
21
|
+
Kiba::Plus.logger.info truncate_sql
|
22
|
+
@conn.exec(truncate_sql)
|
23
|
+
end
|
24
|
+
|
25
|
+
# TODO add where condition to speed up deleting.
|
26
|
+
def delete_before_insert
|
27
|
+
where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
|
28
|
+
sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
|
29
|
+
Kiba::Plus.logger.info sql
|
30
|
+
@conn.exec(sql)
|
31
|
+
end
|
32
|
+
|
33
|
+
def merge_to_target_table
|
34
|
+
sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
|
35
|
+
Kiba::Plus.logger.info sql
|
36
|
+
@conn.exec(sql)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/kiba/plus/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kiba-plus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hooopo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: kiba
|
@@ -119,6 +119,7 @@ files:
|
|
119
119
|
- examples/customer_mysql_to_csv.etl
|
120
120
|
- examples/customer_mysql_to_pg.etl
|
121
121
|
- examples/data/customer.csv
|
122
|
+
- examples/incremental_insert.etl
|
122
123
|
- examples/init.rb
|
123
124
|
- examples/sources/customer.rb
|
124
125
|
- kiba-plus.gemspec
|
@@ -129,6 +130,7 @@ files:
|
|
129
130
|
- lib/kiba/plus/destination/pg.rb
|
130
131
|
- lib/kiba/plus/destination/pg_bulk.rb
|
131
132
|
- lib/kiba/plus/destination/pg_bulk2.rb
|
133
|
+
- lib/kiba/plus/destination/pg_bulk_utils.rb
|
132
134
|
- lib/kiba/plus/helper.rb
|
133
135
|
- lib/kiba/plus/job.rb
|
134
136
|
- lib/kiba/plus/logger.rb
|