kiba-plus 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/examples/Gemfile.lock +1 -1
- data/examples/incremental_insert.etl +48 -0
- data/lib/kiba/plus/destination/pg_bulk.rb +4 -38
- data/lib/kiba/plus/destination/pg_bulk2.rb +22 -5
- data/lib/kiba/plus/destination/pg_bulk_utils.rb +39 -0
- data/lib/kiba/plus/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ab51f248407db0efeea72628dc83f639fa952f4
|
4
|
+
data.tar.gz: e1d93e552d093ea421c1330ee2d1dbcf3f563c0d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55cfec8dcfcad2f4224bcaf3d37d6aa2e7ca7831786cf0b099ec8800f4b3843b6d19e2ff06a8c57af11c10f8d91be3bf920c5410a3895bb3646f5228065e9471
|
7
|
+
data.tar.gz: 5468e321b8b4dbfb9d8174a41964e7b79c8d5d1c8af9129b751621d02759629bb170ef61c51999483df1b260900e096229ea7d5420e9bdbb3db421f51515c356
|
data/examples/Gemfile.lock
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require_relative 'init'
|
3
|
+
|
4
|
+
SOURCE_URL = 'mysql://root@localhost/shopperplus'
|
5
|
+
|
6
|
+
DEST_URL = 'postgresql://hooopo@localhost:5432/crm2_dev'
|
7
|
+
|
8
|
+
pre_process do
|
9
|
+
@job_id = Kiba::Plus::Job.new(
|
10
|
+
:connect_url => DEST_URL,
|
11
|
+
:start_at => Time.now,
|
12
|
+
:job_name => "customer"
|
13
|
+
).start
|
14
|
+
end
|
15
|
+
|
16
|
+
last_pull_at = Kiba::Plus::Job.new(
|
17
|
+
:connect_url => DEST_URL,
|
18
|
+
:job_name => "customer"
|
19
|
+
).last_pull_at
|
20
|
+
|
21
|
+
source Kiba::Plus::Source::Mysql, { :connect_url => SOURCE_URL,
|
22
|
+
:query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers WHERE updated_at > '#{last_pull_at.to_s}'},
|
23
|
+
:last_pull_at => last_pull_at,
|
24
|
+
:incremental => true
|
25
|
+
}
|
26
|
+
|
27
|
+
destination Kiba::Plus::Destination::PgBulk2, { :connect_url => DEST_URL,
|
28
|
+
:table_name => "customers",
|
29
|
+
:truncate => false,
|
30
|
+
:columns => [:id, :email, :first_name, :last_name],
|
31
|
+
:incremental => true,
|
32
|
+
:unique_by => :id
|
33
|
+
}
|
34
|
+
|
35
|
+
post_process do
|
36
|
+
Kiba::Plus::Job.new(
|
37
|
+
:connect_url => DEST_URL,
|
38
|
+
:job_id => @job_id,
|
39
|
+
:job_name => "customer"
|
40
|
+
).complete
|
41
|
+
result = PG.connect(DEST_URL).query("SELECT COUNT(*) AS num FROM customers")
|
42
|
+
puts "Insert total: #{result.first['num']}"
|
43
|
+
end
|
44
|
+
|
45
|
+
# Output:
|
46
|
+
# I, [2016-05-16T01:53:36.832565 #87909] INFO -- : TRUNCATE TABLE customers;
|
47
|
+
# I, [2016-05-16T01:53:36.841770 #87909] INFO -- : COPY customers (id, email, first_name, last_name) FROM STDIN WITH DELIMITER ',' NULL '\N' CSV
|
48
|
+
# Insert total: 428972
|
@@ -1,5 +1,7 @@
|
|
1
|
+
require_relative 'pg_bulk_utils'
|
1
2
|
module Kiba::Plus::Destination
|
2
3
|
class PgBulk
|
4
|
+
include PgBulkUtils
|
3
5
|
attr_reader :options
|
4
6
|
|
5
7
|
def initialize(options = {})
|
@@ -48,32 +50,6 @@ module Kiba::Plus::Destination
|
|
48
50
|
options.fetch(:unique_by, :id)
|
49
51
|
end
|
50
52
|
|
51
|
-
def write(row)
|
52
|
-
# blank!
|
53
|
-
end
|
54
|
-
|
55
|
-
def staging_table_name
|
56
|
-
table_name + "_staging"
|
57
|
-
end
|
58
|
-
|
59
|
-
def create_staging_table
|
60
|
-
sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
|
61
|
-
Kiba::Plus.logger.info sql
|
62
|
-
@conn.exec(sql)
|
63
|
-
end
|
64
|
-
|
65
|
-
def truncate_staging_table
|
66
|
-
truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
|
67
|
-
Kiba::Plus.logger.info truncate_sql
|
68
|
-
@conn.exec(truncate_sql) rescue nil
|
69
|
-
end
|
70
|
-
|
71
|
-
def truncate_target_table
|
72
|
-
truncate_sql = "TRUNCATE TABLE #{table_name};"
|
73
|
-
Kiba::Plus.logger.info truncate_sql
|
74
|
-
@conn.exec(truncate_sql)
|
75
|
-
end
|
76
|
-
|
77
53
|
def copy_to_target_table
|
78
54
|
sql = "COPY #{table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
|
79
55
|
Kiba::Plus.logger.info sql
|
@@ -86,18 +62,8 @@ module Kiba::Plus::Destination
|
|
86
62
|
@conn.exec(sql)
|
87
63
|
end
|
88
64
|
|
89
|
-
|
90
|
-
|
91
|
-
where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
|
92
|
-
sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
|
93
|
-
Kiba::Plus.logger.info sql
|
94
|
-
@conn.exec(sql)
|
95
|
-
end
|
96
|
-
|
97
|
-
def merge_to_target_table
|
98
|
-
sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
|
99
|
-
Kiba::Plus.logger.info sql
|
100
|
-
@conn.exec(sql)
|
65
|
+
def write(row)
|
66
|
+
# blank!
|
101
67
|
end
|
102
68
|
|
103
69
|
def close
|
@@ -1,7 +1,9 @@
|
|
1
1
|
require 'pg'
|
2
2
|
require 'csv'
|
3
|
+
require_relative 'pg_bulk_utils'
|
3
4
|
module Kiba::Plus::Destination
|
4
5
|
class PgBulk2
|
6
|
+
include PgBulkUtils
|
5
7
|
attr_reader :options
|
6
8
|
|
7
9
|
def initialize(options = {})
|
@@ -10,16 +12,22 @@ module Kiba::Plus::Destination
|
|
10
12
|
:columns,
|
11
13
|
:connect_url,
|
12
14
|
:truncate,
|
13
|
-
:incremental
|
15
|
+
:incremental,
|
16
|
+
:unique_by
|
14
17
|
)
|
15
18
|
|
16
19
|
@conn = PG.connect(connect_url)
|
17
|
-
truncate_sql = "TRUNCATE TABLE #{table_name};"
|
18
20
|
if truncate
|
19
|
-
|
20
|
-
|
21
|
+
truncate_staging_table
|
22
|
+
truncate_target_table
|
23
|
+
end
|
24
|
+
if incremental
|
25
|
+
truncate_staging_table
|
26
|
+
create_staging_table
|
27
|
+
sql = "COPY #{staging_table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
|
28
|
+
else
|
29
|
+
sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
|
21
30
|
end
|
22
|
-
sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
|
23
31
|
Kiba::Plus.logger.info sql
|
24
32
|
@res = @conn.exec(sql)
|
25
33
|
end
|
@@ -55,9 +63,18 @@ module Kiba::Plus::Destination
|
|
55
63
|
options.fetch(:incremental, true)
|
56
64
|
end
|
57
65
|
|
66
|
+
def unique_by
|
67
|
+
options.fetch(:unique_by, :id)
|
68
|
+
end
|
69
|
+
|
58
70
|
def close
|
59
71
|
@conn.put_copy_end
|
60
72
|
@conn.get_last_result
|
73
|
+
if incremental
|
74
|
+
delete_before_insert
|
75
|
+
merge_to_target_table
|
76
|
+
truncate_staging_table
|
77
|
+
end
|
61
78
|
rescue
|
62
79
|
raise
|
63
80
|
ensure
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Kiba::Plus::Destination
|
2
|
+
module PgBulkUtils
|
3
|
+
def staging_table_name
|
4
|
+
table_name + "_staging"
|
5
|
+
end
|
6
|
+
|
7
|
+
def create_staging_table
|
8
|
+
sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
|
9
|
+
Kiba::Plus.logger.info sql
|
10
|
+
@conn.exec(sql)
|
11
|
+
end
|
12
|
+
|
13
|
+
def truncate_staging_table
|
14
|
+
truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
|
15
|
+
Kiba::Plus.logger.info truncate_sql
|
16
|
+
@conn.exec(truncate_sql) rescue nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def truncate_target_table
|
20
|
+
truncate_sql = "TRUNCATE TABLE #{table_name};"
|
21
|
+
Kiba::Plus.logger.info truncate_sql
|
22
|
+
@conn.exec(truncate_sql)
|
23
|
+
end
|
24
|
+
|
25
|
+
# TODO add where condition to speed up deleting.
|
26
|
+
def delete_before_insert
|
27
|
+
where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
|
28
|
+
sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
|
29
|
+
Kiba::Plus.logger.info sql
|
30
|
+
@conn.exec(sql)
|
31
|
+
end
|
32
|
+
|
33
|
+
def merge_to_target_table
|
34
|
+
sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
|
35
|
+
Kiba::Plus.logger.info sql
|
36
|
+
@conn.exec(sql)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/kiba/plus/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kiba-plus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hooopo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: kiba
|
@@ -119,6 +119,7 @@ files:
|
|
119
119
|
- examples/customer_mysql_to_csv.etl
|
120
120
|
- examples/customer_mysql_to_pg.etl
|
121
121
|
- examples/data/customer.csv
|
122
|
+
- examples/incremental_insert.etl
|
122
123
|
- examples/init.rb
|
123
124
|
- examples/sources/customer.rb
|
124
125
|
- kiba-plus.gemspec
|
@@ -129,6 +130,7 @@ files:
|
|
129
130
|
- lib/kiba/plus/destination/pg.rb
|
130
131
|
- lib/kiba/plus/destination/pg_bulk.rb
|
131
132
|
- lib/kiba/plus/destination/pg_bulk2.rb
|
133
|
+
- lib/kiba/plus/destination/pg_bulk_utils.rb
|
132
134
|
- lib/kiba/plus/helper.rb
|
133
135
|
- lib/kiba/plus/job.rb
|
134
136
|
- lib/kiba/plus/logger.rb
|