kiba-plus 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.travis.yml +22 -1
- data/README.md +34 -9
- data/examples/customer_csv_to_mysql.etl +1 -1
- data/examples/customer_mysql_to_csv.etl +1 -1
- data/kiba-plus.gemspec +5 -0
- data/lib/kiba/plus/destination/csv.rb +21 -18
- data/lib/kiba/plus/destination/mysql.rb +8 -3
- data/lib/kiba/plus/destination/mysql_bulk.rb +70 -43
- data/lib/kiba/plus/destination/pg.rb +9 -2
- data/lib/kiba/plus/destination/pg_bulk.rb +72 -30
- data/lib/kiba/plus/destination/pg_bulk2.rb +69 -35
- data/lib/kiba/plus/destination/pg_bulk_utils.rb +44 -11
- data/lib/kiba/plus/helper.rb +9 -2
- data/lib/kiba/plus/job.rb +1 -1
- data/lib/kiba/plus/source/mysql.rb +10 -22
- data/lib/kiba/plus/source/pg.rb +40 -0
- data/lib/kiba/plus/version.rb +1 -1
- metadata +46 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b37f1fbad737a5141060feae03a0d99b98b20d42
|
4
|
+
data.tar.gz: f72a505eaf75bf1f24be63de334b63ecd955c881
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f22ebbaa54bcf735a58235c9485342763aa8bcff593d292b6d055897db79a9963ffae44d71bdd9612f20cce76de77d2f6dbcdb04c0fc2b8369fd33734cf2f98
|
7
|
+
data.tar.gz: e7088d77e6e03bb90445f0424a72532fc264ef87503f17fb7e4468bd7d481de3b17fc9994c06b4aa042d66dcbde00c40cf6d68497748028e3c2102b7e61ba1dc
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,4 +1,25 @@
|
|
1
1
|
language: ruby
|
2
|
+
|
2
3
|
rvm:
|
3
4
|
- 2.2.4
|
4
|
-
|
5
|
+
|
6
|
+
services:
|
7
|
+
- mysql
|
8
|
+
- postgresql
|
9
|
+
|
10
|
+
env:
|
11
|
+
- MYSQL2_SRC_CONNECT_URL=mysql2://travis@localhost/kiba_plus_src_test
|
12
|
+
- MYSQL2_DEST_CONNECT_URL=mysql2://travis@localhost/kiba_plus_dest_test
|
13
|
+
- PG_SRC_CONNECT_URL=postgresql://postgres@localhost/kiba_plus_src_test
|
14
|
+
- PG_DEST_CONNECT_URL=postgresql://postgres@localhost/kiba_plus_dest_test
|
15
|
+
|
16
|
+
before_install:
|
17
|
+
- gem install bundler -v 1.11.2
|
18
|
+
|
19
|
+
before_script:
|
20
|
+
- mysql -e 'create database kiba_plus_src_test;'
|
21
|
+
- mysql -e 'create database kiba_plus_dest_test;'
|
22
|
+
- psql -c 'create database kiba_plus_src_test;' -U postgres
|
23
|
+
- psql -c 'create database kiba_plus_dest_test;' -U postgres
|
24
|
+
|
25
|
+
script: bundle exec rake test
|
data/README.md
CHANGED
@@ -4,6 +4,8 @@ Kiba enhancement for Ruby ETL. It connects to various data sources including rel
|
|
4
4
|
# Usage
|
5
5
|
|
6
6
|
```ruby
|
7
|
+
# /tmp/customer_mysql_to_pg.etl
|
8
|
+
|
7
9
|
require 'kiba/plus'
|
8
10
|
|
9
11
|
SOURCE_URL = 'mysql://root@localhost/shopperplus'
|
@@ -27,22 +29,33 @@ post_process do
|
|
27
29
|
end
|
28
30
|
```
|
29
31
|
|
30
|
-
Execute:
|
32
|
+
Execute in shell:
|
31
33
|
|
32
34
|
```shell
|
33
|
-
bundle exec kiba customer_mysql_to_pg.etl
|
34
|
-
```
|
35
|
-
|
36
|
-
Output:
|
35
|
+
$ bundle exec kiba /tmp/customer_mysql_to_pg.etl
|
37
36
|
|
38
|
-
```
|
39
37
|
# Output:
|
40
38
|
# I, [2016-05-16T01:53:36.832565 #87909] INFO -- : TRUNCATE TABLE customers;
|
41
39
|
# I, [2016-05-16T01:53:36.841770 #87909] INFO -- : COPY customers (id, email, first_name, last_name) FROM STDIN WITH DELIMITER ',' NULL '\N' CSV
|
42
40
|
# Insert total: 428972
|
43
41
|
```
|
44
42
|
|
45
|
-
|
43
|
+
Execute in ruby script:
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
require 'kiba'
|
47
|
+
|
48
|
+
job_definition = Kiba.parse(IO.read('/tmp/customer_mysql_to_pg.etl'), '/tmp/customer_mysql_to_pg.etl')
|
49
|
+
Kiba.run(job_definition)
|
50
|
+
```
|
51
|
+
|
52
|
+
# Examples
|
53
|
+
|
54
|
+
* [CSV to MySQL](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_csv_to_mysql.etl)
|
55
|
+
* [CSV to PG](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_csv_to_pg.etl)
|
56
|
+
* [MySQL to CSV](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_mysql_to_csv.etl)
|
57
|
+
* [MySQL to PG](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_mysql_to_pg.etl)
|
58
|
+
* [MySQL incremental to PG](https://github.com/hooopo/kiba-plus/blob/master/examples/incremental_insert.etl)
|
46
59
|
|
47
60
|
# Main Feature
|
48
61
|
|
@@ -73,6 +86,7 @@ More Examples(TODO).
|
|
73
86
|
Add this line to your application's Gemfile:
|
74
87
|
|
75
88
|
```ruby
|
89
|
+
gem 'kiba'
|
76
90
|
gem 'kiba-plus'
|
77
91
|
```
|
78
92
|
|
@@ -84,10 +98,21 @@ Or install it yourself as:
|
|
84
98
|
|
85
99
|
$ gem install kiba-plus
|
86
100
|
|
87
|
-
##
|
101
|
+
## Development
|
88
102
|
|
103
|
+
First of all, Please run the following code in shell.
|
89
104
|
|
90
|
-
|
105
|
+
```bash
|
106
|
+
|
107
|
+
$ mysql -e 'create database kiba_plus_src_test;'
|
108
|
+
|
109
|
+
$ mysql -e 'create database kiba_plus_dest_test;'
|
110
|
+
|
111
|
+
$ psql -c 'create database kiba_plus_src_test;' -U postgres
|
112
|
+
|
113
|
+
$ psql -c 'create database kiba_plus_dest_test;' -U postgres
|
114
|
+
|
115
|
+
```
|
91
116
|
|
92
117
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
93
118
|
|
@@ -11,6 +11,6 @@ destination Kiba::Plus::Destination::MysqlBulk, { :connect_url => DEST_URL,
|
|
11
11
|
:incremental => false
|
12
12
|
}
|
13
13
|
post_process do
|
14
|
-
result = Mysql2::Client.new(
|
14
|
+
result = Mysql2::Client.new(mysql2_connect_hash(DEST_URL)).query("SELECT COUNT(*) AS num FROM customers")
|
15
15
|
puts "Insert total: #{result.first['num']}"
|
16
16
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require_relative 'init'
|
3
3
|
|
4
|
-
|
4
|
+
SOURCE_URL = 'mysql://root@localhost/shopperplus'
|
5
5
|
|
6
6
|
source Kiba::Plus::Source::Mysql, :connect_url => SOURCE_URL,
|
7
7
|
:query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers}
|
data/kiba-plus.gemspec
CHANGED
@@ -26,10 +26,15 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.bindir = "exe"
|
27
27
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
28
|
spec.require_paths = ["lib"]
|
29
|
+
|
29
30
|
spec.add_runtime_dependency "kiba", "~> 0.6"
|
30
31
|
spec.add_runtime_dependency "mysql2", "~> 0.4"
|
31
32
|
spec.add_runtime_dependency "pg", "~> 0.18"
|
33
|
+
|
32
34
|
spec.add_development_dependency "bundler", "~> 1.11"
|
33
35
|
spec.add_development_dependency "rake", "~> 10.0"
|
34
36
|
spec.add_development_dependency "minitest", "~> 5.0"
|
37
|
+
spec.add_development_dependency 'database_cleaner', '~> 1.5.3'
|
38
|
+
spec.add_development_dependency 'sequel', '~> 4.34'
|
39
|
+
spec.add_development_dependency 'pry'
|
35
40
|
end
|
@@ -2,17 +2,17 @@ require 'csv'
|
|
2
2
|
|
3
3
|
module Kiba::Plus::Destination
|
4
4
|
class Csv
|
5
|
-
attr_reader :options
|
5
|
+
attr_reader :options, :csv
|
6
6
|
|
7
7
|
def initialize(options = {})
|
8
8
|
@options = options
|
9
9
|
@options.assert_valid_keys(
|
10
10
|
:output_file,
|
11
|
+
:mode,
|
11
12
|
:row_sep,
|
12
13
|
:col_sep,
|
13
14
|
:force_quotes,
|
14
|
-
:quote_char
|
15
|
-
:mode
|
15
|
+
:quote_char
|
16
16
|
)
|
17
17
|
@csv = CSV.open(output_file, mode, {
|
18
18
|
:col_sep => col_sep,
|
@@ -22,36 +22,39 @@ module Kiba::Plus::Destination
|
|
22
22
|
})
|
23
23
|
end
|
24
24
|
|
25
|
-
def
|
26
|
-
|
25
|
+
def write(row)
|
26
|
+
@csv << row.values
|
27
27
|
end
|
28
28
|
|
29
|
-
def
|
30
|
-
|
29
|
+
def close
|
30
|
+
@csv.close
|
31
31
|
end
|
32
32
|
|
33
|
-
|
34
|
-
options.fetch(:col_sep, ",")
|
35
|
-
end
|
33
|
+
private
|
36
34
|
|
37
|
-
def
|
38
|
-
options.fetch(:
|
35
|
+
def output_file
|
36
|
+
options.fetch(:output_file)
|
39
37
|
end
|
40
38
|
|
41
|
-
def
|
42
|
-
options.fetch(:
|
39
|
+
def mode
|
40
|
+
options.fetch(:mode, "w")
|
43
41
|
end
|
44
42
|
|
45
43
|
def row_sep
|
46
44
|
options.fetch(:row_sep, "\n")
|
47
45
|
end
|
48
46
|
|
49
|
-
def
|
50
|
-
|
47
|
+
def col_sep
|
48
|
+
options.fetch(:col_sep, ",")
|
51
49
|
end
|
52
50
|
|
53
|
-
def
|
54
|
-
|
51
|
+
def force_quotes
|
52
|
+
options.fetch(:force_quotes, false)
|
53
|
+
end
|
54
|
+
|
55
|
+
def quote_char
|
56
|
+
options.fetch(:quote_char, '"')
|
55
57
|
end
|
58
|
+
|
56
59
|
end
|
57
60
|
end
|
@@ -3,7 +3,7 @@ require 'mysql2'
|
|
3
3
|
module Kiba::Plus::Destination
|
4
4
|
class Mysql
|
5
5
|
include Kiba::Plus::Helper
|
6
|
-
attr_reader :options
|
6
|
+
attr_reader :options, :client
|
7
7
|
|
8
8
|
def initialize(options = {})
|
9
9
|
@options = options
|
@@ -12,8 +12,8 @@ module Kiba::Plus::Destination
|
|
12
12
|
:prepare_sql,
|
13
13
|
:columns
|
14
14
|
)
|
15
|
-
@client = Mysql2::Client.new(
|
16
|
-
|
15
|
+
@client = Mysql2::Client.new(mysql2_connect_hash(connect_url))
|
16
|
+
init
|
17
17
|
end
|
18
18
|
|
19
19
|
def write(row)
|
@@ -21,6 +21,7 @@ module Kiba::Plus::Destination
|
|
21
21
|
rescue => e
|
22
22
|
Kiba::Plus.logger.error "ERROR for #{row}"
|
23
23
|
Kiba::Plus.logger.error e.message
|
24
|
+
raise e
|
24
25
|
end
|
25
26
|
|
26
27
|
def close
|
@@ -30,6 +31,10 @@ module Kiba::Plus::Destination
|
|
30
31
|
|
31
32
|
private
|
32
33
|
|
34
|
+
def init
|
35
|
+
@pre_stmt = @client.prepare(prepare_sql)
|
36
|
+
end
|
37
|
+
|
33
38
|
def connect_url
|
34
39
|
options.fetch(:connect_url)
|
35
40
|
end
|
@@ -3,45 +3,53 @@ require 'mysql2'
|
|
3
3
|
module Kiba::Plus::Destination
|
4
4
|
class MysqlBulk
|
5
5
|
include Kiba::Plus::Helper
|
6
|
-
attr_reader :options
|
6
|
+
attr_reader :options, :client
|
7
7
|
|
8
8
|
def initialize(options = {})
|
9
9
|
@options = options
|
10
|
-
@options.assert_valid_keys(
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
10
|
+
@options.assert_valid_keys(
|
11
|
+
:connect_url,
|
12
|
+
:table_name,
|
13
|
+
:columns,
|
14
|
+
:truncate,
|
15
|
+
:incremental,
|
16
|
+
:input_file,
|
17
|
+
:ignore_input_file_header,
|
18
|
+
:delimited_by,
|
19
|
+
:enclosed_by,
|
20
|
+
:ignore_lines
|
21
|
+
)
|
22
|
+
|
23
|
+
@client = Mysql2::Client.new(mysql2_connect_hash(connect_url).merge(local_infile: true))
|
22
24
|
end
|
23
25
|
|
24
|
-
def
|
25
|
-
|
26
|
+
def write(row)
|
27
|
+
# blank!
|
26
28
|
end
|
27
29
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
30
|
+
def close
|
31
|
+
if truncate
|
32
|
+
sql = truncate_sql
|
33
|
+
Kiba::Plus.logger.info sql
|
34
|
+
client.query(sql)
|
35
|
+
end
|
31
36
|
|
32
|
-
|
33
|
-
|
34
|
-
|
37
|
+
sql = bulk_sql
|
38
|
+
Kiba::Plus.logger.info sql
|
39
|
+
client.query(sql)
|
35
40
|
|
36
|
-
|
37
|
-
|
41
|
+
client.close
|
42
|
+
@client = nil
|
38
43
|
end
|
39
44
|
|
40
|
-
|
41
|
-
|
45
|
+
private
|
46
|
+
|
47
|
+
def connect_url
|
48
|
+
options.fetch(:connect_url)
|
42
49
|
end
|
43
50
|
|
44
|
-
def
|
51
|
+
def table_name
|
52
|
+
options.fetch(:table_name)
|
45
53
|
end
|
46
54
|
|
47
55
|
def columns
|
@@ -60,27 +68,46 @@ module Kiba::Plus::Destination
|
|
60
68
|
options.fetch(:input_file)
|
61
69
|
end
|
62
70
|
|
63
|
-
def
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
71
|
+
def ignore_input_file_header
|
72
|
+
!!options.fetch(:ignore_input_file_header, false)
|
73
|
+
end
|
74
|
+
|
75
|
+
def delimited_by
|
76
|
+
options.fetch(:delimited_by, ",")
|
77
|
+
end
|
78
|
+
|
79
|
+
def enclosed_by
|
80
|
+
options.fetch(:enclosed_by, '"')
|
81
|
+
end
|
82
|
+
|
83
|
+
def ignore_lines
|
84
|
+
options.fetch(:ignore_lines, 0).to_i
|
85
|
+
end
|
86
|
+
|
87
|
+
def real_ignore_lines
|
88
|
+
lines = ignore_lines
|
89
|
+
lines += 1 if ignore_input_file_header
|
90
|
+
lines
|
91
|
+
end
|
69
92
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
93
|
+
def truncate_sql
|
94
|
+
sql = "TRUNCATE TABLE #{table_name}"
|
95
|
+
format_sql sql
|
96
|
+
end
|
97
|
+
|
98
|
+
def bulk_sql
|
99
|
+
sql = <<-SQL
|
100
|
+
LOAD DATA LOCAL INFILE '#{input_file}'
|
101
|
+
REPLACE
|
102
|
+
INTO TABLE #{table_name}
|
103
|
+
FIELDS
|
74
104
|
TERMINATED BY '#{delimited_by}'
|
75
105
|
ENCLOSED BY '#{enclosed_by}'
|
76
|
-
|
77
|
-
|
106
|
+
IGNORE #{real_ignore_lines} LINES
|
107
|
+
(#{columns.join(',')})
|
78
108
|
SQL
|
79
|
-
|
80
|
-
@client.query(bulk_sql)
|
81
|
-
|
82
|
-
@client.close
|
83
|
-
@client = nil
|
109
|
+
format_sql sql
|
84
110
|
end
|
111
|
+
|
85
112
|
end
|
86
113
|
end
|
@@ -2,18 +2,20 @@ require 'pg'
|
|
2
2
|
|
3
3
|
module Kiba::Plus::Destination
|
4
4
|
class Pg
|
5
|
-
attr_reader :options
|
5
|
+
attr_reader :options, :conn
|
6
6
|
|
7
7
|
def initialize(options = {})
|
8
8
|
@options = options
|
9
9
|
@options.assert_valid_keys(
|
10
10
|
:connect_url,
|
11
|
+
:schema,
|
11
12
|
:prepare_name,
|
12
13
|
:prepare_sql,
|
13
14
|
:columns
|
14
15
|
)
|
15
16
|
@conn = PG.connect(connect_url)
|
16
|
-
@conn.
|
17
|
+
@conn.exec "SET search_path TO %s" % [ options.fetch(:schema) ] unless options.fetch(:schema).empty?
|
18
|
+
init
|
17
19
|
end
|
18
20
|
|
19
21
|
def write(row)
|
@@ -23,6 +25,7 @@ module Kiba::Plus::Destination
|
|
23
25
|
Kiba::Plus.logger.error "ERROR for #{row}"
|
24
26
|
Kiba::Plus.logger.error ex.message
|
25
27
|
# Maybe, write to db table or file
|
28
|
+
raise ex
|
26
29
|
end
|
27
30
|
|
28
31
|
def close
|
@@ -32,6 +35,10 @@ module Kiba::Plus::Destination
|
|
32
35
|
|
33
36
|
private
|
34
37
|
|
38
|
+
def init
|
39
|
+
@conn.prepare(prepare_name, prepare_sql)
|
40
|
+
end
|
41
|
+
|
35
42
|
def connect_url
|
36
43
|
options.fetch(:connect_url)
|
37
44
|
end
|
@@ -2,20 +2,48 @@ require_relative 'pg_bulk_utils'
|
|
2
2
|
module Kiba::Plus::Destination
|
3
3
|
class PgBulk
|
4
4
|
include PgBulkUtils
|
5
|
-
|
5
|
+
include Kiba::Plus::Helper
|
6
|
+
attr_reader :options, :conn
|
6
7
|
|
7
8
|
def initialize(options = {})
|
8
9
|
@options = options
|
9
10
|
@options.assert_valid_keys(
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
11
|
+
:connect_url,
|
12
|
+
:table_name,
|
13
|
+
:columns,
|
14
|
+
:truncate,
|
15
|
+
:incremental,
|
16
|
+
:unique_by,
|
17
|
+
:input_file,
|
18
|
+
:ignore_input_file_header
|
19
|
+
)
|
18
20
|
@conn = PG.connect(connect_url)
|
21
|
+
|
22
|
+
init
|
23
|
+
end
|
24
|
+
|
25
|
+
def write(row)
|
26
|
+
# blank!
|
27
|
+
end
|
28
|
+
|
29
|
+
def close
|
30
|
+
if incremental
|
31
|
+
truncate_staging_table
|
32
|
+
create_staging_table
|
33
|
+
copy_to_staging_table
|
34
|
+
delete_before_insert
|
35
|
+
merge_to_target_table
|
36
|
+
truncate_staging_table
|
37
|
+
else
|
38
|
+
copy_to_target_table
|
39
|
+
end
|
40
|
+
@conn.close
|
41
|
+
@conn = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def init
|
19
47
|
if truncate
|
20
48
|
truncate_staging_table
|
21
49
|
truncate_target_table
|
@@ -30,10 +58,6 @@ module Kiba::Plus::Destination
|
|
30
58
|
options.fetch(:table_name)
|
31
59
|
end
|
32
60
|
|
33
|
-
def input_file
|
34
|
-
options.fetch(:input_file)
|
35
|
-
end
|
36
|
-
|
37
61
|
def columns
|
38
62
|
options.fetch(:columns)
|
39
63
|
end
|
@@ -50,35 +74,53 @@ module Kiba::Plus::Destination
|
|
50
74
|
options.fetch(:unique_by, :id)
|
51
75
|
end
|
52
76
|
|
77
|
+
def input_file
|
78
|
+
options.fetch(:input_file)
|
79
|
+
end
|
80
|
+
|
81
|
+
def ignore_input_file_header
|
82
|
+
!!options.fetch(:ignore_input_file_header, false)
|
83
|
+
end
|
84
|
+
|
53
85
|
def copy_to_target_table
|
54
|
-
sql =
|
86
|
+
sql = copy_to_target_table_sql
|
55
87
|
Kiba::Plus.logger.info sql
|
56
88
|
@conn.exec(sql)
|
57
89
|
end
|
58
90
|
|
59
91
|
def copy_to_staging_table
|
60
|
-
sql =
|
92
|
+
sql = copy_to_staging_table_sql
|
61
93
|
Kiba::Plus.logger.info sql
|
62
94
|
@conn.exec(sql)
|
63
95
|
end
|
64
96
|
|
65
|
-
def
|
66
|
-
|
97
|
+
def copy_to_target_table_sql
|
98
|
+
sql = <<-SQL
|
99
|
+
COPY #{table_name} (#{columns.join(', ')})
|
100
|
+
FROM '#{File.expand_path(input_file)}'
|
101
|
+
WITH
|
102
|
+
#{ignore_input_file_header ? 'HEADER' : ''}
|
103
|
+
DELIMITER ','
|
104
|
+
NULL '\\N'
|
105
|
+
CSV
|
106
|
+
SQL
|
107
|
+
|
108
|
+
format_sql sql
|
67
109
|
end
|
68
110
|
|
69
|
-
def
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
@conn = nil
|
111
|
+
def copy_to_staging_table_sql
|
112
|
+
sql = <<-SQL
|
113
|
+
COPY #{staging_table_name} (#{columns.join(', ')})
|
114
|
+
FROM '#{File.expand_path(input_file)}'
|
115
|
+
WITH
|
116
|
+
#{ignore_input_file_header ? 'HEADER' : ''}
|
117
|
+
DELIMITER ','
|
118
|
+
NULL '\\N'
|
119
|
+
CSV
|
120
|
+
SQL
|
121
|
+
|
122
|
+
format_sql sql
|
82
123
|
end
|
124
|
+
|
83
125
|
end
|
84
126
|
end
|
@@ -4,19 +4,54 @@ require_relative 'pg_bulk_utils'
|
|
4
4
|
module Kiba::Plus::Destination
|
5
5
|
class PgBulk2
|
6
6
|
include PgBulkUtils
|
7
|
-
|
7
|
+
include Kiba::Plus::Helper
|
8
|
+
attr_reader :options, :conn
|
8
9
|
|
9
10
|
def initialize(options = {})
|
10
11
|
@options = options
|
11
|
-
@options.assert_valid_keys(
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
@options.assert_valid_keys(
|
13
|
+
:connect_url,
|
14
|
+
:table_name,
|
15
|
+
:columns,
|
16
|
+
:truncate,
|
17
|
+
:incremental,
|
18
|
+
:unique_by
|
19
|
+
)
|
18
20
|
|
19
21
|
@conn = PG.connect(connect_url)
|
22
|
+
|
23
|
+
init
|
24
|
+
end
|
25
|
+
|
26
|
+
def write(row)
|
27
|
+
begin
|
28
|
+
@conn.put_copy_data CSV.generate_line(row.values_at(*columns))
|
29
|
+
rescue Exception => err
|
30
|
+
errmsg = "%s while copy data: %s" % [ err.class.name, err.message ]
|
31
|
+
@conn.put_copy_end( errmsg )
|
32
|
+
Kiba::Plus.logger.error @conn.get_result
|
33
|
+
raise
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def close
|
38
|
+
@conn.put_copy_end
|
39
|
+
@conn.get_last_result
|
40
|
+
if incremental
|
41
|
+
delete_before_insert
|
42
|
+
merge_to_target_table
|
43
|
+
truncate_staging_table
|
44
|
+
end
|
45
|
+
rescue
|
46
|
+
raise
|
47
|
+
ensure
|
48
|
+
@conn.close
|
49
|
+
@conn = nil
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def init
|
20
55
|
if truncate
|
21
56
|
truncate_staging_table
|
22
57
|
truncate_target_table
|
@@ -24,12 +59,12 @@ module Kiba::Plus::Destination
|
|
24
59
|
if incremental
|
25
60
|
truncate_staging_table
|
26
61
|
create_staging_table
|
27
|
-
sql =
|
62
|
+
sql = bulk_sql_with_incremental
|
28
63
|
else
|
29
|
-
sql =
|
64
|
+
sql = bulk_sql_with_non_incremental
|
30
65
|
end
|
31
66
|
Kiba::Plus.logger.info sql
|
32
|
-
@
|
67
|
+
@conn.exec(sql)
|
33
68
|
end
|
34
69
|
|
35
70
|
def connect_url
|
@@ -40,17 +75,6 @@ module Kiba::Plus::Destination
|
|
40
75
|
options.fetch(:table_name)
|
41
76
|
end
|
42
77
|
|
43
|
-
def write(row)
|
44
|
-
begin
|
45
|
-
@conn.put_copy_data CSV.generate_line(row.values_at(*columns))
|
46
|
-
rescue Exception => err
|
47
|
-
errmsg = "%s while copy data: %s" % [ err.class.name, err.message ]
|
48
|
-
@conn.put_copy_end( errmsg )
|
49
|
-
Kiba::Plus.logger.error @conn.get_result
|
50
|
-
raise
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
78
|
def columns
|
55
79
|
options.fetch(:columns)
|
56
80
|
end
|
@@ -67,19 +91,29 @@ module Kiba::Plus::Destination
|
|
67
91
|
options.fetch(:unique_by, :id)
|
68
92
|
end
|
69
93
|
|
70
|
-
def
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
ensure
|
81
|
-
@conn.close
|
82
|
-
@conn = nil
|
94
|
+
def bulk_sql_with_incremental
|
95
|
+
sql = <<-SQL
|
96
|
+
COPY #{staging_table_name} (#{columns.join(', ')})
|
97
|
+
FROM STDIN
|
98
|
+
WITH
|
99
|
+
DELIMITER ','
|
100
|
+
NULL '\\N'
|
101
|
+
CSV
|
102
|
+
SQL
|
103
|
+
format_sql sql
|
83
104
|
end
|
105
|
+
|
106
|
+
def bulk_sql_with_non_incremental
|
107
|
+
sql = <<-SQL
|
108
|
+
COPY #{table_name} (#{columns.join(', ')})
|
109
|
+
FROM STDIN
|
110
|
+
WITH
|
111
|
+
DELIMITER ','
|
112
|
+
NULL '\\N'
|
113
|
+
CSV
|
114
|
+
SQL
|
115
|
+
format_sql sql
|
116
|
+
end
|
117
|
+
|
84
118
|
end
|
85
119
|
end
|
@@ -1,39 +1,72 @@
|
|
1
1
|
module Kiba::Plus::Destination
|
2
2
|
module PgBulkUtils
|
3
|
+
|
4
|
+
private
|
5
|
+
|
3
6
|
def staging_table_name
|
4
7
|
table_name + "_staging"
|
5
8
|
end
|
6
9
|
|
7
10
|
def create_staging_table
|
8
|
-
sql =
|
11
|
+
sql = create_staging_table_sql
|
9
12
|
Kiba::Plus.logger.info sql
|
10
13
|
@conn.exec(sql)
|
11
14
|
end
|
12
15
|
|
16
|
+
def create_staging_table_sql
|
17
|
+
sql = <<-SQL
|
18
|
+
CREATE TABLE IF NOT EXISTS #{staging_table_name} (
|
19
|
+
LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES
|
20
|
+
)
|
21
|
+
SQL
|
22
|
+
format_sql sql
|
23
|
+
end
|
24
|
+
|
13
25
|
def truncate_staging_table
|
14
|
-
|
15
|
-
Kiba::Plus.logger.info
|
16
|
-
@conn.exec(
|
26
|
+
sql = truncate_staging_table_sql
|
27
|
+
Kiba::Plus.logger.info sql
|
28
|
+
@conn.exec(sql) rescue nil
|
29
|
+
end
|
30
|
+
|
31
|
+
def truncate_staging_table_sql
|
32
|
+
sql = "TRUNCATE TABLE #{staging_table_name}"
|
33
|
+
format_sql sql
|
17
34
|
end
|
18
35
|
|
19
36
|
def truncate_target_table
|
20
|
-
|
21
|
-
Kiba::Plus.logger.info
|
22
|
-
@conn.exec(
|
37
|
+
sql = truncate_target_table_sql
|
38
|
+
Kiba::Plus.logger.info sql
|
39
|
+
@conn.exec(sql)
|
40
|
+
end
|
41
|
+
|
42
|
+
def truncate_target_table_sql
|
43
|
+
sql = "TRUNCATE TABLE #{table_name}"
|
44
|
+
format_sql sql
|
23
45
|
end
|
24
46
|
|
25
|
-
# TODO add where condition to speed up deleting.
|
26
47
|
def delete_before_insert
|
27
|
-
|
28
|
-
sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
|
48
|
+
sql = delete_before_insert_sql
|
29
49
|
Kiba::Plus.logger.info sql
|
30
50
|
@conn.exec(sql)
|
31
51
|
end
|
32
52
|
|
53
|
+
# TODO add where condition to speed up deleting.
|
54
|
+
def delete_before_insert_sql
|
55
|
+
where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
|
56
|
+
sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
|
57
|
+
format_sql sql
|
58
|
+
end
|
59
|
+
|
33
60
|
def merge_to_target_table
|
34
|
-
sql =
|
61
|
+
sql = merge_to_target_table_sql
|
35
62
|
Kiba::Plus.logger.info sql
|
36
63
|
@conn.exec(sql)
|
37
64
|
end
|
65
|
+
|
66
|
+
def merge_to_target_table_sql
|
67
|
+
sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
|
68
|
+
format_sql sql
|
69
|
+
end
|
70
|
+
|
38
71
|
end
|
39
72
|
end
|
data/lib/kiba/plus/helper.rb
CHANGED
@@ -2,13 +2,15 @@ require 'uri'
|
|
2
2
|
module Kiba
|
3
3
|
module Plus
|
4
4
|
module Helper
|
5
|
-
def
|
5
|
+
def mysql2_connect_hash(url)
|
6
|
+
return url if url.is_a?(Hash)
|
7
|
+
|
6
8
|
u = URI.parse(url)
|
7
9
|
{
|
8
10
|
host: u.host,
|
11
|
+
port: u.port,
|
9
12
|
username: u.user,
|
10
13
|
password: u.password,
|
11
|
-
port: u.port,
|
12
14
|
database: u.path[1..-1]
|
13
15
|
}
|
14
16
|
end
|
@@ -17,6 +19,11 @@ module Kiba
|
|
17
19
|
u = URI.parse(url)
|
18
20
|
u.scheme
|
19
21
|
end
|
22
|
+
|
23
|
+
def format_sql(sql)
|
24
|
+
sql.to_s.gsub(/[\n][\s]*[\n]/, "\n")
|
25
|
+
end
|
26
|
+
|
20
27
|
end
|
21
28
|
end
|
22
29
|
end
|
data/lib/kiba/plus/job.rb
CHANGED
@@ -15,7 +15,7 @@ module Kiba
|
|
15
15
|
@options.assert_valid_keys(:connect_url, :job_id, :job_name, :start_at, :completed_at)
|
16
16
|
url = URI.parse(connect_url)
|
17
17
|
if url.scheme =~ /mysql/i
|
18
|
-
@client = Mysql2::Client.new(
|
18
|
+
@client = Mysql2::Client.new(mysql2_connect_hash(connect_url))
|
19
19
|
elsif url.scheme =~ /postgres/i
|
20
20
|
@client = PG.connect(connect_url)
|
21
21
|
else
|
@@ -10,41 +10,29 @@ module Kiba
|
|
10
10
|
def initialize(options = {})
|
11
11
|
@options = options
|
12
12
|
@options.assert_valid_keys(
|
13
|
-
:
|
14
|
-
:
|
15
|
-
|
16
|
-
|
17
|
-
:connect_url
|
18
|
-
)
|
19
|
-
@client = Mysql2::Client.new(connect_hash(connect_url))
|
13
|
+
:connect_url,
|
14
|
+
:query
|
15
|
+
)
|
16
|
+
@client = Mysql2::Client.new(mysql2_connect_hash(connect_url))
|
20
17
|
end
|
21
18
|
|
22
19
|
def each
|
20
|
+
Kiba::Plus.logger.info query
|
23
21
|
results = client.query(query, as: :hash, symbolize_keys: true, stream: true)
|
24
22
|
results.each do |row|
|
25
23
|
yield(row)
|
26
24
|
end
|
27
25
|
end
|
28
26
|
|
29
|
-
|
30
|
-
options.fetch(:query)
|
31
|
-
end
|
32
|
-
|
33
|
-
def output
|
34
|
-
options.fetch(:output)
|
35
|
-
end
|
36
|
-
|
37
|
-
def last_pull_at
|
38
|
-
options[:last_pull_at]
|
39
|
-
end
|
40
|
-
|
41
|
-
def incremental
|
42
|
-
options.fetch(:incremental, true)
|
43
|
-
end
|
27
|
+
private
|
44
28
|
|
45
29
|
def connect_url
|
46
30
|
options.fetch(:connect_url)
|
47
31
|
end
|
32
|
+
|
33
|
+
def query
|
34
|
+
options.fetch(:query)
|
35
|
+
end
|
48
36
|
end
|
49
37
|
end
|
50
38
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'pg'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module Kiba
|
5
|
+
module Plus::Source
|
6
|
+
class Pg
|
7
|
+
include Kiba::Plus::Helper
|
8
|
+
attr_reader :options, :client
|
9
|
+
|
10
|
+
def initialize(options = {})
|
11
|
+
@options = options
|
12
|
+
@options.assert_valid_keys(
|
13
|
+
:connect_url,
|
14
|
+
:schema,
|
15
|
+
:query
|
16
|
+
)
|
17
|
+
@client = PG.connect(connect_url)
|
18
|
+
@client.exec "SET search_path TO %s" % [ options.fetch(:schema) ] unless options.fetch(:schema).empty?
|
19
|
+
end
|
20
|
+
|
21
|
+
def each
|
22
|
+
Kiba::Plus.logger.info query
|
23
|
+
results = client.query(query)
|
24
|
+
results.each do |row|
|
25
|
+
yield(row)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def connect_url
|
32
|
+
options.fetch(:connect_url)
|
33
|
+
end
|
34
|
+
|
35
|
+
def query
|
36
|
+
options.fetch(:query)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/kiba/plus/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kiba-plus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hooopo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-08-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: kiba
|
@@ -94,6 +94,48 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '5.0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: database_cleaner
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 1.5.3
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 1.5.3
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: sequel
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '4.34'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '4.34'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: pry
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
97
139
|
description: It connects to various data sources including relational, non-relational,
|
98
140
|
and flat file, cloud services and HTTP resources. It has flexible load strategies
|
99
141
|
including insert, bulk load and upsert.
|
@@ -135,6 +177,7 @@ files:
|
|
135
177
|
- lib/kiba/plus/job.rb
|
136
178
|
- lib/kiba/plus/logger.rb
|
137
179
|
- lib/kiba/plus/source/mysql.rb
|
180
|
+
- lib/kiba/plus/source/pg.rb
|
138
181
|
- lib/kiba/plus/version.rb
|
139
182
|
homepage: https://github.com/hooopo/kiba-plus
|
140
183
|
licenses:
|
@@ -157,7 +200,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
157
200
|
version: '0'
|
158
201
|
requirements: []
|
159
202
|
rubyforge_project:
|
160
|
-
rubygems_version: 2.
|
203
|
+
rubygems_version: 2.6.11
|
161
204
|
signing_key:
|
162
205
|
specification_version: 4
|
163
206
|
summary: Kiba enhancement for Ruby ETL
|