kiba-plus 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +44 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE +674 -0
- data/LICENSE.txt +21 -0
- data/README.md +98 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/examples/Gemfile +4 -0
- data/examples/Gemfile.lock +31 -0
- data/examples/customer_mysql_to_csv.etl +13 -0
- data/examples/customer_mysql_to_pg.etl +27 -0
- data/examples/init.rb +8 -0
- data/examples/sources/customer.rb +0 -0
- data/kiba-plus.gemspec +35 -0
- data/lib/kiba/plus.rb +30 -0
- data/lib/kiba/plus/destination/csv.rb +29 -0
- data/lib/kiba/plus/destination/mysql.rb +45 -0
- data/lib/kiba/plus/destination/mysql_bulk.rb +63 -0
- data/lib/kiba/plus/destination/pg.rb +51 -0
- data/lib/kiba/plus/destination/pg_bulk.rb +118 -0
- data/lib/kiba/plus/destination/pg_bulk2.rb +68 -0
- data/lib/kiba/plus/helper.rb +22 -0
- data/lib/kiba/plus/job.rb +149 -0
- data/lib/kiba/plus/logger.rb +12 -0
- data/lib/kiba/plus/source/mysql.rb +50 -0
- data/lib/kiba/plus/version.rb +5 -0
- metadata +157 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'pg'
|
2
|
+
|
3
|
+
module Kiba::Plus::Destination
|
4
|
+
class Pg
|
5
|
+
attr_reader :options
|
6
|
+
|
7
|
+
def initialize(options = {})
|
8
|
+
@options = options
|
9
|
+
@options.assert_valid_keys(
|
10
|
+
:connect_url,
|
11
|
+
:prepare_name,
|
12
|
+
:prepare_sql,
|
13
|
+
:columns
|
14
|
+
)
|
15
|
+
@conn = PG.connect(connect_url)
|
16
|
+
@conn.prepare(prepare_name, prepare_sql)
|
17
|
+
end
|
18
|
+
|
19
|
+
def write(row)
|
20
|
+
@conn.exec_prepared(prepare_name,
|
21
|
+
row.values_at(*columns))
|
22
|
+
rescue PG::Error => ex
|
23
|
+
Kiba::Plus.logger.error "ERROR for #{row}"
|
24
|
+
Kiba::Plus.logger.error ex.message
|
25
|
+
# Maybe, write to db table or file
|
26
|
+
end
|
27
|
+
|
28
|
+
def close
|
29
|
+
@conn.close
|
30
|
+
@conn = nil
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def connect_url
|
36
|
+
options.fetch(:connect_url)
|
37
|
+
end
|
38
|
+
|
39
|
+
def prepare_name
|
40
|
+
options.fetch(:prepare_name, self.class.to_s.downcase + "stmt")
|
41
|
+
end
|
42
|
+
|
43
|
+
def prepare_sql
|
44
|
+
options.fetch(:prepare_sql)
|
45
|
+
end
|
46
|
+
|
47
|
+
def columns
|
48
|
+
options.fetch(:columns)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Kiba::Plus::Destination
|
2
|
+
class PgBulk
|
3
|
+
attr_reader :options
|
4
|
+
|
5
|
+
def initialize(options = {})
|
6
|
+
@options = options
|
7
|
+
@options.assert_valid_keys(
|
8
|
+
:connect_url,
|
9
|
+
:input_file,
|
10
|
+
:table_name,
|
11
|
+
:columns,
|
12
|
+
:truncate,
|
13
|
+
:incremental,
|
14
|
+
:unique_by
|
15
|
+
)
|
16
|
+
@conn = PG.connect(connect_url)
|
17
|
+
if truncate
|
18
|
+
truncate_staging_table
|
19
|
+
truncate_target_table
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def connect_url
|
24
|
+
options.fetch(:connect_url)
|
25
|
+
end
|
26
|
+
|
27
|
+
def table_name
|
28
|
+
options.fetch(:table_name)
|
29
|
+
end
|
30
|
+
|
31
|
+
def input_file
|
32
|
+
options.fetch(:input_file)
|
33
|
+
end
|
34
|
+
|
35
|
+
def columns
|
36
|
+
options.fetch(:columns)
|
37
|
+
end
|
38
|
+
|
39
|
+
def truncate
|
40
|
+
options.fetch(:truncate, false)
|
41
|
+
end
|
42
|
+
|
43
|
+
def incremental
|
44
|
+
options.fetch(:incremental, true)
|
45
|
+
end
|
46
|
+
|
47
|
+
def unique_by
|
48
|
+
options.fetch(:unique_by, :id)
|
49
|
+
end
|
50
|
+
|
51
|
+
def write(row)
|
52
|
+
# blank!
|
53
|
+
end
|
54
|
+
|
55
|
+
def staging_table_name
|
56
|
+
table_name + "_staging"
|
57
|
+
end
|
58
|
+
|
59
|
+
def create_staging_table
|
60
|
+
sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
|
61
|
+
Kiba::Plus.logger.info sql
|
62
|
+
@conn.exec(sql)
|
63
|
+
end
|
64
|
+
|
65
|
+
def truncate_staging_table
|
66
|
+
truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
|
67
|
+
Kiba::Plus.logger.info truncate_sql
|
68
|
+
@conn.exec(truncate_sql) rescue nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def truncate_target_table
|
72
|
+
truncate_sql = "TRUNCATE TABLE #{table_name};"
|
73
|
+
Kiba::Plus.logger.info truncate_sql
|
74
|
+
@conn.exec(truncate_sql)
|
75
|
+
end
|
76
|
+
|
77
|
+
def copy_to_target_table
|
78
|
+
sql = "COPY #{table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
|
79
|
+
Kiba::Plus.logger.info sql
|
80
|
+
@conn.exec(sql)
|
81
|
+
end
|
82
|
+
|
83
|
+
def copy_to_staging_table
|
84
|
+
sql = "COPY #{staging_table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
|
85
|
+
Kiba::Plus.logger.info sql
|
86
|
+
@conn.exec(sql)
|
87
|
+
end
|
88
|
+
|
89
|
+
# TODO add where condition to speed up deleting.
|
90
|
+
def delete_before_insert
|
91
|
+
where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
|
92
|
+
sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
|
93
|
+
Kiba::Plus.logger.info sql
|
94
|
+
@conn.exec(sql)
|
95
|
+
end
|
96
|
+
|
97
|
+
def merge_to_target_table
|
98
|
+
sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
|
99
|
+
Kiba::Plus.logger.info sql
|
100
|
+
@conn.exec(sql)
|
101
|
+
end
|
102
|
+
|
103
|
+
def close
|
104
|
+
if incremental
|
105
|
+
truncate_staging_table
|
106
|
+
create_staging_table
|
107
|
+
copy_to_staging_table
|
108
|
+
delete_before_insert
|
109
|
+
merge_to_target_table
|
110
|
+
truncate_staging_table
|
111
|
+
else
|
112
|
+
copy_to_target_table
|
113
|
+
end
|
114
|
+
@conn.close
|
115
|
+
@conn = nil
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'pg'
|
2
|
+
require 'csv'
|
3
|
+
module Kiba::Plus::Destination
|
4
|
+
class PgBulk2
|
5
|
+
attr_reader :options
|
6
|
+
|
7
|
+
def initialize(options = {})
|
8
|
+
@options = options
|
9
|
+
@options.assert_valid_keys(:table_name,
|
10
|
+
:columns,
|
11
|
+
:connect_url,
|
12
|
+
:truncate,
|
13
|
+
:incremental
|
14
|
+
)
|
15
|
+
|
16
|
+
@conn = PG.connect(connect_url)
|
17
|
+
truncate_sql = "TRUNCATE TABLE #{table_name};"
|
18
|
+
if truncate
|
19
|
+
Kiba::Plus.logger.info truncate_sql
|
20
|
+
@conn.exec(truncate_sql)
|
21
|
+
end
|
22
|
+
sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
|
23
|
+
Kiba::Plus.logger.info sql
|
24
|
+
@res = @conn.exec(sql)
|
25
|
+
end
|
26
|
+
|
27
|
+
def connect_url
|
28
|
+
options.fetch(:connect_url)
|
29
|
+
end
|
30
|
+
|
31
|
+
def table_name
|
32
|
+
options.fetch(:table_name)
|
33
|
+
end
|
34
|
+
|
35
|
+
def write(row)
|
36
|
+
begin
|
37
|
+
@conn.put_copy_data CSV.generate_line(row.values_at(*columns))
|
38
|
+
rescue Exception => err
|
39
|
+
errmsg = "%s while copy data: %s" % [ err.class.name, err.message ]
|
40
|
+
@conn.put_copy_end( errmsg )
|
41
|
+
Kiba::Plus.logger.error @conn.get_result
|
42
|
+
raise
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def columns
|
47
|
+
options.fetch(:columns)
|
48
|
+
end
|
49
|
+
|
50
|
+
def truncate
|
51
|
+
options.fetch(:truncate, false)
|
52
|
+
end
|
53
|
+
|
54
|
+
def incremental
|
55
|
+
options.fetch(:incremental, true)
|
56
|
+
end
|
57
|
+
|
58
|
+
def close
|
59
|
+
@conn.put_copy_end
|
60
|
+
@conn.get_last_result
|
61
|
+
rescue
|
62
|
+
raise
|
63
|
+
ensure
|
64
|
+
@conn.close
|
65
|
+
@conn = nil
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'uri'
|
2
|
+
module Kiba
|
3
|
+
module Plus
|
4
|
+
module Helper
|
5
|
+
def connect_hash(url)
|
6
|
+
u = URI.parse(url)
|
7
|
+
{
|
8
|
+
host: u.host,
|
9
|
+
username: u.user,
|
10
|
+
password: u.password,
|
11
|
+
port: u.port,
|
12
|
+
database: u.path[1..-1]
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
def scheme(url)
|
17
|
+
u = URI.parse(url)
|
18
|
+
u.scheme
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module Kiba
|
4
|
+
require 'pg'
|
5
|
+
require 'mysql2'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
module Plus
|
9
|
+
class Job
|
10
|
+
include Kiba::Plus::Helper
|
11
|
+
|
12
|
+
attr_reader :options, :client
|
13
|
+
def initialize(options)
|
14
|
+
@options = options
|
15
|
+
@options.assert_valid_keys(:connect_url, :job_id, :job_name, :start_at, :completed_at)
|
16
|
+
url = URI.parse(connect_url)
|
17
|
+
if url.scheme =~ /mysql/i
|
18
|
+
@client = Mysql2::Client.new(connect_hash(connect_url))
|
19
|
+
elsif url.scheme =~ /postgres/i
|
20
|
+
@client = PG.connect(connect_url)
|
21
|
+
else
|
22
|
+
raise 'No Imp!'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def job_id
|
27
|
+
options.fetch(:job_id, nil)
|
28
|
+
end
|
29
|
+
|
30
|
+
def connect_url
|
31
|
+
options.fetch(:connect_url)
|
32
|
+
end
|
33
|
+
|
34
|
+
def job_name
|
35
|
+
options.fetch(:job_name)
|
36
|
+
end
|
37
|
+
|
38
|
+
def start_at
|
39
|
+
options.fetch(:start_at, Time.now)
|
40
|
+
end
|
41
|
+
|
42
|
+
def completed_at
|
43
|
+
options.fetch(:completed_at, Time.now)
|
44
|
+
end
|
45
|
+
|
46
|
+
def start
|
47
|
+
create_table
|
48
|
+
result = create_job
|
49
|
+
result.first["id"].to_i
|
50
|
+
end
|
51
|
+
|
52
|
+
def last_pull_at
|
53
|
+
sql = "SELECT MAX(created_at) AS last_pull_at FROM jobs WHERE status = 'completed' AND job_name = '#{job_name}'"
|
54
|
+
Kiba::Plus.logger.info sql
|
55
|
+
client.query(sql).first["last_pull_at"]
|
56
|
+
end
|
57
|
+
|
58
|
+
def complete
|
59
|
+
complete_job
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def create_table
|
65
|
+
url = URI.parse(connect_url)
|
66
|
+
if url.scheme =~ /mysql/i
|
67
|
+
create_table_mysql
|
68
|
+
elsif url.scheme =~ /postgres/i
|
69
|
+
create_table_pg
|
70
|
+
else
|
71
|
+
raise 'No Imp!'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def create_job
|
76
|
+
if @client.is_a?(Mysql2::Client)
|
77
|
+
create_job_mysql
|
78
|
+
else
|
79
|
+
create_job_pg
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def create_job_mysql
|
84
|
+
sql = <<-SQL
|
85
|
+
INSERT INTO jobs (
|
86
|
+
completed_at,
|
87
|
+
job_name,
|
88
|
+
created_at,
|
89
|
+
status) VALUES
|
90
|
+
(NULL, '#{job_name}', '#{start_at.to_s}', 'executing')
|
91
|
+
SQL
|
92
|
+
Kiba::Plus.logger.info sql
|
93
|
+
@client.query(sql)
|
94
|
+
returning_id_sql = "SELECT LAST_INSERT_ID() AS id"
|
95
|
+
Kiba::Plus.logger.info returning_id_sql
|
96
|
+
@client.query(returning_id_sql)
|
97
|
+
end
|
98
|
+
|
99
|
+
def create_job_pg
|
100
|
+
sql = <<-SQL
|
101
|
+
INSERT INTO jobs (
|
102
|
+
completed_at,
|
103
|
+
job_name,
|
104
|
+
created_at,
|
105
|
+
status) VALUES
|
106
|
+
(NULL, '#{job_name}', '#{start_at.to_s}', 'executing') RETURNING id
|
107
|
+
SQL
|
108
|
+
Kiba::Plus.logger.info sql
|
109
|
+
@client.query(sql)
|
110
|
+
end
|
111
|
+
|
112
|
+
def create_table_pg
|
113
|
+
sql = <<-SQL
|
114
|
+
CREATE TABLE IF NOT EXISTS jobs (
|
115
|
+
id SERIAL,
|
116
|
+
job_name varchar(255) NOT NULL,
|
117
|
+
created_at TIMESTAMP without time zone,
|
118
|
+
completed_at TIMESTAMP without time zone,
|
119
|
+
status varchar(255) DEFAULT NULL,
|
120
|
+
PRIMARY KEY (id)
|
121
|
+
)
|
122
|
+
SQL
|
123
|
+
Kiba::Plus.logger.info sql
|
124
|
+
@client.query(sql)
|
125
|
+
end
|
126
|
+
|
127
|
+
def create_table_mysql
|
128
|
+
sql = <<-SQL
|
129
|
+
CREATE TABLE IF NOT EXISTS jobs (
|
130
|
+
id integer(11) NOT NULL AUTO_INCREMENT,
|
131
|
+
job_name varchar(255) NOT NULL,
|
132
|
+
created_at datetime NOT NULL,
|
133
|
+
completed_at datetime DEFAULT NULL,
|
134
|
+
status varchar(255) DEFAULT NULL,
|
135
|
+
PRIMARY KEY (id)
|
136
|
+
) AUTO_INCREMENT=1
|
137
|
+
SQL
|
138
|
+
Kiba::Plus.logger.info sql
|
139
|
+
@client.query(sql)
|
140
|
+
end
|
141
|
+
|
142
|
+
def complete_job
|
143
|
+
sql = "UPDATE jobs SET status = 'completed', completed_at = '#{completed_at.to_s}' WHERE id = #{job_id} AND job_name = '#{job_name}'"
|
144
|
+
Kiba::Plus.logger.info sql
|
145
|
+
@client.query(sql)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'mysql2'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module Kiba
|
5
|
+
module Plus::Source
|
6
|
+
class Mysql
|
7
|
+
include Kiba::Plus::Helper
|
8
|
+
attr_reader :options, :client
|
9
|
+
|
10
|
+
def initialize(options = {})
|
11
|
+
@options = options
|
12
|
+
@options.assert_valid_keys(
|
13
|
+
:query,
|
14
|
+
:output,
|
15
|
+
:last_pull_at,
|
16
|
+
:incremental,
|
17
|
+
:connect_url
|
18
|
+
)
|
19
|
+
@client = Mysql2::Client.new(connect_hash(connect_url))
|
20
|
+
end
|
21
|
+
|
22
|
+
def each
|
23
|
+
results = client.query(query, as: :hash, symbolize_keys: true, stream: true)
|
24
|
+
results.each do |row|
|
25
|
+
yield(row)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def query
|
30
|
+
options.fetch(:query)
|
31
|
+
end
|
32
|
+
|
33
|
+
def output
|
34
|
+
options.fetch(:output)
|
35
|
+
end
|
36
|
+
|
37
|
+
def last_pull_at
|
38
|
+
options[:last_pull_at]
|
39
|
+
end
|
40
|
+
|
41
|
+
def incremental
|
42
|
+
options.fetch(:incremental, true)
|
43
|
+
end
|
44
|
+
|
45
|
+
def connect_url
|
46
|
+
options.fetch(:connect_url)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|