kiba-plus 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +44 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE +674 -0
- data/LICENSE.txt +21 -0
- data/README.md +98 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/examples/Gemfile +4 -0
- data/examples/Gemfile.lock +31 -0
- data/examples/customer_mysql_to_csv.etl +13 -0
- data/examples/customer_mysql_to_pg.etl +27 -0
- data/examples/init.rb +8 -0
- data/examples/sources/customer.rb +0 -0
- data/kiba-plus.gemspec +35 -0
- data/lib/kiba/plus.rb +30 -0
- data/lib/kiba/plus/destination/csv.rb +29 -0
- data/lib/kiba/plus/destination/mysql.rb +45 -0
- data/lib/kiba/plus/destination/mysql_bulk.rb +63 -0
- data/lib/kiba/plus/destination/pg.rb +51 -0
- data/lib/kiba/plus/destination/pg_bulk.rb +118 -0
- data/lib/kiba/plus/destination/pg_bulk2.rb +68 -0
- data/lib/kiba/plus/helper.rb +22 -0
- data/lib/kiba/plus/job.rb +149 -0
- data/lib/kiba/plus/logger.rb +12 -0
- data/lib/kiba/plus/source/mysql.rb +50 -0
- data/lib/kiba/plus/version.rb +5 -0
- metadata +157 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'pg'
|
2
|
+
|
3
|
+
module Kiba::Plus::Destination
|
4
|
+
class Pg
|
5
|
+
attr_reader :options
|
6
|
+
|
7
|
+
def initialize(options = {})
|
8
|
+
@options = options
|
9
|
+
@options.assert_valid_keys(
|
10
|
+
:connect_url,
|
11
|
+
:prepare_name,
|
12
|
+
:prepare_sql,
|
13
|
+
:columns
|
14
|
+
)
|
15
|
+
@conn = PG.connect(connect_url)
|
16
|
+
@conn.prepare(prepare_name, prepare_sql)
|
17
|
+
end
|
18
|
+
|
19
|
+
def write(row)
|
20
|
+
@conn.exec_prepared(prepare_name,
|
21
|
+
row.values_at(*columns))
|
22
|
+
rescue PG::Error => ex
|
23
|
+
Kiba::Plus.logger.error "ERROR for #{row}"
|
24
|
+
Kiba::Plus.logger.error ex.message
|
25
|
+
# Maybe, write to db table or file
|
26
|
+
end
|
27
|
+
|
28
|
+
def close
|
29
|
+
@conn.close
|
30
|
+
@conn = nil
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def connect_url
|
36
|
+
options.fetch(:connect_url)
|
37
|
+
end
|
38
|
+
|
39
|
+
def prepare_name
|
40
|
+
options.fetch(:prepare_name, self.class.to_s.downcase + "stmt")
|
41
|
+
end
|
42
|
+
|
43
|
+
def prepare_sql
|
44
|
+
options.fetch(:prepare_sql)
|
45
|
+
end
|
46
|
+
|
47
|
+
def columns
|
48
|
+
options.fetch(:columns)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Kiba::Plus::Destination
|
2
|
+
class PgBulk
|
3
|
+
attr_reader :options
|
4
|
+
|
5
|
+
def initialize(options = {})
|
6
|
+
@options = options
|
7
|
+
@options.assert_valid_keys(
|
8
|
+
:connect_url,
|
9
|
+
:input_file,
|
10
|
+
:table_name,
|
11
|
+
:columns,
|
12
|
+
:truncate,
|
13
|
+
:incremental,
|
14
|
+
:unique_by
|
15
|
+
)
|
16
|
+
@conn = PG.connect(connect_url)
|
17
|
+
if truncate
|
18
|
+
truncate_staging_table
|
19
|
+
truncate_target_table
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def connect_url
|
24
|
+
options.fetch(:connect_url)
|
25
|
+
end
|
26
|
+
|
27
|
+
def table_name
|
28
|
+
options.fetch(:table_name)
|
29
|
+
end
|
30
|
+
|
31
|
+
def input_file
|
32
|
+
options.fetch(:input_file)
|
33
|
+
end
|
34
|
+
|
35
|
+
def columns
|
36
|
+
options.fetch(:columns)
|
37
|
+
end
|
38
|
+
|
39
|
+
def truncate
|
40
|
+
options.fetch(:truncate, false)
|
41
|
+
end
|
42
|
+
|
43
|
+
def incremental
|
44
|
+
options.fetch(:incremental, true)
|
45
|
+
end
|
46
|
+
|
47
|
+
def unique_by
|
48
|
+
options.fetch(:unique_by, :id)
|
49
|
+
end
|
50
|
+
|
51
|
+
def write(row)
|
52
|
+
# blank!
|
53
|
+
end
|
54
|
+
|
55
|
+
def staging_table_name
|
56
|
+
table_name + "_staging"
|
57
|
+
end
|
58
|
+
|
59
|
+
def create_staging_table
|
60
|
+
sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
|
61
|
+
Kiba::Plus.logger.info sql
|
62
|
+
@conn.exec(sql)
|
63
|
+
end
|
64
|
+
|
65
|
+
def truncate_staging_table
|
66
|
+
truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
|
67
|
+
Kiba::Plus.logger.info truncate_sql
|
68
|
+
@conn.exec(truncate_sql) rescue nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def truncate_target_table
|
72
|
+
truncate_sql = "TRUNCATE TABLE #{table_name};"
|
73
|
+
Kiba::Plus.logger.info truncate_sql
|
74
|
+
@conn.exec(truncate_sql)
|
75
|
+
end
|
76
|
+
|
77
|
+
def copy_to_target_table
|
78
|
+
sql = "COPY #{table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
|
79
|
+
Kiba::Plus.logger.info sql
|
80
|
+
@conn.exec(sql)
|
81
|
+
end
|
82
|
+
|
83
|
+
def copy_to_staging_table
|
84
|
+
sql = "COPY #{staging_table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
|
85
|
+
Kiba::Plus.logger.info sql
|
86
|
+
@conn.exec(sql)
|
87
|
+
end
|
88
|
+
|
89
|
+
# TODO add where condition to speed up deleting.
|
90
|
+
def delete_before_insert
|
91
|
+
where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
|
92
|
+
sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
|
93
|
+
Kiba::Plus.logger.info sql
|
94
|
+
@conn.exec(sql)
|
95
|
+
end
|
96
|
+
|
97
|
+
def merge_to_target_table
|
98
|
+
sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
|
99
|
+
Kiba::Plus.logger.info sql
|
100
|
+
@conn.exec(sql)
|
101
|
+
end
|
102
|
+
|
103
|
+
def close
|
104
|
+
if incremental
|
105
|
+
truncate_staging_table
|
106
|
+
create_staging_table
|
107
|
+
copy_to_staging_table
|
108
|
+
delete_before_insert
|
109
|
+
merge_to_target_table
|
110
|
+
truncate_staging_table
|
111
|
+
else
|
112
|
+
copy_to_target_table
|
113
|
+
end
|
114
|
+
@conn.close
|
115
|
+
@conn = nil
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'pg'
|
2
|
+
require 'csv'
|
3
|
+
module Kiba::Plus::Destination
|
4
|
+
class PgBulk2
|
5
|
+
attr_reader :options
|
6
|
+
|
7
|
+
def initialize(options = {})
|
8
|
+
@options = options
|
9
|
+
@options.assert_valid_keys(:table_name,
|
10
|
+
:columns,
|
11
|
+
:connect_url,
|
12
|
+
:truncate,
|
13
|
+
:incremental
|
14
|
+
)
|
15
|
+
|
16
|
+
@conn = PG.connect(connect_url)
|
17
|
+
truncate_sql = "TRUNCATE TABLE #{table_name};"
|
18
|
+
if truncate
|
19
|
+
Kiba::Plus.logger.info truncate_sql
|
20
|
+
@conn.exec(truncate_sql)
|
21
|
+
end
|
22
|
+
sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
|
23
|
+
Kiba::Plus.logger.info sql
|
24
|
+
@res = @conn.exec(sql)
|
25
|
+
end
|
26
|
+
|
27
|
+
def connect_url
|
28
|
+
options.fetch(:connect_url)
|
29
|
+
end
|
30
|
+
|
31
|
+
def table_name
|
32
|
+
options.fetch(:table_name)
|
33
|
+
end
|
34
|
+
|
35
|
+
def write(row)
|
36
|
+
begin
|
37
|
+
@conn.put_copy_data CSV.generate_line(row.values_at(*columns))
|
38
|
+
rescue Exception => err
|
39
|
+
errmsg = "%s while copy data: %s" % [ err.class.name, err.message ]
|
40
|
+
@conn.put_copy_end( errmsg )
|
41
|
+
Kiba::Plus.logger.error @conn.get_result
|
42
|
+
raise
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def columns
|
47
|
+
options.fetch(:columns)
|
48
|
+
end
|
49
|
+
|
50
|
+
def truncate
|
51
|
+
options.fetch(:truncate, false)
|
52
|
+
end
|
53
|
+
|
54
|
+
def incremental
|
55
|
+
options.fetch(:incremental, true)
|
56
|
+
end
|
57
|
+
|
58
|
+
def close
|
59
|
+
@conn.put_copy_end
|
60
|
+
@conn.get_last_result
|
61
|
+
rescue
|
62
|
+
raise
|
63
|
+
ensure
|
64
|
+
@conn.close
|
65
|
+
@conn = nil
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'uri'
|
2
|
+
module Kiba
|
3
|
+
module Plus
|
4
|
+
module Helper
|
5
|
+
def connect_hash(url)
|
6
|
+
u = URI.parse(url)
|
7
|
+
{
|
8
|
+
host: u.host,
|
9
|
+
username: u.user,
|
10
|
+
password: u.password,
|
11
|
+
port: u.port,
|
12
|
+
database: u.path[1..-1]
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
def scheme(url)
|
17
|
+
u = URI.parse(url)
|
18
|
+
u.scheme
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module Kiba
|
4
|
+
require 'pg'
|
5
|
+
require 'mysql2'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
module Plus
|
9
|
+
class Job
|
10
|
+
include Kiba::Plus::Helper
|
11
|
+
|
12
|
+
attr_reader :options, :client
|
13
|
+
def initialize(options)
|
14
|
+
@options = options
|
15
|
+
@options.assert_valid_keys(:connect_url, :job_id, :job_name, :start_at, :completed_at)
|
16
|
+
url = URI.parse(connect_url)
|
17
|
+
if url.scheme =~ /mysql/i
|
18
|
+
@client = Mysql2::Client.new(connect_hash(connect_url))
|
19
|
+
elsif url.scheme =~ /postgres/i
|
20
|
+
@client = PG.connect(connect_url)
|
21
|
+
else
|
22
|
+
raise 'No Imp!'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def job_id
|
27
|
+
options.fetch(:job_id, nil)
|
28
|
+
end
|
29
|
+
|
30
|
+
def connect_url
|
31
|
+
options.fetch(:connect_url)
|
32
|
+
end
|
33
|
+
|
34
|
+
def job_name
|
35
|
+
options.fetch(:job_name)
|
36
|
+
end
|
37
|
+
|
38
|
+
def start_at
|
39
|
+
options.fetch(:start_at, Time.now)
|
40
|
+
end
|
41
|
+
|
42
|
+
def completed_at
|
43
|
+
options.fetch(:completed_at, Time.now)
|
44
|
+
end
|
45
|
+
|
46
|
+
def start
|
47
|
+
create_table
|
48
|
+
result = create_job
|
49
|
+
result.first["id"].to_i
|
50
|
+
end
|
51
|
+
|
52
|
+
def last_pull_at
|
53
|
+
sql = "SELECT MAX(created_at) AS last_pull_at FROM jobs WHERE status = 'completed' AND job_name = '#{job_name}'"
|
54
|
+
Kiba::Plus.logger.info sql
|
55
|
+
client.query(sql).first["last_pull_at"]
|
56
|
+
end
|
57
|
+
|
58
|
+
def complete
|
59
|
+
complete_job
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def create_table
|
65
|
+
url = URI.parse(connect_url)
|
66
|
+
if url.scheme =~ /mysql/i
|
67
|
+
create_table_mysql
|
68
|
+
elsif url.scheme =~ /postgres/i
|
69
|
+
create_table_pg
|
70
|
+
else
|
71
|
+
raise 'No Imp!'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def create_job
|
76
|
+
if @client.is_a?(Mysql2::Client)
|
77
|
+
create_job_mysql
|
78
|
+
else
|
79
|
+
create_job_pg
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def create_job_mysql
|
84
|
+
sql = <<-SQL
|
85
|
+
INSERT INTO jobs (
|
86
|
+
completed_at,
|
87
|
+
job_name,
|
88
|
+
created_at,
|
89
|
+
status) VALUES
|
90
|
+
(NULL, '#{job_name}', '#{start_at.to_s}', 'executing')
|
91
|
+
SQL
|
92
|
+
Kiba::Plus.logger.info sql
|
93
|
+
@client.query(sql)
|
94
|
+
returning_id_sql = "SELECT LAST_INSERT_ID() AS id"
|
95
|
+
Kiba::Plus.logger.info returning_id_sql
|
96
|
+
@client.query(returning_id_sql)
|
97
|
+
end
|
98
|
+
|
99
|
+
def create_job_pg
|
100
|
+
sql = <<-SQL
|
101
|
+
INSERT INTO jobs (
|
102
|
+
completed_at,
|
103
|
+
job_name,
|
104
|
+
created_at,
|
105
|
+
status) VALUES
|
106
|
+
(NULL, '#{job_name}', '#{start_at.to_s}', 'executing') RETURNING id
|
107
|
+
SQL
|
108
|
+
Kiba::Plus.logger.info sql
|
109
|
+
@client.query(sql)
|
110
|
+
end
|
111
|
+
|
112
|
+
def create_table_pg
|
113
|
+
sql = <<-SQL
|
114
|
+
CREATE TABLE IF NOT EXISTS jobs (
|
115
|
+
id SERIAL,
|
116
|
+
job_name varchar(255) NOT NULL,
|
117
|
+
created_at TIMESTAMP without time zone,
|
118
|
+
completed_at TIMESTAMP without time zone,
|
119
|
+
status varchar(255) DEFAULT NULL,
|
120
|
+
PRIMARY KEY (id)
|
121
|
+
)
|
122
|
+
SQL
|
123
|
+
Kiba::Plus.logger.info sql
|
124
|
+
@client.query(sql)
|
125
|
+
end
|
126
|
+
|
127
|
+
def create_table_mysql
|
128
|
+
sql = <<-SQL
|
129
|
+
CREATE TABLE IF NOT EXISTS jobs (
|
130
|
+
id integer(11) NOT NULL AUTO_INCREMENT,
|
131
|
+
job_name varchar(255) NOT NULL,
|
132
|
+
created_at datetime NOT NULL,
|
133
|
+
completed_at datetime DEFAULT NULL,
|
134
|
+
status varchar(255) DEFAULT NULL,
|
135
|
+
PRIMARY KEY (id)
|
136
|
+
) AUTO_INCREMENT=1
|
137
|
+
SQL
|
138
|
+
Kiba::Plus.logger.info sql
|
139
|
+
@client.query(sql)
|
140
|
+
end
|
141
|
+
|
142
|
+
def complete_job
|
143
|
+
sql = "UPDATE jobs SET status = 'completed', completed_at = '#{completed_at.to_s}' WHERE id = #{job_id} AND job_name = '#{job_name}'"
|
144
|
+
Kiba::Plus.logger.info sql
|
145
|
+
@client.query(sql)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'mysql2'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module Kiba
|
5
|
+
module Plus::Source
|
6
|
+
class Mysql
|
7
|
+
include Kiba::Plus::Helper
|
8
|
+
attr_reader :options, :client
|
9
|
+
|
10
|
+
def initialize(options = {})
|
11
|
+
@options = options
|
12
|
+
@options.assert_valid_keys(
|
13
|
+
:query,
|
14
|
+
:output,
|
15
|
+
:last_pull_at,
|
16
|
+
:incremental,
|
17
|
+
:connect_url
|
18
|
+
)
|
19
|
+
@client = Mysql2::Client.new(connect_hash(connect_url))
|
20
|
+
end
|
21
|
+
|
22
|
+
def each
|
23
|
+
results = client.query(query, as: :hash, symbolize_keys: true, stream: true)
|
24
|
+
results.each do |row|
|
25
|
+
yield(row)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def query
|
30
|
+
options.fetch(:query)
|
31
|
+
end
|
32
|
+
|
33
|
+
def output
|
34
|
+
options.fetch(:output)
|
35
|
+
end
|
36
|
+
|
37
|
+
def last_pull_at
|
38
|
+
options[:last_pull_at]
|
39
|
+
end
|
40
|
+
|
41
|
+
def incremental
|
42
|
+
options.fetch(:incremental, true)
|
43
|
+
end
|
44
|
+
|
45
|
+
def connect_url
|
46
|
+
options.fetch(:connect_url)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|