kiba-plus 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,51 @@
1
+ require 'pg'
2
+
3
+ module Kiba::Plus::Destination
4
+ class Pg
5
+ attr_reader :options
6
+
7
+ def initialize(options = {})
8
+ @options = options
9
+ @options.assert_valid_keys(
10
+ :connect_url,
11
+ :prepare_name,
12
+ :prepare_sql,
13
+ :columns
14
+ )
15
+ @conn = PG.connect(connect_url)
16
+ @conn.prepare(prepare_name, prepare_sql)
17
+ end
18
+
19
+ def write(row)
20
+ @conn.exec_prepared(prepare_name,
21
+ row.values_at(*columns))
22
+ rescue PG::Error => ex
23
+ Kiba::Plus.logger.error "ERROR for #{row}"
24
+ Kiba::Plus.logger.error ex.message
25
+ # Maybe, write to db table or file
26
+ end
27
+
28
+ def close
29
+ @conn.close
30
+ @conn = nil
31
+ end
32
+
33
+ private
34
+
35
+ def connect_url
36
+ options.fetch(:connect_url)
37
+ end
38
+
39
+ def prepare_name
40
+ options.fetch(:prepare_name, self.class.to_s.downcase + "stmt")
41
+ end
42
+
43
+ def prepare_sql
44
+ options.fetch(:prepare_sql)
45
+ end
46
+
47
+ def columns
48
+ options.fetch(:columns)
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,118 @@
1
+ module Kiba::Plus::Destination
2
+ class PgBulk
3
+ attr_reader :options
4
+
5
+ def initialize(options = {})
6
+ @options = options
7
+ @options.assert_valid_keys(
8
+ :connect_url,
9
+ :input_file,
10
+ :table_name,
11
+ :columns,
12
+ :truncate,
13
+ :incremental,
14
+ :unique_by
15
+ )
16
+ @conn = PG.connect(connect_url)
17
+ if truncate
18
+ truncate_staging_table
19
+ truncate_target_table
20
+ end
21
+ end
22
+
23
+ def connect_url
24
+ options.fetch(:connect_url)
25
+ end
26
+
27
+ def table_name
28
+ options.fetch(:table_name)
29
+ end
30
+
31
+ def input_file
32
+ options.fetch(:input_file)
33
+ end
34
+
35
+ def columns
36
+ options.fetch(:columns)
37
+ end
38
+
39
+ def truncate
40
+ options.fetch(:truncate, false)
41
+ end
42
+
43
+ def incremental
44
+ options.fetch(:incremental, true)
45
+ end
46
+
47
+ def unique_by
48
+ options.fetch(:unique_by, :id)
49
+ end
50
+
51
+ def write(row)
52
+ # blank!
53
+ end
54
+
55
+ def staging_table_name
56
+ table_name + "_staging"
57
+ end
58
+
59
+ def create_staging_table
60
+ sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
61
+ Kiba::Plus.logger.info sql
62
+ @conn.exec(sql)
63
+ end
64
+
65
+ def truncate_staging_table
66
+ truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
67
+ Kiba::Plus.logger.info truncate_sql
68
+ @conn.exec(truncate_sql) rescue nil
69
+ end
70
+
71
+ def truncate_target_table
72
+ truncate_sql = "TRUNCATE TABLE #{table_name};"
73
+ Kiba::Plus.logger.info truncate_sql
74
+ @conn.exec(truncate_sql)
75
+ end
76
+
77
+ def copy_to_target_table
78
+ sql = "COPY #{table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
79
+ Kiba::Plus.logger.info sql
80
+ @conn.exec(sql)
81
+ end
82
+
83
+ def copy_to_staging_table
84
+ sql = "COPY #{staging_table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
85
+ Kiba::Plus.logger.info sql
86
+ @conn.exec(sql)
87
+ end
88
+
89
+ # TODO add where condition to speed up deleting.
90
+ def delete_before_insert
91
+ where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
92
+ sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
93
+ Kiba::Plus.logger.info sql
94
+ @conn.exec(sql)
95
+ end
96
+
97
+ def merge_to_target_table
98
+ sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
99
+ Kiba::Plus.logger.info sql
100
+ @conn.exec(sql)
101
+ end
102
+
103
+ def close
104
+ if incremental
105
+ truncate_staging_table
106
+ create_staging_table
107
+ copy_to_staging_table
108
+ delete_before_insert
109
+ merge_to_target_table
110
+ truncate_staging_table
111
+ else
112
+ copy_to_target_table
113
+ end
114
+ @conn.close
115
+ @conn = nil
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,68 @@
1
+ require 'pg'
2
+ require 'csv'
3
+ module Kiba::Plus::Destination
4
+ class PgBulk2
5
+ attr_reader :options
6
+
7
+ def initialize(options = {})
8
+ @options = options
9
+ @options.assert_valid_keys(:table_name,
10
+ :columns,
11
+ :connect_url,
12
+ :truncate,
13
+ :incremental
14
+ )
15
+
16
+ @conn = PG.connect(connect_url)
17
+ truncate_sql = "TRUNCATE TABLE #{table_name};"
18
+ if truncate
19
+ Kiba::Plus.logger.info truncate_sql
20
+ @conn.exec(truncate_sql)
21
+ end
22
+ sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
23
+ Kiba::Plus.logger.info sql
24
+ @res = @conn.exec(sql)
25
+ end
26
+
27
+ def connect_url
28
+ options.fetch(:connect_url)
29
+ end
30
+
31
+ def table_name
32
+ options.fetch(:table_name)
33
+ end
34
+
35
+ def write(row)
36
+ begin
37
+ @conn.put_copy_data CSV.generate_line(row.values_at(*columns))
38
+ rescue Exception => err
39
+ errmsg = "%s while copy data: %s" % [ err.class.name, err.message ]
40
+ @conn.put_copy_end( errmsg )
41
+ Kiba::Plus.logger.error @conn.get_result
42
+ raise
43
+ end
44
+ end
45
+
46
+ def columns
47
+ options.fetch(:columns)
48
+ end
49
+
50
+ def truncate
51
+ options.fetch(:truncate, false)
52
+ end
53
+
54
+ def incremental
55
+ options.fetch(:incremental, true)
56
+ end
57
+
58
+ def close
59
+ @conn.put_copy_end
60
+ @conn.get_last_result
61
+ rescue
62
+ raise
63
+ ensure
64
+ @conn.close
65
+ @conn = nil
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,22 @@
1
+ require 'uri'
2
+ module Kiba
3
+ module Plus
4
+ module Helper
5
+ def connect_hash(url)
6
+ u = URI.parse(url)
7
+ {
8
+ host: u.host,
9
+ username: u.user,
10
+ password: u.password,
11
+ port: u.port,
12
+ database: u.path[1..-1]
13
+ }
14
+ end
15
+
16
+ def scheme(url)
17
+ u = URI.parse(url)
18
+ u.scheme
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,149 @@
1
+ require 'uri'
2
+
3
+ module Kiba
4
+ require 'pg'
5
+ require 'mysql2'
6
+ require 'uri'
7
+
8
+ module Plus
9
+ class Job
10
+ include Kiba::Plus::Helper
11
+
12
+ attr_reader :options, :client
13
+ def initialize(options)
14
+ @options = options
15
+ @options.assert_valid_keys(:connect_url, :job_id, :job_name, :start_at, :completed_at)
16
+ url = URI.parse(connect_url)
17
+ if url.scheme =~ /mysql/i
18
+ @client = Mysql2::Client.new(connect_hash(connect_url))
19
+ elsif url.scheme =~ /postgres/i
20
+ @client = PG.connect(connect_url)
21
+ else
22
+ raise 'No Imp!'
23
+ end
24
+ end
25
+
26
+ def job_id
27
+ options.fetch(:job_id, nil)
28
+ end
29
+
30
+ def connect_url
31
+ options.fetch(:connect_url)
32
+ end
33
+
34
+ def job_name
35
+ options.fetch(:job_name)
36
+ end
37
+
38
+ def start_at
39
+ options.fetch(:start_at, Time.now)
40
+ end
41
+
42
+ def completed_at
43
+ options.fetch(:completed_at, Time.now)
44
+ end
45
+
46
+ def start
47
+ create_table
48
+ result = create_job
49
+ result.first["id"].to_i
50
+ end
51
+
52
+ def last_pull_at
53
+ sql = "SELECT MAX(created_at) AS last_pull_at FROM jobs WHERE status = 'completed' AND job_name = '#{job_name}'"
54
+ Kiba::Plus.logger.info sql
55
+ client.query(sql).first["last_pull_at"]
56
+ end
57
+
58
+ def complete
59
+ complete_job
60
+ end
61
+
62
+ private
63
+
64
+ def create_table
65
+ url = URI.parse(connect_url)
66
+ if url.scheme =~ /mysql/i
67
+ create_table_mysql
68
+ elsif url.scheme =~ /postgres/i
69
+ create_table_pg
70
+ else
71
+ raise 'No Imp!'
72
+ end
73
+ end
74
+
75
+ def create_job
76
+ if @client.is_a?(Mysql2::Client)
77
+ create_job_mysql
78
+ else
79
+ create_job_pg
80
+ end
81
+ end
82
+
83
+ def create_job_mysql
84
+ sql = <<-SQL
85
+ INSERT INTO jobs (
86
+ completed_at,
87
+ job_name,
88
+ created_at,
89
+ status) VALUES
90
+ (NULL, '#{job_name}', '#{start_at.to_s}', 'executing')
91
+ SQL
92
+ Kiba::Plus.logger.info sql
93
+ @client.query(sql)
94
+ returning_id_sql = "SELECT LAST_INSERT_ID() AS id"
95
+ Kiba::Plus.logger.info returning_id_sql
96
+ @client.query(returning_id_sql)
97
+ end
98
+
99
+ def create_job_pg
100
+ sql = <<-SQL
101
+ INSERT INTO jobs (
102
+ completed_at,
103
+ job_name,
104
+ created_at,
105
+ status) VALUES
106
+ (NULL, '#{job_name}', '#{start_at.to_s}', 'executing') RETURNING id
107
+ SQL
108
+ Kiba::Plus.logger.info sql
109
+ @client.query(sql)
110
+ end
111
+
112
+ def create_table_pg
113
+ sql = <<-SQL
114
+ CREATE TABLE IF NOT EXISTS jobs (
115
+ id SERIAL,
116
+ job_name varchar(255) NOT NULL,
117
+ created_at TIMESTAMP without time zone,
118
+ completed_at TIMESTAMP without time zone,
119
+ status varchar(255) DEFAULT NULL,
120
+ PRIMARY KEY (id)
121
+ )
122
+ SQL
123
+ Kiba::Plus.logger.info sql
124
+ @client.query(sql)
125
+ end
126
+
127
+ def create_table_mysql
128
+ sql = <<-SQL
129
+ CREATE TABLE IF NOT EXISTS jobs (
130
+ id integer(11) NOT NULL AUTO_INCREMENT,
131
+ job_name varchar(255) NOT NULL,
132
+ created_at datetime NOT NULL,
133
+ completed_at datetime DEFAULT NULL,
134
+ status varchar(255) DEFAULT NULL,
135
+ PRIMARY KEY (id)
136
+ ) AUTO_INCREMENT=1
137
+ SQL
138
+ Kiba::Plus.logger.info sql
139
+ @client.query(sql)
140
+ end
141
+
142
+ def complete_job
143
+ sql = "UPDATE jobs SET status = 'completed', completed_at = '#{completed_at.to_s}' WHERE id = #{job_id} AND job_name = '#{job_name}'"
144
+ Kiba::Plus.logger.info sql
145
+ @client.query(sql)
146
+ end
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,12 @@
1
+ require 'logger'
2
+ module Kiba
3
+ module Plus
4
+ def self.logger
5
+ @logger ||= Logger.new($stdout)
6
+ end
7
+
8
+ def self.logger=(logger)
9
+ @logger = logger
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,50 @@
1
+ require 'mysql2'
2
+ require 'uri'
3
+
4
+ module Kiba
5
+ module Plus::Source
6
+ class Mysql
7
+ include Kiba::Plus::Helper
8
+ attr_reader :options, :client
9
+
10
+ def initialize(options = {})
11
+ @options = options
12
+ @options.assert_valid_keys(
13
+ :query,
14
+ :output,
15
+ :last_pull_at,
16
+ :incremental,
17
+ :connect_url
18
+ )
19
+ @client = Mysql2::Client.new(connect_hash(connect_url))
20
+ end
21
+
22
+ def each
23
+ results = client.query(query, as: :hash, symbolize_keys: true, stream: true)
24
+ results.each do |row|
25
+ yield(row)
26
+ end
27
+ end
28
+
29
+ def query
30
+ options.fetch(:query)
31
+ end
32
+
33
+ def output
34
+ options.fetch(:output)
35
+ end
36
+
37
+ def last_pull_at
38
+ options[:last_pull_at]
39
+ end
40
+
41
+ def incremental
42
+ options.fetch(:incremental, true)
43
+ end
44
+
45
+ def connect_url
46
+ options.fetch(:connect_url)
47
+ end
48
+ end
49
+ end
50
+ end