kiba-plus 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ require 'pg'
2
+
3
+ module Kiba::Plus::Destination
4
+ class Pg
5
+ attr_reader :options
6
+
7
+ def initialize(options = {})
8
+ @options = options
9
+ @options.assert_valid_keys(
10
+ :connect_url,
11
+ :prepare_name,
12
+ :prepare_sql,
13
+ :columns
14
+ )
15
+ @conn = PG.connect(connect_url)
16
+ @conn.prepare(prepare_name, prepare_sql)
17
+ end
18
+
19
+ def write(row)
20
+ @conn.exec_prepared(prepare_name,
21
+ row.values_at(*columns))
22
+ rescue PG::Error => ex
23
+ Kiba::Plus.logger.error "ERROR for #{row}"
24
+ Kiba::Plus.logger.error ex.message
25
+ # Maybe, write to db table or file
26
+ end
27
+
28
+ def close
29
+ @conn.close
30
+ @conn = nil
31
+ end
32
+
33
+ private
34
+
35
+ def connect_url
36
+ options.fetch(:connect_url)
37
+ end
38
+
39
+ def prepare_name
40
+ options.fetch(:prepare_name, self.class.to_s.downcase + "stmt")
41
+ end
42
+
43
+ def prepare_sql
44
+ options.fetch(:prepare_sql)
45
+ end
46
+
47
+ def columns
48
+ options.fetch(:columns)
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,118 @@
1
+ module Kiba::Plus::Destination
2
+ class PgBulk
3
+ attr_reader :options
4
+
5
+ def initialize(options = {})
6
+ @options = options
7
+ @options.assert_valid_keys(
8
+ :connect_url,
9
+ :input_file,
10
+ :table_name,
11
+ :columns,
12
+ :truncate,
13
+ :incremental,
14
+ :unique_by
15
+ )
16
+ @conn = PG.connect(connect_url)
17
+ if truncate
18
+ truncate_staging_table
19
+ truncate_target_table
20
+ end
21
+ end
22
+
23
+ def connect_url
24
+ options.fetch(:connect_url)
25
+ end
26
+
27
+ def table_name
28
+ options.fetch(:table_name)
29
+ end
30
+
31
+ def input_file
32
+ options.fetch(:input_file)
33
+ end
34
+
35
+ def columns
36
+ options.fetch(:columns)
37
+ end
38
+
39
+ def truncate
40
+ options.fetch(:truncate, false)
41
+ end
42
+
43
+ def incremental
44
+ options.fetch(:incremental, true)
45
+ end
46
+
47
+ def unique_by
48
+ options.fetch(:unique_by, :id)
49
+ end
50
+
51
+ def write(row)
52
+ # blank!
53
+ end
54
+
55
+ def staging_table_name
56
+ table_name + "_staging"
57
+ end
58
+
59
+ def create_staging_table
60
+ sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
61
+ Kiba::Plus.logger.info sql
62
+ @conn.exec(sql)
63
+ end
64
+
65
+ def truncate_staging_table
66
+ truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
67
+ Kiba::Plus.logger.info truncate_sql
68
+ @conn.exec(truncate_sql) rescue nil
69
+ end
70
+
71
+ def truncate_target_table
72
+ truncate_sql = "TRUNCATE TABLE #{table_name};"
73
+ Kiba::Plus.logger.info truncate_sql
74
+ @conn.exec(truncate_sql)
75
+ end
76
+
77
+ def copy_to_target_table
78
+ sql = "COPY #{table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
79
+ Kiba::Plus.logger.info sql
80
+ @conn.exec(sql)
81
+ end
82
+
83
+ def copy_to_staging_table
84
+ sql = "COPY #{staging_table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
85
+ Kiba::Plus.logger.info sql
86
+ @conn.exec(sql)
87
+ end
88
+
89
+ # TODO add where condition to speed up deleting.
90
+ def delete_before_insert
91
+ where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
92
+ sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
93
+ Kiba::Plus.logger.info sql
94
+ @conn.exec(sql)
95
+ end
96
+
97
+ def merge_to_target_table
98
+ sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
99
+ Kiba::Plus.logger.info sql
100
+ @conn.exec(sql)
101
+ end
102
+
103
+ def close
104
+ if incremental
105
+ truncate_staging_table
106
+ create_staging_table
107
+ copy_to_staging_table
108
+ delete_before_insert
109
+ merge_to_target_table
110
+ truncate_staging_table
111
+ else
112
+ copy_to_target_table
113
+ end
114
+ @conn.close
115
+ @conn = nil
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,68 @@
1
+ require 'pg'
2
+ require 'csv'
3
+ module Kiba::Plus::Destination
4
+ class PgBulk2
5
+ attr_reader :options
6
+
7
+ def initialize(options = {})
8
+ @options = options
9
+ @options.assert_valid_keys(:table_name,
10
+ :columns,
11
+ :connect_url,
12
+ :truncate,
13
+ :incremental
14
+ )
15
+
16
+ @conn = PG.connect(connect_url)
17
+ truncate_sql = "TRUNCATE TABLE #{table_name};"
18
+ if truncate
19
+ Kiba::Plus.logger.info truncate_sql
20
+ @conn.exec(truncate_sql)
21
+ end
22
+ sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
23
+ Kiba::Plus.logger.info sql
24
+ @res = @conn.exec(sql)
25
+ end
26
+
27
+ def connect_url
28
+ options.fetch(:connect_url)
29
+ end
30
+
31
+ def table_name
32
+ options.fetch(:table_name)
33
+ end
34
+
35
+ def write(row)
36
+ begin
37
+ @conn.put_copy_data CSV.generate_line(row.values_at(*columns))
38
+ rescue Exception => err
39
+ errmsg = "%s while copy data: %s" % [ err.class.name, err.message ]
40
+ @conn.put_copy_end( errmsg )
41
+ Kiba::Plus.logger.error @conn.get_result
42
+ raise
43
+ end
44
+ end
45
+
46
+ def columns
47
+ options.fetch(:columns)
48
+ end
49
+
50
+ def truncate
51
+ options.fetch(:truncate, false)
52
+ end
53
+
54
+ def incremental
55
+ options.fetch(:incremental, true)
56
+ end
57
+
58
+ def close
59
+ @conn.put_copy_end
60
+ @conn.get_last_result
61
+ rescue
62
+ raise
63
+ ensure
64
+ @conn.close
65
+ @conn = nil
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,22 @@
1
+ require 'uri'
2
+ module Kiba
3
+ module Plus
4
+ module Helper
5
+ def connect_hash(url)
6
+ u = URI.parse(url)
7
+ {
8
+ host: u.host,
9
+ username: u.user,
10
+ password: u.password,
11
+ port: u.port,
12
+ database: u.path[1..-1]
13
+ }
14
+ end
15
+
16
+ def scheme(url)
17
+ u = URI.parse(url)
18
+ u.scheme
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,149 @@
1
+ require 'uri'
2
+
3
+ module Kiba
4
+ require 'pg'
5
+ require 'mysql2'
6
+ require 'uri'
7
+
8
+ module Plus
9
+ class Job
10
+ include Kiba::Plus::Helper
11
+
12
+ attr_reader :options, :client
13
+ def initialize(options)
14
+ @options = options
15
+ @options.assert_valid_keys(:connect_url, :job_id, :job_name, :start_at, :completed_at)
16
+ url = URI.parse(connect_url)
17
+ if url.scheme =~ /mysql/i
18
+ @client = Mysql2::Client.new(connect_hash(connect_url))
19
+ elsif url.scheme =~ /postgres/i
20
+ @client = PG.connect(connect_url)
21
+ else
22
+ raise 'No Imp!'
23
+ end
24
+ end
25
+
26
+ def job_id
27
+ options.fetch(:job_id, nil)
28
+ end
29
+
30
+ def connect_url
31
+ options.fetch(:connect_url)
32
+ end
33
+
34
+ def job_name
35
+ options.fetch(:job_name)
36
+ end
37
+
38
+ def start_at
39
+ options.fetch(:start_at, Time.now)
40
+ end
41
+
42
+ def completed_at
43
+ options.fetch(:completed_at, Time.now)
44
+ end
45
+
46
+ def start
47
+ create_table
48
+ result = create_job
49
+ result.first["id"].to_i
50
+ end
51
+
52
+ def last_pull_at
53
+ sql = "SELECT MAX(created_at) AS last_pull_at FROM jobs WHERE status = 'completed' AND job_name = '#{job_name}'"
54
+ Kiba::Plus.logger.info sql
55
+ client.query(sql).first["last_pull_at"]
56
+ end
57
+
58
+ def complete
59
+ complete_job
60
+ end
61
+
62
+ private
63
+
64
+ def create_table
65
+ url = URI.parse(connect_url)
66
+ if url.scheme =~ /mysql/i
67
+ create_table_mysql
68
+ elsif url.scheme =~ /postgres/i
69
+ create_table_pg
70
+ else
71
+ raise 'No Imp!'
72
+ end
73
+ end
74
+
75
+ def create_job
76
+ if @client.is_a?(Mysql2::Client)
77
+ create_job_mysql
78
+ else
79
+ create_job_pg
80
+ end
81
+ end
82
+
83
+ def create_job_mysql
84
+ sql = <<-SQL
85
+ INSERT INTO jobs (
86
+ completed_at,
87
+ job_name,
88
+ created_at,
89
+ status) VALUES
90
+ (NULL, '#{job_name}', '#{start_at.to_s}', 'executing')
91
+ SQL
92
+ Kiba::Plus.logger.info sql
93
+ @client.query(sql)
94
+ returning_id_sql = "SELECT LAST_INSERT_ID() AS id"
95
+ Kiba::Plus.logger.info returning_id_sql
96
+ @client.query(returning_id_sql)
97
+ end
98
+
99
+ def create_job_pg
100
+ sql = <<-SQL
101
+ INSERT INTO jobs (
102
+ completed_at,
103
+ job_name,
104
+ created_at,
105
+ status) VALUES
106
+ (NULL, '#{job_name}', '#{start_at.to_s}', 'executing') RETURNING id
107
+ SQL
108
+ Kiba::Plus.logger.info sql
109
+ @client.query(sql)
110
+ end
111
+
112
+ def create_table_pg
113
+ sql = <<-SQL
114
+ CREATE TABLE IF NOT EXISTS jobs (
115
+ id SERIAL,
116
+ job_name varchar(255) NOT NULL,
117
+ created_at TIMESTAMP without time zone,
118
+ completed_at TIMESTAMP without time zone,
119
+ status varchar(255) DEFAULT NULL,
120
+ PRIMARY KEY (id)
121
+ )
122
+ SQL
123
+ Kiba::Plus.logger.info sql
124
+ @client.query(sql)
125
+ end
126
+
127
+ def create_table_mysql
128
+ sql = <<-SQL
129
+ CREATE TABLE IF NOT EXISTS jobs (
130
+ id integer(11) NOT NULL AUTO_INCREMENT,
131
+ job_name varchar(255) NOT NULL,
132
+ created_at datetime NOT NULL,
133
+ completed_at datetime DEFAULT NULL,
134
+ status varchar(255) DEFAULT NULL,
135
+ PRIMARY KEY (id)
136
+ ) AUTO_INCREMENT=1
137
+ SQL
138
+ Kiba::Plus.logger.info sql
139
+ @client.query(sql)
140
+ end
141
+
142
+ def complete_job
143
+ sql = "UPDATE jobs SET status = 'completed', completed_at = '#{completed_at.to_s}' WHERE id = #{job_id} AND job_name = '#{job_name}'"
144
+ Kiba::Plus.logger.info sql
145
+ @client.query(sql)
146
+ end
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,12 @@
1
+ require 'logger'
2
+ module Kiba
3
+ module Plus
4
+ def self.logger
5
+ @logger ||= Logger.new($stdout)
6
+ end
7
+
8
+ def self.logger=(logger)
9
+ @logger = logger
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,50 @@
1
+ require 'mysql2'
2
+ require 'uri'
3
+
4
+ module Kiba
5
+ module Plus::Source
6
+ class Mysql
7
+ include Kiba::Plus::Helper
8
+ attr_reader :options, :client
9
+
10
+ def initialize(options = {})
11
+ @options = options
12
+ @options.assert_valid_keys(
13
+ :query,
14
+ :output,
15
+ :last_pull_at,
16
+ :incremental,
17
+ :connect_url
18
+ )
19
+ @client = Mysql2::Client.new(connect_hash(connect_url))
20
+ end
21
+
22
+ def each
23
+ results = client.query(query, as: :hash, symbolize_keys: true, stream: true)
24
+ results.each do |row|
25
+ yield(row)
26
+ end
27
+ end
28
+
29
+ def query
30
+ options.fetch(:query)
31
+ end
32
+
33
+ def output
34
+ options.fetch(:output)
35
+ end
36
+
37
+ def last_pull_at
38
+ options[:last_pull_at]
39
+ end
40
+
41
+ def incremental
42
+ options.fetch(:incremental, true)
43
+ end
44
+
45
+ def connect_url
46
+ options.fetch(:connect_url)
47
+ end
48
+ end
49
+ end
50
+ end