kiba-plus 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 39c5c1848a724492f99fb2472d5743e7c0848d35
4
- data.tar.gz: 547f4f57babb41d3cd664fe873bd55f730ff4ce0
3
+ metadata.gz: 3ab51f248407db0efeea72628dc83f639fa952f4
4
+ data.tar.gz: e1d93e552d093ea421c1330ee2d1dbcf3f563c0d
5
5
  SHA512:
6
- metadata.gz: 778e4bad4aa95d371625962431790a74fbee12077dc4406cb962ca975629dbab850e2241141656802d71196dfaed0b8598b5bcc130e16af180c058c02b8a1e38
7
- data.tar.gz: 45a9389410c08e7eedc594370b9ca70d1656662b87c13f70589cc86597ccc3470a12318499011a98521dff44f21dd154dbdc4716282bf18dafd11c524953f80a
6
+ metadata.gz: 55cfec8dcfcad2f4224bcaf3d37d6aa2e7ca7831786cf0b099ec8800f4b3843b6d19e2ff06a8c57af11c10f8d91be3bf920c5410a3895bb3646f5228065e9471
7
+ data.tar.gz: 5468e321b8b4dbfb9d8174a41964e7b79c8d5d1c8af9129b751621d02759629bb170ef61c51999483df1b260900e096229ea7d5420e9bdbb3db421f51515c356
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: ../
3
3
  specs:
4
- kiba-plus (0.1.0)
4
+ kiba-plus (0.1.2)
5
5
  kiba (~> 0.6)
6
6
  mysql2 (~> 0.4)
7
7
  pg (~> 0.18)
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative 'init'
3
+
4
+ SOURCE_URL = 'mysql://root@localhost/shopperplus'
5
+
6
+ DEST_URL = 'postgresql://hooopo@localhost:5432/crm2_dev'
7
+
8
+ pre_process do
9
+ @job_id = Kiba::Plus::Job.new(
10
+ :connect_url => DEST_URL,
11
+ :start_at => Time.now,
12
+ :job_name => "customer"
13
+ ).start
14
+ end
15
+
16
+ last_pull_at = Kiba::Plus::Job.new(
17
+ :connect_url => DEST_URL,
18
+ :job_name => "customer"
19
+ ).last_pull_at
20
+
21
+ source Kiba::Plus::Source::Mysql, { :connect_url => SOURCE_URL,
22
+ :query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers WHERE updated_at > '#{last_pull_at.to_s}'},
23
+ :last_pull_at => last_pull_at,
24
+ :incremental => true
25
+ }
26
+
27
+ destination Kiba::Plus::Destination::PgBulk2, { :connect_url => DEST_URL,
28
+ :table_name => "customers",
29
+ :truncate => false,
30
+ :columns => [:id, :email, :first_name, :last_name],
31
+ :incremental => true,
32
+ :unique_by => :id
33
+ }
34
+
35
+ post_process do
36
+ Kiba::Plus::Job.new(
37
+ :connect_url => DEST_URL,
38
+ :job_id => @job_id,
39
+ :job_name => "customer"
40
+ ).complete
41
+ result = PG.connect(DEST_URL).query("SELECT COUNT(*) AS num FROM customers")
42
+ puts "Insert total: #{result.first['num']}"
43
+ end
44
+
45
+ # Output:
46
+ # I, [2016-05-16T01:53:36.832565 #87909] INFO -- : TRUNCATE TABLE customers;
47
+ # I, [2016-05-16T01:53:36.841770 #87909] INFO -- : COPY customers (id, email, first_name, last_name) FROM STDIN WITH DELIMITER ',' NULL '\N' CSV
48
+ # Insert total: 428972
@@ -1,5 +1,7 @@
1
+ require_relative 'pg_bulk_utils'
1
2
  module Kiba::Plus::Destination
2
3
  class PgBulk
4
+ include PgBulkUtils
3
5
  attr_reader :options
4
6
 
5
7
  def initialize(options = {})
@@ -48,32 +50,6 @@ module Kiba::Plus::Destination
48
50
  options.fetch(:unique_by, :id)
49
51
  end
50
52
 
51
- def write(row)
52
- # blank!
53
- end
54
-
55
- def staging_table_name
56
- table_name + "_staging"
57
- end
58
-
59
- def create_staging_table
60
- sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
61
- Kiba::Plus.logger.info sql
62
- @conn.exec(sql)
63
- end
64
-
65
- def truncate_staging_table
66
- truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
67
- Kiba::Plus.logger.info truncate_sql
68
- @conn.exec(truncate_sql) rescue nil
69
- end
70
-
71
- def truncate_target_table
72
- truncate_sql = "TRUNCATE TABLE #{table_name};"
73
- Kiba::Plus.logger.info truncate_sql
74
- @conn.exec(truncate_sql)
75
- end
76
-
77
53
  def copy_to_target_table
78
54
  sql = "COPY #{table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
79
55
  Kiba::Plus.logger.info sql
@@ -86,18 +62,8 @@ module Kiba::Plus::Destination
86
62
  @conn.exec(sql)
87
63
  end
88
64
 
89
- # TODO add where condition to speed up deleting.
90
- def delete_before_insert
91
- where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
92
- sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
93
- Kiba::Plus.logger.info sql
94
- @conn.exec(sql)
95
- end
96
-
97
- def merge_to_target_table
98
- sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
99
- Kiba::Plus.logger.info sql
100
- @conn.exec(sql)
65
+ def write(row)
66
+ # blank!
101
67
  end
102
68
 
103
69
  def close
@@ -1,7 +1,9 @@
1
1
  require 'pg'
2
2
  require 'csv'
3
+ require_relative 'pg_bulk_utils'
3
4
  module Kiba::Plus::Destination
4
5
  class PgBulk2
6
+ include PgBulkUtils
5
7
  attr_reader :options
6
8
 
7
9
  def initialize(options = {})
@@ -10,16 +12,22 @@ module Kiba::Plus::Destination
10
12
  :columns,
11
13
  :connect_url,
12
14
  :truncate,
13
- :incremental
15
+ :incremental,
16
+ :unique_by
14
17
  )
15
18
 
16
19
  @conn = PG.connect(connect_url)
17
- truncate_sql = "TRUNCATE TABLE #{table_name};"
18
20
  if truncate
19
- Kiba::Plus.logger.info truncate_sql
20
- @conn.exec(truncate_sql)
21
+ truncate_staging_table
22
+ truncate_target_table
23
+ end
24
+ if incremental
25
+ truncate_staging_table
26
+ create_staging_table
27
+ sql = "COPY #{staging_table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
28
+ else
29
+ sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
21
30
  end
22
- sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
23
31
  Kiba::Plus.logger.info sql
24
32
  @res = @conn.exec(sql)
25
33
  end
@@ -55,9 +63,18 @@ module Kiba::Plus::Destination
55
63
  options.fetch(:incremental, true)
56
64
  end
57
65
 
66
+ def unique_by
67
+ options.fetch(:unique_by, :id)
68
+ end
69
+
58
70
  def close
59
71
  @conn.put_copy_end
60
72
  @conn.get_last_result
73
+ if incremental
74
+ delete_before_insert
75
+ merge_to_target_table
76
+ truncate_staging_table
77
+ end
61
78
  rescue
62
79
  raise
63
80
  ensure
@@ -0,0 +1,39 @@
1
+ module Kiba::Plus::Destination
2
+ module PgBulkUtils
3
+ def staging_table_name
4
+ table_name + "_staging"
5
+ end
6
+
7
+ def create_staging_table
8
+ sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
9
+ Kiba::Plus.logger.info sql
10
+ @conn.exec(sql)
11
+ end
12
+
13
+ def truncate_staging_table
14
+ truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
15
+ Kiba::Plus.logger.info truncate_sql
16
+ @conn.exec(truncate_sql) rescue nil
17
+ end
18
+
19
+ def truncate_target_table
20
+ truncate_sql = "TRUNCATE TABLE #{table_name};"
21
+ Kiba::Plus.logger.info truncate_sql
22
+ @conn.exec(truncate_sql)
23
+ end
24
+
25
+ # TODO add where condition to speed up deleting.
26
+ def delete_before_insert
27
+ where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
28
+ sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
29
+ Kiba::Plus.logger.info sql
30
+ @conn.exec(sql)
31
+ end
32
+
33
+ def merge_to_target_table
34
+ sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
35
+ Kiba::Plus.logger.info sql
36
+ @conn.exec(sql)
37
+ end
38
+ end
39
+ end
@@ -1,5 +1,5 @@
1
1
  module Kiba
2
2
  module Plus
3
- VERSION = "0.1.1"
3
+ VERSION = "0.1.2"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kiba-plus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hooopo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-05-17 00:00:00.000000000 Z
11
+ date: 2016-05-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: kiba
@@ -119,6 +119,7 @@ files:
119
119
  - examples/customer_mysql_to_csv.etl
120
120
  - examples/customer_mysql_to_pg.etl
121
121
  - examples/data/customer.csv
122
+ - examples/incremental_insert.etl
122
123
  - examples/init.rb
123
124
  - examples/sources/customer.rb
124
125
  - kiba-plus.gemspec
@@ -129,6 +130,7 @@ files:
129
130
  - lib/kiba/plus/destination/pg.rb
130
131
  - lib/kiba/plus/destination/pg_bulk.rb
131
132
  - lib/kiba/plus/destination/pg_bulk2.rb
133
+ - lib/kiba/plus/destination/pg_bulk_utils.rb
132
134
  - lib/kiba/plus/helper.rb
133
135
  - lib/kiba/plus/job.rb
134
136
  - lib/kiba/plus/logger.rb