kiba-plus 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3ab51f248407db0efeea72628dc83f639fa952f4
4
- data.tar.gz: e1d93e552d093ea421c1330ee2d1dbcf3f563c0d
3
+ metadata.gz: b37f1fbad737a5141060feae03a0d99b98b20d42
4
+ data.tar.gz: f72a505eaf75bf1f24be63de334b63ecd955c881
5
5
  SHA512:
6
- metadata.gz: 55cfec8dcfcad2f4224bcaf3d37d6aa2e7ca7831786cf0b099ec8800f4b3843b6d19e2ff06a8c57af11c10f8d91be3bf920c5410a3895bb3646f5228065e9471
7
- data.tar.gz: 5468e321b8b4dbfb9d8174a41964e7b79c8d5d1c8af9129b751621d02759629bb170ef61c51999483df1b260900e096229ea7d5420e9bdbb3db421f51515c356
6
+ metadata.gz: 6f22ebbaa54bcf735a58235c9485342763aa8bcff593d292b6d055897db79a9963ffae44d71bdd9612f20cce76de77d2f6dbcdb04c0fc2b8369fd33734cf2f98
7
+ data.tar.gz: e7088d77e6e03bb90445f0424a72532fc264ef87503f17fb7e4468bd7d481de3b17fc9994c06b4aa042d66dcbde00c40cf6d68497748028e3c2102b7e61ba1dc
data/.gitignore CHANGED
@@ -42,3 +42,6 @@ build/
42
42
 
43
43
  # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
44
44
  .rvmrc
45
+
46
+ /test/pg_copy_tmp/*
47
+ !/test/pg_copy_tmp/.keep
data/.travis.yml CHANGED
@@ -1,4 +1,25 @@
1
1
  language: ruby
2
+
2
3
  rvm:
3
4
  - 2.2.4
4
- before_install: gem install bundler -v 1.11.2
5
+
6
+ services:
7
+ - mysql
8
+ - postgresql
9
+
10
+ env:
11
+ - MYSQL2_SRC_CONNECT_URL=mysql2://travis@localhost/kiba_plus_src_test
12
+ - MYSQL2_DEST_CONNECT_URL=mysql2://travis@localhost/kiba_plus_dest_test
13
+ - PG_SRC_CONNECT_URL=postgresql://postgres@localhost/kiba_plus_src_test
14
+ - PG_DEST_CONNECT_URL=postgresql://postgres@localhost/kiba_plus_dest_test
15
+
16
+ before_install:
17
+ - gem install bundler -v 1.11.2
18
+
19
+ before_script:
20
+ - mysql -e 'create database kiba_plus_src_test;'
21
+ - mysql -e 'create database kiba_plus_dest_test;'
22
+ - psql -c 'create database kiba_plus_src_test;' -U postgres
23
+ - psql -c 'create database kiba_plus_dest_test;' -U postgres
24
+
25
+ script: bundle exec rake test
data/README.md CHANGED
@@ -4,6 +4,8 @@ Kiba enhancement for Ruby ETL. It connects to various data sources including rel
4
4
  # Usage
5
5
 
6
6
  ```ruby
7
+ # /tmp/customer_mysql_to_pg.etl
8
+
7
9
  require 'kiba/plus'
8
10
 
9
11
  SOURCE_URL = 'mysql://root@localhost/shopperplus'
@@ -27,22 +29,33 @@ post_process do
27
29
  end
28
30
  ```
29
31
 
30
- Execute:
32
+ Execute in shell:
31
33
 
32
34
  ```shell
33
- bundle exec kiba customer_mysql_to_pg.etl
34
- ```
35
-
36
- Output:
35
+ $ bundle exec kiba /tmp/customer_mysql_to_pg.etl
37
36
 
38
- ```
39
37
  # Output:
40
38
  # I, [2016-05-16T01:53:36.832565 #87909] INFO -- : TRUNCATE TABLE customers;
41
39
  # I, [2016-05-16T01:53:36.841770 #87909] INFO -- : COPY customers (id, email, first_name, last_name) FROM STDIN WITH DELIMITER ',' NULL '\N' CSV
42
40
  # Insert total: 428972
43
41
  ```
44
42
 
45
- More Examples(TODO).
43
+ Execute in ruby script:
44
+
45
+ ```ruby
46
+ require 'kiba'
47
+
48
+ job_definition = Kiba.parse(IO.read('/tmp/customer_mysql_to_pg.etl'), '/tmp/customer_mysql_to_pg.etl')
49
+ Kiba.run(job_definition)
50
+ ```
51
+
52
+ # Examples
53
+
54
+ * [CSV to MySQL](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_csv_to_mysql.etl)
55
+ * [CSV to PG](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_csv_to_pg.etl)
56
+ * [MySQL to CSV](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_mysql_to_csv.etl)
57
+ * [MySQL to PG](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_mysql_to_pg.etl)
58
+ * [MySQL incremental to PG](https://github.com/hooopo/kiba-plus/blob/master/examples/incremental_insert.etl)
46
59
 
47
60
  # Main Feature
48
61
 
@@ -73,6 +86,7 @@ More Examples(TODO).
73
86
  Add this line to your application's Gemfile:
74
87
 
75
88
  ```ruby
89
+ gem 'kiba'
76
90
  gem 'kiba-plus'
77
91
  ```
78
92
 
@@ -84,10 +98,21 @@ Or install it yourself as:
84
98
 
85
99
  $ gem install kiba-plus
86
100
 
87
- ## Usage
101
+ ## Development
88
102
 
103
+ First of all, Please run the following code in shell.
89
104
 
90
- ## Development
105
+ ```bash
106
+
107
+ $ mysql -e 'create database kiba_plus_src_test;'
108
+
109
+ $ mysql -e 'create database kiba_plus_dest_test;'
110
+
111
+ $ psql -c 'create database kiba_plus_src_test;' -U postgres
112
+
113
+ $ psql -c 'create database kiba_plus_dest_test;' -U postgres
114
+
115
+ ```
91
116
 
92
117
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
93
118
 
@@ -11,6 +11,6 @@ destination Kiba::Plus::Destination::MysqlBulk, { :connect_url => DEST_URL,
11
11
  :incremental => false
12
12
  }
13
13
  post_process do
14
- result = Mysql2::Client.new(connect_hash(DEST_URL)).query("SELECT COUNT(*) AS num FROM customers")
14
+ result = Mysql2::Client.new(mysql2_connect_hash(DEST_URL)).query("SELECT COUNT(*) AS num FROM customers")
15
15
  puts "Insert total: #{result.first['num']}"
16
16
  end
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  require_relative 'init'
3
3
 
4
- DEST_URL = 'mysql://root@localhost/crm2_dev'
4
+ SOURCE_URL = 'mysql://root@localhost/shopperplus'
5
5
 
6
6
  source Kiba::Plus::Source::Mysql, :connect_url => SOURCE_URL,
7
7
  :query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers}
data/kiba-plus.gemspec CHANGED
@@ -26,10 +26,15 @@ Gem::Specification.new do |spec|
26
26
  spec.bindir = "exe"
27
27
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
28
  spec.require_paths = ["lib"]
29
+
29
30
  spec.add_runtime_dependency "kiba", "~> 0.6"
30
31
  spec.add_runtime_dependency "mysql2", "~> 0.4"
31
32
  spec.add_runtime_dependency "pg", "~> 0.18"
33
+
32
34
  spec.add_development_dependency "bundler", "~> 1.11"
33
35
  spec.add_development_dependency "rake", "~> 10.0"
34
36
  spec.add_development_dependency "minitest", "~> 5.0"
37
+ spec.add_development_dependency 'database_cleaner', '~> 1.5.3'
38
+ spec.add_development_dependency 'sequel', '~> 4.34'
39
+ spec.add_development_dependency 'pry'
35
40
  end
@@ -2,17 +2,17 @@ require 'csv'
2
2
 
3
3
  module Kiba::Plus::Destination
4
4
  class Csv
5
- attr_reader :options
5
+ attr_reader :options, :csv
6
6
 
7
7
  def initialize(options = {})
8
8
  @options = options
9
9
  @options.assert_valid_keys(
10
10
  :output_file,
11
+ :mode,
11
12
  :row_sep,
12
13
  :col_sep,
13
14
  :force_quotes,
14
- :quote_char,
15
- :mode
15
+ :quote_char
16
16
  )
17
17
  @csv = CSV.open(output_file, mode, {
18
18
  :col_sep => col_sep,
@@ -22,36 +22,39 @@ module Kiba::Plus::Destination
22
22
  })
23
23
  end
24
24
 
25
- def mode
26
- options.fetch(:mode, "w")
25
+ def write(row)
26
+ @csv << row.values
27
27
  end
28
28
 
29
- def output_file
30
- options.fetch(:output_file)
29
+ def close
30
+ @csv.close
31
31
  end
32
32
 
33
- def col_sep
34
- options.fetch(:col_sep, ",")
35
- end
33
+ private
36
34
 
37
- def quote_char
38
- options.fetch(:quote_char, '"')
35
+ def output_file
36
+ options.fetch(:output_file)
39
37
  end
40
38
 
41
- def force_quotes
42
- options.fetch(:force_quotes, false)
39
+ def mode
40
+ options.fetch(:mode, "w")
43
41
  end
44
42
 
45
43
  def row_sep
46
44
  options.fetch(:row_sep, "\n")
47
45
  end
48
46
 
49
- def write(row)
50
- @csv << row.values
47
+ def col_sep
48
+ options.fetch(:col_sep, ",")
51
49
  end
52
50
 
53
- def close
54
- @csv.close
51
+ def force_quotes
52
+ options.fetch(:force_quotes, false)
53
+ end
54
+
55
+ def quote_char
56
+ options.fetch(:quote_char, '"')
55
57
  end
58
+
56
59
  end
57
60
  end
@@ -3,7 +3,7 @@ require 'mysql2'
3
3
  module Kiba::Plus::Destination
4
4
  class Mysql
5
5
  include Kiba::Plus::Helper
6
- attr_reader :options
6
+ attr_reader :options, :client
7
7
 
8
8
  def initialize(options = {})
9
9
  @options = options
@@ -12,8 +12,8 @@ module Kiba::Plus::Destination
12
12
  :prepare_sql,
13
13
  :columns
14
14
  )
15
- @client = Mysql2::Client.new(connect_hash(connect_url))
16
- @pre_stmt = @client.prepare(prepare_sql)
15
+ @client = Mysql2::Client.new(mysql2_connect_hash(connect_url))
16
+ init
17
17
  end
18
18
 
19
19
  def write(row)
@@ -21,6 +21,7 @@ module Kiba::Plus::Destination
21
21
  rescue => e
22
22
  Kiba::Plus.logger.error "ERROR for #{row}"
23
23
  Kiba::Plus.logger.error e.message
24
+ raise e
24
25
  end
25
26
 
26
27
  def close
@@ -30,6 +31,10 @@ module Kiba::Plus::Destination
30
31
 
31
32
  private
32
33
 
34
+ def init
35
+ @pre_stmt = @client.prepare(prepare_sql)
36
+ end
37
+
33
38
  def connect_url
34
39
  options.fetch(:connect_url)
35
40
  end
@@ -3,45 +3,53 @@ require 'mysql2'
3
3
  module Kiba::Plus::Destination
4
4
  class MysqlBulk
5
5
  include Kiba::Plus::Helper
6
- attr_reader :options
6
+ attr_reader :options, :client
7
7
 
8
8
  def initialize(options = {})
9
9
  @options = options
10
- @options.assert_valid_keys(:table_name,
11
- :columns,
12
- :input_file,
13
- :connect_url,
14
- :truncate,
15
- :incremental,
16
- :delimited_by,
17
- :enclosed_by,
18
- :ignore_lines
19
- )
20
-
21
- @client = Mysql2::Client.new(connect_hash(connect_url).merge(local_infile: true))
10
+ @options.assert_valid_keys(
11
+ :connect_url,
12
+ :table_name,
13
+ :columns,
14
+ :truncate,
15
+ :incremental,
16
+ :input_file,
17
+ :ignore_input_file_header,
18
+ :delimited_by,
19
+ :enclosed_by,
20
+ :ignore_lines
21
+ )
22
+
23
+ @client = Mysql2::Client.new(mysql2_connect_hash(connect_url).merge(local_infile: true))
22
24
  end
23
25
 
24
- def connect_url
25
- options.fetch(:connect_url)
26
+ def write(row)
27
+ # blank!
26
28
  end
27
29
 
28
- def table_name
29
- options.fetch(:table_name)
30
- end
30
+ def close
31
+ if truncate
32
+ sql = truncate_sql
33
+ Kiba::Plus.logger.info sql
34
+ client.query(sql)
35
+ end
31
36
 
32
- def delimited_by
33
- options.fetch(:delimited_by, ",")
34
- end
37
+ sql = bulk_sql
38
+ Kiba::Plus.logger.info sql
39
+ client.query(sql)
35
40
 
36
- def enclosed_by
37
- options.fetch(:enclosed_by, '"')
41
+ client.close
42
+ @client = nil
38
43
  end
39
44
 
40
- def ignore_lines
41
- options.fetch(:ignore_lines, 0)
45
+ private
46
+
47
+ def connect_url
48
+ options.fetch(:connect_url)
42
49
  end
43
50
 
44
- def write(row)
51
+ def table_name
52
+ options.fetch(:table_name)
45
53
  end
46
54
 
47
55
  def columns
@@ -60,27 +68,46 @@ module Kiba::Plus::Destination
60
68
  options.fetch(:input_file)
61
69
  end
62
70
 
63
- def close
64
- if truncate
65
- truncate_sql = "TRUNCATE TABLE #{table_name};"
66
- Kiba::Plus.logger.info truncate_sql
67
- @client.query(truncate_sql)
68
- end
71
+ def ignore_input_file_header
72
+ !!options.fetch(:ignore_input_file_header, false)
73
+ end
74
+
75
+ def delimited_by
76
+ options.fetch(:delimited_by, ",")
77
+ end
78
+
79
+ def enclosed_by
80
+ options.fetch(:enclosed_by, '"')
81
+ end
82
+
83
+ def ignore_lines
84
+ options.fetch(:ignore_lines, 0).to_i
85
+ end
86
+
87
+ def real_ignore_lines
88
+ lines = ignore_lines
89
+ lines += 1 if ignore_input_file_header
90
+ lines
91
+ end
69
92
 
70
- bulk_sql = <<-SQL
71
- LOAD DATA LOCAL INFILE '#{input_file}'
72
- REPLACE INTO TABLE #{table_name}
73
- FIELDS
93
+ def truncate_sql
94
+ sql = "TRUNCATE TABLE #{table_name}"
95
+ format_sql sql
96
+ end
97
+
98
+ def bulk_sql
99
+ sql = <<-SQL
100
+ LOAD DATA LOCAL INFILE '#{input_file}'
101
+ REPLACE
102
+ INTO TABLE #{table_name}
103
+ FIELDS
74
104
  TERMINATED BY '#{delimited_by}'
75
105
  ENCLOSED BY '#{enclosed_by}'
76
- IGNORE #{ignore_lines} LINES
77
- (#{columns.join(',')})
106
+ IGNORE #{real_ignore_lines} LINES
107
+ (#{columns.join(',')})
78
108
  SQL
79
- Kiba::Plus.logger.info bulk_sql
80
- @client.query(bulk_sql)
81
-
82
- @client.close
83
- @client = nil
109
+ format_sql sql
84
110
  end
111
+
85
112
  end
86
113
  end
@@ -2,18 +2,20 @@ require 'pg'
2
2
 
3
3
  module Kiba::Plus::Destination
4
4
  class Pg
5
- attr_reader :options
5
+ attr_reader :options, :conn
6
6
 
7
7
  def initialize(options = {})
8
8
  @options = options
9
9
  @options.assert_valid_keys(
10
10
  :connect_url,
11
+ :schema,
11
12
  :prepare_name,
12
13
  :prepare_sql,
13
14
  :columns
14
15
  )
15
16
  @conn = PG.connect(connect_url)
16
- @conn.prepare(prepare_name, prepare_sql)
17
+ @conn.exec "SET search_path TO %s" % [ options.fetch(:schema) ] unless options.fetch(:schema).empty?
18
+ init
17
19
  end
18
20
 
19
21
  def write(row)
@@ -23,6 +25,7 @@ module Kiba::Plus::Destination
23
25
  Kiba::Plus.logger.error "ERROR for #{row}"
24
26
  Kiba::Plus.logger.error ex.message
25
27
  # Maybe, write to db table or file
28
+ raise ex
26
29
  end
27
30
 
28
31
  def close
@@ -32,6 +35,10 @@ module Kiba::Plus::Destination
32
35
 
33
36
  private
34
37
 
38
+ def init
39
+ @conn.prepare(prepare_name, prepare_sql)
40
+ end
41
+
35
42
  def connect_url
36
43
  options.fetch(:connect_url)
37
44
  end
@@ -2,20 +2,48 @@ require_relative 'pg_bulk_utils'
2
2
  module Kiba::Plus::Destination
3
3
  class PgBulk
4
4
  include PgBulkUtils
5
- attr_reader :options
5
+ include Kiba::Plus::Helper
6
+ attr_reader :options, :conn
6
7
 
7
8
  def initialize(options = {})
8
9
  @options = options
9
10
  @options.assert_valid_keys(
10
- :connect_url,
11
- :input_file,
12
- :table_name,
13
- :columns,
14
- :truncate,
15
- :incremental,
16
- :unique_by
17
- )
11
+ :connect_url,
12
+ :table_name,
13
+ :columns,
14
+ :truncate,
15
+ :incremental,
16
+ :unique_by,
17
+ :input_file,
18
+ :ignore_input_file_header
19
+ )
18
20
  @conn = PG.connect(connect_url)
21
+
22
+ init
23
+ end
24
+
25
+ def write(row)
26
+ # blank!
27
+ end
28
+
29
+ def close
30
+ if incremental
31
+ truncate_staging_table
32
+ create_staging_table
33
+ copy_to_staging_table
34
+ delete_before_insert
35
+ merge_to_target_table
36
+ truncate_staging_table
37
+ else
38
+ copy_to_target_table
39
+ end
40
+ @conn.close
41
+ @conn = nil
42
+ end
43
+
44
+ private
45
+
46
+ def init
19
47
  if truncate
20
48
  truncate_staging_table
21
49
  truncate_target_table
@@ -30,10 +58,6 @@ module Kiba::Plus::Destination
30
58
  options.fetch(:table_name)
31
59
  end
32
60
 
33
- def input_file
34
- options.fetch(:input_file)
35
- end
36
-
37
61
  def columns
38
62
  options.fetch(:columns)
39
63
  end
@@ -50,35 +74,53 @@ module Kiba::Plus::Destination
50
74
  options.fetch(:unique_by, :id)
51
75
  end
52
76
 
77
+ def input_file
78
+ options.fetch(:input_file)
79
+ end
80
+
81
+ def ignore_input_file_header
82
+ !!options.fetch(:ignore_input_file_header, false)
83
+ end
84
+
53
85
  def copy_to_target_table
54
- sql = "COPY #{table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
86
+ sql = copy_to_target_table_sql
55
87
  Kiba::Plus.logger.info sql
56
88
  @conn.exec(sql)
57
89
  end
58
90
 
59
91
  def copy_to_staging_table
60
- sql = "COPY #{staging_table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
92
+ sql = copy_to_staging_table_sql
61
93
  Kiba::Plus.logger.info sql
62
94
  @conn.exec(sql)
63
95
  end
64
96
 
65
- def write(row)
66
- # blank!
97
+ def copy_to_target_table_sql
98
+ sql = <<-SQL
99
+ COPY #{table_name} (#{columns.join(', ')})
100
+ FROM '#{File.expand_path(input_file)}'
101
+ WITH
102
+ #{ignore_input_file_header ? 'HEADER' : ''}
103
+ DELIMITER ','
104
+ NULL '\\N'
105
+ CSV
106
+ SQL
107
+
108
+ format_sql sql
67
109
  end
68
110
 
69
- def close
70
- if incremental
71
- truncate_staging_table
72
- create_staging_table
73
- copy_to_staging_table
74
- delete_before_insert
75
- merge_to_target_table
76
- truncate_staging_table
77
- else
78
- copy_to_target_table
79
- end
80
- @conn.close
81
- @conn = nil
111
+ def copy_to_staging_table_sql
112
+ sql = <<-SQL
113
+ COPY #{staging_table_name} (#{columns.join(', ')})
114
+ FROM '#{File.expand_path(input_file)}'
115
+ WITH
116
+ #{ignore_input_file_header ? 'HEADER' : ''}
117
+ DELIMITER ','
118
+ NULL '\\N'
119
+ CSV
120
+ SQL
121
+
122
+ format_sql sql
82
123
  end
124
+
83
125
  end
84
126
  end
@@ -4,19 +4,54 @@ require_relative 'pg_bulk_utils'
4
4
  module Kiba::Plus::Destination
5
5
  class PgBulk2
6
6
  include PgBulkUtils
7
- attr_reader :options
7
+ include Kiba::Plus::Helper
8
+ attr_reader :options, :conn
8
9
 
9
10
  def initialize(options = {})
10
11
  @options = options
11
- @options.assert_valid_keys(:table_name,
12
- :columns,
13
- :connect_url,
14
- :truncate,
15
- :incremental,
16
- :unique_by
17
- )
12
+ @options.assert_valid_keys(
13
+ :connect_url,
14
+ :table_name,
15
+ :columns,
16
+ :truncate,
17
+ :incremental,
18
+ :unique_by
19
+ )
18
20
 
19
21
  @conn = PG.connect(connect_url)
22
+
23
+ init
24
+ end
25
+
26
+ def write(row)
27
+ begin
28
+ @conn.put_copy_data CSV.generate_line(row.values_at(*columns))
29
+ rescue Exception => err
30
+ errmsg = "%s while copy data: %s" % [ err.class.name, err.message ]
31
+ @conn.put_copy_end( errmsg )
32
+ Kiba::Plus.logger.error @conn.get_result
33
+ raise
34
+ end
35
+ end
36
+
37
+ def close
38
+ @conn.put_copy_end
39
+ @conn.get_last_result
40
+ if incremental
41
+ delete_before_insert
42
+ merge_to_target_table
43
+ truncate_staging_table
44
+ end
45
+ rescue
46
+ raise
47
+ ensure
48
+ @conn.close
49
+ @conn = nil
50
+ end
51
+
52
+ private
53
+
54
+ def init
20
55
  if truncate
21
56
  truncate_staging_table
22
57
  truncate_target_table
@@ -24,12 +59,12 @@ module Kiba::Plus::Destination
24
59
  if incremental
25
60
  truncate_staging_table
26
61
  create_staging_table
27
- sql = "COPY #{staging_table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
62
+ sql = bulk_sql_with_incremental
28
63
  else
29
- sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
64
+ sql = bulk_sql_with_non_incremental
30
65
  end
31
66
  Kiba::Plus.logger.info sql
32
- @res = @conn.exec(sql)
67
+ @conn.exec(sql)
33
68
  end
34
69
 
35
70
  def connect_url
@@ -40,17 +75,6 @@ module Kiba::Plus::Destination
40
75
  options.fetch(:table_name)
41
76
  end
42
77
 
43
- def write(row)
44
- begin
45
- @conn.put_copy_data CSV.generate_line(row.values_at(*columns))
46
- rescue Exception => err
47
- errmsg = "%s while copy data: %s" % [ err.class.name, err.message ]
48
- @conn.put_copy_end( errmsg )
49
- Kiba::Plus.logger.error @conn.get_result
50
- raise
51
- end
52
- end
53
-
54
78
  def columns
55
79
  options.fetch(:columns)
56
80
  end
@@ -67,19 +91,29 @@ module Kiba::Plus::Destination
67
91
  options.fetch(:unique_by, :id)
68
92
  end
69
93
 
70
- def close
71
- @conn.put_copy_end
72
- @conn.get_last_result
73
- if incremental
74
- delete_before_insert
75
- merge_to_target_table
76
- truncate_staging_table
77
- end
78
- rescue
79
- raise
80
- ensure
81
- @conn.close
82
- @conn = nil
94
+ def bulk_sql_with_incremental
95
+ sql = <<-SQL
96
+ COPY #{staging_table_name} (#{columns.join(', ')})
97
+ FROM STDIN
98
+ WITH
99
+ DELIMITER ','
100
+ NULL '\\N'
101
+ CSV
102
+ SQL
103
+ format_sql sql
83
104
  end
105
+
106
+ def bulk_sql_with_non_incremental
107
+ sql = <<-SQL
108
+ COPY #{table_name} (#{columns.join(', ')})
109
+ FROM STDIN
110
+ WITH
111
+ DELIMITER ','
112
+ NULL '\\N'
113
+ CSV
114
+ SQL
115
+ format_sql sql
116
+ end
117
+
84
118
  end
85
119
  end
@@ -1,39 +1,72 @@
1
1
  module Kiba::Plus::Destination
2
2
  module PgBulkUtils
3
+
4
+ private
5
+
3
6
  def staging_table_name
4
7
  table_name + "_staging"
5
8
  end
6
9
 
7
10
  def create_staging_table
8
- sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
11
+ sql = create_staging_table_sql
9
12
  Kiba::Plus.logger.info sql
10
13
  @conn.exec(sql)
11
14
  end
12
15
 
16
+ def create_staging_table_sql
17
+ sql = <<-SQL
18
+ CREATE TABLE IF NOT EXISTS #{staging_table_name} (
19
+ LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES
20
+ )
21
+ SQL
22
+ format_sql sql
23
+ end
24
+
13
25
  def truncate_staging_table
14
- truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
15
- Kiba::Plus.logger.info truncate_sql
16
- @conn.exec(truncate_sql) rescue nil
26
+ sql = truncate_staging_table_sql
27
+ Kiba::Plus.logger.info sql
28
+ @conn.exec(sql) rescue nil
29
+ end
30
+
31
+ def truncate_staging_table_sql
32
+ sql = "TRUNCATE TABLE #{staging_table_name}"
33
+ format_sql sql
17
34
  end
18
35
 
19
36
  def truncate_target_table
20
- truncate_sql = "TRUNCATE TABLE #{table_name};"
21
- Kiba::Plus.logger.info truncate_sql
22
- @conn.exec(truncate_sql)
37
+ sql = truncate_target_table_sql
38
+ Kiba::Plus.logger.info sql
39
+ @conn.exec(sql)
40
+ end
41
+
42
+ def truncate_target_table_sql
43
+ sql = "TRUNCATE TABLE #{table_name}"
44
+ format_sql sql
23
45
  end
24
46
 
25
- # TODO add where condition to speed up deleting.
26
47
  def delete_before_insert
27
- where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
28
- sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
48
+ sql = delete_before_insert_sql
29
49
  Kiba::Plus.logger.info sql
30
50
  @conn.exec(sql)
31
51
  end
32
52
 
53
+ # TODO add where condition to speed up deleting.
54
+ def delete_before_insert_sql
55
+ where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
56
+ sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
57
+ format_sql sql
58
+ end
59
+
33
60
  def merge_to_target_table
34
- sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
61
+ sql = merge_to_target_table_sql
35
62
  Kiba::Plus.logger.info sql
36
63
  @conn.exec(sql)
37
64
  end
65
+
66
+ def merge_to_target_table_sql
67
+ sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
68
+ format_sql sql
69
+ end
70
+
38
71
  end
39
72
  end
@@ -2,13 +2,15 @@ require 'uri'
2
2
  module Kiba
3
3
  module Plus
4
4
  module Helper
5
- def connect_hash(url)
5
+ def mysql2_connect_hash(url)
6
+ return url if url.is_a?(Hash)
7
+
6
8
  u = URI.parse(url)
7
9
  {
8
10
  host: u.host,
11
+ port: u.port,
9
12
  username: u.user,
10
13
  password: u.password,
11
- port: u.port,
12
14
  database: u.path[1..-1]
13
15
  }
14
16
  end
@@ -17,6 +19,11 @@ module Kiba
17
19
  u = URI.parse(url)
18
20
  u.scheme
19
21
  end
22
+
23
+ def format_sql(sql)
24
+ sql.to_s.gsub(/[\n][\s]*[\n]/, "\n")
25
+ end
26
+
20
27
  end
21
28
  end
22
29
  end
data/lib/kiba/plus/job.rb CHANGED
@@ -15,7 +15,7 @@ module Kiba
15
15
  @options.assert_valid_keys(:connect_url, :job_id, :job_name, :start_at, :completed_at)
16
16
  url = URI.parse(connect_url)
17
17
  if url.scheme =~ /mysql/i
18
- @client = Mysql2::Client.new(connect_hash(connect_url))
18
+ @client = Mysql2::Client.new(mysql2_connect_hash(connect_url))
19
19
  elsif url.scheme =~ /postgres/i
20
20
  @client = PG.connect(connect_url)
21
21
  else
@@ -10,41 +10,29 @@ module Kiba
10
10
  def initialize(options = {})
11
11
  @options = options
12
12
  @options.assert_valid_keys(
13
- :query,
14
- :output,
15
- :last_pull_at,
16
- :incremental,
17
- :connect_url
18
- )
19
- @client = Mysql2::Client.new(connect_hash(connect_url))
13
+ :connect_url,
14
+ :query
15
+ )
16
+ @client = Mysql2::Client.new(mysql2_connect_hash(connect_url))
20
17
  end
21
18
 
22
19
  def each
20
+ Kiba::Plus.logger.info query
23
21
  results = client.query(query, as: :hash, symbolize_keys: true, stream: true)
24
22
  results.each do |row|
25
23
  yield(row)
26
24
  end
27
25
  end
28
26
 
29
- def query
30
- options.fetch(:query)
31
- end
32
-
33
- def output
34
- options.fetch(:output)
35
- end
36
-
37
- def last_pull_at
38
- options[:last_pull_at]
39
- end
40
-
41
- def incremental
42
- options.fetch(:incremental, true)
43
- end
27
+ private
44
28
 
45
29
  def connect_url
46
30
  options.fetch(:connect_url)
47
31
  end
32
+
33
+ def query
34
+ options.fetch(:query)
35
+ end
48
36
  end
49
37
  end
50
38
  end
@@ -0,0 +1,40 @@
1
+ require 'pg'
2
+ require 'uri'
3
+
4
+ module Kiba
5
+ module Plus::Source
6
+ class Pg
7
+ include Kiba::Plus::Helper
8
+ attr_reader :options, :client
9
+
10
+ def initialize(options = {})
11
+ @options = options
12
+ @options.assert_valid_keys(
13
+ :connect_url,
14
+ :schema,
15
+ :query
16
+ )
17
+ @client = PG.connect(connect_url)
18
+ @client.exec "SET search_path TO %s" % [ options.fetch(:schema) ] unless options.fetch(:schema).empty?
19
+ end
20
+
21
+ def each
22
+ Kiba::Plus.logger.info query
23
+ results = client.query(query)
24
+ results.each do |row|
25
+ yield(row)
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def connect_url
32
+ options.fetch(:connect_url)
33
+ end
34
+
35
+ def query
36
+ options.fetch(:query)
37
+ end
38
+ end
39
+ end
40
+ end
@@ -1,5 +1,5 @@
1
1
  module Kiba
2
2
  module Plus
3
- VERSION = "0.1.2"
3
+ VERSION = "0.1.3"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kiba-plus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hooopo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-05-21 00:00:00.000000000 Z
11
+ date: 2017-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: kiba
@@ -94,6 +94,48 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '5.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: database_cleaner
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 1.5.3
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 1.5.3
111
+ - !ruby/object:Gem::Dependency
112
+ name: sequel
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '4.34'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '4.34'
125
+ - !ruby/object:Gem::Dependency
126
+ name: pry
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
97
139
  description: It connects to various data sources including relational, non-relational,
98
140
  and flat file, cloud services and HTTP resources. It has flexible load strategies
99
141
  including insert, bulk load and upsert.
@@ -135,6 +177,7 @@ files:
135
177
  - lib/kiba/plus/job.rb
136
178
  - lib/kiba/plus/logger.rb
137
179
  - lib/kiba/plus/source/mysql.rb
180
+ - lib/kiba/plus/source/pg.rb
138
181
  - lib/kiba/plus/version.rb
139
182
  homepage: https://github.com/hooopo/kiba-plus
140
183
  licenses:
@@ -157,7 +200,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
157
200
  version: '0'
158
201
  requirements: []
159
202
  rubyforge_project:
160
- rubygems_version: 2.4.5.1
203
+ rubygems_version: 2.6.11
161
204
  signing_key:
162
205
  specification_version: 4
163
206
  summary: Kiba enhancement for Ruby ETL