kiba-plus 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3ab51f248407db0efeea72628dc83f639fa952f4
4
- data.tar.gz: e1d93e552d093ea421c1330ee2d1dbcf3f563c0d
3
+ metadata.gz: b37f1fbad737a5141060feae03a0d99b98b20d42
4
+ data.tar.gz: f72a505eaf75bf1f24be63de334b63ecd955c881
5
5
  SHA512:
6
- metadata.gz: 55cfec8dcfcad2f4224bcaf3d37d6aa2e7ca7831786cf0b099ec8800f4b3843b6d19e2ff06a8c57af11c10f8d91be3bf920c5410a3895bb3646f5228065e9471
7
- data.tar.gz: 5468e321b8b4dbfb9d8174a41964e7b79c8d5d1c8af9129b751621d02759629bb170ef61c51999483df1b260900e096229ea7d5420e9bdbb3db421f51515c356
6
+ metadata.gz: 6f22ebbaa54bcf735a58235c9485342763aa8bcff593d292b6d055897db79a9963ffae44d71bdd9612f20cce76de77d2f6dbcdb04c0fc2b8369fd33734cf2f98
7
+ data.tar.gz: e7088d77e6e03bb90445f0424a72532fc264ef87503f17fb7e4468bd7d481de3b17fc9994c06b4aa042d66dcbde00c40cf6d68497748028e3c2102b7e61ba1dc
data/.gitignore CHANGED
@@ -42,3 +42,6 @@ build/
42
42
 
43
43
  # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
44
44
  .rvmrc
45
+
46
+ /test/pg_copy_tmp/*
47
+ !/test/pg_copy_tmp/.keep
data/.travis.yml CHANGED
@@ -1,4 +1,25 @@
1
1
  language: ruby
2
+
2
3
  rvm:
3
4
  - 2.2.4
4
- before_install: gem install bundler -v 1.11.2
5
+
6
+ services:
7
+ - mysql
8
+ - postgresql
9
+
10
+ env:
11
+ - MYSQL2_SRC_CONNECT_URL=mysql2://travis@localhost/kiba_plus_src_test
12
+ - MYSQL2_DEST_CONNECT_URL=mysql2://travis@localhost/kiba_plus_dest_test
13
+ - PG_SRC_CONNECT_URL=postgresql://postgres@localhost/kiba_plus_src_test
14
+ - PG_DEST_CONNECT_URL=postgresql://postgres@localhost/kiba_plus_dest_test
15
+
16
+ before_install:
17
+ - gem install bundler -v 1.11.2
18
+
19
+ before_script:
20
+ - mysql -e 'create database kiba_plus_src_test;'
21
+ - mysql -e 'create database kiba_plus_dest_test;'
22
+ - psql -c 'create database kiba_plus_src_test;' -U postgres
23
+ - psql -c 'create database kiba_plus_dest_test;' -U postgres
24
+
25
+ script: bundle exec rake test
data/README.md CHANGED
@@ -4,6 +4,8 @@ Kiba enhancement for Ruby ETL. It connects to various data sources including rel
4
4
  # Usage
5
5
 
6
6
  ```ruby
7
+ # /tmp/customer_mysql_to_pg.etl
8
+
7
9
  require 'kiba/plus'
8
10
 
9
11
  SOURCE_URL = 'mysql://root@localhost/shopperplus'
@@ -27,22 +29,33 @@ post_process do
27
29
  end
28
30
  ```
29
31
 
30
- Execute:
32
+ Execute in shell:
31
33
 
32
34
  ```shell
33
- bundle exec kiba customer_mysql_to_pg.etl
34
- ```
35
-
36
- Output:
35
+ $ bundle exec kiba /tmp/customer_mysql_to_pg.etl
37
36
 
38
- ```
39
37
  # Output:
40
38
  # I, [2016-05-16T01:53:36.832565 #87909] INFO -- : TRUNCATE TABLE customers;
41
39
  # I, [2016-05-16T01:53:36.841770 #87909] INFO -- : COPY customers (id, email, first_name, last_name) FROM STDIN WITH DELIMITER ',' NULL '\N' CSV
42
40
  # Insert total: 428972
43
41
  ```
44
42
 
45
- More Examples(TODO).
43
+ Execute in ruby script:
44
+
45
+ ```ruby
46
+ require 'kiba'
47
+
48
+ job_definition = Kiba.parse(IO.read('/tmp/customer_mysql_to_pg.etl'), '/tmp/customer_mysql_to_pg.etl')
49
+ Kiba.run(job_definition)
50
+ ```
51
+
52
+ # Examples
53
+
54
+ * [CSV to MySQL](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_csv_to_mysql.etl)
55
+ * [CSV to PG](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_csv_to_pg.etl)
56
+ * [MySQL to CSV](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_mysql_to_csv.etl)
57
+ * [MySQL to PG](https://github.com/hooopo/kiba-plus/blob/master/examples/customer_mysql_to_pg.etl)
58
+ * [MySQL incremental to PG](https://github.com/hooopo/kiba-plus/blob/master/examples/incremental_insert.etl)
46
59
 
47
60
  # Main Feature
48
61
 
@@ -73,6 +86,7 @@ More Examples(TODO).
73
86
  Add this line to your application's Gemfile:
74
87
 
75
88
  ```ruby
89
+ gem 'kiba'
76
90
  gem 'kiba-plus'
77
91
  ```
78
92
 
@@ -84,10 +98,21 @@ Or install it yourself as:
84
98
 
85
99
  $ gem install kiba-plus
86
100
 
87
- ## Usage
101
+ ## Development
88
102
 
103
+ First of all, Please run the following code in shell.
89
104
 
90
- ## Development
105
+ ```bash
106
+
107
+ $ mysql -e 'create database kiba_plus_src_test;'
108
+
109
+ $ mysql -e 'create database kiba_plus_dest_test;'
110
+
111
+ $ psql -c 'create database kiba_plus_src_test;' -U postgres
112
+
113
+ $ psql -c 'create database kiba_plus_dest_test;' -U postgres
114
+
115
+ ```
91
116
 
92
117
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
93
118
 
@@ -11,6 +11,6 @@ destination Kiba::Plus::Destination::MysqlBulk, { :connect_url => DEST_URL,
11
11
  :incremental => false
12
12
  }
13
13
  post_process do
14
- result = Mysql2::Client.new(connect_hash(DEST_URL)).query("SELECT COUNT(*) AS num FROM customers")
14
+ result = Mysql2::Client.new(mysql2_connect_hash(DEST_URL)).query("SELECT COUNT(*) AS num FROM customers")
15
15
  puts "Insert total: #{result.first['num']}"
16
16
  end
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  require_relative 'init'
3
3
 
4
- DEST_URL = 'mysql://root@localhost/crm2_dev'
4
+ SOURCE_URL = 'mysql://root@localhost/shopperplus'
5
5
 
6
6
  source Kiba::Plus::Source::Mysql, :connect_url => SOURCE_URL,
7
7
  :query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers}
data/kiba-plus.gemspec CHANGED
@@ -26,10 +26,15 @@ Gem::Specification.new do |spec|
26
26
  spec.bindir = "exe"
27
27
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
28
  spec.require_paths = ["lib"]
29
+
29
30
  spec.add_runtime_dependency "kiba", "~> 0.6"
30
31
  spec.add_runtime_dependency "mysql2", "~> 0.4"
31
32
  spec.add_runtime_dependency "pg", "~> 0.18"
33
+
32
34
  spec.add_development_dependency "bundler", "~> 1.11"
33
35
  spec.add_development_dependency "rake", "~> 10.0"
34
36
  spec.add_development_dependency "minitest", "~> 5.0"
37
+ spec.add_development_dependency 'database_cleaner', '~> 1.5.3'
38
+ spec.add_development_dependency 'sequel', '~> 4.34'
39
+ spec.add_development_dependency 'pry'
35
40
  end
@@ -2,17 +2,17 @@ require 'csv'
2
2
 
3
3
  module Kiba::Plus::Destination
4
4
  class Csv
5
- attr_reader :options
5
+ attr_reader :options, :csv
6
6
 
7
7
  def initialize(options = {})
8
8
  @options = options
9
9
  @options.assert_valid_keys(
10
10
  :output_file,
11
+ :mode,
11
12
  :row_sep,
12
13
  :col_sep,
13
14
  :force_quotes,
14
- :quote_char,
15
- :mode
15
+ :quote_char
16
16
  )
17
17
  @csv = CSV.open(output_file, mode, {
18
18
  :col_sep => col_sep,
@@ -22,36 +22,39 @@ module Kiba::Plus::Destination
22
22
  })
23
23
  end
24
24
 
25
- def mode
26
- options.fetch(:mode, "w")
25
+ def write(row)
26
+ @csv << row.values
27
27
  end
28
28
 
29
- def output_file
30
- options.fetch(:output_file)
29
+ def close
30
+ @csv.close
31
31
  end
32
32
 
33
- def col_sep
34
- options.fetch(:col_sep, ",")
35
- end
33
+ private
36
34
 
37
- def quote_char
38
- options.fetch(:quote_char, '"')
35
+ def output_file
36
+ options.fetch(:output_file)
39
37
  end
40
38
 
41
- def force_quotes
42
- options.fetch(:force_quotes, false)
39
+ def mode
40
+ options.fetch(:mode, "w")
43
41
  end
44
42
 
45
43
  def row_sep
46
44
  options.fetch(:row_sep, "\n")
47
45
  end
48
46
 
49
- def write(row)
50
- @csv << row.values
47
+ def col_sep
48
+ options.fetch(:col_sep, ",")
51
49
  end
52
50
 
53
- def close
54
- @csv.close
51
+ def force_quotes
52
+ options.fetch(:force_quotes, false)
53
+ end
54
+
55
+ def quote_char
56
+ options.fetch(:quote_char, '"')
55
57
  end
58
+
56
59
  end
57
60
  end
@@ -3,7 +3,7 @@ require 'mysql2'
3
3
  module Kiba::Plus::Destination
4
4
  class Mysql
5
5
  include Kiba::Plus::Helper
6
- attr_reader :options
6
+ attr_reader :options, :client
7
7
 
8
8
  def initialize(options = {})
9
9
  @options = options
@@ -12,8 +12,8 @@ module Kiba::Plus::Destination
12
12
  :prepare_sql,
13
13
  :columns
14
14
  )
15
- @client = Mysql2::Client.new(connect_hash(connect_url))
16
- @pre_stmt = @client.prepare(prepare_sql)
15
+ @client = Mysql2::Client.new(mysql2_connect_hash(connect_url))
16
+ init
17
17
  end
18
18
 
19
19
  def write(row)
@@ -21,6 +21,7 @@ module Kiba::Plus::Destination
21
21
  rescue => e
22
22
  Kiba::Plus.logger.error "ERROR for #{row}"
23
23
  Kiba::Plus.logger.error e.message
24
+ raise e
24
25
  end
25
26
 
26
27
  def close
@@ -30,6 +31,10 @@ module Kiba::Plus::Destination
30
31
 
31
32
  private
32
33
 
34
+ def init
35
+ @pre_stmt = @client.prepare(prepare_sql)
36
+ end
37
+
33
38
  def connect_url
34
39
  options.fetch(:connect_url)
35
40
  end
@@ -3,45 +3,53 @@ require 'mysql2'
3
3
  module Kiba::Plus::Destination
4
4
  class MysqlBulk
5
5
  include Kiba::Plus::Helper
6
- attr_reader :options
6
+ attr_reader :options, :client
7
7
 
8
8
  def initialize(options = {})
9
9
  @options = options
10
- @options.assert_valid_keys(:table_name,
11
- :columns,
12
- :input_file,
13
- :connect_url,
14
- :truncate,
15
- :incremental,
16
- :delimited_by,
17
- :enclosed_by,
18
- :ignore_lines
19
- )
20
-
21
- @client = Mysql2::Client.new(connect_hash(connect_url).merge(local_infile: true))
10
+ @options.assert_valid_keys(
11
+ :connect_url,
12
+ :table_name,
13
+ :columns,
14
+ :truncate,
15
+ :incremental,
16
+ :input_file,
17
+ :ignore_input_file_header,
18
+ :delimited_by,
19
+ :enclosed_by,
20
+ :ignore_lines
21
+ )
22
+
23
+ @client = Mysql2::Client.new(mysql2_connect_hash(connect_url).merge(local_infile: true))
22
24
  end
23
25
 
24
- def connect_url
25
- options.fetch(:connect_url)
26
+ def write(row)
27
+ # blank!
26
28
  end
27
29
 
28
- def table_name
29
- options.fetch(:table_name)
30
- end
30
+ def close
31
+ if truncate
32
+ sql = truncate_sql
33
+ Kiba::Plus.logger.info sql
34
+ client.query(sql)
35
+ end
31
36
 
32
- def delimited_by
33
- options.fetch(:delimited_by, ",")
34
- end
37
+ sql = bulk_sql
38
+ Kiba::Plus.logger.info sql
39
+ client.query(sql)
35
40
 
36
- def enclosed_by
37
- options.fetch(:enclosed_by, '"')
41
+ client.close
42
+ @client = nil
38
43
  end
39
44
 
40
- def ignore_lines
41
- options.fetch(:ignore_lines, 0)
45
+ private
46
+
47
+ def connect_url
48
+ options.fetch(:connect_url)
42
49
  end
43
50
 
44
- def write(row)
51
+ def table_name
52
+ options.fetch(:table_name)
45
53
  end
46
54
 
47
55
  def columns
@@ -60,27 +68,46 @@ module Kiba::Plus::Destination
60
68
  options.fetch(:input_file)
61
69
  end
62
70
 
63
- def close
64
- if truncate
65
- truncate_sql = "TRUNCATE TABLE #{table_name};"
66
- Kiba::Plus.logger.info truncate_sql
67
- @client.query(truncate_sql)
68
- end
71
+ def ignore_input_file_header
72
+ !!options.fetch(:ignore_input_file_header, false)
73
+ end
74
+
75
+ def delimited_by
76
+ options.fetch(:delimited_by, ",")
77
+ end
78
+
79
+ def enclosed_by
80
+ options.fetch(:enclosed_by, '"')
81
+ end
82
+
83
+ def ignore_lines
84
+ options.fetch(:ignore_lines, 0).to_i
85
+ end
86
+
87
+ def real_ignore_lines
88
+ lines = ignore_lines
89
+ lines += 1 if ignore_input_file_header
90
+ lines
91
+ end
69
92
 
70
- bulk_sql = <<-SQL
71
- LOAD DATA LOCAL INFILE '#{input_file}'
72
- REPLACE INTO TABLE #{table_name}
73
- FIELDS
93
+ def truncate_sql
94
+ sql = "TRUNCATE TABLE #{table_name}"
95
+ format_sql sql
96
+ end
97
+
98
+ def bulk_sql
99
+ sql = <<-SQL
100
+ LOAD DATA LOCAL INFILE '#{input_file}'
101
+ REPLACE
102
+ INTO TABLE #{table_name}
103
+ FIELDS
74
104
  TERMINATED BY '#{delimited_by}'
75
105
  ENCLOSED BY '#{enclosed_by}'
76
- IGNORE #{ignore_lines} LINES
77
- (#{columns.join(',')})
106
+ IGNORE #{real_ignore_lines} LINES
107
+ (#{columns.join(',')})
78
108
  SQL
79
- Kiba::Plus.logger.info bulk_sql
80
- @client.query(bulk_sql)
81
-
82
- @client.close
83
- @client = nil
109
+ format_sql sql
84
110
  end
111
+
85
112
  end
86
113
  end
@@ -2,18 +2,20 @@ require 'pg'
2
2
 
3
3
  module Kiba::Plus::Destination
4
4
  class Pg
5
- attr_reader :options
5
+ attr_reader :options, :conn
6
6
 
7
7
  def initialize(options = {})
8
8
  @options = options
9
9
  @options.assert_valid_keys(
10
10
  :connect_url,
11
+ :schema,
11
12
  :prepare_name,
12
13
  :prepare_sql,
13
14
  :columns
14
15
  )
15
16
  @conn = PG.connect(connect_url)
16
- @conn.prepare(prepare_name, prepare_sql)
17
+ @conn.exec "SET search_path TO %s" % [ options.fetch(:schema) ] unless options.fetch(:schema).empty?
18
+ init
17
19
  end
18
20
 
19
21
  def write(row)
@@ -23,6 +25,7 @@ module Kiba::Plus::Destination
23
25
  Kiba::Plus.logger.error "ERROR for #{row}"
24
26
  Kiba::Plus.logger.error ex.message
25
27
  # Maybe, write to db table or file
28
+ raise ex
26
29
  end
27
30
 
28
31
  def close
@@ -32,6 +35,10 @@ module Kiba::Plus::Destination
32
35
 
33
36
  private
34
37
 
38
+ def init
39
+ @conn.prepare(prepare_name, prepare_sql)
40
+ end
41
+
35
42
  def connect_url
36
43
  options.fetch(:connect_url)
37
44
  end
@@ -2,20 +2,48 @@ require_relative 'pg_bulk_utils'
2
2
  module Kiba::Plus::Destination
3
3
  class PgBulk
4
4
  include PgBulkUtils
5
- attr_reader :options
5
+ include Kiba::Plus::Helper
6
+ attr_reader :options, :conn
6
7
 
7
8
  def initialize(options = {})
8
9
  @options = options
9
10
  @options.assert_valid_keys(
10
- :connect_url,
11
- :input_file,
12
- :table_name,
13
- :columns,
14
- :truncate,
15
- :incremental,
16
- :unique_by
17
- )
11
+ :connect_url,
12
+ :table_name,
13
+ :columns,
14
+ :truncate,
15
+ :incremental,
16
+ :unique_by,
17
+ :input_file,
18
+ :ignore_input_file_header
19
+ )
18
20
  @conn = PG.connect(connect_url)
21
+
22
+ init
23
+ end
24
+
25
+ def write(row)
26
+ # blank!
27
+ end
28
+
29
+ def close
30
+ if incremental
31
+ truncate_staging_table
32
+ create_staging_table
33
+ copy_to_staging_table
34
+ delete_before_insert
35
+ merge_to_target_table
36
+ truncate_staging_table
37
+ else
38
+ copy_to_target_table
39
+ end
40
+ @conn.close
41
+ @conn = nil
42
+ end
43
+
44
+ private
45
+
46
+ def init
19
47
  if truncate
20
48
  truncate_staging_table
21
49
  truncate_target_table
@@ -30,10 +58,6 @@ module Kiba::Plus::Destination
30
58
  options.fetch(:table_name)
31
59
  end
32
60
 
33
- def input_file
34
- options.fetch(:input_file)
35
- end
36
-
37
61
  def columns
38
62
  options.fetch(:columns)
39
63
  end
@@ -50,35 +74,53 @@ module Kiba::Plus::Destination
50
74
  options.fetch(:unique_by, :id)
51
75
  end
52
76
 
77
+ def input_file
78
+ options.fetch(:input_file)
79
+ end
80
+
81
+ def ignore_input_file_header
82
+ !!options.fetch(:ignore_input_file_header, false)
83
+ end
84
+
53
85
  def copy_to_target_table
54
- sql = "COPY #{table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
86
+ sql = copy_to_target_table_sql
55
87
  Kiba::Plus.logger.info sql
56
88
  @conn.exec(sql)
57
89
  end
58
90
 
59
91
  def copy_to_staging_table
60
- sql = "COPY #{staging_table_name} (#{columns.join(', ')}) FROM '#{File.expand_path(input_file)}' WITH DELIMITER ',' NULL '\\N' CSV"
92
+ sql = copy_to_staging_table_sql
61
93
  Kiba::Plus.logger.info sql
62
94
  @conn.exec(sql)
63
95
  end
64
96
 
65
- def write(row)
66
- # blank!
97
+ def copy_to_target_table_sql
98
+ sql = <<-SQL
99
+ COPY #{table_name} (#{columns.join(', ')})
100
+ FROM '#{File.expand_path(input_file)}'
101
+ WITH
102
+ #{ignore_input_file_header ? 'HEADER' : ''}
103
+ DELIMITER ','
104
+ NULL '\\N'
105
+ CSV
106
+ SQL
107
+
108
+ format_sql sql
67
109
  end
68
110
 
69
- def close
70
- if incremental
71
- truncate_staging_table
72
- create_staging_table
73
- copy_to_staging_table
74
- delete_before_insert
75
- merge_to_target_table
76
- truncate_staging_table
77
- else
78
- copy_to_target_table
79
- end
80
- @conn.close
81
- @conn = nil
111
+ def copy_to_staging_table_sql
112
+ sql = <<-SQL
113
+ COPY #{staging_table_name} (#{columns.join(', ')})
114
+ FROM '#{File.expand_path(input_file)}'
115
+ WITH
116
+ #{ignore_input_file_header ? 'HEADER' : ''}
117
+ DELIMITER ','
118
+ NULL '\\N'
119
+ CSV
120
+ SQL
121
+
122
+ format_sql sql
82
123
  end
124
+
83
125
  end
84
126
  end
@@ -4,19 +4,54 @@ require_relative 'pg_bulk_utils'
4
4
  module Kiba::Plus::Destination
5
5
  class PgBulk2
6
6
  include PgBulkUtils
7
- attr_reader :options
7
+ include Kiba::Plus::Helper
8
+ attr_reader :options, :conn
8
9
 
9
10
  def initialize(options = {})
10
11
  @options = options
11
- @options.assert_valid_keys(:table_name,
12
- :columns,
13
- :connect_url,
14
- :truncate,
15
- :incremental,
16
- :unique_by
17
- )
12
+ @options.assert_valid_keys(
13
+ :connect_url,
14
+ :table_name,
15
+ :columns,
16
+ :truncate,
17
+ :incremental,
18
+ :unique_by
19
+ )
18
20
 
19
21
  @conn = PG.connect(connect_url)
22
+
23
+ init
24
+ end
25
+
26
+ def write(row)
27
+ begin
28
+ @conn.put_copy_data CSV.generate_line(row.values_at(*columns))
29
+ rescue Exception => err
30
+ errmsg = "%s while copy data: %s" % [ err.class.name, err.message ]
31
+ @conn.put_copy_end( errmsg )
32
+ Kiba::Plus.logger.error @conn.get_result
33
+ raise
34
+ end
35
+ end
36
+
37
+ def close
38
+ @conn.put_copy_end
39
+ @conn.get_last_result
40
+ if incremental
41
+ delete_before_insert
42
+ merge_to_target_table
43
+ truncate_staging_table
44
+ end
45
+ rescue
46
+ raise
47
+ ensure
48
+ @conn.close
49
+ @conn = nil
50
+ end
51
+
52
+ private
53
+
54
+ def init
20
55
  if truncate
21
56
  truncate_staging_table
22
57
  truncate_target_table
@@ -24,12 +59,12 @@ module Kiba::Plus::Destination
24
59
  if incremental
25
60
  truncate_staging_table
26
61
  create_staging_table
27
- sql = "COPY #{staging_table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
62
+ sql = bulk_sql_with_incremental
28
63
  else
29
- sql = "COPY #{table_name} (#{columns.join(', ')}) FROM STDIN WITH DELIMITER ',' NULL '\\N' CSV"
64
+ sql = bulk_sql_with_non_incremental
30
65
  end
31
66
  Kiba::Plus.logger.info sql
32
- @res = @conn.exec(sql)
67
+ @conn.exec(sql)
33
68
  end
34
69
 
35
70
  def connect_url
@@ -40,17 +75,6 @@ module Kiba::Plus::Destination
40
75
  options.fetch(:table_name)
41
76
  end
42
77
 
43
- def write(row)
44
- begin
45
- @conn.put_copy_data CSV.generate_line(row.values_at(*columns))
46
- rescue Exception => err
47
- errmsg = "%s while copy data: %s" % [ err.class.name, err.message ]
48
- @conn.put_copy_end( errmsg )
49
- Kiba::Plus.logger.error @conn.get_result
50
- raise
51
- end
52
- end
53
-
54
78
  def columns
55
79
  options.fetch(:columns)
56
80
  end
@@ -67,19 +91,29 @@ module Kiba::Plus::Destination
67
91
  options.fetch(:unique_by, :id)
68
92
  end
69
93
 
70
- def close
71
- @conn.put_copy_end
72
- @conn.get_last_result
73
- if incremental
74
- delete_before_insert
75
- merge_to_target_table
76
- truncate_staging_table
77
- end
78
- rescue
79
- raise
80
- ensure
81
- @conn.close
82
- @conn = nil
94
+ def bulk_sql_with_incremental
95
+ sql = <<-SQL
96
+ COPY #{staging_table_name} (#{columns.join(', ')})
97
+ FROM STDIN
98
+ WITH
99
+ DELIMITER ','
100
+ NULL '\\N'
101
+ CSV
102
+ SQL
103
+ format_sql sql
83
104
  end
105
+
106
+ def bulk_sql_with_non_incremental
107
+ sql = <<-SQL
108
+ COPY #{table_name} (#{columns.join(', ')})
109
+ FROM STDIN
110
+ WITH
111
+ DELIMITER ','
112
+ NULL '\\N'
113
+ CSV
114
+ SQL
115
+ format_sql sql
116
+ end
117
+
84
118
  end
85
119
  end
@@ -1,39 +1,72 @@
1
1
  module Kiba::Plus::Destination
2
2
  module PgBulkUtils
3
+
4
+ private
5
+
3
6
  def staging_table_name
4
7
  table_name + "_staging"
5
8
  end
6
9
 
7
10
  def create_staging_table
8
- sql = "CREATE TABLE IF NOT EXISTS #{staging_table_name} (LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES)"
11
+ sql = create_staging_table_sql
9
12
  Kiba::Plus.logger.info sql
10
13
  @conn.exec(sql)
11
14
  end
12
15
 
16
+ def create_staging_table_sql
17
+ sql = <<-SQL
18
+ CREATE TABLE IF NOT EXISTS #{staging_table_name} (
19
+ LIKE #{table_name} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES
20
+ )
21
+ SQL
22
+ format_sql sql
23
+ end
24
+
13
25
  def truncate_staging_table
14
- truncate_sql = "TRUNCATE TABLE #{staging_table_name}"
15
- Kiba::Plus.logger.info truncate_sql
16
- @conn.exec(truncate_sql) rescue nil
26
+ sql = truncate_staging_table_sql
27
+ Kiba::Plus.logger.info sql
28
+ @conn.exec(sql) rescue nil
29
+ end
30
+
31
+ def truncate_staging_table_sql
32
+ sql = "TRUNCATE TABLE #{staging_table_name}"
33
+ format_sql sql
17
34
  end
18
35
 
19
36
  def truncate_target_table
20
- truncate_sql = "TRUNCATE TABLE #{table_name};"
21
- Kiba::Plus.logger.info truncate_sql
22
- @conn.exec(truncate_sql)
37
+ sql = truncate_target_table_sql
38
+ Kiba::Plus.logger.info sql
39
+ @conn.exec(sql)
40
+ end
41
+
42
+ def truncate_target_table_sql
43
+ sql = "TRUNCATE TABLE #{table_name}"
44
+ format_sql sql
23
45
  end
24
46
 
25
- # TODO add where condition to speed up deleting.
26
47
  def delete_before_insert
27
- where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
28
- sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
48
+ sql = delete_before_insert_sql
29
49
  Kiba::Plus.logger.info sql
30
50
  @conn.exec(sql)
31
51
  end
32
52
 
53
+ # TODO add where condition to speed up deleting.
54
+ def delete_before_insert_sql
55
+ where = Array(unique_by).map{|x| ["#{staging_table_name}.#{x}", "#{table_name}.#{x}"].join(" = ") }.join(" AND ")
56
+ sql = "DELETE FROM #{table_name} USING #{staging_table_name} WHERE #{where}"
57
+ format_sql sql
58
+ end
59
+
33
60
  def merge_to_target_table
34
- sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
61
+ sql = merge_to_target_table_sql
35
62
  Kiba::Plus.logger.info sql
36
63
  @conn.exec(sql)
37
64
  end
65
+
66
+ def merge_to_target_table_sql
67
+ sql = "INSERT INTO #{table_name} (SELECT * FROM #{staging_table_name})"
68
+ format_sql sql
69
+ end
70
+
38
71
  end
39
72
  end
@@ -2,13 +2,15 @@ require 'uri'
2
2
  module Kiba
3
3
  module Plus
4
4
  module Helper
5
- def connect_hash(url)
5
+ def mysql2_connect_hash(url)
6
+ return url if url.is_a?(Hash)
7
+
6
8
  u = URI.parse(url)
7
9
  {
8
10
  host: u.host,
11
+ port: u.port,
9
12
  username: u.user,
10
13
  password: u.password,
11
- port: u.port,
12
14
  database: u.path[1..-1]
13
15
  }
14
16
  end
@@ -17,6 +19,11 @@ module Kiba
17
19
  u = URI.parse(url)
18
20
  u.scheme
19
21
  end
22
+
23
+ def format_sql(sql)
24
+ sql.to_s.gsub(/[\n][\s]*[\n]/, "\n")
25
+ end
26
+
20
27
  end
21
28
  end
22
29
  end
data/lib/kiba/plus/job.rb CHANGED
@@ -15,7 +15,7 @@ module Kiba
15
15
  @options.assert_valid_keys(:connect_url, :job_id, :job_name, :start_at, :completed_at)
16
16
  url = URI.parse(connect_url)
17
17
  if url.scheme =~ /mysql/i
18
- @client = Mysql2::Client.new(connect_hash(connect_url))
18
+ @client = Mysql2::Client.new(mysql2_connect_hash(connect_url))
19
19
  elsif url.scheme =~ /postgres/i
20
20
  @client = PG.connect(connect_url)
21
21
  else
@@ -10,41 +10,29 @@ module Kiba
10
10
  def initialize(options = {})
11
11
  @options = options
12
12
  @options.assert_valid_keys(
13
- :query,
14
- :output,
15
- :last_pull_at,
16
- :incremental,
17
- :connect_url
18
- )
19
- @client = Mysql2::Client.new(connect_hash(connect_url))
13
+ :connect_url,
14
+ :query
15
+ )
16
+ @client = Mysql2::Client.new(mysql2_connect_hash(connect_url))
20
17
  end
21
18
 
22
19
  def each
20
+ Kiba::Plus.logger.info query
23
21
  results = client.query(query, as: :hash, symbolize_keys: true, stream: true)
24
22
  results.each do |row|
25
23
  yield(row)
26
24
  end
27
25
  end
28
26
 
29
- def query
30
- options.fetch(:query)
31
- end
32
-
33
- def output
34
- options.fetch(:output)
35
- end
36
-
37
- def last_pull_at
38
- options[:last_pull_at]
39
- end
40
-
41
- def incremental
42
- options.fetch(:incremental, true)
43
- end
27
+ private
44
28
 
45
29
  def connect_url
46
30
  options.fetch(:connect_url)
47
31
  end
32
+
33
+ def query
34
+ options.fetch(:query)
35
+ end
48
36
  end
49
37
  end
50
38
  end
@@ -0,0 +1,40 @@
1
+ require 'pg'
2
+ require 'uri'
3
+
4
+ module Kiba
5
+ module Plus::Source
6
+ class Pg
7
+ include Kiba::Plus::Helper
8
+ attr_reader :options, :client
9
+
10
+ def initialize(options = {})
11
+ @options = options
12
+ @options.assert_valid_keys(
13
+ :connect_url,
14
+ :schema,
15
+ :query
16
+ )
17
+ @client = PG.connect(connect_url)
18
+ @client.exec "SET search_path TO %s" % [ options.fetch(:schema) ] unless options.fetch(:schema).empty?
19
+ end
20
+
21
+ def each
22
+ Kiba::Plus.logger.info query
23
+ results = client.query(query)
24
+ results.each do |row|
25
+ yield(row)
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def connect_url
32
+ options.fetch(:connect_url)
33
+ end
34
+
35
+ def query
36
+ options.fetch(:query)
37
+ end
38
+ end
39
+ end
40
+ end
@@ -1,5 +1,5 @@
1
1
  module Kiba
2
2
  module Plus
3
- VERSION = "0.1.2"
3
+ VERSION = "0.1.3"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kiba-plus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hooopo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-05-21 00:00:00.000000000 Z
11
+ date: 2017-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: kiba
@@ -94,6 +94,48 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '5.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: database_cleaner
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 1.5.3
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 1.5.3
111
+ - !ruby/object:Gem::Dependency
112
+ name: sequel
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '4.34'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '4.34'
125
+ - !ruby/object:Gem::Dependency
126
+ name: pry
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
97
139
  description: It connects to various data sources including relational, non-relational,
98
140
  and flat file, cloud services and HTTP resources. It has flexible load strategies
99
141
  including insert, bulk load and upsert.
@@ -135,6 +177,7 @@ files:
135
177
  - lib/kiba/plus/job.rb
136
178
  - lib/kiba/plus/logger.rb
137
179
  - lib/kiba/plus/source/mysql.rb
180
+ - lib/kiba/plus/source/pg.rb
138
181
  - lib/kiba/plus/version.rb
139
182
  homepage: https://github.com/hooopo/kiba-plus
140
183
  licenses:
@@ -157,7 +200,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
157
200
  version: '0'
158
201
  requirements: []
159
202
  rubyforge_project:
160
- rubygems_version: 2.4.5.1
203
+ rubygems_version: 2.6.11
161
204
  signing_key:
162
205
  specification_version: 4
163
206
  summary: Kiba enhancement for Ruby ETL