kiba-plus 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Hooopo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,98 @@
1
+ # kiba-plus
2
+ Kiba enhancement for Ruby ETL. It connects to various data sources including relational, non-relational, and flat file, cloud services and HTTP resources. It has flexible load strategies including insert, bulk load and upsert.
3
+
4
+ # Usage
5
+
6
+ ```ruby
7
+ require 'kiba/plus'
8
+
9
+ SOURCE_URL = 'mysql://root@localhost/shopperplus'
10
+
11
+ DEST_URL = 'postgresql://hooopo@localhost:5432/crm2_dev'
12
+
13
+ source Kiba::Plus::Source::Mysql, { :connect_url => SOURCE_URL,
14
+ :query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers}
15
+ }
16
+
17
+ destination Kiba::Plus::Destination::PgBulk2, { :connect_url => DEST_URL,
18
+ :table_name => "customers",
19
+ :truncate => true,
20
+ :columns => [:id, :email, :first_name, :last_name],
21
+ :incremental => false
22
+ }
23
+
24
+ post_process do
25
+ result = PG.connect(DEST_URL).query("SELECT COUNT(*) AS num FROM customers")
26
+ puts "Insert total: #{result.first['num']}"
27
+ end
28
+ ```
29
+
30
+ Execute:
31
+
32
+ ```shell
33
+ bundle exec kiba customer_mysql_to_pg.etl
34
+ ```
35
+
36
+ Output:
37
+
38
+ ```
39
+ # Output:
40
+ # I, [2016-05-16T01:53:36.832565 #87909] INFO -- : TRUNCATE TABLE customers;
41
+ # I, [2016-05-16T01:53:36.841770 #87909] INFO -- : COPY customers (id, email, first_name, last_name) FROM STDIN WITH DELIMITER ',' NULL '\N' CSV
42
+ # Insert total: 428972
43
+ ```
44
+
45
+ More Examples(TODO).
46
+
47
+ # Main Feature
48
+
49
+ * Csv Source
50
+ * MySQL Source
51
+ * Postgresql Source
52
+ * Citus Source
53
+ * Greenplus Source
54
+ * MongoDB Source (TODO)
55
+ * Elastic Source (TODO)
56
+ * Redshift Source (TODO)
57
+
58
+ * Csv Destination
59
+ * MySQL Destination
60
+ * Postgresql Destination
61
+ * Citus Destination
62
+ * Greenplus Destination
63
+ * MongoDB Destination (TODO)
64
+ * Elastic Destination (TODO)
65
+ * Redshift Destination (TODO)
66
+
67
+ * Bulk Load for large dataset
68
+ * Upsert for MySQL & Postgresql
69
+ * Incremental Update
70
+
71
+ ## Installation
72
+
73
+ Add this line to your application's Gemfile:
74
+
75
+ ```ruby
76
+ gem 'kiba-plus'
77
+ ```
78
+
79
+ And then execute:
80
+
81
+ $ bundle
82
+
83
+ Or install it yourself as:
84
+
85
+ $ gem install kiba-plus
86
+
87
+ ## Usage
88
+
89
+
90
+ ## Development
91
+
92
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
93
+
94
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
95
+
96
+ ## Contributing
97
+
98
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/kiba-plus.
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "kiba/plus"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/examples/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem 'kiba-plus', :path => "../"
4
+ gem 'pry'
@@ -0,0 +1,31 @@
1
+ PATH
2
+ remote: ../
3
+ specs:
4
+ kiba-plus (0.1.0)
5
+ kiba (~> 0.6)
6
+ mysql2 (~> 0.4)
7
+ pg (~> 0.18)
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ coderay (1.1.1)
13
+ kiba (0.6.1)
14
+ method_source (0.8.2)
15
+ mysql2 (0.4.4)
16
+ pg (0.18.4)
17
+ pry (0.10.3)
18
+ coderay (~> 1.1.0)
19
+ method_source (~> 0.8.1)
20
+ slop (~> 3.4)
21
+ slop (3.6.0)
22
+
23
+ PLATFORMS
24
+ ruby
25
+
26
+ DEPENDENCIES
27
+ kiba-plus!
28
+ pry
29
+
30
+ BUNDLED WITH
31
+ 1.11.2
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative 'init'
3
+
4
+ SOURCE_URL = 'mysql://root@localhost/shopperplus'
5
+
6
+ source Kiba::Plus::Source::Mysql, :connect_url => SOURCE_URL,
7
+ :query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers}
8
+
9
+ destination Kiba::Plus::Destination::Csv, :output_file => "/tmp/customer_csv.csv"
10
+
11
+ post_process do
12
+ puts %x{head -n 10 /tmp/customer_csv.csv}
13
+ end
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative 'init'
3
+
4
+ SOURCE_URL = 'mysql://root@localhost/shopperplus'
5
+
6
+ DEST_URL = 'postgresql://hooopo@localhost:5432/crm2_dev'
7
+
8
+ source Kiba::Plus::Source::Mysql, { :connect_url => SOURCE_URL,
9
+ :query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers}
10
+ }
11
+
12
+ destination Kiba::Plus::Destination::PgBulk2, { :connect_url => DEST_URL,
13
+ :table_name => "customers",
14
+ :truncate => true,
15
+ :columns => [:id, :email, :first_name, :last_name],
16
+ :incremental => false
17
+ }
18
+
19
+ post_process do
20
+ result = PG.connect(DEST_URL).query("SELECT COUNT(*) AS num FROM customers")
21
+ puts "Insert total: #{result.first['num']}"
22
+ end
23
+
24
+ # Output:
25
+ # I, [2016-05-16T01:53:36.832565 #87909] INFO -- : TRUNCATE TABLE customers;
26
+ # I, [2016-05-16T01:53:36.841770 #87909] INFO -- : COPY customers (id, email, first_name, last_name) FROM STDIN WITH DELIMITER ',' NULL '\N' CSV
27
+ # Insert total: 428972
data/examples/init.rb ADDED
@@ -0,0 +1,8 @@
1
+ Bundler.require(:default)
2
+
3
+ source_files = File.expand_path(File.dirname(__FILE__) + "/sources/*.rb")
4
+ destination_files = File.expand_path(File.dirname(__FILE__) + "/destinations/*.rb")
5
+
6
+ [source_files, destination_files].each do |files|
7
+ Dir.glob(files).each {|f| require(f);puts "import #{f}"}
8
+ end
File without changes
data/kiba-plus.gemspec ADDED
@@ -0,0 +1,35 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'kiba/plus/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kiba-plus"
8
+ spec.version = Kiba::Plus::VERSION
9
+ spec.authors = ["Hooopo"]
10
+ spec.email = ["hoooopo@gmail.com"]
11
+
12
+ spec.summary = %q{kiba plus}
13
+ spec.description = %q{kiba plus}
14
+ spec.homepage = ""
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
18
+ # delete this section to allow pushing this gem to any host.
19
+ if spec.respond_to?(:metadata)
20
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
21
+ else
22
+ raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
23
+ end
24
+
25
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+ spec.add_runtime_dependency "kiba", "~> 0.6"
30
+ spec.add_runtime_dependency "mysql2", "~> 0.4"
31
+ spec.add_runtime_dependency "pg", "~> 0.18"
32
+ spec.add_development_dependency "bundler", "~> 1.11"
33
+ spec.add_development_dependency "rake", "~> 10.0"
34
+ spec.add_development_dependency "minitest", "~> 5.0"
35
+ end
data/lib/kiba/plus.rb ADDED
@@ -0,0 +1,30 @@
1
+ require_relative "plus/version"
2
+ require_relative "plus/helper"
3
+ require_relative "plus/source/mysql"
4
+ require_relative "plus/destination/csv"
5
+
6
+ require_relative "plus/destination/mysql"
7
+ require_relative "plus/destination/mysql_bulk"
8
+
9
+ require_relative "plus/destination/pg"
10
+ require_relative "plus/destination/pg_bulk"
11
+ require_relative "plus/destination/pg_bulk2"
12
+
13
+ require_relative "plus/job"
14
+ require_relative 'plus/logger'
15
+
16
+ module Kiba
17
+ module Plus
18
+ end
19
+ end
20
+
21
+ class Hash
22
+ def assert_valid_keys(*valid_keys)
23
+ valid_keys.flatten!
24
+ each_key do |k|
25
+ unless valid_keys.include?(k)
26
+ raise ArgumentError.new("Unknown key: #{k.inspect}. Valid keys are: #{valid_keys.map(&:inspect).join(', ')}")
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,29 @@
1
+ require 'csv'
2
+
3
+ module Kiba::Plus::Destination
4
+ class Csv
5
+ attr_reader :options
6
+
7
+ def initialize(options = {})
8
+ @options = options
9
+ @options.assert_valid_keys(:output_file)
10
+ @csv = CSV.open(output_file, 'w', {col_sep: delimiter})
11
+ end
12
+
13
+ def output_file
14
+ options.fetch(:output_file)
15
+ end
16
+
17
+ def delimiter
18
+ ","
19
+ end
20
+
21
+ def write(row)
22
+ @csv << row.values
23
+ end
24
+
25
+ def close
26
+ @csv.close
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,45 @@
1
+ require 'mysql2'
2
+
3
+ module Kiba::Plus::Destination
4
+ class Mysql
5
+ include Kiba::Plus::Helper
6
+ attr_reader :options
7
+
8
+ def initialize(options = {})
9
+ @options = options
10
+ @options.assert_valid_keys(
11
+ :connect_url,
12
+ :prepare_sql,
13
+ :columns
14
+ )
15
+ @client = Mysql2::Client.new(connect_hash(connect_url))
16
+ @pre_stmt = @client.prepare(prepare_sql)
17
+ end
18
+
19
+ def write(row)
20
+ @pre_stmt.execute(*row.values_at(*columns))
21
+ rescue => e
22
+ Kiba::Plus.logger.error "ERROR for #{row}"
23
+ Kiba::Plus.logger.error e.message
24
+ end
25
+
26
+ def close
27
+ @client.close
28
+ @client = nil
29
+ end
30
+
31
+ private
32
+
33
+ def connect_url
34
+ options.fetch(:connect_url)
35
+ end
36
+
37
+ def prepare_sql
38
+ options.fetch(:prepare_sql)
39
+ end
40
+
41
+ def columns
42
+ options.fetch(:columns)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,63 @@
1
+ require 'mysql2'
2
+
3
+ module Kiba::Plus::Destination
4
+ class MysqlBulk
5
+ include Kiba::Plus::Helper
6
+ attr_reader :options
7
+
8
+ def initialize(options = {})
9
+ @options = options
10
+ @options.assert_valid_keys(:table_name,
11
+ :columns,
12
+ :input_file,
13
+ :connect_url,
14
+ :truncate,
15
+ :incremental
16
+ )
17
+
18
+ @client = Mysql2::Client.new(connect_hash(connect_url).merge(local_infile: true))
19
+ end
20
+
21
+ def connect_url
22
+ options.fetch(:connect_url)
23
+ end
24
+
25
+ def table_name
26
+ options.fetch(:table_name)
27
+ end
28
+
29
+ def write(row)
30
+ end
31
+
32
+ def columns
33
+ options.fetch(:columns)
34
+ end
35
+
36
+ def truncate
37
+ options.fetch(:truncate, false)
38
+ end
39
+
40
+ def incremental
41
+ options.fetch(:incremental, true)
42
+ end
43
+
44
+ def input_file
45
+ options.fetch(:input_file)
46
+ end
47
+
48
+ def close
49
+ if truncate
50
+ truncate_sql = "TRUNCATE TABLE #{table_name};"
51
+ Kiba::Plus.logger.info truncate_sql
52
+ @client.query(truncate_sql)
53
+ end
54
+
55
+ bulk_sql = "LOAD DATA LOCAL INFILE '#{input_file}' REPLACE INTO TABLE #{table_name} FIELDS TERMINATED BY ', ' (#{columns.join(',')})"
56
+ Kiba::Plus.logger.info bulk_sql
57
+ @client.query(bulk_sql)
58
+
59
+ @client.close
60
+ @client = nil
61
+ end
62
+ end
63
+ end