kiba-plus 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Hooopo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,98 @@
1
+ # kiba-plus
2
+ Kiba enhancement for Ruby ETL. It connects to various data sources including relational, non-relational, and flat file, cloud services and HTTP resources. It has flexible load strategies including insert, bulk load and upsert.
3
+
4
+ # Usage
5
+
6
+ ```ruby
7
+ require 'kiba/plus'
8
+
9
+ SOURCE_URL = 'mysql://root@localhost/shopperplus'
10
+
11
+ DEST_URL = 'postgresql://hooopo@localhost:5432/crm2_dev'
12
+
13
+ source Kiba::Plus::Source::Mysql, { :connect_url => SOURCE_URL,
14
+ :query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers}
15
+ }
16
+
17
+ destination Kiba::Plus::Destination::PgBulk2, { :connect_url => DEST_URL,
18
+ :table_name => "customers",
19
+ :truncate => true,
20
+ :columns => [:id, :email, :first_name, :last_name],
21
+ :incremental => false
22
+ }
23
+
24
+ post_process do
25
+ result = PG.connect(DEST_URL).query("SELECT COUNT(*) AS num FROM customers")
26
+ puts "Insert total: #{result.first['num']}"
27
+ end
28
+ ```
29
+
30
+ Execute:
31
+
32
+ ```shell
33
+ bundle exec kiba customer_mysql_to_pg.etl
34
+ ```
35
+
36
+ Output:
37
+
38
+ ```
39
+ # Output:
40
+ # I, [2016-05-16T01:53:36.832565 #87909] INFO -- : TRUNCATE TABLE customers;
41
+ # I, [2016-05-16T01:53:36.841770 #87909] INFO -- : COPY customers (id, email, first_name, last_name) FROM STDIN WITH DELIMITER ',' NULL '\N' CSV
42
+ # Insert total: 428972
43
+ ```
44
+
45
+ More Examples(TODO).
46
+
47
+ # Main Feature
48
+
49
+ * Csv Source
50
+ * MySQL Source
51
+ * Postgresql Source
52
+ * Citus Source
53
+ * Greenplus Source
54
+ * MongoDB Source (TODO)
55
+ * Elastic Source (TODO)
56
+ * Redshift Source (TODO)
57
+
58
+ * Csv Destination
59
+ * MySQL Destination
60
+ * Postgresql Destination
61
+ * Citus Destination
62
+ * Greenplus Destination
63
+ * MongoDB Destination (TODO)
64
+ * Elastic Destination (TODO)
65
+ * Redshift Destination (TODO)
66
+
67
+ * Bulk Load for large dataset
68
+ * Upsert for MySQL & Postgresql
69
+ * Incremental Update
70
+
71
+ ## Installation
72
+
73
+ Add this line to your application's Gemfile:
74
+
75
+ ```ruby
76
+ gem 'kiba-plus'
77
+ ```
78
+
79
+ And then execute:
80
+
81
+ $ bundle
82
+
83
+ Or install it yourself as:
84
+
85
+ $ gem install kiba-plus
86
+
87
+ ## Usage
88
+
89
+
90
+ ## Development
91
+
92
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
93
+
94
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
95
+
96
+ ## Contributing
97
+
98
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/kiba-plus.
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "kiba/plus"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/examples/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem 'kiba-plus', :path => "../"
4
+ gem 'pry'
@@ -0,0 +1,31 @@
1
+ PATH
2
+ remote: ../
3
+ specs:
4
+ kiba-plus (0.1.0)
5
+ kiba (~> 0.6)
6
+ mysql2 (~> 0.4)
7
+ pg (~> 0.18)
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ coderay (1.1.1)
13
+ kiba (0.6.1)
14
+ method_source (0.8.2)
15
+ mysql2 (0.4.4)
16
+ pg (0.18.4)
17
+ pry (0.10.3)
18
+ coderay (~> 1.1.0)
19
+ method_source (~> 0.8.1)
20
+ slop (~> 3.4)
21
+ slop (3.6.0)
22
+
23
+ PLATFORMS
24
+ ruby
25
+
26
+ DEPENDENCIES
27
+ kiba-plus!
28
+ pry
29
+
30
+ BUNDLED WITH
31
+ 1.11.2
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative 'init'
3
+
4
+ SOURCE_URL = 'mysql://root@localhost/shopperplus'
5
+
6
+ source Kiba::Plus::Source::Mysql, :connect_url => SOURCE_URL,
7
+ :query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers}
8
+
9
+ destination Kiba::Plus::Destination::Csv, :output_file => "/tmp/customer_csv.csv"
10
+
11
+ post_process do
12
+ puts %x{head -n 10 /tmp/customer_csv.csv}
13
+ end
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative 'init'
3
+
4
+ SOURCE_URL = 'mysql://root@localhost/shopperplus'
5
+
6
+ DEST_URL = 'postgresql://hooopo@localhost:5432/crm2_dev'
7
+
8
+ source Kiba::Plus::Source::Mysql, { :connect_url => SOURCE_URL,
9
+ :query => %Q{SELECT id, email, 'hooopo' AS first_name, 'Wang' AS last_name FROM customers}
10
+ }
11
+
12
+ destination Kiba::Plus::Destination::PgBulk2, { :connect_url => DEST_URL,
13
+ :table_name => "customers",
14
+ :truncate => true,
15
+ :columns => [:id, :email, :first_name, :last_name],
16
+ :incremental => false
17
+ }
18
+
19
+ post_process do
20
+ result = PG.connect(DEST_URL).query("SELECT COUNT(*) AS num FROM customers")
21
+ puts "Insert total: #{result.first['num']}"
22
+ end
23
+
24
+ # Output:
25
+ # I, [2016-05-16T01:53:36.832565 #87909] INFO -- : TRUNCATE TABLE customers;
26
+ # I, [2016-05-16T01:53:36.841770 #87909] INFO -- : COPY customers (id, email, first_name, last_name) FROM STDIN WITH DELIMITER ',' NULL '\N' CSV
27
+ # Insert total: 428972
data/examples/init.rb ADDED
@@ -0,0 +1,8 @@
1
+ Bundler.require(:default)
2
+
3
+ source_files = File.expand_path(File.dirname(__FILE__) + "/sources/*.rb")
4
+ destination_files = File.expand_path(File.dirname(__FILE__) + "/destinations/*.rb")
5
+
6
+ [source_files, destination_files].each do |files|
7
+ Dir.glob(files).each {|f| require(f);puts "import #{f}"}
8
+ end
File without changes
data/kiba-plus.gemspec ADDED
@@ -0,0 +1,35 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'kiba/plus/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kiba-plus"
8
+ spec.version = Kiba::Plus::VERSION
9
+ spec.authors = ["Hooopo"]
10
+ spec.email = ["hoooopo@gmail.com"]
11
+
12
+ spec.summary = %q{kiba plus}
13
+ spec.description = %q{kiba plus}
14
+ spec.homepage = ""
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
18
+ # delete this section to allow pushing this gem to any host.
19
+ if spec.respond_to?(:metadata)
20
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
21
+ else
22
+ raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
23
+ end
24
+
25
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+ spec.add_runtime_dependency "kiba", "~> 0.6"
30
+ spec.add_runtime_dependency "mysql2", "~> 0.4"
31
+ spec.add_runtime_dependency "pg", "~> 0.18"
32
+ spec.add_development_dependency "bundler", "~> 1.11"
33
+ spec.add_development_dependency "rake", "~> 10.0"
34
+ spec.add_development_dependency "minitest", "~> 5.0"
35
+ end
data/lib/kiba/plus.rb ADDED
@@ -0,0 +1,30 @@
1
+ require_relative "plus/version"
2
+ require_relative "plus/helper"
3
+ require_relative "plus/source/mysql"
4
+ require_relative "plus/destination/csv"
5
+
6
+ require_relative "plus/destination/mysql"
7
+ require_relative "plus/destination/mysql_bulk"
8
+
9
+ require_relative "plus/destination/pg"
10
+ require_relative "plus/destination/pg_bulk"
11
+ require_relative "plus/destination/pg_bulk2"
12
+
13
+ require_relative "plus/job"
14
+ require_relative 'plus/logger'
15
+
16
+ module Kiba
17
+ module Plus
18
+ end
19
+ end
20
+
21
+ class Hash
22
+ def assert_valid_keys(*valid_keys)
23
+ valid_keys.flatten!
24
+ each_key do |k|
25
+ unless valid_keys.include?(k)
26
+ raise ArgumentError.new("Unknown key: #{k.inspect}. Valid keys are: #{valid_keys.map(&:inspect).join(', ')}")
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,29 @@
1
+ require 'csv'
2
+
3
+ module Kiba::Plus::Destination
4
+ class Csv
5
+ attr_reader :options
6
+
7
+ def initialize(options = {})
8
+ @options = options
9
+ @options.assert_valid_keys(:output_file)
10
+ @csv = CSV.open(output_file, 'w', {col_sep: delimiter})
11
+ end
12
+
13
+ def output_file
14
+ options.fetch(:output_file)
15
+ end
16
+
17
+ def delimiter
18
+ ","
19
+ end
20
+
21
+ def write(row)
22
+ @csv << row.values
23
+ end
24
+
25
+ def close
26
+ @csv.close
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,45 @@
1
+ require 'mysql2'
2
+
3
+ module Kiba::Plus::Destination
4
+ class Mysql
5
+ include Kiba::Plus::Helper
6
+ attr_reader :options
7
+
8
+ def initialize(options = {})
9
+ @options = options
10
+ @options.assert_valid_keys(
11
+ :connect_url,
12
+ :prepare_sql,
13
+ :columns
14
+ )
15
+ @client = Mysql2::Client.new(connect_hash(connect_url))
16
+ @pre_stmt = @client.prepare(prepare_sql)
17
+ end
18
+
19
+ def write(row)
20
+ @pre_stmt.execute(*row.values_at(*columns))
21
+ rescue => e
22
+ Kiba::Plus.logger.error "ERROR for #{row}"
23
+ Kiba::Plus.logger.error e.message
24
+ end
25
+
26
+ def close
27
+ @client.close
28
+ @client = nil
29
+ end
30
+
31
+ private
32
+
33
+ def connect_url
34
+ options.fetch(:connect_url)
35
+ end
36
+
37
+ def prepare_sql
38
+ options.fetch(:prepare_sql)
39
+ end
40
+
41
+ def columns
42
+ options.fetch(:columns)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,63 @@
1
+ require 'mysql2'
2
+
3
+ module Kiba::Plus::Destination
4
+ class MysqlBulk
5
+ include Kiba::Plus::Helper
6
+ attr_reader :options
7
+
8
+ def initialize(options = {})
9
+ @options = options
10
+ @options.assert_valid_keys(:table_name,
11
+ :columns,
12
+ :input_file,
13
+ :connect_url,
14
+ :truncate,
15
+ :incremental
16
+ )
17
+
18
+ @client = Mysql2::Client.new(connect_hash(connect_url).merge(local_infile: true))
19
+ end
20
+
21
+ def connect_url
22
+ options.fetch(:connect_url)
23
+ end
24
+
25
+ def table_name
26
+ options.fetch(:table_name)
27
+ end
28
+
29
+ def write(row)
30
+ end
31
+
32
+ def columns
33
+ options.fetch(:columns)
34
+ end
35
+
36
+ def truncate
37
+ options.fetch(:truncate, false)
38
+ end
39
+
40
+ def incremental
41
+ options.fetch(:incremental, true)
42
+ end
43
+
44
+ def input_file
45
+ options.fetch(:input_file)
46
+ end
47
+
48
+ def close
49
+ if truncate
50
+ truncate_sql = "TRUNCATE TABLE #{table_name};"
51
+ Kiba::Plus.logger.info truncate_sql
52
+ @client.query(truncate_sql)
53
+ end
54
+
55
+ bulk_sql = "LOAD DATA LOCAL INFILE '#{input_file}' REPLACE INTO TABLE #{table_name} FIELDS TERMINATED BY ', ' (#{columns.join(',')})"
56
+ Kiba::Plus.logger.info bulk_sql
57
+ @client.query(bulk_sql)
58
+
59
+ @client.close
60
+ @client = nil
61
+ end
62
+ end
63
+ end