embulk-filter-mysql 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e111c45ae7b1c0778579d8763d25578646d48b94
4
+ data.tar.gz: 3adb179342813e12ddbe8c5c390c4ac518feac87
5
+ SHA512:
6
+ metadata.gz: 247a0ae786842b1be33978bcc1ff321e6085f110594fda24b25f84fe6473c57e481fb728d993a4346656aa297775897c3c2a268e15c7ab269b22263a4f76039f
7
+ data.tar.gz: ee2156901e57868adcebe358198f34314a03d51a8a8d83c5992f3de4f1c6a1d60520f8c8c61138ccb133edb1afd8dfd6fe99f42d007cffe7310272002f552b4a
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
6
+ /vendor
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.0.4.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,107 @@
1
+ # Mysql filter plugin for Embulk
2
+
3
+ Mysql filter plugin for Embulk. Execute prepared statements query.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: filter
8
+
9
+ ## Configuration
10
+
11
+ - **host**: host(string, default: 'localhost')
12
+ - **user**: user(string, required)
13
+ - **password**: password(string, required)
14
+ - **database**: database(string, required)
15
+ - **port**: port(integer, default: 3306)
16
+ - **query**: query(string, required)
17
+ - **params**: params(array, required)
18
+ - **keep_input**: keep_input(bool, default: true)
19
+
20
+ ## Example
21
+
22
+ ```yaml
23
+ in:
24
+ type: s3
25
+ bucket: machine-learning-production
26
+ path_prefix: customer-approval/batch-prediction/result/
27
+ endpoint: s3.amazonaws.com
28
+ auth_method: {{ env.EMBULK_S3_AUTH_METHOD }}
29
+ decoders:
30
+ - {type: gzip}
31
+ parser:
32
+ type: csv
33
+ delimiter: ","
34
+ skip_header_lines: 1
35
+ allow_extra_columns: true
36
+ allow_optional_columns: true
37
+ columns:
38
+ - {name: user_id, type: long}
39
+ - {name: trueLabel, type: string}
40
+ - {name: bestAnswer, type: string}
41
+ - {name: score, type: double}
42
+ filters:
43
+ - type: mysql
44
+ host: {{ env.DATABASE_HOST | default: "localhost" }}
45
+ user: {{ env.APPLICATION_USERNAME }}
46
+ password: {{ env.APPLICATION_DATABASE_PASS }}
47
+ database: {{ env.APPLICATION_DATABASE }}
48
+ keep_input: true
49
+ query: |
50
+ select
51
+ id,
52
+ last_name,
53
+ first_name,
54
+ company_name
55
+ from
56
+ user
57
+ where id = ?
58
+ params:
59
+ - id
60
+ out:
61
+ type: stdout
62
+ ```
63
+
64
+ #### input CSV
65
+ ```
66
+ user_id,trueLabel,bestAnswer,score
67
+ 1,0,0,1.5
68
+ 2,0,0,1.5
69
+ 3,0,0,1.5
70
+ ```
71
+
72
+ #### Running query
73
+ ```
74
+ select
75
+ id,
76
+ last_name,
77
+ first_name,
78
+ company_name
79
+ from
80
+ user
81
+ where id = 1;
82
+
83
+ select
84
+ id,
85
+ last_name,
86
+ first_name,
87
+ company_name
88
+ from
89
+ user
90
+ where id = 2;
91
+
92
+ select
93
+ id,
94
+ last_name,
95
+ first_name,
96
+ company_name
97
+ from
98
+ user
99
+ where id = 3;
100
+ ```
101
+
102
+
103
+ ## Build
104
+
105
+ ```
106
+ $ rake
107
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,20 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-filter-mysql"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["toyama0919"]
6
+ spec.summary = "Mysql filter plugin for Embulk. Execute prepared statements query."
7
+ spec.description = "Mysql"
8
+ spec.email = ["toyama0919@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/toyama0919/embulk-filter-mysql"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_dependency 'ffi-mysql'
17
+ spec.add_development_dependency 'embulk', ['>= 0.8.3']
18
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
19
+ spec.add_development_dependency 'rake', ['>= 10.0']
20
+ end
@@ -0,0 +1,132 @@
1
+ # coding: UTF-8
2
+ require 'ffi-mysql'
3
+
4
+ module Embulk
5
+ module Filter
6
+ class Mysql < FilterPlugin
7
+ Plugin.register_filter("mysql", self)
8
+
9
+ def self.transaction(config, in_schema, &control)
10
+ task = {
11
+ "host" => config.param("host", :string, default: 'localhost'),
12
+ "user" => config.param("user", :string),
13
+ "password" => config.param("password", :string),
14
+ "database" => config.param("database", :string),
15
+ "port" => config.param("port", :integer, default: 3306),
16
+ "query" => config.param("query", :string),
17
+ "params" => config.param("params", :array),
18
+ "keep_input" => config.param("keep_input", :bool, default: false)
19
+ }
20
+ connection = ::Mysql.real_connect(task['host'], task['user'], task['password'], task['database'], task['port'])
21
+ statement = connection.prepare(task['query'])
22
+ columns = []
23
+ columns = columns + in_schema if task['keep_input']
24
+
25
+ statement.result_metadata.fetch_fields.each do |field|
26
+ columns << Column.new(nil, field.name, get_type(field.type))
27
+ end
28
+
29
+ yield(task, columns)
30
+ end
31
+
32
+ # ::Mysql::Field::TYPE_DECIMAL = 0
33
+ # ::Mysql::Field::TYPE_TINY = 1
34
+ # ::Mysql::Field::TYPE_SHORT = 2
35
+ # ::Mysql::Field::TYPE_LONG = 3
36
+ # ::Mysql::Field::TYPE_FLOAT = 4
37
+ # ::Mysql::Field::TYPE_DOUBLE = 5
38
+ # ::Mysql::Field::TYPE_NULL = 6
39
+ # ::Mysql::Field::TYPE_TIMESTAMP = 7
40
+ # ::Mysql::Field::TYPE_LONGLONG = 8
41
+ # ::Mysql::Field::TYPE_INT24 = 9
42
+ # ::Mysql::Field::TYPE_DATE = 10
43
+ # ::Mysql::Field::TYPE_TIME = 11
44
+ # ::Mysql::Field::TYPE_DATETIME = 12
45
+ # ::Mysql::Field::TYPE_YEAR = 13
46
+ # ::Mysql::Field::TYPE_NEWDATE = 14
47
+ # ::Mysql::Field::TYPE_VARCHAR = 15
48
+ # ::Mysql::Field::TYPE_BIT = 16
49
+ # ::Mysql::Field::TYPE_TIMESTAMP2 = 17
50
+ # ::Mysql::Field::TYPE_DATETIME2 = 18
51
+ # ::Mysql::Field::TYPE_TIME2 = 19
52
+ # ::Mysql::Field::TYPE_JSON = 245
53
+ # ::Mysql::Field::TYPE_NEWDECIMAL = 246
54
+ # ::Mysql::Field::TYPE_ENUM = 247
55
+ # ::Mysql::Field::TYPE_SET = 248
56
+ # ::Mysql::Field::TYPE_TINY_BLOB = 249
57
+ # ::Mysql::Field::TYPE_MEDIUM_BLOB = 250
58
+ # ::Mysql::Field::TYPE_LONG_BLOB = 251
59
+ # ::Mysql::Field::TYPE_BLOB = 252
60
+ # ::Mysql::Field::TYPE_VAR_STRING = 253
61
+ # ::Mysql::Field::TYPE_STRING = 254
62
+ # ::Mysql::Field::TYPE_GEOMETRY = 255
63
+ # ::Mysql::Field::TYPE_CHAR = TYPE_TINY
64
+ # ::Mysql::Field::TYPE_INTERVAL = TYPE_ENUM
65
+ def self.get_type(type)
66
+ case type
67
+ when ::Mysql::Field::TYPE_TINY
68
+ :boolean
69
+ when ::Mysql::Field::TYPE_SHORT, ::Mysql::Field::TYPE_LONG
70
+ :long
71
+ when ::Mysql::Field::TYPE_DOUBLE, ::Mysql::Field::TYPE_FLOAT
72
+ :double
73
+ when ::Mysql::Field::TYPE_DATE, ::Mysql::Field::TYPE_DATETIME, ::Mysql::Field::TYPE_TIMESTAMP
74
+ :timestamp
75
+ when ::Mysql::Field::TYPE_BLOB, ::Mysql::Field::TYPE_STRING, ::Mysql::Field::TYPE_VAR_STRING, ::Mysql::Field::TYPE_VARCHAR
76
+ :string
77
+ else
78
+ raise
79
+ end
80
+ end
81
+
82
+ def init
83
+ @connection = ::Mysql.real_connect(task['host'], task['user'], task['password'], task['database'], task['port'])
84
+ @statement = @connection.prepare(task['query'])
85
+ @params = task['params']
86
+ @keep_input = task['keep_input']
87
+ end
88
+
89
+ def close
90
+ Embulk.logger.info "connection closing..."
91
+ @connection.close
92
+ end
93
+
94
+ def add(page)
95
+ page.each do |record|
96
+ hash = Hash[in_schema.names.zip(record)]
97
+ prepare_params = @params ? @params.map{ |param| hash[param] } : []
98
+ query_results = @statement.execute(*prepare_params)
99
+ query_results.each do |values|
100
+ converted = []
101
+ converted = record + converted if @keep_input
102
+ values.each do |value|
103
+ converted << cast(value)
104
+ end
105
+ page_builder.add(converted)
106
+ end
107
+ end
108
+ end
109
+
110
+ def finish
111
+ page_builder.finish
112
+ end
113
+
114
+ def cast(value)
115
+ if (value.class == String)
116
+ value.force_encoding('UTF-8')
117
+ elsif (value.class == ::Mysql::Time)
118
+ Time.local(
119
+ value.year,
120
+ value.month,
121
+ value.day,
122
+ value.hour,
123
+ value.minute,
124
+ value.second
125
+ )
126
+ else
127
+ value
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
metadata ADDED
@@ -0,0 +1,108 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-mysql
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - toyama0919
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-04-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ name: ffi-mysql
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.8.3
33
+ name: embulk
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.3
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.10.6
47
+ name: bundler
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.10.6
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '10.0'
61
+ name: rake
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ description: Mysql
70
+ email:
71
+ - toyama0919@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".ruby-version"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - embulk-filter-mysql.gemspec
83
+ - lib/embulk/filter/mysql.rb
84
+ homepage: https://github.com/toyama0919/embulk-filter-mysql
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubyforge_project:
104
+ rubygems_version: 2.4.8
105
+ signing_key:
106
+ specification_version: 4
107
+ summary: Mysql filter plugin for Embulk. Execute prepared statements query.
108
+ test_files: []