embulk-filter-mysql 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e111c45ae7b1c0778579d8763d25578646d48b94
4
+ data.tar.gz: 3adb179342813e12ddbe8c5c390c4ac518feac87
5
+ SHA512:
6
+ metadata.gz: 247a0ae786842b1be33978bcc1ff321e6085f110594fda24b25f84fe6473c57e481fb728d993a4346656aa297775897c3c2a268e15c7ab269b22263a4f76039f
7
+ data.tar.gz: ee2156901e57868adcebe358198f34314a03d51a8a8d83c5992f3de4f1c6a1d60520f8c8c61138ccb133edb1afd8dfd6fe99f42d007cffe7310272002f552b4a
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
6
+ /vendor
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.0.4.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,107 @@
1
+ # Mysql filter plugin for Embulk
2
+
3
+ Mysql filter plugin for Embulk. Execute prepared statements query.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: filter
8
+
9
+ ## Configuration
10
+
11
+ - **host**: host(string, default: 'localhost')
12
+ - **user**: user(string, required)
13
+ - **password**: password(string, required)
14
+ - **database**: database(string, required)
15
+ - **port**: port(integer, default: 3306)
16
+ - **query**: query(string, required)
17
+ - **params**: params(array, required)
18
+ - **keep_input**: keep_input(bool, default: true)
19
+
20
+ ## Example
21
+
22
+ ```yaml
23
+ in:
24
+ type: s3
25
+ bucket: machine-learning-production
26
+ path_prefix: customer-approval/batch-prediction/result/
27
+ endpoint: s3.amazonaws.com
28
+ auth_method: {{ env.EMBULK_S3_AUTH_METHOD }}
29
+ decoders:
30
+ - {type: gzip}
31
+ parser:
32
+ type: csv
33
+ delimiter: ","
34
+ skip_header_lines: 1
35
+ allow_extra_columns: true
36
+ allow_optional_columns: true
37
+ columns:
38
+ - {name: user_id, type: long}
39
+ - {name: trueLabel, type: string}
40
+ - {name: bestAnswer, type: string}
41
+ - {name: score, type: double}
42
+ filters:
43
+ - type: mysql
44
+ host: {{ env.DATABASE_HOST | default: "localhost" }}
45
+ user: {{ env.APPLICATION_USERNAME }}
46
+ password: {{ env.APPLICATION_DATABASE_PASS }}
47
+ database: {{ env.APPLICATION_DATABASE }}
48
+ keep_input: true
49
+ query: |
50
+ select
51
+ id,
52
+ last_name,
53
+ first_name,
54
+ company_name
55
+ from
56
+ user
57
+ where id = ?
58
+ params:
59
+ - id
60
+ out:
61
+ type: stdout
62
+ ```
63
+
64
+ #### input CSV
65
+ ```
66
+ user_id,trueLabel,bestAnswer,score
67
+ 1,0,0,1.5
68
+ 2,0,0,1.5
69
+ 3,0,0,1.5
70
+ ```
71
+
72
+ #### Running query
73
+ ```
74
+ select
75
+ id,
76
+ last_name,
77
+ first_name,
78
+ company_name
79
+ from
80
+ user
81
+ where id = 1;
82
+
83
+ select
84
+ id,
85
+ last_name,
86
+ first_name,
87
+ company_name
88
+ from
89
+ user
90
+ where id = 2;
91
+
92
+ select
93
+ id,
94
+ last_name,
95
+ first_name,
96
+ company_name
97
+ from
98
+ user
99
+ where id = 3;
100
+ ```
101
+
102
+
103
+ ## Build
104
+
105
+ ```
106
+ $ rake
107
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,20 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-filter-mysql"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["toyama0919"]
6
+ spec.summary = "Mysql filter plugin for Embulk. Execute prepared statements query."
7
+ spec.description = "Mysql"
8
+ spec.email = ["toyama0919@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/toyama0919/embulk-filter-mysql"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_dependency 'ffi-mysql'
17
+ spec.add_development_dependency 'embulk', ['>= 0.8.3']
18
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
19
+ spec.add_development_dependency 'rake', ['>= 10.0']
20
+ end
@@ -0,0 +1,132 @@
1
+ # coding: UTF-8
2
+ require 'ffi-mysql'
3
+
4
+ module Embulk
5
+ module Filter
6
+ class Mysql < FilterPlugin
7
+ Plugin.register_filter("mysql", self)
8
+
9
+ def self.transaction(config, in_schema, &control)
10
+ task = {
11
+ "host" => config.param("host", :string, default: 'localhost'),
12
+ "user" => config.param("user", :string),
13
+ "password" => config.param("password", :string),
14
+ "database" => config.param("database", :string),
15
+ "port" => config.param("port", :integer, default: 3306),
16
+ "query" => config.param("query", :string),
17
+ "params" => config.param("params", :array),
18
+ "keep_input" => config.param("keep_input", :bool, default: false)
19
+ }
20
+ connection = ::Mysql.real_connect(task['host'], task['user'], task['password'], task['database'], task['port'])
21
+ statement = connection.prepare(task['query'])
22
+ columns = []
23
+ columns = columns + in_schema if task['keep_input']
24
+
25
+ statement.result_metadata.fetch_fields.each do |field|
26
+ columns << Column.new(nil, field.name, get_type(field.type))
27
+ end
28
+
29
+ yield(task, columns)
30
+ end
31
+
32
+ # ::Mysql::Field::TYPE_DECIMAL = 0
33
+ # ::Mysql::Field::TYPE_TINY = 1
34
+ # ::Mysql::Field::TYPE_SHORT = 2
35
+ # ::Mysql::Field::TYPE_LONG = 3
36
+ # ::Mysql::Field::TYPE_FLOAT = 4
37
+ # ::Mysql::Field::TYPE_DOUBLE = 5
38
+ # ::Mysql::Field::TYPE_NULL = 6
39
+ # ::Mysql::Field::TYPE_TIMESTAMP = 7
40
+ # ::Mysql::Field::TYPE_LONGLONG = 8
41
+ # ::Mysql::Field::TYPE_INT24 = 9
42
+ # ::Mysql::Field::TYPE_DATE = 10
43
+ # ::Mysql::Field::TYPE_TIME = 11
44
+ # ::Mysql::Field::TYPE_DATETIME = 12
45
+ # ::Mysql::Field::TYPE_YEAR = 13
46
+ # ::Mysql::Field::TYPE_NEWDATE = 14
47
+ # ::Mysql::Field::TYPE_VARCHAR = 15
48
+ # ::Mysql::Field::TYPE_BIT = 16
49
+ # ::Mysql::Field::TYPE_TIMESTAMP2 = 17
50
+ # ::Mysql::Field::TYPE_DATETIME2 = 18
51
+ # ::Mysql::Field::TYPE_TIME2 = 19
52
+ # ::Mysql::Field::TYPE_JSON = 245
53
+ # ::Mysql::Field::TYPE_NEWDECIMAL = 246
54
+ # ::Mysql::Field::TYPE_ENUM = 247
55
+ # ::Mysql::Field::TYPE_SET = 248
56
+ # ::Mysql::Field::TYPE_TINY_BLOB = 249
57
+ # ::Mysql::Field::TYPE_MEDIUM_BLOB = 250
58
+ # ::Mysql::Field::TYPE_LONG_BLOB = 251
59
+ # ::Mysql::Field::TYPE_BLOB = 252
60
+ # ::Mysql::Field::TYPE_VAR_STRING = 253
61
+ # ::Mysql::Field::TYPE_STRING = 254
62
+ # ::Mysql::Field::TYPE_GEOMETRY = 255
63
+ # ::Mysql::Field::TYPE_CHAR = TYPE_TINY
64
+ # ::Mysql::Field::TYPE_INTERVAL = TYPE_ENUM
65
+ def self.get_type(type)
66
+ case type
67
+ when ::Mysql::Field::TYPE_TINY
68
+ :boolean
69
+ when ::Mysql::Field::TYPE_SHORT, ::Mysql::Field::TYPE_LONG
70
+ :long
71
+ when ::Mysql::Field::TYPE_DOUBLE, ::Mysql::Field::TYPE_FLOAT
72
+ :double
73
+ when ::Mysql::Field::TYPE_DATE, ::Mysql::Field::TYPE_DATETIME, ::Mysql::Field::TYPE_TIMESTAMP
74
+ :timestamp
75
+ when ::Mysql::Field::TYPE_BLOB, ::Mysql::Field::TYPE_STRING, ::Mysql::Field::TYPE_VAR_STRING, ::Mysql::Field::TYPE_VARCHAR
76
+ :string
77
+ else
78
+ raise
79
+ end
80
+ end
81
+
82
+ def init
83
+ @connection = ::Mysql.real_connect(task['host'], task['user'], task['password'], task['database'], task['port'])
84
+ @statement = @connection.prepare(task['query'])
85
+ @params = task['params']
86
+ @keep_input = task['keep_input']
87
+ end
88
+
89
+ def close
90
+ Embulk.logger.info "connection closing..."
91
+ @connection.close
92
+ end
93
+
94
+ def add(page)
95
+ page.each do |record|
96
+ hash = Hash[in_schema.names.zip(record)]
97
+ prepare_params = @params ? @params.map{ |param| hash[param] } : []
98
+ query_results = @statement.execute(*prepare_params)
99
+ query_results.each do |values|
100
+ converted = []
101
+ converted = record + converted if @keep_input
102
+ values.each do |value|
103
+ converted << cast(value)
104
+ end
105
+ page_builder.add(converted)
106
+ end
107
+ end
108
+ end
109
+
110
+ def finish
111
+ page_builder.finish
112
+ end
113
+
114
+ def cast(value)
115
+ if (value.class == String)
116
+ value.force_encoding('UTF-8')
117
+ elsif (value.class == ::Mysql::Time)
118
+ Time.local(
119
+ value.year,
120
+ value.month,
121
+ value.day,
122
+ value.hour,
123
+ value.minute,
124
+ value.second
125
+ )
126
+ else
127
+ value
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
metadata ADDED
@@ -0,0 +1,108 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-mysql
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - toyama0919
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-04-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ name: ffi-mysql
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.8.3
33
+ name: embulk
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.3
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.10.6
47
+ name: bundler
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.10.6
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '10.0'
61
+ name: rake
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ description: Mysql
70
+ email:
71
+ - toyama0919@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".ruby-version"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - embulk-filter-mysql.gemspec
83
+ - lib/embulk/filter/mysql.rb
84
+ homepage: https://github.com/toyama0919/embulk-filter-mysql
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubyforge_project:
104
+ rubygems_version: 2.4.8
105
+ signing_key:
106
+ specification_version: 4
107
+ summary: Mysql filter plugin for Embulk. Execute prepared statements query.
108
+ test_files: []