elasticsearch_mysql_importer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5272f35fb0aa318d58f9cbacb9c70ea5ac2ed2d5
4
+ data.tar.gz: 512a335ab41175198e66836c80b175f82141778e
5
+ SHA512:
6
+ metadata.gz: 7a7e84cbe1b85da430b3e61792535b7ac49c1c920b90bf6a3b7036dba76bfa97e3c3760b08748f4218ac282a78b2dd3e7cff845eb1f98e0388d4185800c454ee
7
+ data.tar.gz: 3d925a7f1d94bb3a69bd8f72fb8592cd2e98adc5eb4c6c26a119f31722cbc6b48004c32434ab7107909b0f5c3a1f9486fd25715ce608775bd159e9fec240aeb2
data/.gitignore ADDED
@@ -0,0 +1,23 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
23
+ vendor/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in elasticsearch_mysql_importer.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 y-ken
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # elasticsearch_mysql_importer
2
+
3
+ It is importing from mysql table with SQL to elasticsearch not only that, it could generating nested documents.
4
+
5
+ ## Usage
6
+
7
+ # Clone repository
8
+ $ git clone https://github.com/y-ken/elasticsearch_mysql_importer.git
9
+ $ cd elasticsearch_mysql_importer
10
+ $ bundle install --path vendor/bundle
11
+
12
+ # Setup mysql connection and query
13
+ $ vim example.rb
14
+
15
+ # Execute script, then it outputs result into ./requests.json
16
+ $ bundle exec ruby example.rb
17
+
18
+ # Index document for elasticsearch
19
+ $ curl -s -XPOST localhost:9200/_bulk --data-binary @requests.json
20
+
21
+ ## TODO
22
+
23
+ Pull requests are very welcome!!
24
+
25
+ * support thread
26
+ * call elasticsearch bluk api directory
27
+
28
+ ## Contributing
29
+
30
+ 1. Fork it ( https://github.com/y-ken/elasticsearch_mysql_importer/fork )
31
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
32
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
33
+ 4. Push to the branch (`git push origin my-new-feature`)
34
+ 5. Create a new Pull Request
35
+
36
+ ## Copyright
37
+
38
+ Copyright © 2014- Kentaro Yoshida ([@yoshi_ken](https://twitter.com/yoshi_ken))
39
+
40
+ ## License
41
+
42
+ MIT License
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'elasticsearch_mysql_importer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "elasticsearch_mysql_importer"
8
+ spec.version = ElasticsearchMysqlImporter::VERSION
9
+ spec.authors = ["Kentaro Yoshida"]
10
+ spec.email = ["y.ken.studio@gmail.com"]
11
+ spec.summary = %q{bulk import file generator as well as nested document from MySQL for elasticsearch bulk api}
12
+ spec.homepage = "https://github.com/y-ken/elasticsearch_mysql_importer"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.6"
21
+ spec.add_development_dependency "rake"
22
+ spec.add_runtime_dependency "mysql2"
23
+ spec.add_runtime_dependency "yajl-ruby"
24
+ end
data/example.rb ADDED
@@ -0,0 +1,43 @@
1
+ # coding: utf-8
2
+ require 'elasticsearch_mysql_importer'
3
+
4
+ importer = ElasticsearchMysqlImporter::Importer.new
5
+ importer.configure do |config|
6
+ # required
7
+ config.mysql_host = 'localhost'
8
+ config.mysql_username = 'your_mysql_username'
9
+ config.mysql_password = 'your_mysql_password'
10
+ config.mysql_database = 'some_database'
11
+
12
+ # optional, but it is required only generating nested documents
13
+ config.prepared_query = '
14
+ CREATE TEMPORARY TABLE tmp_member_skill
15
+ SELECT
16
+ members.id AS member_id,
17
+ skills.name AS skill_name,
18
+ skills.url AS skill_url
19
+ FROM
20
+ members
21
+ LEFT JOIN member_skill_relation ON members.id = member_id
22
+ LEFT JOIN skills ON skills.id = skill_id;
23
+ '
24
+ # required
25
+ config.query = '
26
+ SELECT
27
+ members.id AS member_id,
28
+ members.name AS member_name,
29
+ "SELECT skill_name, skill_url FROM tmp_member_skill WHERE member_id = ${member_id}" AS skills,
30
+ current_timestamp
31
+ FROM
32
+ members
33
+ ;
34
+ '
35
+ # required for using unique index for elasticsearch
36
+ config.primary_key = 'member_id'
37
+
38
+ # required for outputs file path
39
+ config.output_file = 'requests.json'
40
+ end
41
+
42
+ importer.write_file
43
+ p importer.output_file
@@ -0,0 +1,7 @@
1
+ require 'elasticsearch_mysql_importer/version'
2
+ require 'elasticsearch_mysql_importer/configuration'
3
+ require 'elasticsearch_mysql_importer/importer'
4
+
5
+ module ElasticsearchMysqlImporter
6
+
7
+ end
@@ -0,0 +1,19 @@
1
+ module ElasticsearchMysqlImporter
2
+ class Configuration
3
+ attr_accessor :mysql_host, :mysql_port, :mysql_socket, :mysql_username, :mysql_password, :mysql_encoding
4
+ attr_accessor :mysql_database, :mysql_options, :prepared_query, :query, :primary_key, :output_file
5
+
6
+ def initialize
7
+ super
8
+
9
+ @mysql_host = 'localhost'
10
+ @mysql_port = '3306'
11
+ @mysql_socket = nil
12
+ @mysql_username = 'root'
13
+ @mysql_password = ''
14
+ @mysql_encoding = 'utf8'
15
+ @mysql_options = { :cast => false, :cache_rows => true }
16
+ @primary_key = 'id'
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,91 @@
1
+ require 'mysql2'
2
+ require 'yajl'
3
+ require 'tempfile'
4
+ require "net/http"
5
+ require "uri"
6
+
7
+ module ElasticsearchMysqlImporter
8
+ class Importer
9
+
10
+ attr_accessor :output_file
11
+
12
+ def configure
13
+ @configuration ||= Configuration.new
14
+ yield(@configuration) if block_given?
15
+ validate_configuration
16
+ end
17
+
18
+ def write_file
19
+ if @configuration.output_file.nil?
20
+ raise "Missing Configuration: 'output_file' is required."
21
+ end
22
+ create_import_file
23
+ end
24
+
25
+ private
26
+ def validate_configuration
27
+ if @configuration.mysql_database.nil? or @configuration.query.nil?
28
+ raise "Missing Configuration: 'mysql_database' or 'query' are required."
29
+ end
30
+ end
31
+
32
+ def connect_db
33
+ if not @configuration.mysql_socket.nil?
34
+ # not tested yet
35
+ Mysql2::Client.new({
36
+ :host => @configuration.mysql_host,
37
+ :socket => @configuration.mysql_socket,
38
+ :username => @configuration.mysql_username,
39
+ :password => @configuration.mysql_password,
40
+ :database => @configuration.mysql_database,
41
+ :encoding => 'utf8',
42
+ :reconnect => true
43
+ })
44
+ else
45
+ Mysql2::Client.new({
46
+ :host => @configuration.mysql_host,
47
+ :port => @configuration.mysql_port,
48
+ :username => @configuration.mysql_username,
49
+ :password => @configuration.mysql_password,
50
+ :database => @configuration.mysql_database,
51
+ :encoding => 'utf8',
52
+ :reconnect => true
53
+ })
54
+ end
55
+ end
56
+
57
+ def get_file_io_object
58
+ if @configuration.output_file.nil?
59
+ file = Tempfile.open(['elasticsearch_mysql_importer_','.json'])
60
+ else
61
+ file = File.open(@configuration.output_file, 'w+')
62
+ end
63
+ @output_file = file.path
64
+ return file
65
+ end
66
+
67
+ def create_import_file
68
+ file = get_file_io_object
69
+ db = connect_db
70
+ db.query(@configuration.prepared_query, @configuration.mysql_options)
71
+ db.query(@configuration.query, @configuration.mysql_options).each do |row|
72
+ row.select {|k, v| v.to_s.strip.match(/^SELECT/i) }.each do |k, v|
73
+ row[k] = [] unless row[k].is_a?(Array)
74
+ db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
75
+ row[k] << nest_row
76
+ end
77
+ end
78
+ header = {
79
+ "index" => {
80
+ "_index" => @configuration.elasticsearch_index,
81
+ "_type" => @configuration.elasticsearch_type,
82
+ "_id" => row[@configuration.primary_key]
83
+ }
84
+ }
85
+ file.puts(Yajl::Encoder.encode(header))
86
+ file.puts(Yajl::Encoder.encode(row))
87
+ end
88
+ return file.path
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,3 @@
1
+ module ElasticsearchMysqlImporter
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: elasticsearch_mysql_importer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Kentaro Yoshida
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: mysql2
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: yajl-ruby
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description:
70
+ email:
71
+ - y.ken.studio@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - Gemfile
78
+ - LICENSE.txt
79
+ - README.md
80
+ - Rakefile
81
+ - elasticsearch_mysql_importer.gemspec
82
+ - example.rb
83
+ - lib/elasticsearch_mysql_importer.rb
84
+ - lib/elasticsearch_mysql_importer/configuration.rb
85
+ - lib/elasticsearch_mysql_importer/importer.rb
86
+ - lib/elasticsearch_mysql_importer/version.rb
87
+ homepage: https://github.com/y-ken/elasticsearch_mysql_importer
88
+ licenses:
89
+ - MIT
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.2.2
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: bulk import file generator as well as nested document from MySQL for elasticsearch
111
+ bulk api
112
+ test_files: []