elasticsearch_mysql_importer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5272f35fb0aa318d58f9cbacb9c70ea5ac2ed2d5
4
+ data.tar.gz: 512a335ab41175198e66836c80b175f82141778e
5
+ SHA512:
6
+ metadata.gz: 7a7e84cbe1b85da430b3e61792535b7ac49c1c920b90bf6a3b7036dba76bfa97e3c3760b08748f4218ac282a78b2dd3e7cff845eb1f98e0388d4185800c454ee
7
+ data.tar.gz: 3d925a7f1d94bb3a69bd8f72fb8592cd2e98adc5eb4c6c26a119f31722cbc6b48004c32434ab7107909b0f5c3a1f9486fd25715ce608775bd159e9fec240aeb2
data/.gitignore ADDED
@@ -0,0 +1,23 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
23
+ vendor/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in elasticsearch_mysql_importer.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 y-ken
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # elasticsearch_mysql_importer
2
+
3
+ It is importing from mysql table with SQL to elasticsearch not only that, it could generating nested documents.
4
+
5
+ ## Usage
6
+
7
+ # Clone repository
8
+ $ git clone https://github.com/y-ken/elasticsearch_mysql_importer.git
9
+ $ cd elasticsearch_mysql_importer
10
+ $ bundle install --path vendor/bundle
11
+
12
+ # Setup mysql connection and query
13
+ $ vim example.rb
14
+
15
+ # Execute script, then it outputs result into ./requests.json
16
+ $ bundle exec ruby example.rb
17
+
18
+ # Index document for elasticsearch
19
+ $ curl -s -XPOST localhost:9200/_bulk --data-binary @requests.json
20
+
21
+ ## TODO
22
+
23
+ Pull requests are very welcome!!
24
+
25
+ * support thread
26
+ * call elasticsearch bluk api directory
27
+
28
+ ## Contributing
29
+
30
+ 1. Fork it ( https://github.com/y-ken/elasticsearch_mysql_importer/fork )
31
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
32
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
33
+ 4. Push to the branch (`git push origin my-new-feature`)
34
+ 5. Create a new Pull Request
35
+
36
+ ## Copyright
37
+
38
+ Copyright © 2014- Kentaro Yoshida ([@yoshi_ken](https://twitter.com/yoshi_ken))
39
+
40
+ ## License
41
+
42
+ MIT License
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'elasticsearch_mysql_importer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "elasticsearch_mysql_importer"
8
+ spec.version = ElasticsearchMysqlImporter::VERSION
9
+ spec.authors = ["Kentaro Yoshida"]
10
+ spec.email = ["y.ken.studio@gmail.com"]
11
+ spec.summary = %q{bulk import file generator as well as nested document from MySQL for elasticsearch bulk api}
12
+ spec.homepage = "https://github.com/y-ken/elasticsearch_mysql_importer"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.6"
21
+ spec.add_development_dependency "rake"
22
+ spec.add_runtime_dependency "mysql2"
23
+ spec.add_runtime_dependency "yajl-ruby"
24
+ end
data/example.rb ADDED
@@ -0,0 +1,43 @@
1
+ # coding: utf-8
2
+ require 'elasticsearch_mysql_importer'
3
+
4
+ importer = ElasticsearchMysqlImporter::Importer.new
5
+ importer.configure do |config|
6
+ # required
7
+ config.mysql_host = 'localhost'
8
+ config.mysql_username = 'your_mysql_username'
9
+ config.mysql_password = 'your_mysql_password'
10
+ config.mysql_database = 'some_database'
11
+
12
+ # optional, but it is required only generating nested documents
13
+ config.prepared_query = '
14
+ CREATE TEMPORARY TABLE tmp_member_skill
15
+ SELECT
16
+ members.id AS member_id,
17
+ skills.name AS skill_name,
18
+ skills.url AS skill_url
19
+ FROM
20
+ members
21
+ LEFT JOIN member_skill_relation ON members.id = member_id
22
+ LEFT JOIN skills ON skills.id = skill_id;
23
+ '
24
+ # required
25
+ config.query = '
26
+ SELECT
27
+ members.id AS member_id,
28
+ members.name AS member_name,
29
+ "SELECT skill_name, skill_url FROM tmp_member_skill WHERE member_id = ${member_id}" AS skills,
30
+ current_timestamp
31
+ FROM
32
+ members
33
+ ;
34
+ '
35
+ # required for using unique index for elasticsearch
36
+ config.primary_key = 'member_id'
37
+
38
+ # required for outputs file path
39
+ config.output_file = 'requests.json'
40
+ end
41
+
42
+ importer.write_file
43
+ p importer.output_file
@@ -0,0 +1,7 @@
1
+ require 'elasticsearch_mysql_importer/version'
2
+ require 'elasticsearch_mysql_importer/configuration'
3
+ require 'elasticsearch_mysql_importer/importer'
4
+
5
+ module ElasticsearchMysqlImporter
6
+
7
+ end
@@ -0,0 +1,19 @@
1
+ module ElasticsearchMysqlImporter
2
+ class Configuration
3
+ attr_accessor :mysql_host, :mysql_port, :mysql_socket, :mysql_username, :mysql_password, :mysql_encoding
4
+ attr_accessor :mysql_database, :mysql_options, :prepared_query, :query, :primary_key, :output_file
5
+
6
+ def initialize
7
+ super
8
+
9
+ @mysql_host = 'localhost'
10
+ @mysql_port = '3306'
11
+ @mysql_socket = nil
12
+ @mysql_username = 'root'
13
+ @mysql_password = ''
14
+ @mysql_encoding = 'utf8'
15
+ @mysql_options = { :cast => false, :cache_rows => true }
16
+ @primary_key = 'id'
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,91 @@
1
+ require 'mysql2'
2
+ require 'yajl'
3
+ require 'tempfile'
4
+ require "net/http"
5
+ require "uri"
6
+
7
+ module ElasticsearchMysqlImporter
8
+ class Importer
9
+
10
+ attr_accessor :output_file
11
+
12
+ def configure
13
+ @configuration ||= Configuration.new
14
+ yield(@configuration) if block_given?
15
+ validate_configuration
16
+ end
17
+
18
+ def write_file
19
+ if @configuration.output_file.nil?
20
+ raise "Missing Configuration: 'output_file' is required."
21
+ end
22
+ create_import_file
23
+ end
24
+
25
+ private
26
+ def validate_configuration
27
+ if @configuration.mysql_database.nil? or @configuration.query.nil?
28
+ raise "Missing Configuration: 'mysql_database' or 'query' are required."
29
+ end
30
+ end
31
+
32
+ def connect_db
33
+ if not @configuration.mysql_socket.nil?
34
+ # not tested yet
35
+ Mysql2::Client.new({
36
+ :host => @configuration.mysql_host,
37
+ :socket => @configuration.mysql_socket,
38
+ :username => @configuration.mysql_username,
39
+ :password => @configuration.mysql_password,
40
+ :database => @configuration.mysql_database,
41
+ :encoding => 'utf8',
42
+ :reconnect => true
43
+ })
44
+ else
45
+ Mysql2::Client.new({
46
+ :host => @configuration.mysql_host,
47
+ :port => @configuration.mysql_port,
48
+ :username => @configuration.mysql_username,
49
+ :password => @configuration.mysql_password,
50
+ :database => @configuration.mysql_database,
51
+ :encoding => 'utf8',
52
+ :reconnect => true
53
+ })
54
+ end
55
+ end
56
+
57
+ def get_file_io_object
58
+ if @configuration.output_file.nil?
59
+ file = Tempfile.open(['elasticsearch_mysql_importer_','.json'])
60
+ else
61
+ file = File.open(@configuration.output_file, 'w+')
62
+ end
63
+ @output_file = file.path
64
+ return file
65
+ end
66
+
67
+ def create_import_file
68
+ file = get_file_io_object
69
+ db = connect_db
70
+ db.query(@configuration.prepared_query, @configuration.mysql_options)
71
+ db.query(@configuration.query, @configuration.mysql_options).each do |row|
72
+ row.select {|k, v| v.to_s.strip.match(/^SELECT/i) }.each do |k, v|
73
+ row[k] = [] unless row[k].is_a?(Array)
74
+ db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
75
+ row[k] << nest_row
76
+ end
77
+ end
78
+ header = {
79
+ "index" => {
80
+ "_index" => @configuration.elasticsearch_index,
81
+ "_type" => @configuration.elasticsearch_type,
82
+ "_id" => row[@configuration.primary_key]
83
+ }
84
+ }
85
+ file.puts(Yajl::Encoder.encode(header))
86
+ file.puts(Yajl::Encoder.encode(row))
87
+ end
88
+ return file.path
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,3 @@
1
+ module ElasticsearchMysqlImporter
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: elasticsearch_mysql_importer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Kentaro Yoshida
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: mysql2
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: yajl-ruby
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description:
70
+ email:
71
+ - y.ken.studio@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - Gemfile
78
+ - LICENSE.txt
79
+ - README.md
80
+ - Rakefile
81
+ - elasticsearch_mysql_importer.gemspec
82
+ - example.rb
83
+ - lib/elasticsearch_mysql_importer.rb
84
+ - lib/elasticsearch_mysql_importer/configuration.rb
85
+ - lib/elasticsearch_mysql_importer/importer.rb
86
+ - lib/elasticsearch_mysql_importer/version.rb
87
+ homepage: https://github.com/y-ken/elasticsearch_mysql_importer
88
+ licenses:
89
+ - MIT
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.2.2
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: bulk import file generator as well as nested document from MySQL for elasticsearch
111
+ bulk api
112
+ test_files: []