elasticsearch_mysql_importer 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5272f35fb0aa318d58f9cbacb9c70ea5ac2ed2d5
4
- data.tar.gz: 512a335ab41175198e66836c80b175f82141778e
3
+ metadata.gz: 0fc769852e9c3fe18f682d2ff00bf490355b6e7a
4
+ data.tar.gz: 53258865a42da4e001c437e819bacd9faecc16f7
5
5
  SHA512:
6
- metadata.gz: 7a7e84cbe1b85da430b3e61792535b7ac49c1c920b90bf6a3b7036dba76bfa97e3c3760b08748f4218ac282a78b2dd3e7cff845eb1f98e0388d4185800c454ee
7
- data.tar.gz: 3d925a7f1d94bb3a69bd8f72fb8592cd2e98adc5eb4c6c26a119f31722cbc6b48004c32434ab7107909b0f5c3a1f9486fd25715ce608775bd159e9fec240aeb2
6
+ metadata.gz: aaa6bcc65f4d711bc98afd9b7e58894fcde315a527d1ba9483900e798faa25c30a977f82ea784ac9766aac3463b1e48e40dc285e993cfac992d3a5090c49fc9b
7
+ data.tar.gz: cad0f17c9e96f98bf6d145e17361ace9b0e76f76a901407d24f1695e6bf20365fde65cc3b338347ea23d94d2eb12bea7a54a90cb80d00ea574fef049f75ae6c1
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # elasticsearch_mysql_importer
2
2
 
3
- It is importing from mysql table with SQL to elasticsearch not only that, it could generating nested documents.
3
+ It is importing from mysql table with SQL to elasticsearch. Not only that, it could generating nested documents.
4
4
 
5
5
  ## Usage
6
6
 
@@ -13,17 +13,18 @@ It is importing from mysql table with SQL to elasticsearch not only that, it cou
13
13
  $ vim example.rb
14
14
 
15
15
  # Execute script, then it outputs result into ./requests.json
16
- $ bundle exec ruby example.rb
16
+ $ bundle exec ruby example/example.rb
17
17
 
18
- # Index document for elasticsearch
19
- $ curl -s -XPOST localhost:9200/_bulk --data-binary @requests.json
18
+ # Index document for elasticsearch if you didn't call 'write_elasticsearch' in example.rb
19
+ $ curl -s -XPOST localhost:9200/_bulk --data-binary @example/requests.json
20
20
 
21
21
  ## TODO
22
22
 
23
23
  Pull requests are very welcome!!
24
24
 
25
+ * add test
25
26
  * support thread
26
- * call elasticsearch bluk api directory
27
+ * support CLI command
27
28
 
28
29
  ## Contributing
29
30
 
@@ -0,0 +1,62 @@
1
+ # coding: utf-8
2
+ require 'elasticsearch_mysql_importer'
3
+
4
+ importer = ElasticsearchMysqlImporter::Importer.new
5
+ importer.configure do |config|
6
+ # required
7
+ config.mysql_host = 'localhost'
8
+ config.mysql_username = 'your_mysql_username'
9
+ config.mysql_password = 'your_mysql_password'
10
+ config.mysql_database = 'some_database'
11
+
12
+ # optional, but it is required only generating nested documents
13
+ config.prepared_query = '
14
+ CREATE TEMPORARY TABLE tmp_member_skill
15
+ SELECT
16
+ members.id AS member_id,
17
+ skills.name AS skill_name,
18
+ skills.url AS skill_url
19
+ FROM
20
+ members
21
+ LEFT JOIN member_skill_relation ON members.id = member_id
22
+ LEFT JOIN skills ON skills.id = skill_id;
23
+ '
24
+ # required for importing into elasticsearch
25
+ config.query = '
26
+ SELECT
27
+ members.id AS member_id,
28
+ members.name AS member_name,
29
+ "SELECT skill_name, skill_url FROM tmp_member_skill WHERE member_id = ${member_id}" AS skills
30
+ FROM
31
+ members
32
+ ;
33
+ '
34
+ # required for using unique index into elasticsearch
35
+ config.primary_key = 'member_id'
36
+
37
+ # To post index directory into elasticsearch,
38
+ # configure following two lines and call 'write_elasticsearch' method.
39
+ # config.elasticsearch_host = 'localhost' # default: localhost
40
+ # config.elasticsearch_port = 9200 # default: 9200
41
+
42
+ # required for specifying elasticsearch index and type
43
+ config.elasticsearch_index = 'importer_example'
44
+ config.elasticsearch_type = 'member_skill'
45
+
46
+ # required for writing output file path
47
+ config.output_file = 'example/requests.json'
48
+ end
49
+
50
+ if importer.write_file
51
+ puts "Finished to run importer.write_file."
52
+ puts "The output file is written at '#{importer.output_file}'"
53
+ puts "Let's try importing file with following curl command."
54
+ puts "e.g.) curl -s -XPOST localhost:9200/_bulk --data-binary @#{importer.output_file}\n\n"
55
+ end
56
+
57
+ #if importer.write_elasticsearch
58
+ # puts "Finished to run importer.write_elasticsearch."
59
+ # puts "Let's checking results of index with following curl command."
60
+ # puts "e.g.) curl localhost:9200/importer_example/_search?pretty=1"
61
+ #end
62
+ # To post index directory into elasticsearch, uncommented out following line.
@@ -0,0 +1,33 @@
1
+ /*!40101 SET @saved_cs_client = @@character_set_client */;
2
+ /*!40101 SET character_set_client = utf8 */;
3
+ CREATE TABLE `member_skill_relation` (
4
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
5
+ `member_id` int(11) DEFAULT NULL,
6
+ `skill_id` int(11) DEFAULT NULL,
7
+ PRIMARY KEY (`id`)
8
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
9
+ /*!40101 SET character_set_client = @saved_cs_client */;
10
+ INSERT INTO `member_skill_relation` VALUES (1,1,1),(2,1,2),(3,2,3),(4,3,3),(5,3,4),(6,3,5);
11
+
12
+ /*!40101 SET @saved_cs_client = @@character_set_client */;
13
+ /*!40101 SET character_set_client = utf8 */;
14
+ CREATE TABLE `members` (
15
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
16
+ `name` varchar(100) DEFAULT NULL,
17
+ `created_at` datetime DEFAULT NULL,
18
+ `updated_at` datetime DEFAULT NULL,
19
+ PRIMARY KEY (`id`)
20
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
21
+ /*!40101 SET character_set_client = @saved_cs_client */;
22
+ INSERT INTO `members` VALUES (1,'User-A','2014-04-01 15:20:22','2014-04-01 15:20:22'),(2,'User-B','2014-04-01 15:21:30','2014-04-01 15:21:30'),(3,'User-C','2014-04-01 15:21:41','2014-04-01 15:21:41');
23
+
24
+ /*!40101 SET @saved_cs_client = @@character_set_client */;
25
+ /*!40101 SET character_set_client = utf8 */;
26
+ CREATE TABLE `skills` (
27
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
28
+ `name` varchar(11) DEFAULT NULL,
29
+ `url` varchar(250) DEFAULT NULL,
30
+ PRIMARY KEY (`id`)
31
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
32
+ /*!40101 SET character_set_client = @saved_cs_client */;
33
+ INSERT INTO `skills` VALUES (1,'PHP','http://php.net/'),(2,'Ruby','https://www.ruby-lang.org/'),(3,'Python','https://www.python.org/'),(4,'Java','https://www.java.com/'),(5,'Perl','http://www.perl.org/');
@@ -0,0 +1,6 @@
1
+ {"index":{"_index":"importer_example","_type":"member_skill","_id":"1"}}
2
+ {"member_id":"1","member_name":"User-A","skills":[{"skill_name":"PHP","skill_url":"http://php.net/"},{"skill_name":"Ruby","skill_url":"https://www.ruby-lang.org/"}]}
3
+ {"index":{"_index":"importer_example","_type":"member_skill","_id":"2"}}
4
+ {"member_id":"2","member_name":"User-B","skills":[{"skill_name":"Python","skill_url":"https://www.python.org/"}]}
5
+ {"index":{"_index":"importer_example","_type":"member_skill","_id":"3"}}
6
+ {"member_id":"3","member_name":"User-C","skills":[{"skill_name":"Python","skill_url":"https://www.python.org/"},{"skill_name":"Java","skill_url":"https://www.java.com/"},{"skill_name":"Perl","skill_url":"http://www.perl.org/"}]}
@@ -2,6 +2,7 @@ module ElasticsearchMysqlImporter
2
2
  class Configuration
3
3
  attr_accessor :mysql_host, :mysql_port, :mysql_socket, :mysql_username, :mysql_password, :mysql_encoding
4
4
  attr_accessor :mysql_database, :mysql_options, :prepared_query, :query, :primary_key, :output_file
5
+ attr_accessor :elasticsearch_host, :elasticsearch_port, :elasticsearch_index, :elasticsearch_type
5
6
 
6
7
  def initialize
7
8
  super
@@ -14,6 +15,8 @@ module ElasticsearchMysqlImporter
14
15
  @mysql_encoding = 'utf8'
15
16
  @mysql_options = { :cast => false, :cache_rows => true }
16
17
  @primary_key = 'id'
18
+ @elasticsearch_host = 'localhost'
19
+ @elasticsearch_port = 9200
17
20
  end
18
21
  end
19
22
  end
@@ -22,10 +22,17 @@ module ElasticsearchMysqlImporter
22
22
  create_import_file
23
23
  end
24
24
 
25
+ def write_elasticsearch
26
+ call_elasticsearch_bulk_api
27
+ end
28
+
25
29
  private
26
30
  def validate_configuration
27
31
  if @configuration.mysql_database.nil? or @configuration.query.nil?
28
- raise "Missing Configuration: 'mysql_database' or 'query' are required."
32
+ raise "Missing Configuration: 'mysql_database' and 'query' are required."
33
+ end
34
+ if @configuration.elasticsearch_index.nil? or @configuration.elasticsearch_type.nil?
35
+ raise "Missing Configuration: 'elasticsearch_index' and 'elasticsearch_type' are required."
29
36
  end
30
37
  end
31
38
 
@@ -38,7 +45,7 @@ module ElasticsearchMysqlImporter
38
45
  :username => @configuration.mysql_username,
39
46
  :password => @configuration.mysql_password,
40
47
  :database => @configuration.mysql_database,
41
- :encoding => 'utf8',
48
+ :encoding => @configuration.mysql_encoding,
42
49
  :reconnect => true
43
50
  })
44
51
  else
@@ -48,7 +55,7 @@ module ElasticsearchMysqlImporter
48
55
  :username => @configuration.mysql_username,
49
56
  :password => @configuration.mysql_password,
50
57
  :database => @configuration.mysql_database,
51
- :encoding => 'utf8',
58
+ :encoding => @configuration.mysql_encoding,
52
59
  :reconnect => true
53
60
  })
54
61
  end
@@ -65,27 +72,45 @@ module ElasticsearchMysqlImporter
65
72
  end
66
73
 
67
74
  def create_import_file
68
- file = get_file_io_object
69
- db = connect_db
70
- db.query(@configuration.prepared_query, @configuration.mysql_options)
71
- db.query(@configuration.query, @configuration.mysql_options).each do |row|
72
- row.select {|k, v| v.to_s.strip.match(/^SELECT/i) }.each do |k, v|
73
- row[k] = [] unless row[k].is_a?(Array)
74
- db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
75
- row[k] << nest_row
75
+ begin
76
+ file = get_file_io_object
77
+ db = connect_db
78
+ db.query(@configuration.prepared_query, @configuration.mysql_options)
79
+ db.query(@configuration.query, @configuration.mysql_options).each do |row|
80
+ row.select {|k, v| v.to_s.strip.match(/^SELECT/i) }.each do |k, v|
81
+ row[k] = [] unless row[k].is_a?(Array)
82
+ db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
83
+ row[k] << nest_row
84
+ end
76
85
  end
77
- end
78
- header = {
79
- "index" => {
80
- "_index" => @configuration.elasticsearch_index,
81
- "_type" => @configuration.elasticsearch_type,
82
- "_id" => row[@configuration.primary_key]
86
+ header = {
87
+ "index" => {
88
+ "_index" => @configuration.elasticsearch_index,
89
+ "_type" => @configuration.elasticsearch_type,
90
+ "_id" => row[@configuration.primary_key]
91
+ }
83
92
  }
84
- }
85
- file.puts(Yajl::Encoder.encode(header))
86
- file.puts(Yajl::Encoder.encode(row))
93
+ file.puts(Yajl::Encoder.encode(header))
94
+ file.puts(Yajl::Encoder.encode(row))
95
+ end
96
+ file.seek 0
97
+ return file.path
98
+ rescue StandardError => e
99
+ puts "Failed to generate import file: #{e.message}"
100
+ end
101
+ end
102
+
103
+ def call_elasticsearch_bulk_api
104
+ begin
105
+ elasticsearch_bulk_uri = "http://#{@configuration.elasticsearch_host}:#{@configuration.elasticsearch_port}/_bulk"
106
+ uri = URI.parse(elasticsearch_bulk_uri)
107
+ data = File.open(@output_file, 'r').read
108
+ raise "Error: generated import file is empty." if data.empty?
109
+ http = Net::HTTP.new(uri.host, uri.port)
110
+ response, body = http.post(uri.path, data, {'Content-type'=>'application/json'})
111
+ rescue Timeout::Error, StandardError => e
112
+ puts "Failed to call Bulk API: #{e.message}"
87
113
  end
88
- return file.path
89
114
  end
90
115
  end
91
116
  end
@@ -1,3 +1,3 @@
1
1
  module ElasticsearchMysqlImporter
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticsearch_mysql_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kentaro Yoshida
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-23 00:00:00.000000000 Z
11
+ date: 2014-05-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -79,7 +79,9 @@ files:
79
79
  - README.md
80
80
  - Rakefile
81
81
  - elasticsearch_mysql_importer.gemspec
82
- - example.rb
82
+ - example/example.rb
83
+ - example/example_table.sql
84
+ - example/requests.json
83
85
  - lib/elasticsearch_mysql_importer.rb
84
86
  - lib/elasticsearch_mysql_importer/configuration.rb
85
87
  - lib/elasticsearch_mysql_importer/importer.rb
data/example.rb DELETED
@@ -1,43 +0,0 @@
1
- # coding: utf-8
2
- require 'elasticsearch_mysql_importer'
3
-
4
- importer = ElasticsearchMysqlImporter::Importer.new
5
- importer.configure do |config|
6
- # required
7
- config.mysql_host = 'localhost'
8
- config.mysql_username = 'your_mysql_username'
9
- config.mysql_password = 'your_mysql_password'
10
- config.mysql_database = 'some_database'
11
-
12
- # optional, but it is required only generating nested documents
13
- config.prepared_query = '
14
- CREATE TEMPORARY TABLE tmp_member_skill
15
- SELECT
16
- members.id AS member_id,
17
- skills.name AS skill_name,
18
- skills.url AS skill_url
19
- FROM
20
- members
21
- LEFT JOIN member_skill_relation ON members.id = member_id
22
- LEFT JOIN skills ON skills.id = skill_id;
23
- '
24
- # required
25
- config.query = '
26
- SELECT
27
- members.id AS member_id,
28
- members.name AS member_name,
29
- "SELECT skill_name, skill_url FROM tmp_member_skill WHERE member_id = ${member_id}" AS skills,
30
- current_timestamp
31
- FROM
32
- members
33
- ;
34
- '
35
- # required for using unique index for elasticsearch
36
- config.primary_key = 'member_id'
37
-
38
- # required for outputs file path
39
- config.output_file = 'requests.json'
40
- end
41
-
42
- importer.write_file
43
- p importer.output_file