elasticsearch_mysql_importer 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5272f35fb0aa318d58f9cbacb9c70ea5ac2ed2d5
4
- data.tar.gz: 512a335ab41175198e66836c80b175f82141778e
3
+ metadata.gz: 0fc769852e9c3fe18f682d2ff00bf490355b6e7a
4
+ data.tar.gz: 53258865a42da4e001c437e819bacd9faecc16f7
5
5
  SHA512:
6
- metadata.gz: 7a7e84cbe1b85da430b3e61792535b7ac49c1c920b90bf6a3b7036dba76bfa97e3c3760b08748f4218ac282a78b2dd3e7cff845eb1f98e0388d4185800c454ee
7
- data.tar.gz: 3d925a7f1d94bb3a69bd8f72fb8592cd2e98adc5eb4c6c26a119f31722cbc6b48004c32434ab7107909b0f5c3a1f9486fd25715ce608775bd159e9fec240aeb2
6
+ metadata.gz: aaa6bcc65f4d711bc98afd9b7e58894fcde315a527d1ba9483900e798faa25c30a977f82ea784ac9766aac3463b1e48e40dc285e993cfac992d3a5090c49fc9b
7
+ data.tar.gz: cad0f17c9e96f98bf6d145e17361ace9b0e76f76a901407d24f1695e6bf20365fde65cc3b338347ea23d94d2eb12bea7a54a90cb80d00ea574fef049f75ae6c1
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # elasticsearch_mysql_importer
2
2
 
3
- It is importing from mysql table with SQL to elasticsearch not only that, it could generating nested documents.
3
+ It is importing from mysql table with SQL to elasticsearch. Not only that, it could generating nested documents.
4
4
 
5
5
  ## Usage
6
6
 
@@ -13,17 +13,18 @@ It is importing from mysql table with SQL to elasticsearch not only that, it cou
13
13
  $ vim example.rb
14
14
 
15
15
  # Execute script, then it outputs result into ./requests.json
16
- $ bundle exec ruby example.rb
16
+ $ bundle exec ruby example/example.rb
17
17
 
18
- # Index document for elasticsearch
19
- $ curl -s -XPOST localhost:9200/_bulk --data-binary @requests.json
18
+ # Index document for elasticsearch if you didn't call 'write_elasticsearch' in example.rb
19
+ $ curl -s -XPOST localhost:9200/_bulk --data-binary @example/requests.json
20
20
 
21
21
  ## TODO
22
22
 
23
23
  Pull requests are very welcome!!
24
24
 
25
+ * add test
25
26
  * support thread
26
- * call elasticsearch bluk api directory
27
+ * support CLI command
27
28
 
28
29
  ## Contributing
29
30
 
@@ -0,0 +1,62 @@
1
+ # coding: utf-8
2
+ require 'elasticsearch_mysql_importer'
3
+
4
+ importer = ElasticsearchMysqlImporter::Importer.new
5
+ importer.configure do |config|
6
+ # required
7
+ config.mysql_host = 'localhost'
8
+ config.mysql_username = 'your_mysql_username'
9
+ config.mysql_password = 'your_mysql_password'
10
+ config.mysql_database = 'some_database'
11
+
12
+ # optional, but it is required only generating nested documents
13
+ config.prepared_query = '
14
+ CREATE TEMPORARY TABLE tmp_member_skill
15
+ SELECT
16
+ members.id AS member_id,
17
+ skills.name AS skill_name,
18
+ skills.url AS skill_url
19
+ FROM
20
+ members
21
+ LEFT JOIN member_skill_relation ON members.id = member_id
22
+ LEFT JOIN skills ON skills.id = skill_id;
23
+ '
24
+ # required for importing into elasticsearch
25
+ config.query = '
26
+ SELECT
27
+ members.id AS member_id,
28
+ members.name AS member_name,
29
+ "SELECT skill_name, skill_url FROM tmp_member_skill WHERE member_id = ${member_id}" AS skills
30
+ FROM
31
+ members
32
+ ;
33
+ '
34
+ # required for using unique index into elasticsearch
35
+ config.primary_key = 'member_id'
36
+
37
+ # To post index directory into elasticsearch,
38
+ # configure following two lines and call 'write_elasticsearch' method.
39
+ # config.elasticsearch_host = 'localhost' # default: localhost
40
+ # config.elasticsearch_port = 9200 # default: 9200
41
+
42
+ # required for specifying elasticsearch index and type
43
+ config.elasticsearch_index = 'importer_example'
44
+ config.elasticsearch_type = 'member_skill'
45
+
46
+ # required for writing output file path
47
+ config.output_file = 'example/requests.json'
48
+ end
49
+
50
+ if importer.write_file
51
+ puts "Finished to run importer.write_file."
52
+ puts "The output file is written at '#{importer.output_file}'"
53
+ puts "Let's try importing file with following curl command."
54
+ puts "e.g.) curl -s -XPOST localhost:9200/_bulk --data-binary @#{importer.output_file}\n\n"
55
+ end
56
+
57
+ #if importer.write_elasticsearch
58
+ # puts "Finished to run importer.write_elasticsearch."
59
+ # puts "Let's checking results of index with following curl command."
60
+ # puts "e.g.) curl localhost:9200/importer_example/_search?pretty=1"
61
+ #end
62
+ # To post index directory into elasticsearch, uncommented out following line.
@@ -0,0 +1,33 @@
1
+ /*!40101 SET @saved_cs_client = @@character_set_client */;
2
+ /*!40101 SET character_set_client = utf8 */;
3
+ CREATE TABLE `member_skill_relation` (
4
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
5
+ `member_id` int(11) DEFAULT NULL,
6
+ `skill_id` int(11) DEFAULT NULL,
7
+ PRIMARY KEY (`id`)
8
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
9
+ /*!40101 SET character_set_client = @saved_cs_client */;
10
+ INSERT INTO `member_skill_relation` VALUES (1,1,1),(2,1,2),(3,2,3),(4,3,3),(5,3,4),(6,3,5);
11
+
12
+ /*!40101 SET @saved_cs_client = @@character_set_client */;
13
+ /*!40101 SET character_set_client = utf8 */;
14
+ CREATE TABLE `members` (
15
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
16
+ `name` varchar(100) DEFAULT NULL,
17
+ `created_at` datetime DEFAULT NULL,
18
+ `updated_at` datetime DEFAULT NULL,
19
+ PRIMARY KEY (`id`)
20
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
21
+ /*!40101 SET character_set_client = @saved_cs_client */;
22
+ INSERT INTO `members` VALUES (1,'User-A','2014-04-01 15:20:22','2014-04-01 15:20:22'),(2,'User-B','2014-04-01 15:21:30','2014-04-01 15:21:30'),(3,'User-C','2014-04-01 15:21:41','2014-04-01 15:21:41');
23
+
24
+ /*!40101 SET @saved_cs_client = @@character_set_client */;
25
+ /*!40101 SET character_set_client = utf8 */;
26
+ CREATE TABLE `skills` (
27
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
28
+ `name` varchar(11) DEFAULT NULL,
29
+ `url` varchar(250) DEFAULT NULL,
30
+ PRIMARY KEY (`id`)
31
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
32
+ /*!40101 SET character_set_client = @saved_cs_client */;
33
+ INSERT INTO `skills` VALUES (1,'PHP','http://php.net/'),(2,'Ruby','https://www.ruby-lang.org/'),(3,'Python','https://www.python.org/'),(4,'Java','https://www.java.com/'),(5,'Perl','http://www.perl.org/');
@@ -0,0 +1,6 @@
1
+ {"index":{"_index":"importer_example","_type":"member_skill","_id":"1"}}
2
+ {"member_id":"1","member_name":"User-A","skills":[{"skill_name":"PHP","skill_url":"http://php.net/"},{"skill_name":"Ruby","skill_url":"https://www.ruby-lang.org/"}]}
3
+ {"index":{"_index":"importer_example","_type":"member_skill","_id":"2"}}
4
+ {"member_id":"2","member_name":"User-B","skills":[{"skill_name":"Python","skill_url":"https://www.python.org/"}]}
5
+ {"index":{"_index":"importer_example","_type":"member_skill","_id":"3"}}
6
+ {"member_id":"3","member_name":"User-C","skills":[{"skill_name":"Python","skill_url":"https://www.python.org/"},{"skill_name":"Java","skill_url":"https://www.java.com/"},{"skill_name":"Perl","skill_url":"http://www.perl.org/"}]}
@@ -2,6 +2,7 @@ module ElasticsearchMysqlImporter
2
2
  class Configuration
3
3
  attr_accessor :mysql_host, :mysql_port, :mysql_socket, :mysql_username, :mysql_password, :mysql_encoding
4
4
  attr_accessor :mysql_database, :mysql_options, :prepared_query, :query, :primary_key, :output_file
5
+ attr_accessor :elasticsearch_host, :elasticsearch_port, :elasticsearch_index, :elasticsearch_type
5
6
 
6
7
  def initialize
7
8
  super
@@ -14,6 +15,8 @@ module ElasticsearchMysqlImporter
14
15
  @mysql_encoding = 'utf8'
15
16
  @mysql_options = { :cast => false, :cache_rows => true }
16
17
  @primary_key = 'id'
18
+ @elasticsearch_host = 'localhost'
19
+ @elasticsearch_port = 9200
17
20
  end
18
21
  end
19
22
  end
@@ -22,10 +22,17 @@ module ElasticsearchMysqlImporter
22
22
  create_import_file
23
23
  end
24
24
 
25
+ def write_elasticsearch
26
+ call_elasticsearch_bulk_api
27
+ end
28
+
25
29
  private
26
30
  def validate_configuration
27
31
  if @configuration.mysql_database.nil? or @configuration.query.nil?
28
- raise "Missing Configuration: 'mysql_database' or 'query' are required."
32
+ raise "Missing Configuration: 'mysql_database' and 'query' are required."
33
+ end
34
+ if @configuration.elasticsearch_index.nil? or @configuration.elasticsearch_type.nil?
35
+ raise "Missing Configuration: 'elasticsearch_index' and 'elasticsearch_type' are required."
29
36
  end
30
37
  end
31
38
 
@@ -38,7 +45,7 @@ module ElasticsearchMysqlImporter
38
45
  :username => @configuration.mysql_username,
39
46
  :password => @configuration.mysql_password,
40
47
  :database => @configuration.mysql_database,
41
- :encoding => 'utf8',
48
+ :encoding => @configuration.mysql_encoding,
42
49
  :reconnect => true
43
50
  })
44
51
  else
@@ -48,7 +55,7 @@ module ElasticsearchMysqlImporter
48
55
  :username => @configuration.mysql_username,
49
56
  :password => @configuration.mysql_password,
50
57
  :database => @configuration.mysql_database,
51
- :encoding => 'utf8',
58
+ :encoding => @configuration.mysql_encoding,
52
59
  :reconnect => true
53
60
  })
54
61
  end
@@ -65,27 +72,45 @@ module ElasticsearchMysqlImporter
65
72
  end
66
73
 
67
74
  def create_import_file
68
- file = get_file_io_object
69
- db = connect_db
70
- db.query(@configuration.prepared_query, @configuration.mysql_options)
71
- db.query(@configuration.query, @configuration.mysql_options).each do |row|
72
- row.select {|k, v| v.to_s.strip.match(/^SELECT/i) }.each do |k, v|
73
- row[k] = [] unless row[k].is_a?(Array)
74
- db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
75
- row[k] << nest_row
75
+ begin
76
+ file = get_file_io_object
77
+ db = connect_db
78
+ db.query(@configuration.prepared_query, @configuration.mysql_options)
79
+ db.query(@configuration.query, @configuration.mysql_options).each do |row|
80
+ row.select {|k, v| v.to_s.strip.match(/^SELECT/i) }.each do |k, v|
81
+ row[k] = [] unless row[k].is_a?(Array)
82
+ db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
83
+ row[k] << nest_row
84
+ end
76
85
  end
77
- end
78
- header = {
79
- "index" => {
80
- "_index" => @configuration.elasticsearch_index,
81
- "_type" => @configuration.elasticsearch_type,
82
- "_id" => row[@configuration.primary_key]
86
+ header = {
87
+ "index" => {
88
+ "_index" => @configuration.elasticsearch_index,
89
+ "_type" => @configuration.elasticsearch_type,
90
+ "_id" => row[@configuration.primary_key]
91
+ }
83
92
  }
84
- }
85
- file.puts(Yajl::Encoder.encode(header))
86
- file.puts(Yajl::Encoder.encode(row))
93
+ file.puts(Yajl::Encoder.encode(header))
94
+ file.puts(Yajl::Encoder.encode(row))
95
+ end
96
+ file.seek 0
97
+ return file.path
98
+ rescue StandardError => e
99
+ puts "Failed to generate import file: #{e.message}"
100
+ end
101
+ end
102
+
103
+ def call_elasticsearch_bulk_api
104
+ begin
105
+ elasticsearch_bulk_uri = "http://#{@configuration.elasticsearch_host}:#{@configuration.elasticsearch_port}/_bulk"
106
+ uri = URI.parse(elasticsearch_bulk_uri)
107
+ data = File.open(@output_file, 'r').read
108
+ raise "Error: generated import file is empty." if data.empty?
109
+ http = Net::HTTP.new(uri.host, uri.port)
110
+ response, body = http.post(uri.path, data, {'Content-type'=>'application/json'})
111
+ rescue Timeout::Error, StandardError => e
112
+ puts "Failed to call Bulk API: #{e.message}"
87
113
  end
88
- return file.path
89
114
  end
90
115
  end
91
116
  end
@@ -1,3 +1,3 @@
1
1
  module ElasticsearchMysqlImporter
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticsearch_mysql_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kentaro Yoshida
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-23 00:00:00.000000000 Z
11
+ date: 2014-05-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -79,7 +79,9 @@ files:
79
79
  - README.md
80
80
  - Rakefile
81
81
  - elasticsearch_mysql_importer.gemspec
82
- - example.rb
82
+ - example/example.rb
83
+ - example/example_table.sql
84
+ - example/requests.json
83
85
  - lib/elasticsearch_mysql_importer.rb
84
86
  - lib/elasticsearch_mysql_importer/configuration.rb
85
87
  - lib/elasticsearch_mysql_importer/importer.rb
data/example.rb DELETED
@@ -1,43 +0,0 @@
1
- # coding: utf-8
2
- require 'elasticsearch_mysql_importer'
3
-
4
- importer = ElasticsearchMysqlImporter::Importer.new
5
- importer.configure do |config|
6
- # required
7
- config.mysql_host = 'localhost'
8
- config.mysql_username = 'your_mysql_username'
9
- config.mysql_password = 'your_mysql_password'
10
- config.mysql_database = 'some_database'
11
-
12
- # optional, but it is required only generating nested documents
13
- config.prepared_query = '
14
- CREATE TEMPORARY TABLE tmp_member_skill
15
- SELECT
16
- members.id AS member_id,
17
- skills.name AS skill_name,
18
- skills.url AS skill_url
19
- FROM
20
- members
21
- LEFT JOIN member_skill_relation ON members.id = member_id
22
- LEFT JOIN skills ON skills.id = skill_id;
23
- '
24
- # required
25
- config.query = '
26
- SELECT
27
- members.id AS member_id,
28
- members.name AS member_name,
29
- "SELECT skill_name, skill_url FROM tmp_member_skill WHERE member_id = ${member_id}" AS skills,
30
- current_timestamp
31
- FROM
32
- members
33
- ;
34
- '
35
- # required for using unique index for elasticsearch
36
- config.primary_key = 'member_id'
37
-
38
- # required for outputs file path
39
- config.output_file = 'requests.json'
40
- end
41
-
42
- importer.write_file
43
- p importer.output_file