elasticsearch_mysql_importer 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -5
- data/example/example.rb +62 -0
- data/example/example_table.sql +33 -0
- data/example/requests.json +6 -0
- data/lib/elasticsearch_mysql_importer/configuration.rb +3 -0
- data/lib/elasticsearch_mysql_importer/importer.rb +46 -21
- data/lib/elasticsearch_mysql_importer/version.rb +1 -1
- metadata +5 -3
- data/example.rb +0 -43
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0fc769852e9c3fe18f682d2ff00bf490355b6e7a
|
4
|
+
data.tar.gz: 53258865a42da4e001c437e819bacd9faecc16f7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aaa6bcc65f4d711bc98afd9b7e58894fcde315a527d1ba9483900e798faa25c30a977f82ea784ac9766aac3463b1e48e40dc285e993cfac992d3a5090c49fc9b
|
7
|
+
data.tar.gz: cad0f17c9e96f98bf6d145e17361ace9b0e76f76a901407d24f1695e6bf20365fde65cc3b338347ea23d94d2eb12bea7a54a90cb80d00ea574fef049f75ae6c1
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# elasticsearch_mysql_importer
|
2
2
|
|
3
|
-
It is importing from mysql table with SQL to elasticsearch
|
3
|
+
It is importing from mysql table with SQL to elasticsearch. Not only that, it could generating nested documents.
|
4
4
|
|
5
5
|
## Usage
|
6
6
|
|
@@ -13,17 +13,18 @@ It is importing from mysql table with SQL to elasticsearch not only that, it cou
|
|
13
13
|
$ vim example.rb
|
14
14
|
|
15
15
|
# Execute script, then it outputs result into ./requests.json
|
16
|
-
$ bundle exec ruby example.rb
|
16
|
+
$ bundle exec ruby example/example.rb
|
17
17
|
|
18
|
-
# Index document for elasticsearch
|
19
|
-
$ curl -s -XPOST localhost:9200/_bulk --data-binary @requests.json
|
18
|
+
# Index document for elasticsearch if you didn't call 'write_elasticsearch' in example.rb
|
19
|
+
$ curl -s -XPOST localhost:9200/_bulk --data-binary @example/requests.json
|
20
20
|
|
21
21
|
## TODO
|
22
22
|
|
23
23
|
Pull requests are very welcome!!
|
24
24
|
|
25
|
+
* add test
|
25
26
|
* support thread
|
26
|
-
*
|
27
|
+
* support CLI command
|
27
28
|
|
28
29
|
## Contributing
|
29
30
|
|
data/example/example.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'elasticsearch_mysql_importer'
|
3
|
+
|
4
|
+
importer = ElasticsearchMysqlImporter::Importer.new
|
5
|
+
importer.configure do |config|
|
6
|
+
# required
|
7
|
+
config.mysql_host = 'localhost'
|
8
|
+
config.mysql_username = 'your_mysql_username'
|
9
|
+
config.mysql_password = 'your_mysql_password'
|
10
|
+
config.mysql_database = 'some_database'
|
11
|
+
|
12
|
+
# optional, but it is required only generating nested documents
|
13
|
+
config.prepared_query = '
|
14
|
+
CREATE TEMPORARY TABLE tmp_member_skill
|
15
|
+
SELECT
|
16
|
+
members.id AS member_id,
|
17
|
+
skills.name AS skill_name,
|
18
|
+
skills.url AS skill_url
|
19
|
+
FROM
|
20
|
+
members
|
21
|
+
LEFT JOIN member_skill_relation ON members.id = member_id
|
22
|
+
LEFT JOIN skills ON skills.id = skill_id;
|
23
|
+
'
|
24
|
+
# required for importing into elasticsearch
|
25
|
+
config.query = '
|
26
|
+
SELECT
|
27
|
+
members.id AS member_id,
|
28
|
+
members.name AS member_name,
|
29
|
+
"SELECT skill_name, skill_url FROM tmp_member_skill WHERE member_id = ${member_id}" AS skills
|
30
|
+
FROM
|
31
|
+
members
|
32
|
+
;
|
33
|
+
'
|
34
|
+
# required for using unique index into elasticsearch
|
35
|
+
config.primary_key = 'member_id'
|
36
|
+
|
37
|
+
# To post index directory into elasticsearch,
|
38
|
+
# configure following two lines and call 'write_elasticsearch' method.
|
39
|
+
# config.elasticsearch_host = 'localhost' # default: localhost
|
40
|
+
# config.elasticsearch_port = 9200 # default: 9200
|
41
|
+
|
42
|
+
# required for specifying elasticsearch index and type
|
43
|
+
config.elasticsearch_index = 'importer_example'
|
44
|
+
config.elasticsearch_type = 'member_skill'
|
45
|
+
|
46
|
+
# required for writing output file path
|
47
|
+
config.output_file = 'example/requests.json'
|
48
|
+
end
|
49
|
+
|
50
|
+
if importer.write_file
|
51
|
+
puts "Finished to run importer.write_file."
|
52
|
+
puts "The output file is written at '#{importer.output_file}'"
|
53
|
+
puts "Let's try importing file with following curl command."
|
54
|
+
puts "e.g.) curl -s -XPOST localhost:9200/_bulk --data-binary @#{importer.output_file}\n\n"
|
55
|
+
end
|
56
|
+
|
57
|
+
#if importer.write_elasticsearch
|
58
|
+
# puts "Finished to run importer.write_elasticsearch."
|
59
|
+
# puts "Let's checking results of index with following curl command."
|
60
|
+
# puts "e.g.) curl localhost:9200/importer_example/_search?pretty=1"
|
61
|
+
#end
|
62
|
+
# To post index directory into elasticsearch, uncommented out following line.
|
@@ -0,0 +1,33 @@
|
|
1
|
+
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
2
|
+
/*!40101 SET character_set_client = utf8 */;
|
3
|
+
CREATE TABLE `member_skill_relation` (
|
4
|
+
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
|
5
|
+
`member_id` int(11) DEFAULT NULL,
|
6
|
+
`skill_id` int(11) DEFAULT NULL,
|
7
|
+
PRIMARY KEY (`id`)
|
8
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
9
|
+
/*!40101 SET character_set_client = @saved_cs_client */;
|
10
|
+
INSERT INTO `member_skill_relation` VALUES (1,1,1),(2,1,2),(3,2,3),(4,3,3),(5,3,4),(6,3,5);
|
11
|
+
|
12
|
+
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
13
|
+
/*!40101 SET character_set_client = utf8 */;
|
14
|
+
CREATE TABLE `members` (
|
15
|
+
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
|
16
|
+
`name` varchar(100) DEFAULT NULL,
|
17
|
+
`created_at` datetime DEFAULT NULL,
|
18
|
+
`updated_at` datetime DEFAULT NULL,
|
19
|
+
PRIMARY KEY (`id`)
|
20
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
21
|
+
/*!40101 SET character_set_client = @saved_cs_client */;
|
22
|
+
INSERT INTO `members` VALUES (1,'User-A','2014-04-01 15:20:22','2014-04-01 15:20:22'),(2,'User-B','2014-04-01 15:21:30','2014-04-01 15:21:30'),(3,'User-C','2014-04-01 15:21:41','2014-04-01 15:21:41');
|
23
|
+
|
24
|
+
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
25
|
+
/*!40101 SET character_set_client = utf8 */;
|
26
|
+
CREATE TABLE `skills` (
|
27
|
+
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
|
28
|
+
`name` varchar(11) DEFAULT NULL,
|
29
|
+
`url` varchar(250) DEFAULT NULL,
|
30
|
+
PRIMARY KEY (`id`)
|
31
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
32
|
+
/*!40101 SET character_set_client = @saved_cs_client */;
|
33
|
+
INSERT INTO `skills` VALUES (1,'PHP','http://php.net/'),(2,'Ruby','https://www.ruby-lang.org/'),(3,'Python','https://www.python.org/'),(4,'Java','https://www.java.com/'),(5,'Perl','http://www.perl.org/');
|
@@ -0,0 +1,6 @@
|
|
1
|
+
{"index":{"_index":"importer_example","_type":"member_skill","_id":"1"}}
|
2
|
+
{"member_id":"1","member_name":"User-A","skills":[{"skill_name":"PHP","skill_url":"http://php.net/"},{"skill_name":"Ruby","skill_url":"https://www.ruby-lang.org/"}]}
|
3
|
+
{"index":{"_index":"importer_example","_type":"member_skill","_id":"2"}}
|
4
|
+
{"member_id":"2","member_name":"User-B","skills":[{"skill_name":"Python","skill_url":"https://www.python.org/"}]}
|
5
|
+
{"index":{"_index":"importer_example","_type":"member_skill","_id":"3"}}
|
6
|
+
{"member_id":"3","member_name":"User-C","skills":[{"skill_name":"Python","skill_url":"https://www.python.org/"},{"skill_name":"Java","skill_url":"https://www.java.com/"},{"skill_name":"Perl","skill_url":"http://www.perl.org/"}]}
|
@@ -2,6 +2,7 @@ module ElasticsearchMysqlImporter
|
|
2
2
|
class Configuration
|
3
3
|
attr_accessor :mysql_host, :mysql_port, :mysql_socket, :mysql_username, :mysql_password, :mysql_encoding
|
4
4
|
attr_accessor :mysql_database, :mysql_options, :prepared_query, :query, :primary_key, :output_file
|
5
|
+
attr_accessor :elasticsearch_host, :elasticsearch_port, :elasticsearch_index, :elasticsearch_type
|
5
6
|
|
6
7
|
def initialize
|
7
8
|
super
|
@@ -14,6 +15,8 @@ module ElasticsearchMysqlImporter
|
|
14
15
|
@mysql_encoding = 'utf8'
|
15
16
|
@mysql_options = { :cast => false, :cache_rows => true }
|
16
17
|
@primary_key = 'id'
|
18
|
+
@elasticsearch_host = 'localhost'
|
19
|
+
@elasticsearch_port = 9200
|
17
20
|
end
|
18
21
|
end
|
19
22
|
end
|
@@ -22,10 +22,17 @@ module ElasticsearchMysqlImporter
|
|
22
22
|
create_import_file
|
23
23
|
end
|
24
24
|
|
25
|
+
def write_elasticsearch
|
26
|
+
call_elasticsearch_bulk_api
|
27
|
+
end
|
28
|
+
|
25
29
|
private
|
26
30
|
def validate_configuration
|
27
31
|
if @configuration.mysql_database.nil? or @configuration.query.nil?
|
28
|
-
raise "Missing Configuration: 'mysql_database'
|
32
|
+
raise "Missing Configuration: 'mysql_database' and 'query' are required."
|
33
|
+
end
|
34
|
+
if @configuration.elasticsearch_index.nil? or @configuration.elasticsearch_type.nil?
|
35
|
+
raise "Missing Configuration: 'elasticsearch_index' and 'elasticsearch_type' are required."
|
29
36
|
end
|
30
37
|
end
|
31
38
|
|
@@ -38,7 +45,7 @@ module ElasticsearchMysqlImporter
|
|
38
45
|
:username => @configuration.mysql_username,
|
39
46
|
:password => @configuration.mysql_password,
|
40
47
|
:database => @configuration.mysql_database,
|
41
|
-
:encoding =>
|
48
|
+
:encoding => @configuration.mysql_encoding,
|
42
49
|
:reconnect => true
|
43
50
|
})
|
44
51
|
else
|
@@ -48,7 +55,7 @@ module ElasticsearchMysqlImporter
|
|
48
55
|
:username => @configuration.mysql_username,
|
49
56
|
:password => @configuration.mysql_password,
|
50
57
|
:database => @configuration.mysql_database,
|
51
|
-
:encoding =>
|
58
|
+
:encoding => @configuration.mysql_encoding,
|
52
59
|
:reconnect => true
|
53
60
|
})
|
54
61
|
end
|
@@ -65,27 +72,45 @@ module ElasticsearchMysqlImporter
|
|
65
72
|
end
|
66
73
|
|
67
74
|
def create_import_file
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
row
|
74
|
-
|
75
|
-
row[
|
75
|
+
begin
|
76
|
+
file = get_file_io_object
|
77
|
+
db = connect_db
|
78
|
+
db.query(@configuration.prepared_query, @configuration.mysql_options)
|
79
|
+
db.query(@configuration.query, @configuration.mysql_options).each do |row|
|
80
|
+
row.select {|k, v| v.to_s.strip.match(/^SELECT/i) }.each do |k, v|
|
81
|
+
row[k] = [] unless row[k].is_a?(Array)
|
82
|
+
db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
|
83
|
+
row[k] << nest_row
|
84
|
+
end
|
76
85
|
end
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
86
|
+
header = {
|
87
|
+
"index" => {
|
88
|
+
"_index" => @configuration.elasticsearch_index,
|
89
|
+
"_type" => @configuration.elasticsearch_type,
|
90
|
+
"_id" => row[@configuration.primary_key]
|
91
|
+
}
|
83
92
|
}
|
84
|
-
|
85
|
-
|
86
|
-
|
93
|
+
file.puts(Yajl::Encoder.encode(header))
|
94
|
+
file.puts(Yajl::Encoder.encode(row))
|
95
|
+
end
|
96
|
+
file.seek 0
|
97
|
+
return file.path
|
98
|
+
rescue StandardError => e
|
99
|
+
puts "Failed to generate import file: #{e.message}"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def call_elasticsearch_bulk_api
|
104
|
+
begin
|
105
|
+
elasticsearch_bulk_uri = "http://#{@configuration.elasticsearch_host}:#{@configuration.elasticsearch_port}/_bulk"
|
106
|
+
uri = URI.parse(elasticsearch_bulk_uri)
|
107
|
+
data = File.open(@output_file, 'r').read
|
108
|
+
raise "Error: generated import file is empty." if data.empty?
|
109
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
110
|
+
response, body = http.post(uri.path, data, {'Content-type'=>'application/json'})
|
111
|
+
rescue Timeout::Error, StandardError => e
|
112
|
+
puts "Failed to call Bulk API: #{e.message}"
|
87
113
|
end
|
88
|
-
return file.path
|
89
114
|
end
|
90
115
|
end
|
91
116
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticsearch_mysql_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kentaro Yoshida
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -79,7 +79,9 @@ files:
|
|
79
79
|
- README.md
|
80
80
|
- Rakefile
|
81
81
|
- elasticsearch_mysql_importer.gemspec
|
82
|
-
- example.rb
|
82
|
+
- example/example.rb
|
83
|
+
- example/example_table.sql
|
84
|
+
- example/requests.json
|
83
85
|
- lib/elasticsearch_mysql_importer.rb
|
84
86
|
- lib/elasticsearch_mysql_importer/configuration.rb
|
85
87
|
- lib/elasticsearch_mysql_importer/importer.rb
|
data/example.rb
DELETED
@@ -1,43 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
require 'elasticsearch_mysql_importer'
|
3
|
-
|
4
|
-
importer = ElasticsearchMysqlImporter::Importer.new
|
5
|
-
importer.configure do |config|
|
6
|
-
# required
|
7
|
-
config.mysql_host = 'localhost'
|
8
|
-
config.mysql_username = 'your_mysql_username'
|
9
|
-
config.mysql_password = 'your_mysql_password'
|
10
|
-
config.mysql_database = 'some_database'
|
11
|
-
|
12
|
-
# optional, but it is required only generating nested documents
|
13
|
-
config.prepared_query = '
|
14
|
-
CREATE TEMPORARY TABLE tmp_member_skill
|
15
|
-
SELECT
|
16
|
-
members.id AS member_id,
|
17
|
-
skills.name AS skill_name,
|
18
|
-
skills.url AS skill_url
|
19
|
-
FROM
|
20
|
-
members
|
21
|
-
LEFT JOIN member_skill_relation ON members.id = member_id
|
22
|
-
LEFT JOIN skills ON skills.id = skill_id;
|
23
|
-
'
|
24
|
-
# required
|
25
|
-
config.query = '
|
26
|
-
SELECT
|
27
|
-
members.id AS member_id,
|
28
|
-
members.name AS member_name,
|
29
|
-
"SELECT skill_name, skill_url FROM tmp_member_skill WHERE member_id = ${member_id}" AS skills,
|
30
|
-
current_timestamp
|
31
|
-
FROM
|
32
|
-
members
|
33
|
-
;
|
34
|
-
'
|
35
|
-
# required for using unique index for elasticsearch
|
36
|
-
config.primary_key = 'member_id'
|
37
|
-
|
38
|
-
# required for outputs file path
|
39
|
-
config.output_file = 'requests.json'
|
40
|
-
end
|
41
|
-
|
42
|
-
importer.write_file
|
43
|
-
p importer.output_file
|