elasticsearch_mysql_importer 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -5
- data/example/example.rb +62 -0
- data/example/example_table.sql +33 -0
- data/example/requests.json +6 -0
- data/lib/elasticsearch_mysql_importer/configuration.rb +3 -0
- data/lib/elasticsearch_mysql_importer/importer.rb +46 -21
- data/lib/elasticsearch_mysql_importer/version.rb +1 -1
- metadata +5 -3
- data/example.rb +0 -43
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0fc769852e9c3fe18f682d2ff00bf490355b6e7a
|
4
|
+
data.tar.gz: 53258865a42da4e001c437e819bacd9faecc16f7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aaa6bcc65f4d711bc98afd9b7e58894fcde315a527d1ba9483900e798faa25c30a977f82ea784ac9766aac3463b1e48e40dc285e993cfac992d3a5090c49fc9b
|
7
|
+
data.tar.gz: cad0f17c9e96f98bf6d145e17361ace9b0e76f76a901407d24f1695e6bf20365fde65cc3b338347ea23d94d2eb12bea7a54a90cb80d00ea574fef049f75ae6c1
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# elasticsearch_mysql_importer
|
2
2
|
|
3
|
-
It is importing from mysql table with SQL to elasticsearch
|
3
|
+
It is importing from mysql table with SQL to elasticsearch. Not only that, it could generating nested documents.
|
4
4
|
|
5
5
|
## Usage
|
6
6
|
|
@@ -13,17 +13,18 @@ It is importing from mysql table with SQL to elasticsearch not only that, it cou
|
|
13
13
|
$ vim example.rb
|
14
14
|
|
15
15
|
# Execute script, then it outputs result into ./requests.json
|
16
|
-
$ bundle exec ruby example.rb
|
16
|
+
$ bundle exec ruby example/example.rb
|
17
17
|
|
18
|
-
# Index document for elasticsearch
|
19
|
-
$ curl -s -XPOST localhost:9200/_bulk --data-binary @requests.json
|
18
|
+
# Index document for elasticsearch if you didn't call 'write_elasticsearch' in example.rb
|
19
|
+
$ curl -s -XPOST localhost:9200/_bulk --data-binary @example/requests.json
|
20
20
|
|
21
21
|
## TODO
|
22
22
|
|
23
23
|
Pull requests are very welcome!!
|
24
24
|
|
25
|
+
* add test
|
25
26
|
* support thread
|
26
|
-
*
|
27
|
+
* support CLI command
|
27
28
|
|
28
29
|
## Contributing
|
29
30
|
|
data/example/example.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'elasticsearch_mysql_importer'
|
3
|
+
|
4
|
+
importer = ElasticsearchMysqlImporter::Importer.new
|
5
|
+
importer.configure do |config|
|
6
|
+
# required
|
7
|
+
config.mysql_host = 'localhost'
|
8
|
+
config.mysql_username = 'your_mysql_username'
|
9
|
+
config.mysql_password = 'your_mysql_password'
|
10
|
+
config.mysql_database = 'some_database'
|
11
|
+
|
12
|
+
# optional, but it is required only generating nested documents
|
13
|
+
config.prepared_query = '
|
14
|
+
CREATE TEMPORARY TABLE tmp_member_skill
|
15
|
+
SELECT
|
16
|
+
members.id AS member_id,
|
17
|
+
skills.name AS skill_name,
|
18
|
+
skills.url AS skill_url
|
19
|
+
FROM
|
20
|
+
members
|
21
|
+
LEFT JOIN member_skill_relation ON members.id = member_id
|
22
|
+
LEFT JOIN skills ON skills.id = skill_id;
|
23
|
+
'
|
24
|
+
# required for importing into elasticsearch
|
25
|
+
config.query = '
|
26
|
+
SELECT
|
27
|
+
members.id AS member_id,
|
28
|
+
members.name AS member_name,
|
29
|
+
"SELECT skill_name, skill_url FROM tmp_member_skill WHERE member_id = ${member_id}" AS skills
|
30
|
+
FROM
|
31
|
+
members
|
32
|
+
;
|
33
|
+
'
|
34
|
+
# required for using unique index into elasticsearch
|
35
|
+
config.primary_key = 'member_id'
|
36
|
+
|
37
|
+
# To post index directory into elasticsearch,
|
38
|
+
# configure following two lines and call 'write_elasticsearch' method.
|
39
|
+
# config.elasticsearch_host = 'localhost' # default: localhost
|
40
|
+
# config.elasticsearch_port = 9200 # default: 9200
|
41
|
+
|
42
|
+
# required for specifying elasticsearch index and type
|
43
|
+
config.elasticsearch_index = 'importer_example'
|
44
|
+
config.elasticsearch_type = 'member_skill'
|
45
|
+
|
46
|
+
# required for writing output file path
|
47
|
+
config.output_file = 'example/requests.json'
|
48
|
+
end
|
49
|
+
|
50
|
+
if importer.write_file
|
51
|
+
puts "Finished to run importer.write_file."
|
52
|
+
puts "The output file is written at '#{importer.output_file}'"
|
53
|
+
puts "Let's try importing file with following curl command."
|
54
|
+
puts "e.g.) curl -s -XPOST localhost:9200/_bulk --data-binary @#{importer.output_file}\n\n"
|
55
|
+
end
|
56
|
+
|
57
|
+
#if importer.write_elasticsearch
|
58
|
+
# puts "Finished to run importer.write_elasticsearch."
|
59
|
+
# puts "Let's checking results of index with following curl command."
|
60
|
+
# puts "e.g.) curl localhost:9200/importer_example/_search?pretty=1"
|
61
|
+
#end
|
62
|
+
# To post index directory into elasticsearch, uncommented out following line.
|
@@ -0,0 +1,33 @@
|
|
1
|
+
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
2
|
+
/*!40101 SET character_set_client = utf8 */;
|
3
|
+
CREATE TABLE `member_skill_relation` (
|
4
|
+
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
|
5
|
+
`member_id` int(11) DEFAULT NULL,
|
6
|
+
`skill_id` int(11) DEFAULT NULL,
|
7
|
+
PRIMARY KEY (`id`)
|
8
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
9
|
+
/*!40101 SET character_set_client = @saved_cs_client */;
|
10
|
+
INSERT INTO `member_skill_relation` VALUES (1,1,1),(2,1,2),(3,2,3),(4,3,3),(5,3,4),(6,3,5);
|
11
|
+
|
12
|
+
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
13
|
+
/*!40101 SET character_set_client = utf8 */;
|
14
|
+
CREATE TABLE `members` (
|
15
|
+
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
|
16
|
+
`name` varchar(100) DEFAULT NULL,
|
17
|
+
`created_at` datetime DEFAULT NULL,
|
18
|
+
`updated_at` datetime DEFAULT NULL,
|
19
|
+
PRIMARY KEY (`id`)
|
20
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
21
|
+
/*!40101 SET character_set_client = @saved_cs_client */;
|
22
|
+
INSERT INTO `members` VALUES (1,'User-A','2014-04-01 15:20:22','2014-04-01 15:20:22'),(2,'User-B','2014-04-01 15:21:30','2014-04-01 15:21:30'),(3,'User-C','2014-04-01 15:21:41','2014-04-01 15:21:41');
|
23
|
+
|
24
|
+
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
25
|
+
/*!40101 SET character_set_client = utf8 */;
|
26
|
+
CREATE TABLE `skills` (
|
27
|
+
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
|
28
|
+
`name` varchar(11) DEFAULT NULL,
|
29
|
+
`url` varchar(250) DEFAULT NULL,
|
30
|
+
PRIMARY KEY (`id`)
|
31
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
32
|
+
/*!40101 SET character_set_client = @saved_cs_client */;
|
33
|
+
INSERT INTO `skills` VALUES (1,'PHP','http://php.net/'),(2,'Ruby','https://www.ruby-lang.org/'),(3,'Python','https://www.python.org/'),(4,'Java','https://www.java.com/'),(5,'Perl','http://www.perl.org/');
|
@@ -0,0 +1,6 @@
|
|
1
|
+
{"index":{"_index":"importer_example","_type":"member_skill","_id":"1"}}
|
2
|
+
{"member_id":"1","member_name":"User-A","skills":[{"skill_name":"PHP","skill_url":"http://php.net/"},{"skill_name":"Ruby","skill_url":"https://www.ruby-lang.org/"}]}
|
3
|
+
{"index":{"_index":"importer_example","_type":"member_skill","_id":"2"}}
|
4
|
+
{"member_id":"2","member_name":"User-B","skills":[{"skill_name":"Python","skill_url":"https://www.python.org/"}]}
|
5
|
+
{"index":{"_index":"importer_example","_type":"member_skill","_id":"3"}}
|
6
|
+
{"member_id":"3","member_name":"User-C","skills":[{"skill_name":"Python","skill_url":"https://www.python.org/"},{"skill_name":"Java","skill_url":"https://www.java.com/"},{"skill_name":"Perl","skill_url":"http://www.perl.org/"}]}
|
@@ -2,6 +2,7 @@ module ElasticsearchMysqlImporter
|
|
2
2
|
class Configuration
|
3
3
|
attr_accessor :mysql_host, :mysql_port, :mysql_socket, :mysql_username, :mysql_password, :mysql_encoding
|
4
4
|
attr_accessor :mysql_database, :mysql_options, :prepared_query, :query, :primary_key, :output_file
|
5
|
+
attr_accessor :elasticsearch_host, :elasticsearch_port, :elasticsearch_index, :elasticsearch_type
|
5
6
|
|
6
7
|
def initialize
|
7
8
|
super
|
@@ -14,6 +15,8 @@ module ElasticsearchMysqlImporter
|
|
14
15
|
@mysql_encoding = 'utf8'
|
15
16
|
@mysql_options = { :cast => false, :cache_rows => true }
|
16
17
|
@primary_key = 'id'
|
18
|
+
@elasticsearch_host = 'localhost'
|
19
|
+
@elasticsearch_port = 9200
|
17
20
|
end
|
18
21
|
end
|
19
22
|
end
|
@@ -22,10 +22,17 @@ module ElasticsearchMysqlImporter
|
|
22
22
|
create_import_file
|
23
23
|
end
|
24
24
|
|
25
|
+
def write_elasticsearch
|
26
|
+
call_elasticsearch_bulk_api
|
27
|
+
end
|
28
|
+
|
25
29
|
private
|
26
30
|
def validate_configuration
|
27
31
|
if @configuration.mysql_database.nil? or @configuration.query.nil?
|
28
|
-
raise "Missing Configuration: 'mysql_database'
|
32
|
+
raise "Missing Configuration: 'mysql_database' and 'query' are required."
|
33
|
+
end
|
34
|
+
if @configuration.elasticsearch_index.nil? or @configuration.elasticsearch_type.nil?
|
35
|
+
raise "Missing Configuration: 'elasticsearch_index' and 'elasticsearch_type' are required."
|
29
36
|
end
|
30
37
|
end
|
31
38
|
|
@@ -38,7 +45,7 @@ module ElasticsearchMysqlImporter
|
|
38
45
|
:username => @configuration.mysql_username,
|
39
46
|
:password => @configuration.mysql_password,
|
40
47
|
:database => @configuration.mysql_database,
|
41
|
-
:encoding =>
|
48
|
+
:encoding => @configuration.mysql_encoding,
|
42
49
|
:reconnect => true
|
43
50
|
})
|
44
51
|
else
|
@@ -48,7 +55,7 @@ module ElasticsearchMysqlImporter
|
|
48
55
|
:username => @configuration.mysql_username,
|
49
56
|
:password => @configuration.mysql_password,
|
50
57
|
:database => @configuration.mysql_database,
|
51
|
-
:encoding =>
|
58
|
+
:encoding => @configuration.mysql_encoding,
|
52
59
|
:reconnect => true
|
53
60
|
})
|
54
61
|
end
|
@@ -65,27 +72,45 @@ module ElasticsearchMysqlImporter
|
|
65
72
|
end
|
66
73
|
|
67
74
|
def create_import_file
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
row
|
74
|
-
|
75
|
-
row[
|
75
|
+
begin
|
76
|
+
file = get_file_io_object
|
77
|
+
db = connect_db
|
78
|
+
db.query(@configuration.prepared_query, @configuration.mysql_options)
|
79
|
+
db.query(@configuration.query, @configuration.mysql_options).each do |row|
|
80
|
+
row.select {|k, v| v.to_s.strip.match(/^SELECT/i) }.each do |k, v|
|
81
|
+
row[k] = [] unless row[k].is_a?(Array)
|
82
|
+
db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
|
83
|
+
row[k] << nest_row
|
84
|
+
end
|
76
85
|
end
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
86
|
+
header = {
|
87
|
+
"index" => {
|
88
|
+
"_index" => @configuration.elasticsearch_index,
|
89
|
+
"_type" => @configuration.elasticsearch_type,
|
90
|
+
"_id" => row[@configuration.primary_key]
|
91
|
+
}
|
83
92
|
}
|
84
|
-
|
85
|
-
|
86
|
-
|
93
|
+
file.puts(Yajl::Encoder.encode(header))
|
94
|
+
file.puts(Yajl::Encoder.encode(row))
|
95
|
+
end
|
96
|
+
file.seek 0
|
97
|
+
return file.path
|
98
|
+
rescue StandardError => e
|
99
|
+
puts "Failed to generate import file: #{e.message}"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def call_elasticsearch_bulk_api
|
104
|
+
begin
|
105
|
+
elasticsearch_bulk_uri = "http://#{@configuration.elasticsearch_host}:#{@configuration.elasticsearch_port}/_bulk"
|
106
|
+
uri = URI.parse(elasticsearch_bulk_uri)
|
107
|
+
data = File.open(@output_file, 'r').read
|
108
|
+
raise "Error: generated import file is empty." if data.empty?
|
109
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
110
|
+
response, body = http.post(uri.path, data, {'Content-type'=>'application/json'})
|
111
|
+
rescue Timeout::Error, StandardError => e
|
112
|
+
puts "Failed to call Bulk API: #{e.message}"
|
87
113
|
end
|
88
|
-
return file.path
|
89
114
|
end
|
90
115
|
end
|
91
116
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticsearch_mysql_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kentaro Yoshida
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -79,7 +79,9 @@ files:
|
|
79
79
|
- README.md
|
80
80
|
- Rakefile
|
81
81
|
- elasticsearch_mysql_importer.gemspec
|
82
|
-
- example.rb
|
82
|
+
- example/example.rb
|
83
|
+
- example/example_table.sql
|
84
|
+
- example/requests.json
|
83
85
|
- lib/elasticsearch_mysql_importer.rb
|
84
86
|
- lib/elasticsearch_mysql_importer/configuration.rb
|
85
87
|
- lib/elasticsearch_mysql_importer/importer.rb
|
data/example.rb
DELETED
@@ -1,43 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
require 'elasticsearch_mysql_importer'
|
3
|
-
|
4
|
-
importer = ElasticsearchMysqlImporter::Importer.new
|
5
|
-
importer.configure do |config|
|
6
|
-
# required
|
7
|
-
config.mysql_host = 'localhost'
|
8
|
-
config.mysql_username = 'your_mysql_username'
|
9
|
-
config.mysql_password = 'your_mysql_password'
|
10
|
-
config.mysql_database = 'some_database'
|
11
|
-
|
12
|
-
# optional, but it is required only generating nested documents
|
13
|
-
config.prepared_query = '
|
14
|
-
CREATE TEMPORARY TABLE tmp_member_skill
|
15
|
-
SELECT
|
16
|
-
members.id AS member_id,
|
17
|
-
skills.name AS skill_name,
|
18
|
-
skills.url AS skill_url
|
19
|
-
FROM
|
20
|
-
members
|
21
|
-
LEFT JOIN member_skill_relation ON members.id = member_id
|
22
|
-
LEFT JOIN skills ON skills.id = skill_id;
|
23
|
-
'
|
24
|
-
# required
|
25
|
-
config.query = '
|
26
|
-
SELECT
|
27
|
-
members.id AS member_id,
|
28
|
-
members.name AS member_name,
|
29
|
-
"SELECT skill_name, skill_url FROM tmp_member_skill WHERE member_id = ${member_id}" AS skills,
|
30
|
-
current_timestamp
|
31
|
-
FROM
|
32
|
-
members
|
33
|
-
;
|
34
|
-
'
|
35
|
-
# required for using unique index for elasticsearch
|
36
|
-
config.primary_key = 'member_id'
|
37
|
-
|
38
|
-
# required for outputs file path
|
39
|
-
config.output_file = 'requests.json'
|
40
|
-
end
|
41
|
-
|
42
|
-
importer.write_file
|
43
|
-
p importer.output_file
|