embulk-input-elasticsearch 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -11
- data/embulk-input-elasticsearch.gemspec +2 -2
- data/lib/embulk/input/elasticsearch.rb +20 -4
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e5dbc0a3f7f57fd192f65ba884bb66c6f3cc64e9
|
4
|
+
data.tar.gz: e2d7318385c5ef34c173a2a7e2a4f826ff526495
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c0ab03a6b629767ed2eaea29c349ed5541187667b5017806de60ed518dd4d34c840efc70bdc4f6fb5c7f126850985dc6d9c24cd0d92af6a118e63fac2fd58ff
|
7
|
+
data.tar.gz: 086e06172b34af6e8c88b7d8c02c03e8b2d2ee560ffab72cbe409b12fcf2aaad271029992d48d3443675b0239f5d4cd20670e5d624743b46f9bf3b8c7e54aae6
|
data/README.md
CHANGED
@@ -9,19 +9,19 @@
|
|
9
9
|
|
10
10
|
## Configuration
|
11
11
|
- **nodes**: nodes (array, required)
|
12
|
-
- **host**: host (string, required
|
13
|
-
- **port**: port (integer, required
|
14
|
-
- **queries**: query (array, required
|
15
|
-
- **index**: index (string, required
|
16
|
-
- **index_type**: index_type (string
|
17
|
-
- **request_timeout**: request_timeout (
|
12
|
+
- **host**: host (string, required)
|
13
|
+
- **port**: port (integer, required)
|
14
|
+
- **queries**: query (array, required)
|
15
|
+
- **index**: index (string, required)
|
16
|
+
- **index_type**: index_type (string)
|
17
|
+
- **request_timeout**: request_timeout (integer)
|
18
18
|
- **per_size**: per_size (integer, required, default: `1000`)
|
19
19
|
- **limit_size**: limit_size (integer, default: unlimit)
|
20
|
-
- **fields**: fields (
|
21
|
-
- **name**: name (string, required
|
22
|
-
- **type**: type (string, required
|
23
|
-
- **metadata**: metadata (boolean,
|
24
|
-
- **time_format**: time_format (string, required
|
20
|
+
- **fields**: fields (array, required)
|
21
|
+
- **name**: name (string, required)
|
22
|
+
- **type**: type (string, required)
|
23
|
+
- **metadata**: metadata (boolean, default: false)
|
24
|
+
- **time_format**: time_format (string, required)
|
25
25
|
|
26
26
|
## Example
|
27
27
|
|
@@ -1,10 +1,10 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-input-elasticsearch"
|
4
|
-
spec.version = "0.
|
4
|
+
spec.version = "0.2.0"
|
5
5
|
spec.authors = ["toyama0919"]
|
6
6
|
spec.summary = "Elasticsearch input plugin for Embulk"
|
7
|
-
spec.description = "Loads records from Elasticsearch."
|
7
|
+
spec.description = "Loads records from Elasticsearch. parallel query support."
|
8
8
|
spec.email = ["toyama0919@gmail.com"]
|
9
9
|
spec.licenses = ["MIT"]
|
10
10
|
spec.homepage = "https://github.com/toyama0919/embulk-input-elasticsearch"
|
@@ -22,13 +22,28 @@ module Embulk
|
|
22
22
|
"queries" => config.param("queries", :array),
|
23
23
|
"sort" => config.param("sort", :hash, default: nil)
|
24
24
|
}
|
25
|
+
# TODO: want max_threads
|
26
|
+
define_num_threads = config.param("num_threads", :integer, default: 1)
|
27
|
+
task['slice_queries'] = get_slice_from_num_threads(task['queries'], define_num_threads)
|
25
28
|
|
26
29
|
columns = []
|
27
30
|
task['fields'].each_with_index{ |field, i|
|
28
31
|
columns << Column.new(i, field['name'], field['type'].to_sym)
|
29
32
|
}
|
30
33
|
|
31
|
-
resume(task, columns,
|
34
|
+
resume(task, columns, task['slice_queries'].size, &control)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.get_slice_from_num_threads(array, define_num_threads)
|
38
|
+
num_threads = array.size < define_num_threads ? array.size : define_num_threads
|
39
|
+
per_queries = if (array.size % num_threads) == 0
|
40
|
+
(array.size / num_threads)
|
41
|
+
else
|
42
|
+
(array.size / num_threads) + 1
|
43
|
+
end
|
44
|
+
sliced = array.each_slice(per_queries).to_a
|
45
|
+
Embulk.logger.info("calculate num threads => #{sliced.size}")
|
46
|
+
return sliced
|
32
47
|
end
|
33
48
|
|
34
49
|
def self.resume(task, columns, count, &control)
|
@@ -57,10 +72,11 @@ module Embulk
|
|
57
72
|
end
|
58
73
|
|
59
74
|
def init
|
75
|
+
@queries = task['slice_queries'][@index]
|
76
|
+
Embulk.logger.info("this thread queries => #{@queries}")
|
60
77
|
@client = self.class.create_client(task)
|
61
|
-
@
|
78
|
+
@index_name = task['index']
|
62
79
|
@index_type = task['index_type']
|
63
|
-
@queries = task['queries']
|
64
80
|
@per_size = task['per_size']
|
65
81
|
@limit_size = task['limit_size']
|
66
82
|
@fields = task['fields']
|
@@ -148,7 +164,7 @@ module Embulk
|
|
148
164
|
body[:sort] = sorts
|
149
165
|
end
|
150
166
|
body[:query] = { query_string: { query: query } } unless query.nil?
|
151
|
-
search_option = { index: @
|
167
|
+
search_option = { index: @index_name, type: type, body: body }
|
152
168
|
search_option[:routing] = routing unless routing.nil?
|
153
169
|
search_option[:_source] = fields.select{ |field| !field['metadata'] }.map { |field| field['name'] }.join(',')
|
154
170
|
Embulk.logger.info(%Q{search_option => #{search_option}})
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,7 +94,7 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
-
description: Loads records from Elasticsearch.
|
97
|
+
description: Loads records from Elasticsearch. parallel query support.
|
98
98
|
email:
|
99
99
|
- toyama0919@gmail.com
|
100
100
|
executables: []
|