embulk-input-elasticsearch 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -11
- data/embulk-input-elasticsearch.gemspec +2 -2
- data/lib/embulk/input/elasticsearch.rb +20 -4
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e5dbc0a3f7f57fd192f65ba884bb66c6f3cc64e9
|
4
|
+
data.tar.gz: e2d7318385c5ef34c173a2a7e2a4f826ff526495
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c0ab03a6b629767ed2eaea29c349ed5541187667b5017806de60ed518dd4d34c840efc70bdc4f6fb5c7f126850985dc6d9c24cd0d92af6a118e63fac2fd58ff
|
7
|
+
data.tar.gz: 086e06172b34af6e8c88b7d8c02c03e8b2d2ee560ffab72cbe409b12fcf2aaad271029992d48d3443675b0239f5d4cd20670e5d624743b46f9bf3b8c7e54aae6
|
data/README.md
CHANGED
@@ -9,19 +9,19 @@
|
|
9
9
|
|
10
10
|
## Configuration
|
11
11
|
- **nodes**: nodes (array, required)
|
12
|
-
- **host**: host (string, required
|
13
|
-
- **port**: port (integer, required
|
14
|
-
- **queries**: query (array, required
|
15
|
-
- **index**: index (string, required
|
16
|
-
- **index_type**: index_type (string
|
17
|
-
- **request_timeout**: request_timeout (
|
12
|
+
- **host**: host (string, required)
|
13
|
+
- **port**: port (integer, required)
|
14
|
+
- **queries**: query (array, required)
|
15
|
+
- **index**: index (string, required)
|
16
|
+
- **index_type**: index_type (string)
|
17
|
+
- **request_timeout**: request_timeout (integer)
|
18
18
|
- **per_size**: per_size (integer, required, default: `1000`)
|
19
19
|
- **limit_size**: limit_size (integer, default: unlimit)
|
20
|
-
- **fields**: fields (
|
21
|
-
- **name**: name (string, required
|
22
|
-
- **type**: type (string, required
|
23
|
-
- **metadata**: metadata (boolean,
|
24
|
-
- **time_format**: time_format (string, required
|
20
|
+
- **fields**: fields (array, required)
|
21
|
+
- **name**: name (string, required)
|
22
|
+
- **type**: type (string, required)
|
23
|
+
- **metadata**: metadata (boolean, default: false)
|
24
|
+
- **time_format**: time_format (string, required)
|
25
25
|
|
26
26
|
## Example
|
27
27
|
|
@@ -1,10 +1,10 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-input-elasticsearch"
|
4
|
-
spec.version = "0.
|
4
|
+
spec.version = "0.2.0"
|
5
5
|
spec.authors = ["toyama0919"]
|
6
6
|
spec.summary = "Elasticsearch input plugin for Embulk"
|
7
|
-
spec.description = "Loads records from Elasticsearch."
|
7
|
+
spec.description = "Loads records from Elasticsearch. parallel query support."
|
8
8
|
spec.email = ["toyama0919@gmail.com"]
|
9
9
|
spec.licenses = ["MIT"]
|
10
10
|
spec.homepage = "https://github.com/toyama0919/embulk-input-elasticsearch"
|
@@ -22,13 +22,28 @@ module Embulk
|
|
22
22
|
"queries" => config.param("queries", :array),
|
23
23
|
"sort" => config.param("sort", :hash, default: nil)
|
24
24
|
}
|
25
|
+
# TODO: want max_threads
|
26
|
+
define_num_threads = config.param("num_threads", :integer, default: 1)
|
27
|
+
task['slice_queries'] = get_slice_from_num_threads(task['queries'], define_num_threads)
|
25
28
|
|
26
29
|
columns = []
|
27
30
|
task['fields'].each_with_index{ |field, i|
|
28
31
|
columns << Column.new(i, field['name'], field['type'].to_sym)
|
29
32
|
}
|
30
33
|
|
31
|
-
resume(task, columns,
|
34
|
+
resume(task, columns, task['slice_queries'].size, &control)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.get_slice_from_num_threads(array, define_num_threads)
|
38
|
+
num_threads = array.size < define_num_threads ? array.size : define_num_threads
|
39
|
+
per_queries = if (array.size % num_threads) == 0
|
40
|
+
(array.size / num_threads)
|
41
|
+
else
|
42
|
+
(array.size / num_threads) + 1
|
43
|
+
end
|
44
|
+
sliced = array.each_slice(per_queries).to_a
|
45
|
+
Embulk.logger.info("calculate num threads => #{sliced.size}")
|
46
|
+
return sliced
|
32
47
|
end
|
33
48
|
|
34
49
|
def self.resume(task, columns, count, &control)
|
@@ -57,10 +72,11 @@ module Embulk
|
|
57
72
|
end
|
58
73
|
|
59
74
|
def init
|
75
|
+
@queries = task['slice_queries'][@index]
|
76
|
+
Embulk.logger.info("this thread queries => #{@queries}")
|
60
77
|
@client = self.class.create_client(task)
|
61
|
-
@
|
78
|
+
@index_name = task['index']
|
62
79
|
@index_type = task['index_type']
|
63
|
-
@queries = task['queries']
|
64
80
|
@per_size = task['per_size']
|
65
81
|
@limit_size = task['limit_size']
|
66
82
|
@fields = task['fields']
|
@@ -148,7 +164,7 @@ module Embulk
|
|
148
164
|
body[:sort] = sorts
|
149
165
|
end
|
150
166
|
body[:query] = { query_string: { query: query } } unless query.nil?
|
151
|
-
search_option = { index: @
|
167
|
+
search_option = { index: @index_name, type: type, body: body }
|
152
168
|
search_option[:routing] = routing unless routing.nil?
|
153
169
|
search_option[:_source] = fields.select{ |field| !field['metadata'] }.map { |field| field['name'] }.join(',')
|
154
170
|
Embulk.logger.info(%Q{search_option => #{search_option}})
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,7 +94,7 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
-
description: Loads records from Elasticsearch.
|
97
|
+
description: Loads records from Elasticsearch. parallel query support.
|
98
98
|
email:
|
99
99
|
- toyama0919@gmail.com
|
100
100
|
executables: []
|