embulk-input-elasticsearch 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9e3e0bcff8c7de3a07cab81b1689f6f2f6b5dfe5
4
- data.tar.gz: a2997eb7f98cdea800403211b75342a0ff360483
3
+ metadata.gz: e5dbc0a3f7f57fd192f65ba884bb66c6f3cc64e9
4
+ data.tar.gz: e2d7318385c5ef34c173a2a7e2a4f826ff526495
5
5
  SHA512:
6
- metadata.gz: bb618f0f0c4af1cdce787ce20cc38b009a4b48eba7e678f0f198c0e0195038242d8609ef42e1744077b0a51e99a8c3d62662c42e9482c4e33802d7b7ac8adeab
7
- data.tar.gz: 8b43fc2e6c9afa87bb0851e2a813c9a3633104df341db9b73c74b472e3c701ce1e04e5e287f11d94b7ea2b1e11a6a0b7e2ea9c25bc70978b627765bf87ba2abf
6
+ metadata.gz: 4c0ab03a6b629767ed2eaea29c349ed5541187667b5017806de60ed518dd4d34c840efc70bdc4f6fb5c7f126850985dc6d9c24cd0d92af6a118e63fac2fd58ff
7
+ data.tar.gz: 086e06172b34af6e8c88b7d8c02c03e8b2d2ee560ffab72cbe409b12fcf2aaad271029992d48d3443675b0239f5d4cd20670e5d624743b46f9bf3b8c7e54aae6
data/README.md CHANGED
@@ -9,19 +9,19 @@
9
9
 
10
10
  ## Configuration
11
11
  - **nodes**: nodes (array, required)
12
- - **host**: host (string, required, default: ``)
13
- - **port**: port (integer, required, default: ``)
14
- - **queries**: query (array, required, default: ``)
15
- - **index**: index (string, required, default: ``)
16
- - **index_type**: index_type (string, default: ``)
17
- - **request_timeout**: request_timeout (string, default: ``)
12
+ - **host**: host (string, required)
13
+ - **port**: port (integer, required)
14
+ - **queries**: query (array, required)
15
+ - **index**: index (string, required)
16
+ - **index_type**: index_type (string)
17
+ - **request_timeout**: request_timeout (integer)
18
18
  - **per_size**: per_size (integer, required, default: `1000`)
19
19
  - **limit_size**: limit_size (integer, default: unlimit)
20
- - **fields**: fields (string, required, default: ``)
21
- - **name**: name (string, required, default: ``)
22
- - **type**: type (string, required, default: ``)
23
- - **metadata**: metadata (boolean, required, default: ``)
24
- - **time_format**: time_format (string, required, default: ``)
20
+ - **fields**: fields (array, required)
21
+ - **name**: name (string, required)
22
+ - **type**: type (string, required)
23
+ - **metadata**: metadata (boolean, default: false)
24
+ - **time_format**: time_format (string, required)
25
25
 
26
26
  ## Example
27
27
 
@@ -1,10 +1,10 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-elasticsearch"
4
- spec.version = "0.1.0"
4
+ spec.version = "0.2.0"
5
5
  spec.authors = ["toyama0919"]
6
6
  spec.summary = "Elasticsearch input plugin for Embulk"
7
- spec.description = "Loads records from Elasticsearch."
7
+ spec.description = "Loads records from Elasticsearch. parallel query support."
8
8
  spec.email = ["toyama0919@gmail.com"]
9
9
  spec.licenses = ["MIT"]
10
10
  spec.homepage = "https://github.com/toyama0919/embulk-input-elasticsearch"
@@ -22,13 +22,28 @@ module Embulk
22
22
  "queries" => config.param("queries", :array),
23
23
  "sort" => config.param("sort", :hash, default: nil)
24
24
  }
25
+ # TODO: want max_threads
26
+ define_num_threads = config.param("num_threads", :integer, default: 1)
27
+ task['slice_queries'] = get_slice_from_num_threads(task['queries'], define_num_threads)
25
28
 
26
29
  columns = []
27
30
  task['fields'].each_with_index{ |field, i|
28
31
  columns << Column.new(i, field['name'], field['type'].to_sym)
29
32
  }
30
33
 
31
- resume(task, columns, 1, &control)
34
+ resume(task, columns, task['slice_queries'].size, &control)
35
+ end
36
+
37
+ def self.get_slice_from_num_threads(array, define_num_threads)
38
+ num_threads = array.size < define_num_threads ? array.size : define_num_threads
39
+ per_queries = if (array.size % num_threads) == 0
40
+ (array.size / num_threads)
41
+ else
42
+ (array.size / num_threads) + 1
43
+ end
44
+ sliced = array.each_slice(per_queries).to_a
45
+ Embulk.logger.info("calculate num threads => #{sliced.size}")
46
+ return sliced
32
47
  end
33
48
 
34
49
  def self.resume(task, columns, count, &control)
@@ -57,10 +72,11 @@ module Embulk
57
72
  end
58
73
 
59
74
  def init
75
+ @queries = task['slice_queries'][@index]
76
+ Embulk.logger.info("this thread queries => #{@queries}")
60
77
  @client = self.class.create_client(task)
61
- @index = task['index']
78
+ @index_name = task['index']
62
79
  @index_type = task['index_type']
63
- @queries = task['queries']
64
80
  @per_size = task['per_size']
65
81
  @limit_size = task['limit_size']
66
82
  @fields = task['fields']
@@ -148,7 +164,7 @@ module Embulk
148
164
  body[:sort] = sorts
149
165
  end
150
166
  body[:query] = { query_string: { query: query } } unless query.nil?
151
- search_option = { index: @index, type: type, body: body }
167
+ search_option = { index: @index_name, type: type, body: body }
152
168
  search_option[:routing] = routing unless routing.nil?
153
169
  search_option[:_source] = fields.select{ |field| !field['metadata'] }.map { |field| field['name'] }.join(',')
154
170
  Embulk.logger.info(%Q{search_option => #{search_option}})
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - toyama0919
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-13 00:00:00.000000000 Z
11
+ date: 2016-06-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description: Loads records from Elasticsearch.
97
+ description: Loads records from Elasticsearch. parallel query support.
98
98
  email:
99
99
  - toyama0919@gmail.com
100
100
  executables: []