embulk-input-elasticsearch 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9e3e0bcff8c7de3a07cab81b1689f6f2f6b5dfe5
4
- data.tar.gz: a2997eb7f98cdea800403211b75342a0ff360483
3
+ metadata.gz: e5dbc0a3f7f57fd192f65ba884bb66c6f3cc64e9
4
+ data.tar.gz: e2d7318385c5ef34c173a2a7e2a4f826ff526495
5
5
  SHA512:
6
- metadata.gz: bb618f0f0c4af1cdce787ce20cc38b009a4b48eba7e678f0f198c0e0195038242d8609ef42e1744077b0a51e99a8c3d62662c42e9482c4e33802d7b7ac8adeab
7
- data.tar.gz: 8b43fc2e6c9afa87bb0851e2a813c9a3633104df341db9b73c74b472e3c701ce1e04e5e287f11d94b7ea2b1e11a6a0b7e2ea9c25bc70978b627765bf87ba2abf
6
+ metadata.gz: 4c0ab03a6b629767ed2eaea29c349ed5541187667b5017806de60ed518dd4d34c840efc70bdc4f6fb5c7f126850985dc6d9c24cd0d92af6a118e63fac2fd58ff
7
+ data.tar.gz: 086e06172b34af6e8c88b7d8c02c03e8b2d2ee560ffab72cbe409b12fcf2aaad271029992d48d3443675b0239f5d4cd20670e5d624743b46f9bf3b8c7e54aae6
data/README.md CHANGED
@@ -9,19 +9,19 @@
9
9
 
10
10
  ## Configuration
11
11
  - **nodes**: nodes (array, required)
12
- - **host**: host (string, required, default: ``)
13
- - **port**: port (integer, required, default: ``)
14
- - **queries**: query (array, required, default: ``)
15
- - **index**: index (string, required, default: ``)
16
- - **index_type**: index_type (string, default: ``)
17
- - **request_timeout**: request_timeout (string, default: ``)
12
+ - **host**: host (string, required)
13
+ - **port**: port (integer, required)
14
+ - **queries**: query (array, required)
15
+ - **index**: index (string, required)
16
+ - **index_type**: index_type (string)
17
+ - **request_timeout**: request_timeout (integer)
18
18
  - **per_size**: per_size (integer, required, default: `1000`)
19
19
  - **limit_size**: limit_size (integer, default: unlimit)
20
- - **fields**: fields (string, required, default: ``)
21
- - **name**: name (string, required, default: ``)
22
- - **type**: type (string, required, default: ``)
23
- - **metadata**: metadata (boolean, required, default: ``)
24
- - **time_format**: time_format (string, required, default: ``)
20
+ - **fields**: fields (array, required)
21
+ - **name**: name (string, required)
22
+ - **type**: type (string, required)
23
+ - **metadata**: metadata (boolean, default: false)
24
+ - **time_format**: time_format (string, required)
25
25
 
26
26
  ## Example
27
27
 
@@ -1,10 +1,10 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-elasticsearch"
4
- spec.version = "0.1.0"
4
+ spec.version = "0.2.0"
5
5
  spec.authors = ["toyama0919"]
6
6
  spec.summary = "Elasticsearch input plugin for Embulk"
7
- spec.description = "Loads records from Elasticsearch."
7
+ spec.description = "Loads records from Elasticsearch. parallel query support."
8
8
  spec.email = ["toyama0919@gmail.com"]
9
9
  spec.licenses = ["MIT"]
10
10
  spec.homepage = "https://github.com/toyama0919/embulk-input-elasticsearch"
@@ -22,13 +22,28 @@ module Embulk
22
22
  "queries" => config.param("queries", :array),
23
23
  "sort" => config.param("sort", :hash, default: nil)
24
24
  }
25
+ # TODO: want max_threads
26
+ define_num_threads = config.param("num_threads", :integer, default: 1)
27
+ task['slice_queries'] = get_slice_from_num_threads(task['queries'], define_num_threads)
25
28
 
26
29
  columns = []
27
30
  task['fields'].each_with_index{ |field, i|
28
31
  columns << Column.new(i, field['name'], field['type'].to_sym)
29
32
  }
30
33
 
31
- resume(task, columns, 1, &control)
34
+ resume(task, columns, task['slice_queries'].size, &control)
35
+ end
36
+
37
+ def self.get_slice_from_num_threads(array, define_num_threads)
38
+ num_threads = array.size < define_num_threads ? array.size : define_num_threads
39
+ per_queries = if (array.size % num_threads) == 0
40
+ (array.size / num_threads)
41
+ else
42
+ (array.size / num_threads) + 1
43
+ end
44
+ sliced = array.each_slice(per_queries).to_a
45
+ Embulk.logger.info("calculate num threads => #{sliced.size}")
46
+ return sliced
32
47
  end
33
48
 
34
49
  def self.resume(task, columns, count, &control)
@@ -57,10 +72,11 @@ module Embulk
57
72
  end
58
73
 
59
74
  def init
75
+ @queries = task['slice_queries'][@index]
76
+ Embulk.logger.info("this thread queries => #{@queries}")
60
77
  @client = self.class.create_client(task)
61
- @index = task['index']
78
+ @index_name = task['index']
62
79
  @index_type = task['index_type']
63
- @queries = task['queries']
64
80
  @per_size = task['per_size']
65
81
  @limit_size = task['limit_size']
66
82
  @fields = task['fields']
@@ -148,7 +164,7 @@ module Embulk
148
164
  body[:sort] = sorts
149
165
  end
150
166
  body[:query] = { query_string: { query: query } } unless query.nil?
151
- search_option = { index: @index, type: type, body: body }
167
+ search_option = { index: @index_name, type: type, body: body }
152
168
  search_option[:routing] = routing unless routing.nil?
153
169
  search_option[:_source] = fields.select{ |field| !field['metadata'] }.map { |field| field['name'] }.join(',')
154
170
  Embulk.logger.info(%Q{search_option => #{search_option}})
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - toyama0919
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-13 00:00:00.000000000 Z
11
+ date: 2016-06-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description: Loads records from Elasticsearch.
97
+ description: Loads records from Elasticsearch. parallel query support.
98
98
  email:
99
99
  - toyama0919@gmail.com
100
100
  executables: []