embulk-input-elasticsearch 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +12 -0
- data/README.md +3 -1
- data/Rakefile +8 -7
- data/embulk-input-elasticsearch.gemspec +1 -1
- data/lib/embulk/input/elasticsearch.rb +9 -1
- data/test/helper.rb +11 -0
- data/test/test_transaction.rb +78 -0
- metadata +7 -4
- data/test/embulk/input/test_elasticsearch.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24e8943311e14d4c3ba8fbbdb490bed67a2974ba
|
4
|
+
data.tar.gz: 4f795401dfb26d3e5b58e39d4921a99eb1530445
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b48169a99f7d055271aeecf213704bd1798d9b4f0bac14b28a56933c5ca3ce0bce48b849690abe8a133b7f2e06d4bd869c2cdb01351904fa9992bc7f429efa8
|
7
|
+
data.tar.gz: 1d2dafc4528c3d306f7bbbd08de973b3bdc2ec0ab4dc6ae0997aeb1adbe83e940d71e9becf0c9dcafeebf8169a76275fb9ce3bb4c0e3a6d9dd1a9d3ad4612a1d
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Elasticsearch input plugin for Embulk
|
1
|
+
# Elasticsearch input plugin for Embulk [](http://travis-ci.org/toyama0919/embulk-input-elasticsearch)
|
2
2
|
|
3
3
|
## Overview
|
4
4
|
|
@@ -17,6 +17,7 @@
|
|
17
17
|
- **request_timeout**: request_timeout (integer)
|
18
18
|
- **per_size**: per_size (integer, required, default: `1000`)
|
19
19
|
- **limit_size**: limit_size (integer, default: unlimit)
|
20
|
+
- **num_threads**: number of threads for queries. (integer, default: 1)
|
20
21
|
- **fields**: fields (array, required)
|
21
22
|
- **name**: name (string, required)
|
22
23
|
- **type**: type (string, required)
|
@@ -38,6 +39,7 @@ in:
|
|
38
39
|
request_timeout: 60
|
39
40
|
per_size: 1000
|
40
41
|
limit_size: 200000
|
42
|
+
num_threads: 2
|
41
43
|
fields:
|
42
44
|
- { name: _id, type: string, metadata: true }
|
43
45
|
- { name: _type, type: string, metadata: true }
|
data/Rakefile
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
2
3
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
Test::Unit::AutoRunner.run(true, './')
|
4
|
+
desc 'Run test_unit based test'
|
5
|
+
Rake::TestTask.new(:test) do |t|
|
6
|
+
t.libs << "test"
|
7
|
+
t.test_files = Dir["test/**/test_*.rb"].sort
|
8
|
+
t.verbose = true
|
9
|
+
t.warning = false
|
10
10
|
end
|
11
|
+
task :default => :test
|
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-input-elasticsearch"
|
4
|
-
spec.version = "0.2.
|
4
|
+
spec.version = "0.2.1"
|
5
5
|
spec.authors = ["toyama0919"]
|
6
6
|
spec.summary = "Elasticsearch input plugin for Embulk"
|
7
7
|
spec.description = "Loads records from Elasticsearch. parallel query support."
|
@@ -20,7 +20,8 @@ module Embulk
|
|
20
20
|
"limit_size" => config.param("limit_size", :integer, default: nil),
|
21
21
|
"fields" => config.param("fields", :array, default: nil),
|
22
22
|
"queries" => config.param("queries", :array),
|
23
|
-
"sort" => config.param("sort", :hash, default: nil)
|
23
|
+
"sort" => config.param("sort", :hash, default: nil),
|
24
|
+
"add_query_to_record" => config.param("add_query_to_record", :bool, default: false)
|
24
25
|
}
|
25
26
|
# TODO: want max_threads
|
26
27
|
define_num_threads = config.param("num_threads", :integer, default: 1)
|
@@ -30,6 +31,9 @@ module Embulk
|
|
30
31
|
task['fields'].each_with_index{ |field, i|
|
31
32
|
columns << Column.new(i, field['name'], field['type'].to_sym)
|
32
33
|
}
|
34
|
+
if task['add_query_to_record']
|
35
|
+
columns << Column.new(task['fields'].size, "query", :string)
|
36
|
+
end
|
33
37
|
|
34
38
|
resume(task, columns, task['slice_queries'].size, &control)
|
35
39
|
end
|
@@ -81,6 +85,7 @@ module Embulk
|
|
81
85
|
@limit_size = task['limit_size']
|
82
86
|
@fields = task['fields']
|
83
87
|
@sort = task['sort']
|
88
|
+
@add_query_to_record = task['add_query_to_record']
|
84
89
|
end
|
85
90
|
|
86
91
|
def run
|
@@ -96,6 +101,9 @@ module Embulk
|
|
96
101
|
|
97
102
|
results = get_sources(search(@index_type, query, size, now_results_size, @routing, @fields, @sort), @fields)
|
98
103
|
results.each do |record|
|
104
|
+
if @add_query_to_record
|
105
|
+
record << query
|
106
|
+
end
|
99
107
|
page_builder.add(record)
|
100
108
|
end
|
101
109
|
break if last_query?(next_results_size ,total_count)
|
data/test/helper.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
# require 'embulk/java/bootstrap'
|
6
|
+
require 'embulk'
|
7
|
+
Embulk.setup
|
8
|
+
Embulk.logger = Embulk::Logger.new('/dev/null')
|
9
|
+
|
10
|
+
APP_ROOT = File.expand_path('../', __dir__)
|
11
|
+
TEST_ROOT = File.expand_path(File.dirname(__FILE__))
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require_relative './helper'
|
2
|
+
require 'embulk/input/elasticsearch'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Elasticsearch = Embulk::Input::Elasticsearch
|
6
|
+
|
7
|
+
module Embulk
|
8
|
+
class Input::Elasticsearch
|
9
|
+
class TestTransaction < Test::Unit::TestCase
|
10
|
+
def control
|
11
|
+
Proc.new {|task| task_reports = [] }
|
12
|
+
end
|
13
|
+
|
14
|
+
sub_test_case "get_slice_from_num_threads" do
|
15
|
+
def test_normal
|
16
|
+
slice = Elasticsearch.get_slice_from_num_threads((1..10).to_a, 5)
|
17
|
+
assert_equal slice.size, 5
|
18
|
+
assert_equal slice.first.size, 2
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_normal_same
|
22
|
+
slice = Elasticsearch.get_slice_from_num_threads((1..3).to_a, 3)
|
23
|
+
assert_equal slice.size, 3
|
24
|
+
assert_equal slice.first.size, 1
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_num_threads_over_array_size
|
28
|
+
slice = Elasticsearch.get_slice_from_num_threads((1..3).to_a, 10)
|
29
|
+
assert_equal slice.size, 3
|
30
|
+
assert_equal slice.first.size, 1
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_rest
|
34
|
+
slice = Elasticsearch.get_slice_from_num_threads((1..20).to_a, 8)
|
35
|
+
assert_equal slice.size, 7
|
36
|
+
assert_equal slice.first.size, 3
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
sub_test_case "transaction" do
|
41
|
+
def test_normal
|
42
|
+
yaml = YAML.load(%(
|
43
|
+
nodes:
|
44
|
+
- {host: localhost, port: 9200}
|
45
|
+
queries:
|
46
|
+
- 'title: 製函機'
|
47
|
+
index: crawl
|
48
|
+
index_type: m_corporation_page
|
49
|
+
request_timeout: 60
|
50
|
+
per_size: 1000
|
51
|
+
limit_size: 2000
|
52
|
+
num_threads: 20
|
53
|
+
fields:
|
54
|
+
- { name: title, type: string }
|
55
|
+
)
|
56
|
+
)
|
57
|
+
config = DataSource.new(yaml)
|
58
|
+
Elasticsearch.transaction(config, &control)
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_minimum
|
62
|
+
yaml = YAML.load(%(
|
63
|
+
nodes:
|
64
|
+
- {host: localhost, port: 9200}
|
65
|
+
queries:
|
66
|
+
- 'title: 製函機'
|
67
|
+
index: crawl
|
68
|
+
fields:
|
69
|
+
- { name: title, type: string }
|
70
|
+
)
|
71
|
+
)
|
72
|
+
config = DataSource.new(yaml)
|
73
|
+
Elasticsearch.transaction(config, &control)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -103,13 +103,15 @@ extra_rdoc_files: []
|
|
103
103
|
files:
|
104
104
|
- ".gitignore"
|
105
105
|
- ".ruby-version"
|
106
|
+
- ".travis.yml"
|
106
107
|
- Gemfile
|
107
108
|
- LICENSE.txt
|
108
109
|
- README.md
|
109
110
|
- Rakefile
|
110
111
|
- embulk-input-elasticsearch.gemspec
|
111
112
|
- lib/embulk/input/elasticsearch.rb
|
112
|
-
- test/
|
113
|
+
- test/helper.rb
|
114
|
+
- test/test_transaction.rb
|
113
115
|
homepage: https://github.com/toyama0919/embulk-input-elasticsearch
|
114
116
|
licenses:
|
115
117
|
- MIT
|
@@ -135,4 +137,5 @@ signing_key:
|
|
135
137
|
specification_version: 4
|
136
138
|
summary: Elasticsearch input plugin for Embulk
|
137
139
|
test_files:
|
138
|
-
- test/
|
140
|
+
- test/helper.rb
|
141
|
+
- test/test_transaction.rb
|