embulk-input-elasticsearch 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +12 -0
- data/README.md +3 -1
- data/Rakefile +8 -7
- data/embulk-input-elasticsearch.gemspec +1 -1
- data/lib/embulk/input/elasticsearch.rb +9 -1
- data/test/helper.rb +11 -0
- data/test/test_transaction.rb +78 -0
- metadata +7 -4
- data/test/embulk/input/test_elasticsearch.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24e8943311e14d4c3ba8fbbdb490bed67a2974ba
|
4
|
+
data.tar.gz: 4f795401dfb26d3e5b58e39d4921a99eb1530445
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b48169a99f7d055271aeecf213704bd1798d9b4f0bac14b28a56933c5ca3ce0bce48b849690abe8a133b7f2e06d4bd869c2cdb01351904fa9992bc7f429efa8
|
7
|
+
data.tar.gz: 1d2dafc4528c3d306f7bbbd08de973b3bdc2ec0ab4dc6ae0997aeb1adbe83e940d71e9becf0c9dcafeebf8169a76275fb9ce3bb4c0e3a6d9dd1a9d3ad4612a1d
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Elasticsearch input plugin for Embulk
|
1
|
+
# Elasticsearch input plugin for Embulk [![Build Status](https://secure.travis-ci.org/toyama0919/embulk-input-elasticsearch.png?branch=master)](http://travis-ci.org/toyama0919/embulk-input-elasticsearch)
|
2
2
|
|
3
3
|
## Overview
|
4
4
|
|
@@ -17,6 +17,7 @@
|
|
17
17
|
- **request_timeout**: request_timeout (integer)
|
18
18
|
- **per_size**: per_size (integer, required, default: `1000`)
|
19
19
|
- **limit_size**: limit_size (integer, default: unlimit)
|
20
|
+
- **num_threads**: number of threads for queries. (integer, default: 1)
|
20
21
|
- **fields**: fields (array, required)
|
21
22
|
- **name**: name (string, required)
|
22
23
|
- **type**: type (string, required)
|
@@ -38,6 +39,7 @@ in:
|
|
38
39
|
request_timeout: 60
|
39
40
|
per_size: 1000
|
40
41
|
limit_size: 200000
|
42
|
+
num_threads: 2
|
41
43
|
fields:
|
42
44
|
- { name: _id, type: string, metadata: true }
|
43
45
|
- { name: _type, type: string, metadata: true }
|
data/Rakefile
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
2
3
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
Test::Unit::AutoRunner.run(true, './')
|
4
|
+
desc 'Run test_unit based test'
|
5
|
+
Rake::TestTask.new(:test) do |t|
|
6
|
+
t.libs << "test"
|
7
|
+
t.test_files = Dir["test/**/test_*.rb"].sort
|
8
|
+
t.verbose = true
|
9
|
+
t.warning = false
|
10
10
|
end
|
11
|
+
task :default => :test
|
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-input-elasticsearch"
|
4
|
-
spec.version = "0.2.
|
4
|
+
spec.version = "0.2.1"
|
5
5
|
spec.authors = ["toyama0919"]
|
6
6
|
spec.summary = "Elasticsearch input plugin for Embulk"
|
7
7
|
spec.description = "Loads records from Elasticsearch. parallel query support."
|
@@ -20,7 +20,8 @@ module Embulk
|
|
20
20
|
"limit_size" => config.param("limit_size", :integer, default: nil),
|
21
21
|
"fields" => config.param("fields", :array, default: nil),
|
22
22
|
"queries" => config.param("queries", :array),
|
23
|
-
"sort" => config.param("sort", :hash, default: nil)
|
23
|
+
"sort" => config.param("sort", :hash, default: nil),
|
24
|
+
"add_query_to_record" => config.param("add_query_to_record", :bool, default: false)
|
24
25
|
}
|
25
26
|
# TODO: want max_threads
|
26
27
|
define_num_threads = config.param("num_threads", :integer, default: 1)
|
@@ -30,6 +31,9 @@ module Embulk
|
|
30
31
|
task['fields'].each_with_index{ |field, i|
|
31
32
|
columns << Column.new(i, field['name'], field['type'].to_sym)
|
32
33
|
}
|
34
|
+
if task['add_query_to_record']
|
35
|
+
columns << Column.new(task['fields'].size, "query", :string)
|
36
|
+
end
|
33
37
|
|
34
38
|
resume(task, columns, task['slice_queries'].size, &control)
|
35
39
|
end
|
@@ -81,6 +85,7 @@ module Embulk
|
|
81
85
|
@limit_size = task['limit_size']
|
82
86
|
@fields = task['fields']
|
83
87
|
@sort = task['sort']
|
88
|
+
@add_query_to_record = task['add_query_to_record']
|
84
89
|
end
|
85
90
|
|
86
91
|
def run
|
@@ -96,6 +101,9 @@ module Embulk
|
|
96
101
|
|
97
102
|
results = get_sources(search(@index_type, query, size, now_results_size, @routing, @fields, @sort), @fields)
|
98
103
|
results.each do |record|
|
104
|
+
if @add_query_to_record
|
105
|
+
record << query
|
106
|
+
end
|
99
107
|
page_builder.add(record)
|
100
108
|
end
|
101
109
|
break if last_query?(next_results_size ,total_count)
|
data/test/helper.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
# require 'embulk/java/bootstrap'
|
6
|
+
require 'embulk'
|
7
|
+
Embulk.setup
|
8
|
+
Embulk.logger = Embulk::Logger.new('/dev/null')
|
9
|
+
|
10
|
+
APP_ROOT = File.expand_path('../', __dir__)
|
11
|
+
TEST_ROOT = File.expand_path(File.dirname(__FILE__))
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require_relative './helper'
|
2
|
+
require 'embulk/input/elasticsearch'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Elasticsearch = Embulk::Input::Elasticsearch
|
6
|
+
|
7
|
+
module Embulk
|
8
|
+
class Input::Elasticsearch
|
9
|
+
class TestTransaction < Test::Unit::TestCase
|
10
|
+
def control
|
11
|
+
Proc.new {|task| task_reports = [] }
|
12
|
+
end
|
13
|
+
|
14
|
+
sub_test_case "get_slice_from_num_threads" do
|
15
|
+
def test_normal
|
16
|
+
slice = Elasticsearch.get_slice_from_num_threads((1..10).to_a, 5)
|
17
|
+
assert_equal slice.size, 5
|
18
|
+
assert_equal slice.first.size, 2
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_normal_same
|
22
|
+
slice = Elasticsearch.get_slice_from_num_threads((1..3).to_a, 3)
|
23
|
+
assert_equal slice.size, 3
|
24
|
+
assert_equal slice.first.size, 1
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_num_threads_over_array_size
|
28
|
+
slice = Elasticsearch.get_slice_from_num_threads((1..3).to_a, 10)
|
29
|
+
assert_equal slice.size, 3
|
30
|
+
assert_equal slice.first.size, 1
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_rest
|
34
|
+
slice = Elasticsearch.get_slice_from_num_threads((1..20).to_a, 8)
|
35
|
+
assert_equal slice.size, 7
|
36
|
+
assert_equal slice.first.size, 3
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
sub_test_case "transaction" do
|
41
|
+
def test_normal
|
42
|
+
yaml = YAML.load(%(
|
43
|
+
nodes:
|
44
|
+
- {host: localhost, port: 9200}
|
45
|
+
queries:
|
46
|
+
- 'title: 製函機'
|
47
|
+
index: crawl
|
48
|
+
index_type: m_corporation_page
|
49
|
+
request_timeout: 60
|
50
|
+
per_size: 1000
|
51
|
+
limit_size: 2000
|
52
|
+
num_threads: 20
|
53
|
+
fields:
|
54
|
+
- { name: title, type: string }
|
55
|
+
)
|
56
|
+
)
|
57
|
+
config = DataSource.new(yaml)
|
58
|
+
Elasticsearch.transaction(config, &control)
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_minimum
|
62
|
+
yaml = YAML.load(%(
|
63
|
+
nodes:
|
64
|
+
- {host: localhost, port: 9200}
|
65
|
+
queries:
|
66
|
+
- 'title: 製函機'
|
67
|
+
index: crawl
|
68
|
+
fields:
|
69
|
+
- { name: title, type: string }
|
70
|
+
)
|
71
|
+
)
|
72
|
+
config = DataSource.new(yaml)
|
73
|
+
Elasticsearch.transaction(config, &control)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -103,13 +103,15 @@ extra_rdoc_files: []
|
|
103
103
|
files:
|
104
104
|
- ".gitignore"
|
105
105
|
- ".ruby-version"
|
106
|
+
- ".travis.yml"
|
106
107
|
- Gemfile
|
107
108
|
- LICENSE.txt
|
108
109
|
- README.md
|
109
110
|
- Rakefile
|
110
111
|
- embulk-input-elasticsearch.gemspec
|
111
112
|
- lib/embulk/input/elasticsearch.rb
|
112
|
-
- test/
|
113
|
+
- test/helper.rb
|
114
|
+
- test/test_transaction.rb
|
113
115
|
homepage: https://github.com/toyama0919/embulk-input-elasticsearch
|
114
116
|
licenses:
|
115
117
|
- MIT
|
@@ -135,4 +137,5 @@ signing_key:
|
|
135
137
|
specification_version: 4
|
136
138
|
summary: Elasticsearch input plugin for Embulk
|
137
139
|
test_files:
|
138
|
-
- test/
|
140
|
+
- test/helper.rb
|
141
|
+
- test/test_transaction.rb
|