embulk-input-elasticsearch 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e5dbc0a3f7f57fd192f65ba884bb66c6f3cc64e9
4
- data.tar.gz: e2d7318385c5ef34c173a2a7e2a4f826ff526495
3
+ metadata.gz: 24e8943311e14d4c3ba8fbbdb490bed67a2974ba
4
+ data.tar.gz: 4f795401dfb26d3e5b58e39d4921a99eb1530445
5
5
  SHA512:
6
- metadata.gz: 4c0ab03a6b629767ed2eaea29c349ed5541187667b5017806de60ed518dd4d34c840efc70bdc4f6fb5c7f126850985dc6d9c24cd0d92af6a118e63fac2fd58ff
7
- data.tar.gz: 086e06172b34af6e8c88b7d8c02c03e8b2d2ee560ffab72cbe409b12fcf2aaad271029992d48d3443675b0239f5d4cd20670e5d624743b46f9bf3b8c7e54aae6
6
+ metadata.gz: 0b48169a99f7d055271aeecf213704bd1798d9b4f0bac14b28a56933c5ca3ce0bce48b849690abe8a133b7f2e06d4bd869c2cdb01351904fa9992bc7f429efa8
7
+ data.tar.gz: 1d2dafc4528c3d306f7bbbd08de973b3bdc2ec0ab4dc6ae0997aeb1adbe83e940d71e9becf0c9dcafeebf8169a76275fb9ce3bb4c0e3a6d9dd1a9d3ad4612a1d
data/.travis.yml ADDED
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ cache: bundler
3
+ rvm:
4
+ - jruby-9.0.5.0
5
+ - jruby-head
6
+ jdk:
7
+ - openjdk7
8
+ before_install:
9
+ - gem install bundler
10
+ matrix:
11
+ allow_failures:
12
+ - rvm: jruby-head
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Elasticsearch input plugin for Embulk
1
+ # Elasticsearch input plugin for Embulk [![Build Status](https://secure.travis-ci.org/toyama0919/embulk-input-elasticsearch.png?branch=master)](http://travis-ci.org/toyama0919/embulk-input-elasticsearch)
2
2
 
3
3
  ## Overview
4
4
 
@@ -17,6 +17,7 @@
17
17
  - **request_timeout**: request_timeout (integer)
18
18
  - **per_size**: per_size (integer, required, default: `1000`)
19
19
  - **limit_size**: limit_size (integer, default: unlimit)
20
+ - **num_threads**: number of threads for queries. (integer, default: 1)
20
21
  - **fields**: fields (array, required)
21
22
  - **name**: name (string, required)
22
23
  - **type**: type (string, required)
@@ -38,6 +39,7 @@ in:
38
39
  request_timeout: 60
39
40
  per_size: 1000
40
41
  limit_size: 200000
42
+ num_threads: 2
41
43
  fields:
42
44
  - { name: _id, type: string, metadata: true }
43
45
  - { name: _type, type: string, metadata: true }
data/Rakefile CHANGED
@@ -1,10 +1,11 @@
1
1
  require "bundler/gem_tasks"
2
+ require 'rake/testtask'
2
3
 
3
- task default: :build
4
-
5
- desc "Run tests"
6
- task :test do
7
- require "test-unit"
8
-
9
- Test::Unit::AutoRunner.run(true, './')
4
+ desc 'Run test_unit based test'
5
+ Rake::TestTask.new(:test) do |t|
6
+ t.libs << "test"
7
+ t.test_files = Dir["test/**/test_*.rb"].sort
8
+ t.verbose = true
9
+ t.warning = false
10
10
  end
11
+ task :default => :test
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-elasticsearch"
4
- spec.version = "0.2.0"
4
+ spec.version = "0.2.1"
5
5
  spec.authors = ["toyama0919"]
6
6
  spec.summary = "Elasticsearch input plugin for Embulk"
7
7
  spec.description = "Loads records from Elasticsearch. parallel query support."
@@ -20,7 +20,8 @@ module Embulk
20
20
  "limit_size" => config.param("limit_size", :integer, default: nil),
21
21
  "fields" => config.param("fields", :array, default: nil),
22
22
  "queries" => config.param("queries", :array),
23
- "sort" => config.param("sort", :hash, default: nil)
23
+ "sort" => config.param("sort", :hash, default: nil),
24
+ "add_query_to_record" => config.param("add_query_to_record", :bool, default: false)
24
25
  }
25
26
  # TODO: want max_threads
26
27
  define_num_threads = config.param("num_threads", :integer, default: 1)
@@ -30,6 +31,9 @@ module Embulk
30
31
  task['fields'].each_with_index{ |field, i|
31
32
  columns << Column.new(i, field['name'], field['type'].to_sym)
32
33
  }
34
+ if task['add_query_to_record']
35
+ columns << Column.new(task['fields'].size, "query", :string)
36
+ end
33
37
 
34
38
  resume(task, columns, task['slice_queries'].size, &control)
35
39
  end
@@ -81,6 +85,7 @@ module Embulk
81
85
  @limit_size = task['limit_size']
82
86
  @fields = task['fields']
83
87
  @sort = task['sort']
88
+ @add_query_to_record = task['add_query_to_record']
84
89
  end
85
90
 
86
91
  def run
@@ -96,6 +101,9 @@ module Embulk
96
101
 
97
102
  results = get_sources(search(@index_type, query, size, now_results_size, @routing, @fields, @sort), @fields)
98
103
  results.each do |record|
104
+ if @add_query_to_record
105
+ record << query
106
+ end
99
107
  page_builder.add(record)
100
108
  end
101
109
  break if last_query?(next_results_size ,total_count)
data/test/helper.rb ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+
5
+ # require 'embulk/java/bootstrap'
6
+ require 'embulk'
7
+ Embulk.setup
8
+ Embulk.logger = Embulk::Logger.new('/dev/null')
9
+
10
+ APP_ROOT = File.expand_path('../', __dir__)
11
+ TEST_ROOT = File.expand_path(File.dirname(__FILE__))
@@ -0,0 +1,78 @@
1
+ require_relative './helper'
2
+ require 'embulk/input/elasticsearch'
3
+ require 'yaml'
4
+
5
+ Elasticsearch = Embulk::Input::Elasticsearch
6
+
7
+ module Embulk
8
+ class Input::Elasticsearch
9
+ class TestTransaction < Test::Unit::TestCase
10
+ def control
11
+ Proc.new {|task| task_reports = [] }
12
+ end
13
+
14
+ sub_test_case "get_slice_from_num_threads" do
15
+ def test_normal
16
+ slice = Elasticsearch.get_slice_from_num_threads((1..10).to_a, 5)
17
+ assert_equal slice.size, 5
18
+ assert_equal slice.first.size, 2
19
+ end
20
+
21
+ def test_normal_same
22
+ slice = Elasticsearch.get_slice_from_num_threads((1..3).to_a, 3)
23
+ assert_equal slice.size, 3
24
+ assert_equal slice.first.size, 1
25
+ end
26
+
27
+ def test_num_threads_over_array_size
28
+ slice = Elasticsearch.get_slice_from_num_threads((1..3).to_a, 10)
29
+ assert_equal slice.size, 3
30
+ assert_equal slice.first.size, 1
31
+ end
32
+
33
+ def test_rest
34
+ slice = Elasticsearch.get_slice_from_num_threads((1..20).to_a, 8)
35
+ assert_equal slice.size, 7
36
+ assert_equal slice.first.size, 3
37
+ end
38
+ end
39
+
40
+ sub_test_case "transaction" do
41
+ def test_normal
42
+ yaml = YAML.load(%(
43
+ nodes:
44
+ - {host: localhost, port: 9200}
45
+ queries:
46
+ - 'title: 製函機'
47
+ index: crawl
48
+ index_type: m_corporation_page
49
+ request_timeout: 60
50
+ per_size: 1000
51
+ limit_size: 2000
52
+ num_threads: 20
53
+ fields:
54
+ - { name: title, type: string }
55
+ )
56
+ )
57
+ config = DataSource.new(yaml)
58
+ Elasticsearch.transaction(config, &control)
59
+ end
60
+
61
+ def test_minimum
62
+ yaml = YAML.load(%(
63
+ nodes:
64
+ - {host: localhost, port: 9200}
65
+ queries:
66
+ - 'title: 製函機'
67
+ index: crawl
68
+ fields:
69
+ - { name: title, type: string }
70
+ )
71
+ )
72
+ config = DataSource.new(yaml)
73
+ Elasticsearch.transaction(config, &control)
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - toyama0919
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-15 00:00:00.000000000 Z
11
+ date: 2016-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -103,13 +103,15 @@ extra_rdoc_files: []
103
103
  files:
104
104
  - ".gitignore"
105
105
  - ".ruby-version"
106
+ - ".travis.yml"
106
107
  - Gemfile
107
108
  - LICENSE.txt
108
109
  - README.md
109
110
  - Rakefile
110
111
  - embulk-input-elasticsearch.gemspec
111
112
  - lib/embulk/input/elasticsearch.rb
112
- - test/embulk/input/test_elasticsearch.rb
113
+ - test/helper.rb
114
+ - test/test_transaction.rb
113
115
  homepage: https://github.com/toyama0919/embulk-input-elasticsearch
114
116
  licenses:
115
117
  - MIT
@@ -135,4 +137,5 @@ signing_key:
135
137
  specification_version: 4
136
138
  summary: Elasticsearch input plugin for Embulk
137
139
  test_files:
138
- - test/embulk/input/test_elasticsearch.rb
140
+ - test/helper.rb
141
+ - test/test_transaction.rb
@@ -1,11 +0,0 @@
1
- require "embulk/command/embulk_run"
2
- require "embulk"
3
- Embulk.setup
4
-
5
- require "embulk/input/elasticsearch"
6
- module Embulk
7
- module Input
8
- class ElasticsearchInputPluginTest < Test::Unit::TestCase
9
- end
10
- end
11
- end