embulk-input-elasticsearch 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e5dbc0a3f7f57fd192f65ba884bb66c6f3cc64e9
4
- data.tar.gz: e2d7318385c5ef34c173a2a7e2a4f826ff526495
3
+ metadata.gz: 24e8943311e14d4c3ba8fbbdb490bed67a2974ba
4
+ data.tar.gz: 4f795401dfb26d3e5b58e39d4921a99eb1530445
5
5
  SHA512:
6
- metadata.gz: 4c0ab03a6b629767ed2eaea29c349ed5541187667b5017806de60ed518dd4d34c840efc70bdc4f6fb5c7f126850985dc6d9c24cd0d92af6a118e63fac2fd58ff
7
- data.tar.gz: 086e06172b34af6e8c88b7d8c02c03e8b2d2ee560ffab72cbe409b12fcf2aaad271029992d48d3443675b0239f5d4cd20670e5d624743b46f9bf3b8c7e54aae6
6
+ metadata.gz: 0b48169a99f7d055271aeecf213704bd1798d9b4f0bac14b28a56933c5ca3ce0bce48b849690abe8a133b7f2e06d4bd869c2cdb01351904fa9992bc7f429efa8
7
+ data.tar.gz: 1d2dafc4528c3d306f7bbbd08de973b3bdc2ec0ab4dc6ae0997aeb1adbe83e940d71e9becf0c9dcafeebf8169a76275fb9ce3bb4c0e3a6d9dd1a9d3ad4612a1d
data/.travis.yml ADDED
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ cache: bundler
3
+ rvm:
4
+ - jruby-9.0.5.0
5
+ - jruby-head
6
+ jdk:
7
+ - openjdk7
8
+ before_install:
9
+ - gem install bundler
10
+ matrix:
11
+ allow_failures:
12
+ - rvm: jruby-head
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Elasticsearch input plugin for Embulk
1
+ # Elasticsearch input plugin for Embulk [![Build Status](https://secure.travis-ci.org/toyama0919/embulk-input-elasticsearch.png?branch=master)](http://travis-ci.org/toyama0919/embulk-input-elasticsearch)
2
2
 
3
3
  ## Overview
4
4
 
@@ -17,6 +17,7 @@
17
17
  - **request_timeout**: request_timeout (integer)
18
18
  - **per_size**: per_size (integer, required, default: `1000`)
19
19
  - **limit_size**: limit_size (integer, default: unlimit)
20
+ - **num_threads**: number of threads for queries. (integer, default: 1)
20
21
  - **fields**: fields (array, required)
21
22
  - **name**: name (string, required)
22
23
  - **type**: type (string, required)
@@ -38,6 +39,7 @@ in:
38
39
  request_timeout: 60
39
40
  per_size: 1000
40
41
  limit_size: 200000
42
+ num_threads: 2
41
43
  fields:
42
44
  - { name: _id, type: string, metadata: true }
43
45
  - { name: _type, type: string, metadata: true }
data/Rakefile CHANGED
@@ -1,10 +1,11 @@
1
1
  require "bundler/gem_tasks"
2
+ require 'rake/testtask'
2
3
 
3
- task default: :build
4
-
5
- desc "Run tests"
6
- task :test do
7
- require "test-unit"
8
-
9
- Test::Unit::AutoRunner.run(true, './')
4
+ desc 'Run test_unit based test'
5
+ Rake::TestTask.new(:test) do |t|
6
+ t.libs << "test"
7
+ t.test_files = Dir["test/**/test_*.rb"].sort
8
+ t.verbose = true
9
+ t.warning = false
10
10
  end
11
+ task :default => :test
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-elasticsearch"
4
- spec.version = "0.2.0"
4
+ spec.version = "0.2.1"
5
5
  spec.authors = ["toyama0919"]
6
6
  spec.summary = "Elasticsearch input plugin for Embulk"
7
7
  spec.description = "Loads records from Elasticsearch. parallel query support."
@@ -20,7 +20,8 @@ module Embulk
20
20
  "limit_size" => config.param("limit_size", :integer, default: nil),
21
21
  "fields" => config.param("fields", :array, default: nil),
22
22
  "queries" => config.param("queries", :array),
23
- "sort" => config.param("sort", :hash, default: nil)
23
+ "sort" => config.param("sort", :hash, default: nil),
24
+ "add_query_to_record" => config.param("add_query_to_record", :bool, default: false)
24
25
  }
25
26
  # TODO: want max_threads
26
27
  define_num_threads = config.param("num_threads", :integer, default: 1)
@@ -30,6 +31,9 @@ module Embulk
30
31
  task['fields'].each_with_index{ |field, i|
31
32
  columns << Column.new(i, field['name'], field['type'].to_sym)
32
33
  }
34
+ if task['add_query_to_record']
35
+ columns << Column.new(task['fields'].size, "query", :string)
36
+ end
33
37
 
34
38
  resume(task, columns, task['slice_queries'].size, &control)
35
39
  end
@@ -81,6 +85,7 @@ module Embulk
81
85
  @limit_size = task['limit_size']
82
86
  @fields = task['fields']
83
87
  @sort = task['sort']
88
+ @add_query_to_record = task['add_query_to_record']
84
89
  end
85
90
 
86
91
  def run
@@ -96,6 +101,9 @@ module Embulk
96
101
 
97
102
  results = get_sources(search(@index_type, query, size, now_results_size, @routing, @fields, @sort), @fields)
98
103
  results.each do |record|
104
+ if @add_query_to_record
105
+ record << query
106
+ end
99
107
  page_builder.add(record)
100
108
  end
101
109
  break if last_query?(next_results_size ,total_count)
data/test/helper.rb ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+
5
+ # require 'embulk/java/bootstrap'
6
+ require 'embulk'
7
+ Embulk.setup
8
+ Embulk.logger = Embulk::Logger.new('/dev/null')
9
+
10
+ APP_ROOT = File.expand_path('../', __dir__)
11
+ TEST_ROOT = File.expand_path(File.dirname(__FILE__))
@@ -0,0 +1,78 @@
1
+ require_relative './helper'
2
+ require 'embulk/input/elasticsearch'
3
+ require 'yaml'
4
+
5
+ Elasticsearch = Embulk::Input::Elasticsearch
6
+
7
+ module Embulk
8
+ class Input::Elasticsearch
9
+ class TestTransaction < Test::Unit::TestCase
10
+ def control
11
+ Proc.new {|task| task_reports = [] }
12
+ end
13
+
14
+ sub_test_case "get_slice_from_num_threads" do
15
+ def test_normal
16
+ slice = Elasticsearch.get_slice_from_num_threads((1..10).to_a, 5)
17
+ assert_equal slice.size, 5
18
+ assert_equal slice.first.size, 2
19
+ end
20
+
21
+ def test_normal_same
22
+ slice = Elasticsearch.get_slice_from_num_threads((1..3).to_a, 3)
23
+ assert_equal slice.size, 3
24
+ assert_equal slice.first.size, 1
25
+ end
26
+
27
+ def test_num_threads_over_array_size
28
+ slice = Elasticsearch.get_slice_from_num_threads((1..3).to_a, 10)
29
+ assert_equal slice.size, 3
30
+ assert_equal slice.first.size, 1
31
+ end
32
+
33
+ def test_rest
34
+ slice = Elasticsearch.get_slice_from_num_threads((1..20).to_a, 8)
35
+ assert_equal slice.size, 7
36
+ assert_equal slice.first.size, 3
37
+ end
38
+ end
39
+
40
+ sub_test_case "transaction" do
41
+ def test_normal
42
+ yaml = YAML.load(%(
43
+ nodes:
44
+ - {host: localhost, port: 9200}
45
+ queries:
46
+ - 'title: 製函機'
47
+ index: crawl
48
+ index_type: m_corporation_page
49
+ request_timeout: 60
50
+ per_size: 1000
51
+ limit_size: 2000
52
+ num_threads: 20
53
+ fields:
54
+ - { name: title, type: string }
55
+ )
56
+ )
57
+ config = DataSource.new(yaml)
58
+ Elasticsearch.transaction(config, &control)
59
+ end
60
+
61
+ def test_minimum
62
+ yaml = YAML.load(%(
63
+ nodes:
64
+ - {host: localhost, port: 9200}
65
+ queries:
66
+ - 'title: 製函機'
67
+ index: crawl
68
+ fields:
69
+ - { name: title, type: string }
70
+ )
71
+ )
72
+ config = DataSource.new(yaml)
73
+ Elasticsearch.transaction(config, &control)
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - toyama0919
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-15 00:00:00.000000000 Z
11
+ date: 2016-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -103,13 +103,15 @@ extra_rdoc_files: []
103
103
  files:
104
104
  - ".gitignore"
105
105
  - ".ruby-version"
106
+ - ".travis.yml"
106
107
  - Gemfile
107
108
  - LICENSE.txt
108
109
  - README.md
109
110
  - Rakefile
110
111
  - embulk-input-elasticsearch.gemspec
111
112
  - lib/embulk/input/elasticsearch.rb
112
- - test/embulk/input/test_elasticsearch.rb
113
+ - test/helper.rb
114
+ - test/test_transaction.rb
113
115
  homepage: https://github.com/toyama0919/embulk-input-elasticsearch
114
116
  licenses:
115
117
  - MIT
@@ -135,4 +137,5 @@ signing_key:
135
137
  specification_version: 4
136
138
  summary: Elasticsearch input plugin for Embulk
137
139
  test_files:
138
- - test/embulk/input/test_elasticsearch.rb
140
+ - test/helper.rb
141
+ - test/test_transaction.rb
@@ -1,11 +0,0 @@
1
- require "embulk/command/embulk_run"
2
- require "embulk"
3
- Embulk.setup
4
-
5
- require "embulk/input/elasticsearch"
6
- module Embulk
7
- module Input
8
- class ElasticsearchInputPluginTest < Test::Unit::TestCase
9
- end
10
- end
11
- end