embulk-input-elasticsearch-nosslverify 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: cce837140ec443edf4892a250708a8c1c1961d08fc8fb61c4137ff06198c67fd
4
+ data.tar.gz: 23463d69c4349aa8bd528d035c05eec0a3643c0b5f6f271949c091166a981821
5
+ SHA512:
6
+ metadata.gz: 2bdd8090f15fb4fee727d041ab410173939080fe73f1d3af6bf3e4edf9aaf617b90677c246c8b9eab69c42c9c81f3ae69bfe3e3a7fc0a3218390e695b600fcf7
7
+ data.tar.gz: 2ced32de36e5830f1ec0468fccad5f60a507b2033a231b4a3bc331d071632da236b86b625d756ef37d8151af4e44afbb8539ae3c6e1ab55ff22af30ef9097f29
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /vendor/
6
+ /Gemfile.lock
7
+ *.jar
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.1.5.0
data/.travis.yml ADDED
@@ -0,0 +1,17 @@
1
+ language: ruby
2
+ install: ./embulk.jar bundle install --path vendor/bundle
3
+ matrix:
4
+ include:
5
+ - env: EMBULK_VERSION=0.9.15
6
+ rvm: jruby-9.1.5.0 # bundled jruby version
7
+ jdk: openjdk8 # embulk 0.9.x uses jdk8
8
+ - env: EMBULK_VERSION=latest
9
+ rvm: jruby-9.1.5.0 # ?
10
+ jdk: openjdk8 # ?
11
+ allow_failures:
12
+ - env: EMBULK_VERSION=latest
13
+ before_install:
14
+ - curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-${EMBULK_VERSION}.jar"
15
+ - chmod +x embulk.jar
16
+ - ./embulk.jar gem install bundler
17
+ script: ./embulk.jar bundle exec rake test
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,96 @@
1
+ # Elasticsearch input plugin for Embulk [![Build Status](https://secure.travis-ci.org/toyama0919/embulk-input-elasticsearch.png?branch=master)](http://travis-ci.org/toyama0919/embulk-input-elasticsearch) [![Gem Version](https://badge.fury.io/rb/embulk-input-elasticsearch.svg)](http://badge.fury.io/rb/embulk-input-elasticsearch)
2
+
3
+ ## Overview
4
+
5
+ * **Plugin type**: input
6
+ * **Resume supported**: yes
7
+ * **Cleanup supported**: yes
8
+ * **Guess supported**: no
9
+
10
+ ## Configuration
11
+ - **nodes**: nodes (array, required)
12
+ - **host**: host (string, required)
13
+ - **port**: port (integer, required)
14
+ - **queries**: lucene query array. (array, required)
15
+ - **index**: index (string, required)
16
+ - **index_type**: index_type (string)
17
+ - **request_timeout**: request timeout (integer)
18
+ - **per_size**: per size query. (integer, required, default: `1000`)
19
+ - **limit_size**: limit size unit query. (integer, default: unlimit)
20
+ - **num_threads**: number of threads for queries. (integer, default: 1)
21
+ - **retry_on_failure**: retry on failure. set 0 is retry forever. (integer, default: 5)
22
+ - **sort**: sort order. (hash, default: nil)
23
+ - **scroll**: scroll. to keep the search context. (string, default: '1m')
24
+ - **fields**: fields (array, required)
25
+ - **name**: name (string, required)
26
+ - **type**: type (string, required)
27
+ - **metadata**: metadata (boolean, default: false)
28
+ - **time_format**: time_format (string)
29
+
30
+ ## Example
31
+
32
+ ```yaml
33
+ in:
34
+ type: elasticsearch
35
+ nodes:
36
+ - {host: localhost, port: 9200}
37
+ queries:
38
+ - 'page_type: HP'
39
+ - 'page_type: GP'
40
+ index: crawl
41
+ index_type: m_corporation_page
42
+ request_timeout: 60
43
+ per_size: 1000
44
+ limit_size: 200000
45
+ num_threads: 2
46
+ sort:
47
+ m_corporation_id: desc
48
+ employee_range: asc
49
+ fields:
50
+ - { name: _id, type: string, metadata: true }
51
+ - { name: _type, type: string, metadata: true }
52
+ - { name: _index, type: string, metadata: true }
53
+ - { name: _score, type: double, metadata: true }
54
+ - { name: page_type, type: string }
55
+ - { name: corp_name, type: string }
56
+ - { name: corp_key, type: string }
57
+ - { name: title, type: string }
58
+ - { name: body, type: string }
59
+ - { name: url, type: string }
60
+ - { name: employee_range, type: long }
61
+ - { name: m_corporation_id, type: long }
62
+ - { name: cg_lv1, type: json }
63
+ - { name: cg_lv2, type: json }
64
+ - { name: cg_lv3, type: json }
65
+ ```
66
+
67
+ ## Support Type
68
+ * string
69
+ * long
70
+ * double
71
+ * timestamp
72
+ * json
73
+ * boolean
74
+
75
+ ## test
76
+
77
+ ### setup
78
+
79
+ ```
80
+ curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-latest.jar"
81
+ chmod +x embulk.jar
82
+ ./embulk.jar gem install bundler
83
+ ./embulk.jar bundle install --path vendor/bundle
84
+ ```
85
+
86
+ ### run test
87
+
88
+ ```
89
+ ./embulk.jar bundle exec rake test
90
+ ```
91
+
92
+ ## Build
93
+
94
+ ```
95
+ $ rake
96
+ ```
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ desc 'Run test_unit based test'
5
+ Rake::TestTask.new(:test) do |t|
6
+ t.libs << "test"
7
+ t.test_files = Dir["test/**/test_*.rb"].sort
8
+ t.verbose = true
9
+ t.warning = false
10
+ end
11
+ task :default => :test
@@ -0,0 +1,22 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-input-elasticsearch-nosslverify"
4
+ spec.version = "0.3.6"
5
+ spec.authors = ["toyama0919"]
6
+ spec.summary = "Elasticsearch input plugin for Embulk"
7
+ spec.description = "Loads records from Elasticsearch. parallel query support."
8
+ spec.email = ["toyama0919@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/toyama0919/embulk-input-elasticsearch"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_dependency 'elasticsearch'
17
+ spec.add_dependency 'excon'
18
+ spec.add_development_dependency 'embulk', ['>= 0.8.18']
19
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
20
+ spec.add_development_dependency 'rake', ['>= 10.0']
21
+ spec.add_development_dependency 'test-unit'
22
+ end
@@ -0,0 +1,75 @@
1
+ require_relative 'elasticsearch/connection'
2
+ require_relative 'elasticsearch/input_thread'
3
+ require_relative 'elasticsearch/converter'
4
+ require_relative 'elasticsearch/error'
5
+
6
+ module Embulk
7
+ module Input
8
+
9
+ class Elasticsearch < InputPlugin
10
+ Plugin.register_input("elasticsearch", self)
11
+ ADD_QUERY_TO_RECORD_KEY = 'query'
12
+
13
+ def self.transaction(config, &control)
14
+ task = {
15
+ "nodes" => config.param("nodes", :array),
16
+ "request_timeout" => config.param("request_timeout", :integer, default: 60),
17
+ "index" => config.param("index", :string),
18
+ "reload_connections" => config.param("reload_connections", :bool, default: true),
19
+ "reload_on_failure" => config.param("reload_on_failure", :bool, default: false),
20
+ "index_type" => config.param("index_type", :string, default: nil),
21
+ "retry_on_failure" => config.param("retry_on_failure", :integer, default: 5),
22
+ "per_size" => config.param("per_size", :integer, default: 1000),
23
+ "limit_size" => config.param("limit_size", :integer, default: nil),
24
+ "fields" => config.param("fields", :array, default: nil),
25
+ "queries" => config.param("queries", :array),
26
+ "sort" => config.param("sort", :hash, default: nil),
27
+ "add_query_to_record" => config.param("add_query_to_record", :bool, default: false),
28
+ "scroll" => config.param("scroll", :string, default: '1m')
29
+ }
30
+ # TODO: want max_threads
31
+ define_num_threads = config.param("num_threads", :integer, default: 1)
32
+ task['slice_queries'] = InputThread.get_slice_from_num_threads(task['queries'], define_num_threads)
33
+
34
+ columns = []
35
+ task['fields'].each_with_index{ |field, i|
36
+ columns << Column.new(i, field['name'], field['type'].to_sym)
37
+ }
38
+ if task['add_query_to_record']
39
+ columns << Column.new(task['fields'].size, ADD_QUERY_TO_RECORD_KEY, :string)
40
+ end
41
+
42
+ resume(task, columns, task['slice_queries'].size, &control)
43
+ end
44
+
45
+ def self.resume(task, columns, count, &control)
46
+ task_reports = yield(task, columns, count)
47
+
48
+ next_config_diff = {}
49
+ return next_config_diff
50
+ end
51
+
52
+ def init
53
+ @queries = task['slice_queries'][@index]
54
+ Embulk.logger.info("this thread queries => #{@queries}")
55
+ @add_query_to_record = task['add_query_to_record']
56
+ @connection = Connection.new(task)
57
+ end
58
+
59
+ def run
60
+ @queries.each do |query|
61
+ @connection.search_with_query(query) { |result|
62
+ if @add_query_to_record
63
+ result << query
64
+ end
65
+ page_builder.add(result)
66
+ }
67
+ end
68
+ page_builder.finish
69
+
70
+ task_report = {}
71
+ return task_report
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,101 @@
1
+ require 'excon'
2
+ require 'elasticsearch'
3
+
4
+ module Embulk
5
+ module Input
6
+ class Elasticsearch < InputPlugin
7
+ class Connection
8
+ def initialize(task)
9
+ @scroll = task['scroll']
10
+ @index = task['index']
11
+ @index_type = task['index_type']
12
+ @size = task['per_size']
13
+ @fields = task['fields']
14
+ @sort = task['sort']
15
+ @limit_size = task['limit_size']
16
+ @retry_on_failure = task['retry_on_failure']
17
+ @client = create_client(
18
+ nodes: task['nodes'],
19
+ reload_connections: task['reload_connections'],
20
+ reload_on_failure: task['reload_on_failure'],
21
+ retry_on_failure: task['retry_on_failure'],
22
+ request_timeout: task['request_timeout']
23
+ )
24
+ end
25
+
26
+ def create_client(nodes: ,reload_connections: ,reload_on_failure: ,retry_on_failure: ,request_timeout:)
27
+ transport = ::Elasticsearch::Transport::Transport::HTTP::Faraday.new(
28
+ {
29
+ hosts: nodes.map{ |node| Hash[node.map{ |k, v| [k.to_sym, v] }] },
30
+ options: {
31
+ reload_connections: reload_connections,
32
+ reload_on_failure: reload_on_failure,
33
+ retry_on_failure: retry_on_failure,
34
+ transport_options: {
35
+ request: { timeout: request_timeout },
36
+ ssl: { verify: false }
37
+ }
38
+ }
39
+ }
40
+ )
41
+
42
+ ::Elasticsearch::Client.new transport: transport
43
+ end
44
+
45
+ def search_with_query(query)
46
+ search_option = get_search_option(query)
47
+ Embulk.logger.info("#{search_option}")
48
+ r = search_with_retry { @client.search(search_option) }
49
+ return if r.nil?
50
+ i = 0
51
+ Converter.get_sources(r, @fields).each do |result|
52
+ yield(result) if block_given?
53
+ return if @limit_size == (i += 1)
54
+ end
55
+
56
+ while r = (search_with_retry { @client.scroll(scroll_id: r['_scroll_id'], scroll: @scroll) }) and (not r['hits']['hits'].empty?) do
57
+ Converter.get_sources(r, @fields).each do |result|
58
+ yield(result) if block_given?
59
+ return if @limit_size == (i += 1)
60
+ end
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ def search_with_retry
67
+ retries = 0
68
+ begin
69
+ yield if block_given?
70
+ rescue => e
71
+ if (@retry_on_failure == 0 || retries < @retry_on_failure)
72
+ retries += 1
73
+ Embulk.logger.warn "Could not search to Elasticsearch, resetting connection and trying again. #{e.message}"
74
+ sleep 2**retries
75
+ retry
76
+ end
77
+ msg = "Could not search to Elasticsearch after #{retries} retries. #{e.message}"
78
+ raise Elasticsearch::ConnectionError.new e, msg
79
+ end
80
+ end
81
+
82
+ def get_search_option(query)
83
+ body = { }
84
+ body[:query] = { query_string: { query: query } } unless query.nil?
85
+ if @sort
86
+ sorts = []
87
+ @sort.each do |k, v|
88
+ sorts << { k => v }
89
+ end
90
+ body[:sort] = sorts
91
+ else
92
+ body[:sort] = ["_doc"]
93
+ end
94
+ search_option = { index: @index, type: @index_type, scroll: @scroll, body: body, size: @size }
95
+ search_option[:_source] = @fields.select{ |field| !field['metadata'] }.map { |field| field['name'] }.join(',')
96
+ search_option
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,49 @@
1
+ module Embulk
2
+ module Input
3
+ class Elasticsearch < InputPlugin
4
+ class Converter
5
+ def self.get_sources(results, fields)
6
+ hits = results['hits']['hits']
7
+ hits.map { |hit|
8
+ result = hit['_source']
9
+ fields.map { |field|
10
+ value = field['metadata'] ? hit[field['name']] : result[field['name']]
11
+ convert_value(value, field)
12
+ }
13
+ }
14
+ end
15
+
16
+ def self.convert_value(value, field)
17
+ return nil if value.nil?
18
+ case field["type"]
19
+ when "string"
20
+ value
21
+ when "long"
22
+ value.to_i
23
+ when "double"
24
+ value.to_f
25
+ when "boolean"
26
+ if value.is_a?(TrueClass) || value.is_a?(FalseClass)
27
+ value
28
+ else
29
+ downcased_val = value.downcase
30
+ case downcased_val
31
+ when 'true' then true
32
+ when 'false' then false
33
+ when '1' then true
34
+ when '0' then false
35
+ else nil
36
+ end
37
+ end
38
+ when "timestamp"
39
+ Time.parse(value)
40
+ when "json"
41
+ value
42
+ else
43
+ raise Elasticsearch::TypecastError.new "Unsupported type #{field['type']}"
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,39 @@
1
+ module Embulk
2
+ module Input
3
+
4
+ class Elasticsearch < InputPlugin
5
+
6
+ module Traceable
7
+ def initialize(e, more_msg = nil)
8
+ message = e.is_a?(String) ? '' : "(#{e.class}) "
9
+ message << "#{e}#{more_msg}\n"
10
+ message << "\tat #{e.backtrace.join("\n\tat ")}\n" if e.respond_to?(:backtrace)
11
+
12
+ while e.respond_to?(:cause) and e.cause
13
+ # Java Exception cannot follow the JRuby causes.
14
+ message << "Caused by (#{e.cause.class}) #{e.cause}\n"
15
+ message << "\tat #{e.cause.backtrace.join("\n\tat ")}\n" if e.cause.respond_to?(:backtrace)
16
+ e = e.cause
17
+ end
18
+
19
+ super(message)
20
+ end
21
+ end
22
+
23
+ class ConfigError < ::Embulk::ConfigError
24
+ include Traceable
25
+ end
26
+
27
+ class ConnectionError < ConfigError
28
+ end
29
+
30
+ class DataError < ::Embulk::DataError
31
+ include Traceable
32
+ end
33
+
34
+ class TypecastError < DataError
35
+ end
36
+
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,19 @@
1
+ module Embulk
2
+ module Input
3
+ class Elasticsearch < InputPlugin
4
+ class InputThread
5
+ def self.get_slice_from_num_threads(array, define_num_threads)
6
+ num_threads = array.size < define_num_threads ? array.size : define_num_threads
7
+ per_queries = if (array.size % num_threads) == 0
8
+ (array.size / num_threads)
9
+ else
10
+ (array.size / num_threads) + 1
11
+ end
12
+ sliced = array.each_slice(per_queries).to_a
13
+ Embulk.logger.info("calculate num threads => #{sliced.size}")
14
+ return sliced
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+
5
+ # require 'embulk/java/bootstrap'
6
+ require 'embulk'
7
+ begin
8
+ # Embulk ~> 0.8.x
9
+ Embulk.setup
10
+ rescue NotImplementedError
11
+ # Embulk ~> 0.9.x
12
+ require 'embulk/java/bootstrap'
13
+ end
14
+ Embulk.logger = Embulk::Logger.new('/dev/null')
15
+
16
+ APP_ROOT = File.expand_path('../', __dir__)
17
+ TEST_ROOT = File.expand_path(File.dirname(__FILE__))
18
+
19
+ require 'embulk/input/elasticsearch'
@@ -0,0 +1,51 @@
1
+ require_relative './helper'
2
+
3
+ Elasticsearch = Embulk::Input::Elasticsearch
4
+
5
+ module Embulk
6
+ class Input::Elasticsearch
7
+ class TestConverter < Test::Unit::TestCase
8
+
9
+ def startup
10
+ end
11
+
12
+ def shutdown
13
+ end
14
+
15
+ sub_test_case "get_sources" do
16
+ def test_normal
17
+ fields = [
18
+ {"name"=>"_id", "type"=>"string", "metadata"=>true},
19
+ {"name"=>"product_id", "type"=>"long"},
20
+ {"name"=>"title", "type"=>"string"}
21
+ ]
22
+
23
+ results = {
24
+ "_scroll_id"=>"cXVlcnlUaGVuRmV0Y2g7NTsxNzg3MjE6WlphQ3V0WDNRYmFRcS1QQ3dCb2s5UTsxNzg3MjI6WlphQ3V0WDNRYmFRcS1QQ3dCb2s5UTsxNzg3MjM6WlphQ3V0WDNRYmFRcS1QQ3dCb2s5UTsxNzg3MjU6WlphQ3V0WDNRYmFRcS1QQ3dCb2s5UTsxNzg3MjQ6WlphQ3V0WDNRYmFRcS1QQ3dCb2s5UTswOw==",
25
+ "took"=>41,
26
+ "timed_out"=>false,
27
+ "_shards"=>{"total"=>5, "successful"=>5, "failed"=>0},
28
+ "hits"=>{
29
+ "total"=>1,
30
+ "max_score"=>nil,
31
+ "hits"=>[
32
+ {
33
+ "_index"=>"test_index",
34
+ "_type"=>"test_type",
35
+ "_id"=>"AVTCxiCuNR-BVKOgUB7R",
36
+ "_score"=>nil,
37
+ "_source"=>{
38
+ "title"=>"dummy title",
39
+ "product_id"=>1
40
+ },
41
+ "sort"=>[12534]
42
+ }
43
+ ]
44
+ }
45
+ }
46
+ assert_equal Converter.get_sources(results, fields), [["AVTCxiCuNR-BVKOgUB7R", 1, "dummy title"]]
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,36 @@
1
+ require_relative './helper'
2
+
3
+ Elasticsearch = Embulk::Input::Elasticsearch
4
+
5
+ module Embulk
6
+ class Input::Elasticsearch
7
+ class TestTransaction < Test::Unit::TestCase
8
+ sub_test_case "get_slice_from_num_threads" do
9
+ def test_normal
10
+ slice = InputThread.get_slice_from_num_threads((1..10).to_a, 5)
11
+ assert_equal slice.size, 5
12
+ assert_equal slice.first.size, 2
13
+ end
14
+
15
+ def test_normal_same
16
+ slice = InputThread.get_slice_from_num_threads((1..3).to_a, 3)
17
+ assert_equal slice.size, 3
18
+ assert_equal slice.first.size, 1
19
+ end
20
+
21
+ def test_num_threads_over_array_size
22
+ slice = InputThread.get_slice_from_num_threads((1..3).to_a, 10)
23
+ assert_equal slice.size, 3
24
+ assert_equal slice.first.size, 1
25
+ end
26
+
27
+ def test_rest
28
+ slice = InputThread.get_slice_from_num_threads((1..20).to_a, 8)
29
+ assert_equal slice.size, 7
30
+ assert_equal slice.first.size, 3
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,50 @@
1
+ require_relative './helper'
2
+ require 'yaml'
3
+
4
+ Elasticsearch = Embulk::Input::Elasticsearch
5
+
6
+ module Embulk
7
+ class Input::Elasticsearch
8
+ class TestTransaction < Test::Unit::TestCase
9
+ def control
10
+ Proc.new {|task| task_reports = [] }
11
+ end
12
+ sub_test_case "transaction" do
13
+ def test_normal
14
+ yaml = YAML.load(%(
15
+ nodes:
16
+ - {host: localhost, port: 9200}
17
+ queries:
18
+ - 'title: 製函機'
19
+ index: crawl
20
+ index_type: m_corporation_page
21
+ request_timeout: 60
22
+ per_size: 1000
23
+ limit_size: 2000
24
+ num_threads: 20
25
+ fields:
26
+ - { name: title, type: string }
27
+ )
28
+ )
29
+ config = DataSource.new(yaml)
30
+ Elasticsearch.transaction(config, &control)
31
+ end
32
+
33
+ def test_minimum
34
+ yaml = YAML.load(%(
35
+ nodes:
36
+ - {host: localhost, port: 9200}
37
+ queries:
38
+ - 'title: 製函機'
39
+ index: crawl
40
+ fields:
41
+ - { name: title, type: string }
42
+ )
43
+ )
44
+ config = DataSource.new(yaml)
45
+ Elasticsearch.transaction(config, &control)
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-input-elasticsearch-nosslverify
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.6
5
+ platform: ruby
6
+ authors:
7
+ - toyama0919
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-11-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: elasticsearch
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: excon
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: embulk
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 0.8.18
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 0.8.18
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: 1.10.6
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 1.10.6
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: test-unit
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Loads records from Elasticsearch. parallel query support.
98
+ email:
99
+ - toyama0919@gmail.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - ".ruby-version"
106
+ - ".travis.yml"
107
+ - Gemfile
108
+ - LICENSE.txt
109
+ - README.md
110
+ - Rakefile
111
+ - embulk-input-elasticsearch.gemspec
112
+ - lib/embulk/input/elasticsearch.rb
113
+ - lib/embulk/input/elasticsearch/connection.rb
114
+ - lib/embulk/input/elasticsearch/converter.rb
115
+ - lib/embulk/input/elasticsearch/error.rb
116
+ - lib/embulk/input/elasticsearch/input_thread.rb
117
+ - test/helper.rb
118
+ - test/test_converter.rb
119
+ - test/test_input_thread.rb
120
+ - test/test_transaction.rb
121
+ homepage: https://github.com/toyama0919/embulk-input-elasticsearch
122
+ licenses:
123
+ - MIT
124
+ metadata: {}
125
+ post_install_message:
126
+ rdoc_options: []
127
+ require_paths:
128
+ - lib
129
+ required_ruby_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ requirements: []
140
+ rubygems_version: 3.0.3
141
+ signing_key:
142
+ specification_version: 4
143
+ summary: Elasticsearch input plugin for Embulk
144
+ test_files:
145
+ - test/helper.rb
146
+ - test/test_converter.rb
147
+ - test/test_input_thread.rb
148
+ - test/test_transaction.rb