metacrunch-elasticsearch 3.0.0 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +5 -9
- data/Rakefile +1 -1
- data/bin/console +10 -7
- data/lib/metacrunch/elasticsearch.rb +2 -11
- data/lib/metacrunch/elasticsearch/destination.rb +47 -0
- data/lib/metacrunch/elasticsearch/source.rb +56 -0
- data/lib/metacrunch/elasticsearch/version.rb +1 -1
- data/metacrunch-elasticsearch.gemspec +5 -6
- metadata +13 -23
- data/.travis.yml +0 -5
- data/Readme.md +0 -3
- data/bin/setup +0 -7
- data/lib/metacrunch/elasticsearch/client_factory.rb +0 -15
- data/lib/metacrunch/elasticsearch/index_creator.rb +0 -77
- data/lib/metacrunch/elasticsearch/indexer.rb +0 -90
- data/lib/metacrunch/elasticsearch/options_helpers.rb +0 -30
- data/lib/metacrunch/elasticsearch/reader.rb +0 -63
- data/lib/metacrunch/elasticsearch/searcher.rb +0 -56
- data/lib/metacrunch/elasticsearch/uri.rb +0 -31
- data/lib/metacrunch/elasticsearch/writer.rb +0 -59
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7fcdad44003a195fca6541d8c9156523dcde02ad
|
4
|
+
data.tar.gz: 964c93ec7c91b54ea76bf338c5997920546f3840
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 29bdc05cebb697ee8fbf6a434d2ad3861cf205c0a35324a77f1c1ef145599c72032f4adf2e5cc6e2cb395ac5bc744493942f87ddeb6f0310b342ac8285c04074
|
7
|
+
data.tar.gz: 60afb62d71c16d17404b5fd2b1bf735bce2011398a9bc014f0cb2c0e398df13d668c1601b9caa3e6beda1ed3f0d946da4bd37c0a59548159410182e86d3faf12
|
data/Gemfile
CHANGED
@@ -3,19 +3,15 @@ source "https://rubygems.org"
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development do
|
6
|
-
gem "bundler",
|
7
|
-
gem "rake",
|
8
|
-
gem "rspec",
|
9
|
-
gem "simplecov", ">= 0.11.0"
|
6
|
+
gem "bundler", ">= 1.15"
|
7
|
+
gem "rake", ">= 12.1"
|
8
|
+
gem "rspec", ">= 3.5.0", "< 4.0.0"
|
10
9
|
|
11
10
|
if !ENV["CI"]
|
12
|
-
gem "
|
13
|
-
gem "pry-byebug", ">= 3.3.0", platform: :ruby
|
14
|
-
gem "pry-rescue", ">= 1.4.2", platform: :ruby
|
15
|
-
gem "pry-state", ">= 0.1.7", platform: :ruby
|
11
|
+
gem "pry-byebug", ">= 3.5.0"
|
16
12
|
end
|
17
13
|
end
|
18
14
|
|
19
15
|
group :test do
|
20
|
-
gem "
|
16
|
+
gem "simplecov", ">= 0.15.0"
|
21
17
|
end
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
2
|
require "bundler/setup"
|
4
|
-
require "
|
3
|
+
require "metacrunch/elasticsearch"
|
5
4
|
|
6
|
-
|
7
|
-
|
5
|
+
begin
|
6
|
+
require "pry"
|
7
|
+
rescue LoadError ; end
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
if defined?(Pry)
|
10
|
+
Pry.start
|
11
|
+
else
|
12
|
+
require "irb"
|
13
|
+
IRB.start
|
14
|
+
end
|
@@ -2,16 +2,7 @@ require "elasticsearch"
|
|
2
2
|
|
3
3
|
module Metacrunch
|
4
4
|
module Elasticsearch
|
5
|
-
require_relative "
|
6
|
-
require_relative "
|
7
|
-
require_relative "./elasticsearch/reader"
|
8
|
-
require_relative "./elasticsearch/searcher"
|
9
|
-
require_relative "./elasticsearch/uri"
|
10
|
-
require_relative "./elasticsearch/writer"
|
11
|
-
|
12
|
-
#
|
13
|
-
# error class are inline to not clutter source files unnecessarily
|
14
|
-
#
|
15
|
-
class IndexAlreadyExistsError < StandardError; end
|
5
|
+
require_relative "elasticsearch/destination"
|
6
|
+
require_relative "elasticsearch/source"
|
16
7
|
end
|
17
8
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "metacrunch/elasticsearch"
|
2
|
+
|
3
|
+
module Metacrunch
|
4
|
+
class Elasticsearch::Destination
|
5
|
+
|
6
|
+
DEFAULT_OPTIONS = {
|
7
|
+
raise_on_result_errors: false,
|
8
|
+
result_callback: nil,
|
9
|
+
bulk_options: {}
|
10
|
+
}
|
11
|
+
|
12
|
+
def initialize(elasticsearch_client, options = {})
|
13
|
+
@client = elasticsearch_client
|
14
|
+
@options = DEFAULT_OPTIONS.deep_merge(options)
|
15
|
+
end
|
16
|
+
|
17
|
+
def write(data)
|
18
|
+
return if data.blank?
|
19
|
+
|
20
|
+
# Call elasticsearch bulk api
|
21
|
+
bulk_options = @options[:bulk_options]
|
22
|
+
bulk_options[:body] = data
|
23
|
+
result = @client.bulk(bulk_options)
|
24
|
+
|
25
|
+
# Raise an exception if one of the results produced an error and the user wants to know about it
|
26
|
+
raise DestinationError.new(errors: result["errors"]) if result["errors"] && @options[:raise_on_result_errors]
|
27
|
+
|
28
|
+
# if the user provided a callback proc, call it
|
29
|
+
@options[:result_callback].call(result) if @options[:result_callback]&.respond_to?(:call)
|
30
|
+
end
|
31
|
+
|
32
|
+
def close
|
33
|
+
# noop
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
class Elasticsearch::DestinationError < StandardError
|
39
|
+
|
40
|
+
attr_reader :errors
|
41
|
+
|
42
|
+
def initialize(msg = nil, errors:)
|
43
|
+
@errors = errors
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require "metacrunch/elasticsearch"
|
2
|
+
|
3
|
+
module Metacrunch
|
4
|
+
class Elasticsearch::Source
|
5
|
+
|
6
|
+
DEFAULT_OPTIONS = {
|
7
|
+
total_hits_callback: nil,
|
8
|
+
search_options: {
|
9
|
+
size: 100,
|
10
|
+
scroll: "1m",
|
11
|
+
sort: ["_doc"]
|
12
|
+
}
|
13
|
+
}
|
14
|
+
|
15
|
+
def initialize(elasticsearch_client, options = {})
|
16
|
+
@client = elasticsearch_client
|
17
|
+
@options = DEFAULT_OPTIONS.deep_merge(options)
|
18
|
+
end
|
19
|
+
|
20
|
+
def each(&block)
|
21
|
+
return enum_for(__method__) unless block_given?
|
22
|
+
|
23
|
+
# Perform search request and yield the first results if any
|
24
|
+
search_options = @options[:search_options]
|
25
|
+
result = @client.search(search_options)
|
26
|
+
call_total_hits_callback(result)
|
27
|
+
yield_hits(result, &block)
|
28
|
+
|
29
|
+
# Scroll over the rest of result set and yield the results until the set is empty.
|
30
|
+
while (
|
31
|
+
# Note: semantic of 'and' is important here. Do not use '&&'.
|
32
|
+
result = @client.scroll(scroll_id: result["_scroll_id"], scroll: search_options[:scroll]) and result["hits"]["hits"].present?
|
33
|
+
) do
|
34
|
+
yield_hits(result, &block)
|
35
|
+
end
|
36
|
+
ensure
|
37
|
+
# Clear scroll to free up resources.
|
38
|
+
@client.clear_scroll(scroll_id: result["_scroll_id"]) if result
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def call_total_hits_callback(result)
|
44
|
+
if @options[:total_hits_callback]&.respond_to?(:call) && result["hits"]["total"]
|
45
|
+
@options[:total_hits_callback].call(result["hits"]["total"])
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def yield_hits(result, &block)
|
50
|
+
result["hits"]["hits"].each do |hit|
|
51
|
+
yield(hit)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
@@ -6,17 +6,16 @@ require "metacrunch/elasticsearch/version"
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "metacrunch-elasticsearch"
|
8
8
|
spec.version = Metacrunch::Elasticsearch::VERSION
|
9
|
-
spec.authors = ["René Sprotte"
|
10
|
-
spec.summary = %q{
|
9
|
+
spec.authors = ["René Sprotte"]
|
10
|
+
spec.summary = %q{Elasticsearch package for the metacrunch ETL toolkit.}
|
11
11
|
spec.homepage = "http://github.com/ubpb/metacrunch-elasticsearch"
|
12
|
-
spec.
|
12
|
+
spec.license = "MIT"
|
13
13
|
|
14
14
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
15
|
-
spec.bindir = "exe"
|
16
15
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
17
16
|
spec.require_paths = ["lib"]
|
18
17
|
|
19
|
-
spec.add_dependency "activesupport", ">=
|
20
|
-
spec.add_dependency "elasticsearch", "
|
18
|
+
spec.add_dependency "activesupport", ">= 5.1.0"
|
19
|
+
spec.add_dependency "elasticsearch", ">= 5.0.4"
|
21
20
|
end
|
22
21
|
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metacrunch-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- René Sprotte
|
8
|
-
- Michael Sievers
|
9
8
|
autorequire:
|
10
|
-
bindir:
|
9
|
+
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2017-09-27 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: activesupport
|
@@ -17,28 +16,28 @@ dependencies:
|
|
17
16
|
requirements:
|
18
17
|
- - ">="
|
19
18
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
19
|
+
version: 5.1.0
|
21
20
|
type: :runtime
|
22
21
|
prerelease: false
|
23
22
|
version_requirements: !ruby/object:Gem::Requirement
|
24
23
|
requirements:
|
25
24
|
- - ">="
|
26
25
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
26
|
+
version: 5.1.0
|
28
27
|
- !ruby/object:Gem::Dependency
|
29
28
|
name: elasticsearch
|
30
29
|
requirement: !ruby/object:Gem::Requirement
|
31
30
|
requirements:
|
32
|
-
- - "
|
31
|
+
- - ">="
|
33
32
|
- !ruby/object:Gem::Version
|
34
|
-
version:
|
33
|
+
version: 5.0.4
|
35
34
|
type: :runtime
|
36
35
|
prerelease: false
|
37
36
|
version_requirements: !ruby/object:Gem::Requirement
|
38
37
|
requirements:
|
39
|
-
- - "
|
38
|
+
- - ">="
|
40
39
|
- !ruby/object:Gem::Version
|
41
|
-
version:
|
40
|
+
version: 5.0.4
|
42
41
|
description:
|
43
42
|
email:
|
44
43
|
executables: []
|
@@ -47,23 +46,14 @@ extra_rdoc_files: []
|
|
47
46
|
files:
|
48
47
|
- ".gitignore"
|
49
48
|
- ".rspec"
|
50
|
-
- ".travis.yml"
|
51
49
|
- Gemfile
|
52
50
|
- License.txt
|
53
51
|
- Rakefile
|
54
|
-
- Readme.md
|
55
52
|
- bin/console
|
56
|
-
- bin/setup
|
57
53
|
- lib/metacrunch/elasticsearch.rb
|
58
|
-
- lib/metacrunch/elasticsearch/
|
59
|
-
- lib/metacrunch/elasticsearch/
|
60
|
-
- lib/metacrunch/elasticsearch/indexer.rb
|
61
|
-
- lib/metacrunch/elasticsearch/options_helpers.rb
|
62
|
-
- lib/metacrunch/elasticsearch/reader.rb
|
63
|
-
- lib/metacrunch/elasticsearch/searcher.rb
|
64
|
-
- lib/metacrunch/elasticsearch/uri.rb
|
54
|
+
- lib/metacrunch/elasticsearch/destination.rb
|
55
|
+
- lib/metacrunch/elasticsearch/source.rb
|
65
56
|
- lib/metacrunch/elasticsearch/version.rb
|
66
|
-
- lib/metacrunch/elasticsearch/writer.rb
|
67
57
|
- metacrunch-elasticsearch.gemspec
|
68
58
|
homepage: http://github.com/ubpb/metacrunch-elasticsearch
|
69
59
|
licenses:
|
@@ -85,8 +75,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
75
|
version: '0'
|
86
76
|
requirements: []
|
87
77
|
rubyforge_project:
|
88
|
-
rubygems_version: 2.
|
78
|
+
rubygems_version: 2.6.11
|
89
79
|
signing_key:
|
90
80
|
specification_version: 4
|
91
|
-
summary:
|
81
|
+
summary: Elasticsearch package for the metacrunch ETL toolkit.
|
92
82
|
test_files: []
|
data/.travis.yml
DELETED
data/Readme.md
DELETED
data/bin/setup
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
require "elasticsearch"
|
2
|
-
require_relative "../elasticsearch"
|
3
|
-
|
4
|
-
module Metacrunch::Elasticsearch::ClientFactory
|
5
|
-
def client_factory
|
6
|
-
client_options = {
|
7
|
-
host: @host,
|
8
|
-
hosts: @hosts,
|
9
|
-
url: @url,
|
10
|
-
urls: @urls
|
11
|
-
}.compact
|
12
|
-
|
13
|
-
Elasticsearch::Client.new(client_options)
|
14
|
-
end
|
15
|
-
end
|
@@ -1,77 +0,0 @@
|
|
1
|
-
require "elasticsearch"
|
2
|
-
require_relative "../elasticsearch"
|
3
|
-
require_relative "./client_factory"
|
4
|
-
require_relative "./options_helpers"
|
5
|
-
|
6
|
-
class Metacrunch::Elasticsearch::IndexCreator
|
7
|
-
include Metacrunch::Elasticsearch::ClientFactory
|
8
|
-
include Metacrunch::Elasticsearch::OptionsHelpers
|
9
|
-
|
10
|
-
attr_accessor :default_mapping
|
11
|
-
attr_accessor :delete_existing_index
|
12
|
-
attr_accessor :logger
|
13
|
-
attr_accessor :settings
|
14
|
-
|
15
|
-
def initialize(options = {})
|
16
|
-
(@client_args = options).deep_symbolize_keys!
|
17
|
-
extract_options!(@client_args, :_client_options_, :default_mapping, :delete_existing_index, :logger, :number_of_shards, :number_of_replicas, :settings)
|
18
|
-
raise ArgumentError.new("You have to supply an index name!") if @client_args[:index].blank?
|
19
|
-
end
|
20
|
-
|
21
|
-
def call(items = [])
|
22
|
-
client = client_factory
|
23
|
-
logger = @logger
|
24
|
-
|
25
|
-
if client.indices.exists?(@client_args)
|
26
|
-
if @delete_existing_index == true
|
27
|
-
client.indices.delete(@client_args)
|
28
|
-
log_index_deleted(logger, @client_args[:index], client) if logger
|
29
|
-
elsif @delete_existing_index == false
|
30
|
-
return
|
31
|
-
else
|
32
|
-
raise Metacrunch::Elasticsearch::IndexAlreadyExistsError
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
client.indices.create(@client_args.merge(
|
37
|
-
{
|
38
|
-
body: {
|
39
|
-
number_of_shards: @number_of_shards,
|
40
|
-
number_of_replicas: @number_of_replicas,
|
41
|
-
settings: @settings
|
42
|
-
}.compact
|
43
|
-
}
|
44
|
-
))
|
45
|
-
|
46
|
-
log_index_created(logger, @client_args[:index], client) if logger
|
47
|
-
|
48
|
-
if @default_mapping
|
49
|
-
client.indices.put_mapping(
|
50
|
-
@client_args.merge(
|
51
|
-
type: "_default_",
|
52
|
-
body: {
|
53
|
-
_default_: @default_mapping
|
54
|
-
}
|
55
|
-
)
|
56
|
-
)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
private
|
61
|
-
|
62
|
-
def log_index_created(logger, index, client)
|
63
|
-
paths = client.transport.hosts.map do |_host|
|
64
|
-
"#{_host[:host]}:#{_host[:port]}"
|
65
|
-
end
|
66
|
-
|
67
|
-
logger.info("Index #{index} created at #{paths}")
|
68
|
-
end
|
69
|
-
|
70
|
-
def log_index_deleted(logger, index, client)
|
71
|
-
paths = client.transport.hosts.map do |_host|
|
72
|
-
"#{_host[:host]}:#{_host[:port]}"
|
73
|
-
end
|
74
|
-
|
75
|
-
logger.info("Index #{index} deleted at #{paths}")
|
76
|
-
end
|
77
|
-
end
|
@@ -1,90 +0,0 @@
|
|
1
|
-
require "elasticsearch"
|
2
|
-
require_relative "../elasticsearch"
|
3
|
-
require_relative "./client_factory"
|
4
|
-
require_relative "./options_helpers"
|
5
|
-
|
6
|
-
class Metacrunch::Elasticsearch::Indexer
|
7
|
-
include Metacrunch::Elasticsearch::ClientFactory
|
8
|
-
include Metacrunch::Elasticsearch::OptionsHelpers
|
9
|
-
|
10
|
-
attr_accessor :bulk_size
|
11
|
-
attr_accessor :callbacks
|
12
|
-
attr_accessor :id_accessor
|
13
|
-
attr_accessor :index
|
14
|
-
attr_accessor :logger
|
15
|
-
attr_accessor :type
|
16
|
-
|
17
|
-
def initialize(options = {})
|
18
|
-
(@client_args = options).deep_symbolize_keys!
|
19
|
-
extract_options!(@client_args, :_client_options_, :bulk_size, :callbacks, :id_accessor, :index, :logger, :type)
|
20
|
-
raise ArgumentError.new("You have to supply an index name!") if @index.blank?
|
21
|
-
end
|
22
|
-
|
23
|
-
def call(items = [])
|
24
|
-
logger = @logger
|
25
|
-
|
26
|
-
if (slice_size = @bulk_size || items.length) > 0
|
27
|
-
client = client_factory
|
28
|
-
|
29
|
-
items.each_slice(slice_size) do |_item_slice|
|
30
|
-
# bodies is an array to allow slicing in case of HTTP content length exceed
|
31
|
-
bodies = [_item_slice.inject([]) { |_memo, _item| _memo.concat bulk_item_factory(_item) }]
|
32
|
-
|
33
|
-
bulk_responses =
|
34
|
-
begin
|
35
|
-
bodies.map do |_body|
|
36
|
-
client.bulk body: _body
|
37
|
-
end
|
38
|
-
rescue
|
39
|
-
logger.info "Bulk index failed. Decreasing bulk size temporary and trying again." if logger
|
40
|
-
|
41
|
-
bodies = bodies.inject([]) do |_memo, _body|
|
42
|
-
# Since we have to work with the bulk request body instead if the original items
|
43
|
-
# the bodys length has to be a multiple of 2 in any case. .fdiv(2).fdiv(2).ceil * 2
|
44
|
-
# ensures this. Example 3698.fdiv(2).fdiv(2).fdiv(2).ceil * 2 == 1850
|
45
|
-
_memo.concat(_body.each_slice(_body.length.fdiv(2).fdiv(2).ceil * 2).to_a)
|
46
|
-
end
|
47
|
-
|
48
|
-
retry
|
49
|
-
end
|
50
|
-
|
51
|
-
bulk_responses.each do |_bulk_response|
|
52
|
-
log_items_indexed(logger, _bulk_response["items"].length, client) if logger
|
53
|
-
|
54
|
-
if after_indexed_callback = (@callbacks || {})[:after_indexed]
|
55
|
-
_item_slice.zip(_bulk_response["items"]).each do |_item, _item_response|
|
56
|
-
after_indexed_callback.call(_item, _item_response)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
private
|
65
|
-
|
66
|
-
def bulk_item_factory(item)
|
67
|
-
[
|
68
|
-
{ index: { _index: @index, _type: @type, _id: id(item) }.compact },
|
69
|
-
item.to_h
|
70
|
-
]
|
71
|
-
end
|
72
|
-
|
73
|
-
def id(item)
|
74
|
-
if @id_accessor
|
75
|
-
if @id_accessor.respond_to?(:call)
|
76
|
-
@id_accessor.call(item)
|
77
|
-
else
|
78
|
-
item[@id_accessor]
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def log_items_indexed(logger, amount, client)
|
84
|
-
paths = client.transport.hosts.map do |_host|
|
85
|
-
"#{_host[:host]}:#{_host[:port]}/#{@index}/#{@type}"
|
86
|
-
end
|
87
|
-
|
88
|
-
logger.info("Indexed #{amount} items to #{paths}")
|
89
|
-
end
|
90
|
-
end
|
@@ -1,30 +0,0 @@
|
|
1
|
-
require_relative "../elasticsearch"
|
2
|
-
|
3
|
-
module Metacrunch::Elasticsearch::OptionsHelpers
|
4
|
-
def extract_options!(options, *keys)
|
5
|
-
keys = keys
|
6
|
-
.map do |_key|
|
7
|
-
_key == :_client_options_ ? [:host, :hosts, :url, :urls] : _key
|
8
|
-
end
|
9
|
-
.flatten
|
10
|
-
|
11
|
-
options
|
12
|
-
.delete_if do |_key, _value|
|
13
|
-
if keys.include?(_key)
|
14
|
-
instance_variable_set("@#{_key}", _value)
|
15
|
-
true # else if _value is falsy, the key does not get deleted
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def normalize_options!(options)
|
21
|
-
{
|
22
|
-
index: options[:index],
|
23
|
-
body: options.select { |_key, _| _key != :index }
|
24
|
-
}
|
25
|
-
.tap(&:compact!)
|
26
|
-
.try do |_result|
|
27
|
-
options.clear.merge!(_result)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
@@ -1,63 +0,0 @@
|
|
1
|
-
require "elasticsearch"
|
2
|
-
require_relative "../elasticsearch"
|
3
|
-
|
4
|
-
module Metacrunch
|
5
|
-
module Elasticsearch
|
6
|
-
class Reader
|
7
|
-
|
8
|
-
DEFAULT_SCAN_SIZE = 250
|
9
|
-
DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
|
10
|
-
|
11
|
-
|
12
|
-
def initialize(uri, body, log: false)
|
13
|
-
unless uri.starts_with?("elasticsearch://")
|
14
|
-
raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
|
15
|
-
end
|
16
|
-
|
17
|
-
@uri = URI(uri)
|
18
|
-
@body = body
|
19
|
-
@log = log
|
20
|
-
end
|
21
|
-
|
22
|
-
def each(&block)
|
23
|
-
return enum_for(__method__) unless block_given?
|
24
|
-
|
25
|
-
search_result = client.search({
|
26
|
-
body: @body,
|
27
|
-
index: @uri.index,
|
28
|
-
type: @uri.type,
|
29
|
-
scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
|
30
|
-
search_type: "scan",
|
31
|
-
size: DEFAULT_SCAN_SIZE
|
32
|
-
})
|
33
|
-
|
34
|
-
while (
|
35
|
-
search_result = client.scroll(
|
36
|
-
scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
|
37
|
-
scroll_id: search_result["_scroll_id"]
|
38
|
-
) and # don't use &&, the semantic of 'and' is important here
|
39
|
-
search_result["hits"]["hits"].present?
|
40
|
-
) do
|
41
|
-
search_result["hits"]["hits"].each do |_hit|
|
42
|
-
yield(_hit)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def count
|
48
|
-
client.count({
|
49
|
-
body: { query: @body[:query] },
|
50
|
-
index: @uri.index,
|
51
|
-
type: @uri.type
|
52
|
-
})["count"]
|
53
|
-
end
|
54
|
-
|
55
|
-
private
|
56
|
-
|
57
|
-
def client
|
58
|
-
@client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
|
59
|
-
end
|
60
|
-
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
require "elasticsearch"
|
2
|
-
require_relative "../elasticsearch"
|
3
|
-
require_relative "./client_factory"
|
4
|
-
require_relative "./options_helpers"
|
5
|
-
|
6
|
-
class Metacrunch::Elasticsearch::Searcher
|
7
|
-
include Enumerable
|
8
|
-
include Metacrunch::Elasticsearch::ClientFactory
|
9
|
-
include Metacrunch::Elasticsearch::OptionsHelpers
|
10
|
-
|
11
|
-
DEFAULT_BODY = { query: { match_all: {} } }
|
12
|
-
DEFAULT_SCAN_SIZE = 200 # per shard
|
13
|
-
DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
|
14
|
-
|
15
|
-
attr_accessor :bulk_size
|
16
|
-
attr_accessor :index
|
17
|
-
attr_accessor :scan_size
|
18
|
-
attr_accessor :scroll_expiry_time
|
19
|
-
attr_accessor :type
|
20
|
-
|
21
|
-
def initialize(options = {})
|
22
|
-
options.deep_symbolize_keys!
|
23
|
-
extract_options!(options, :_client_options_, :bulk_size, :index, :scan_size, :scroll_expiry_time, :type)
|
24
|
-
@body = options.presence || DEFAULT_BODY
|
25
|
-
end
|
26
|
-
|
27
|
-
def call(items = [])
|
28
|
-
@docs_enumerator ||= @bulk_size ? each_slice(@bulk_size) : [each.to_a].to_enum
|
29
|
-
items.concat(@docs_enumerator.next)
|
30
|
-
end
|
31
|
-
|
32
|
-
def each
|
33
|
-
return enum_for(__method__) unless block_given?
|
34
|
-
client = client_factory
|
35
|
-
|
36
|
-
search_result = client.search({
|
37
|
-
body: @body,
|
38
|
-
index: @index,
|
39
|
-
scroll: "#{@scroll_expiry_time || DEFAULT_SCROLL_EXPIRY_TIME}s",
|
40
|
-
search_type: "scan",
|
41
|
-
size: @scan_size || DEFAULT_SCAN_SIZE
|
42
|
-
})
|
43
|
-
|
44
|
-
while (
|
45
|
-
search_result = client.scroll(
|
46
|
-
scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
|
47
|
-
scroll_id: search_result["_scroll_id"]
|
48
|
-
) and # don't use &&, the semantic of and is important here
|
49
|
-
search_result["hits"]["hits"].present?
|
50
|
-
) do
|
51
|
-
search_result["hits"]["hits"].each do |_hit|
|
52
|
-
yield _hit
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
@@ -1,31 +0,0 @@
|
|
1
|
-
require "uri"
|
2
|
-
require_relative "../elasticsearch"
|
3
|
-
|
4
|
-
|
5
|
-
module Metacrunch
|
6
|
-
module Elasticsearch
|
7
|
-
class URI < URI::Generic
|
8
|
-
|
9
|
-
DEFAULT_PORT = 9200
|
10
|
-
|
11
|
-
def index
|
12
|
-
splitted_path[0]
|
13
|
-
end
|
14
|
-
|
15
|
-
def type
|
16
|
-
splitted_path[1]
|
17
|
-
end
|
18
|
-
|
19
|
-
private
|
20
|
-
|
21
|
-
def splitted_path
|
22
|
-
path.split("/").map(&:presence).compact
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
module URI
|
30
|
-
@@schemes['ELASTICSEARCH'] = Metacrunch::Elasticsearch::URI
|
31
|
-
end
|
@@ -1,59 +0,0 @@
|
|
1
|
-
require "elasticsearch"
|
2
|
-
require_relative "../elasticsearch"
|
3
|
-
|
4
|
-
module Metacrunch
|
5
|
-
module Elasticsearch
|
6
|
-
class Writer
|
7
|
-
|
8
|
-
def initialize(uri, log: false, bulk_size: 250, autoflush: true)
|
9
|
-
unless uri.starts_with?("elasticsearch://")
|
10
|
-
raise ArgumentError, "URI must be an elasticsearch URI (elasticsearch://...)"
|
11
|
-
end
|
12
|
-
|
13
|
-
@uri = URI(uri)
|
14
|
-
@log = log
|
15
|
-
@bulk_size = bulk_size
|
16
|
-
@buffer = []
|
17
|
-
@autoflush = autoflush
|
18
|
-
end
|
19
|
-
|
20
|
-
def write(data, options = {})
|
21
|
-
id = data.delete(:id) || data.delete(:_id)
|
22
|
-
raise ArgumentError, "Missing id. You must provide 'id' or '_id' as part of the data" unless id
|
23
|
-
|
24
|
-
@buffer << {
|
25
|
-
_index: @uri.index,
|
26
|
-
_type: @uri.type,
|
27
|
-
_id: id,
|
28
|
-
data: data
|
29
|
-
}
|
30
|
-
|
31
|
-
flush if @autoflush && @bulk_size > 0 && @buffer.length >= @bulk_size
|
32
|
-
|
33
|
-
true
|
34
|
-
end
|
35
|
-
|
36
|
-
def flush
|
37
|
-
if @buffer.length > 0
|
38
|
-
result = client.bulk(body: @buffer.inject([]){ |_body, _data| _body << { index: _data } })
|
39
|
-
raise RuntimeError if result["errors"]
|
40
|
-
end
|
41
|
-
|
42
|
-
true
|
43
|
-
ensure
|
44
|
-
@buffer = []
|
45
|
-
end
|
46
|
-
|
47
|
-
def close
|
48
|
-
flush
|
49
|
-
end
|
50
|
-
|
51
|
-
private
|
52
|
-
|
53
|
-
def client
|
54
|
-
@client ||= ::Elasticsearch::Client.new(host: @uri.host, port: @uri.port, log: @log)
|
55
|
-
end
|
56
|
-
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|