metacrunch-elasticsearch 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +3 -0
- data/.travis.yml +5 -0
- data/Gemfile +15 -7
- data/bin/console +11 -0
- data/bin/setup +7 -0
- data/lib/metacrunch/elasticsearch/client_factory.rb +15 -0
- data/lib/metacrunch/elasticsearch/index_creator.rb +76 -0
- data/lib/metacrunch/elasticsearch/indexer.rb +91 -0
- data/lib/metacrunch/elasticsearch/options_helpers.rb +30 -0
- data/lib/metacrunch/elasticsearch/searcher.rb +62 -0
- data/lib/metacrunch/elasticsearch/version.rb +1 -1
- data/lib/metacrunch/elasticsearch.rb +9 -1
- metadata +11 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17ba1fec96c7af64b5cdf8283dfee2fe0b4eadf1
|
4
|
+
data.tar.gz: fb673a9a460c7d7f82c96dfedc6a48fd26fdc971
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 106cdda72d6b43bf2d52392b1f3d45e68bbd679427ec1ca582a265dc904d286415ce8e35a0464e867eca550661ef65232e983e72812c4bd815dbbb5c050dcf34
|
7
|
+
data.tar.gz: 7d3552381f7dbf8fab91d2d1e8a2ca8cc964304bfe2105baa760eeba71cbadd12748ffe6ef03b2a425f343fa37caf51dace45d2534eec8b783f4d30516ee658a
|
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
CHANGED
@@ -1,20 +1,28 @@
|
|
1
1
|
source "https://rubygems.org"
|
2
2
|
|
3
|
+
# Specify your gem's dependencies in your gemspec
|
3
4
|
gemspec
|
4
5
|
|
5
|
-
|
6
|
+
group :development do
|
7
|
+
gem "bundler", ">= 1.10"
|
8
|
+
gem "rake"
|
9
|
+
gem "rspec", ">= 3.0.0", "< 4.0.0"
|
10
|
+
gem "simplecov", ">= 0.8.0"
|
11
|
+
gem "vcr", ">= 2.9.0", "< 3.0.0"
|
12
|
+
gem "webmock", ">= 1.19.0", "< 2.0.0"
|
6
13
|
|
7
|
-
|
8
|
-
gem "rspec", "~> 3.2.0"
|
9
|
-
|
10
|
-
if !ENV["CI"]
|
11
|
-
group :development do
|
14
|
+
if !ENV["CI"]
|
12
15
|
gem "hashdiff"
|
13
16
|
gem "pry", "~> 0.9.12.6"
|
14
17
|
gem "pry-byebug", "<= 1.3.2"
|
15
|
-
gem "pry-rescue", "~> 1.4.
|
18
|
+
gem "pry-rescue", "~> 1.4.2"
|
16
19
|
gem "pry-stack_explorer", "~> 0.4.9.1"
|
17
20
|
gem "pry-syntax-hacks", "~> 0.0.6"
|
18
21
|
end
|
19
22
|
end
|
20
23
|
|
24
|
+
group :test do
|
25
|
+
gem "codeclimate-test-reporter", require: nil
|
26
|
+
end
|
27
|
+
|
28
|
+
gem "metacrunch", github: "ubpb/metacrunch", branch: :master
|
data/bin/console
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "metascrunch/elasticsearch"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
require "pry"
|
11
|
+
Pry.start
|
data/bin/setup
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require "elasticsearch"
|
2
|
+
require_relative "../elasticsearch"
|
3
|
+
|
4
|
+
module Metacrunch::Elasticsearch::ClientFactory
|
5
|
+
def client_factory
|
6
|
+
client_options = {
|
7
|
+
host: @host,
|
8
|
+
hosts: @hosts,
|
9
|
+
url: @url,
|
10
|
+
urls: @urls
|
11
|
+
}.compact
|
12
|
+
|
13
|
+
Elasticsearch::Client.new(client_options)
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require "elasticsearch"
|
2
|
+
require "metacrunch/processor"
|
3
|
+
require_relative "../elasticsearch"
|
4
|
+
require_relative "./client_factory"
|
5
|
+
require_relative "./options_helpers"
|
6
|
+
|
7
|
+
class Metacrunch::Elasticsearch::IndexCreator < Metacrunch::Processor
|
8
|
+
include Metacrunch::Elasticsearch::ClientFactory
|
9
|
+
include Metacrunch::Elasticsearch::OptionsHelpers
|
10
|
+
|
11
|
+
attr_accessor :default_mapping
|
12
|
+
attr_accessor :delete_existing_index
|
13
|
+
attr_accessor :logger
|
14
|
+
|
15
|
+
def initialize(options = {})
|
16
|
+
(@client_args = options).deep_symbolize_keys!
|
17
|
+
extract_options!(@client_args, :_client_options_, :default_mapping, :delete_existing_index, :logger, :number_of_shards, :number_of_replicas)
|
18
|
+
raise ArgumentError.new("You have to supply an index name!") if @client_args[:index].blank?
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(items = [], pipeline = nil)
|
22
|
+
client = client_factory
|
23
|
+
logger = pipeline.try(:logger) || @logger
|
24
|
+
|
25
|
+
if client.indices.exists?(@client_args)
|
26
|
+
if @delete_existing_index == true
|
27
|
+
client.indices.delete(@client_args)
|
28
|
+
log_index_deleted(logger, @client_args[:index], client) if logger
|
29
|
+
elsif @delete_existing_index == false
|
30
|
+
return
|
31
|
+
else
|
32
|
+
raise Metacrunch::Elasticsearch::IndexAlreadyExistsError
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
client.indices.create(@client_args.merge(
|
37
|
+
{
|
38
|
+
body: {
|
39
|
+
number_of_shards: @number_of_shards,
|
40
|
+
number_of_replicas: @number_of_replicas
|
41
|
+
}.compact
|
42
|
+
}
|
43
|
+
))
|
44
|
+
|
45
|
+
log_index_created(logger, @client_args[:index], client) if logger
|
46
|
+
|
47
|
+
if @default_mapping
|
48
|
+
client.indices.put_mapping(
|
49
|
+
@client_args.merge(
|
50
|
+
type: "_default_",
|
51
|
+
body: {
|
52
|
+
_default_: @default_mapping
|
53
|
+
}
|
54
|
+
)
|
55
|
+
)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def log_index_created(logger, index, client)
|
62
|
+
paths = client.transport.hosts.map do |_host|
|
63
|
+
"#{_host[:host]}:#{_host[:port]}"
|
64
|
+
end
|
65
|
+
|
66
|
+
logger.info("Index #{index} created at #{paths}")
|
67
|
+
end
|
68
|
+
|
69
|
+
def log_index_deleted(logger, index, client)
|
70
|
+
paths = client.transport.hosts.map do |_host|
|
71
|
+
"#{_host[:host]}:#{_host[:port]}"
|
72
|
+
end
|
73
|
+
|
74
|
+
logger.info("Index #{index} deleted at #{paths}")
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require "elasticsearch"
|
2
|
+
require "metacrunch/processor"
|
3
|
+
require_relative "../elasticsearch"
|
4
|
+
require_relative "./client_factory"
|
5
|
+
require_relative "./options_helpers"
|
6
|
+
|
7
|
+
class Metacrunch::Elasticsearch::Indexer < Metacrunch::Processor
|
8
|
+
include Metacrunch::Elasticsearch::ClientFactory
|
9
|
+
include Metacrunch::Elasticsearch::OptionsHelpers
|
10
|
+
|
11
|
+
attr_accessor :bulk_size
|
12
|
+
attr_accessor :callbacks
|
13
|
+
attr_accessor :id_accessor
|
14
|
+
attr_accessor :index
|
15
|
+
attr_accessor :logger
|
16
|
+
attr_accessor :type
|
17
|
+
|
18
|
+
def initialize(options = {})
|
19
|
+
(@client_args = options).deep_symbolize_keys!
|
20
|
+
extract_options!(@client_args, :_client_options_, :bulk_size, :callbacks, :id_accessor, :index, :logger, :type)
|
21
|
+
raise ArgumentError.new("You have to supply an index name!") if @index.blank?
|
22
|
+
end
|
23
|
+
|
24
|
+
def call(items = [], pipeline = nil)
|
25
|
+
logger = pipeline.try(:logger) || @logger
|
26
|
+
|
27
|
+
if (slice_size = @bulk_size || items.length) > 0
|
28
|
+
client = client_factory
|
29
|
+
|
30
|
+
items.each_slice(slice_size) do |_item_slice|
|
31
|
+
# bodies is an array to allow slicing in case of HTTP content length exceed
|
32
|
+
bodies = [_item_slice.inject([]) { |_memo, _item| _memo.concat bulk_item_factory(_item) }]
|
33
|
+
|
34
|
+
bulk_responses =
|
35
|
+
begin
|
36
|
+
bodies.map do |_body|
|
37
|
+
client.bulk body: _body
|
38
|
+
end
|
39
|
+
rescue
|
40
|
+
logger.info "Bulk index failed. Decreasing bulk size temporary and trying again." if logger
|
41
|
+
|
42
|
+
bodies = bodies.inject([]) do |_memo, _body|
|
43
|
+
# Since we have to work with the bulk request body instead if the original items
|
44
|
+
# the bodys length has to be a multiple of 2 in any case. .fdiv(2).fdiv(2).ceil * 2
|
45
|
+
# ensures this. Example 3698.fdiv(2).fdiv(2).fdiv(2).ceil * 2 == 1850
|
46
|
+
_memo.concat(_body.each_slice(_body.length.fdiv(2).fdiv(2).ceil * 2).to_a)
|
47
|
+
end
|
48
|
+
|
49
|
+
retry
|
50
|
+
end
|
51
|
+
|
52
|
+
bulk_responses.each do |_bulk_response|
|
53
|
+
log_items_indexed(logger, _bulk_response["items"].length, client) if logger
|
54
|
+
|
55
|
+
if after_indexed_callback = (@callbacks || {})[:after_indexed]
|
56
|
+
_item_slice.zip(_bulk_response["items"]).each do |_item, _item_response|
|
57
|
+
after_indexed_callback.call(_item, _item_response)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def bulk_item_factory(item)
|
68
|
+
[
|
69
|
+
{ index: { _index: @index, _type: @type, _id: id(item) }.compact },
|
70
|
+
item.to_h
|
71
|
+
]
|
72
|
+
end
|
73
|
+
|
74
|
+
def id(item)
|
75
|
+
if @id_accessor
|
76
|
+
if @id_accessor.respond_to?(:call)
|
77
|
+
@id_accessor.call(item)
|
78
|
+
else
|
79
|
+
item[@id_accessor]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def log_items_indexed(logger, amount, client)
|
85
|
+
paths = client.transport.hosts.map do |_host|
|
86
|
+
"#{_host[:host]}:#{_host[:port]}/#{@index}/#{@type}"
|
87
|
+
end
|
88
|
+
|
89
|
+
logger.info("Indexed #{amount} items to #{paths}")
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require_relative "../elasticsearch"
|
2
|
+
|
3
|
+
module Metacrunch::Elasticsearch::OptionsHelpers
|
4
|
+
def extract_options!(options, *keys)
|
5
|
+
keys = keys
|
6
|
+
.map do |_key|
|
7
|
+
_key == :_client_options_ ? [:host, :hosts, :url, :urls] : _key
|
8
|
+
end
|
9
|
+
.flatten
|
10
|
+
|
11
|
+
options
|
12
|
+
.delete_if do |_key, _value|
|
13
|
+
if keys.include?(_key)
|
14
|
+
instance_variable_set("@#{_key}", _value)
|
15
|
+
true # else if _value is falsy, the key does not get deleted
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def normalize_options!(options)
|
21
|
+
{
|
22
|
+
index: options[:index],
|
23
|
+
body: options.select { |_key, _| _key != :index }
|
24
|
+
}
|
25
|
+
.tap(&:compact!)
|
26
|
+
.try do |_result|
|
27
|
+
options.clear.merge!(_result)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require "elasticsearch"
|
2
|
+
require "metacrunch/processor"
|
3
|
+
require_relative "../elasticsearch"
|
4
|
+
require_relative "./client_factory"
|
5
|
+
require_relative "./options_helpers"
|
6
|
+
|
7
|
+
class Metacrunch::Elasticsearch::Searcher < Metacrunch::Processor
|
8
|
+
include Enumerable
|
9
|
+
include Metacrunch::Elasticsearch::ClientFactory
|
10
|
+
include Metacrunch::Elasticsearch::OptionsHelpers
|
11
|
+
|
12
|
+
DEFAULT_BODY = { query: { match_all: {} } }
|
13
|
+
DEFAULT_SCAN_SIZE = 200 # per shard
|
14
|
+
DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
|
15
|
+
|
16
|
+
attr_accessor :bulk_size
|
17
|
+
attr_accessor :index
|
18
|
+
attr_accessor :scan_size
|
19
|
+
attr_accessor :scroll_expiry_time
|
20
|
+
attr_accessor :type
|
21
|
+
|
22
|
+
def initialize(options = {})
|
23
|
+
options.deep_symbolize_keys!
|
24
|
+
extract_options!(options, :_client_options_, :bulk_size, :index, :scan_size, :scroll_expiry_time, :type)
|
25
|
+
@body = options.presence || DEFAULT_BODY
|
26
|
+
end
|
27
|
+
|
28
|
+
def call(items = [], pipeline = nil)
|
29
|
+
@docs_enumerator ||= @bulk_size ? each_slice(@bulk_size) : [each.to_a].to_enum
|
30
|
+
|
31
|
+
begin
|
32
|
+
items.concat(@docs_enumerator.next)
|
33
|
+
rescue StopIteration
|
34
|
+
pipeline.terminate!
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def each
|
39
|
+
return enum_for(__method__) unless block_given?
|
40
|
+
client = client_factory
|
41
|
+
|
42
|
+
search_result = client.search({
|
43
|
+
body: @body,
|
44
|
+
index: @index,
|
45
|
+
scroll: "#{@scroll_expiry_time || DEFAULT_SCROLL_EXPIRY_TIME}s",
|
46
|
+
search_type: "scan",
|
47
|
+
size: @scan_size || DEFAULT_SCAN_SIZE
|
48
|
+
})
|
49
|
+
|
50
|
+
while (
|
51
|
+
search_result = client.scroll(
|
52
|
+
scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
|
53
|
+
scroll_id: search_result["_scroll_id"]
|
54
|
+
) and # don't use &&, the semantic of and is important here
|
55
|
+
search_result["hits"]["hits"].present?
|
56
|
+
) do
|
57
|
+
search_result["hits"]["hits"].each do |_hit|
|
58
|
+
yield _hit
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -3,8 +3,16 @@ require "elasticsearch"
|
|
3
3
|
|
4
4
|
module Metacrunch
|
5
5
|
module Elasticsearch
|
6
|
-
require_relative "./elasticsearch/
|
6
|
+
require_relative "./elasticsearch/index_creator"
|
7
|
+
require_relative "./elasticsearch/indexer"
|
7
8
|
require_relative "./elasticsearch/reader"
|
9
|
+
require_relative "./elasticsearch/searcher"
|
10
|
+
require_relative "./elasticsearch/uri"
|
8
11
|
require_relative "./elasticsearch/writer"
|
12
|
+
|
13
|
+
#
|
14
|
+
# error class are inline to not clutter source files unnecessarily
|
15
|
+
#
|
16
|
+
class IndexAlreadyExistsError < StandardError; end
|
9
17
|
end
|
10
18
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metacrunch-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- René Sprotte
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-10-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -60,12 +60,21 @@ extensions: []
|
|
60
60
|
extra_rdoc_files: []
|
61
61
|
files:
|
62
62
|
- ".gitignore"
|
63
|
+
- ".rspec"
|
64
|
+
- ".travis.yml"
|
63
65
|
- Gemfile
|
64
66
|
- License.txt
|
65
67
|
- Rakefile
|
66
68
|
- Readme.md
|
69
|
+
- bin/console
|
70
|
+
- bin/setup
|
67
71
|
- lib/metacrunch/elasticsearch.rb
|
72
|
+
- lib/metacrunch/elasticsearch/client_factory.rb
|
73
|
+
- lib/metacrunch/elasticsearch/index_creator.rb
|
74
|
+
- lib/metacrunch/elasticsearch/indexer.rb
|
75
|
+
- lib/metacrunch/elasticsearch/options_helpers.rb
|
68
76
|
- lib/metacrunch/elasticsearch/reader.rb
|
77
|
+
- lib/metacrunch/elasticsearch/searcher.rb
|
69
78
|
- lib/metacrunch/elasticsearch/uri.rb
|
70
79
|
- lib/metacrunch/elasticsearch/version.rb
|
71
80
|
- lib/metacrunch/elasticsearch/writer.rb
|
@@ -96,4 +105,3 @@ signing_key:
|
|
96
105
|
specification_version: 4
|
97
106
|
summary: Metacrunch elasticsearch package
|
98
107
|
test_files: []
|
99
|
-
has_rdoc:
|