metacrunch-elasticsearch 2.0.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a74d43ab5c9f961f3efd802650e78022fb51e1e8
4
- data.tar.gz: 047c7af302f42d06dc24afa823fa46d2fa67a30a
3
+ metadata.gz: 17ba1fec96c7af64b5cdf8283dfee2fe0b4eadf1
4
+ data.tar.gz: fb673a9a460c7d7f82c96dfedc6a48fd26fdc971
5
5
  SHA512:
6
- metadata.gz: ccbddc5b0846556dbbb7409e1b718e716316f56fd8b1dc46c788c10f11fe7406f46db2d1788360bb7bed5703d94885da15e7ad380da17961b2f508c37a34f84b
7
- data.tar.gz: cb7e01749ab51e45702bfced9967860376bc41ee35ef40bf78f692f8d5df57fb88eb16e954e31d6251650df9ca0a96e3fcd6b1822190e14bbbc2b34b6a7d3157
6
+ metadata.gz: 106cdda72d6b43bf2d52392b1f3d45e68bbd679427ec1ca582a265dc904d286415ce8e35a0464e867eca550661ef65232e983e72812c4bd815dbbb5c050dcf34
7
+ data.tar.gz: 7d3552381f7dbf8fab91d2d1e8a2ca8cc964304bfe2105baa760eeba71cbadd12748ffe6ef03b2a425f343fa37caf51dace45d2534eec8b783f4d30516ee658a
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - "2.0"
4
+ - "2.1"
5
+ - "2.2"
data/Gemfile CHANGED
@@ -1,20 +1,28 @@
1
1
  source "https://rubygems.org"
2
2
 
3
+ # Specify your gem's dependencies in your gemspec
3
4
  gemspec
4
5
 
5
- gem "metacrunch", ">= 2.1.0", github: "ubpb/metacrunch", branch: "master"
6
+ group :development do
7
+ gem "bundler", ">= 1.10"
8
+ gem "rake"
9
+ gem "rspec", ">= 3.0.0", "< 4.0.0"
10
+ gem "simplecov", ">= 0.8.0"
11
+ gem "vcr", ">= 2.9.0", "< 3.0.0"
12
+ gem "webmock", ">= 1.19.0", "< 2.0.0"
6
13
 
7
- gem "rake"
8
- gem "rspec", "~> 3.2.0"
9
-
10
- if !ENV["CI"]
11
- group :development do
14
+ if !ENV["CI"]
12
15
  gem "hashdiff"
13
16
  gem "pry", "~> 0.9.12.6"
14
17
  gem "pry-byebug", "<= 1.3.2"
15
- gem "pry-rescue", "~> 1.4.1", github: "ConradIrwin/pry-rescue", branch: :master
18
+ gem "pry-rescue", "~> 1.4.2"
16
19
  gem "pry-stack_explorer", "~> 0.4.9.1"
17
20
  gem "pry-syntax-hacks", "~> 0.0.6"
18
21
  end
19
22
  end
20
23
 
24
+ group :test do
25
+ gem "codeclimate-test-reporter", require: nil
26
+ end
27
+
28
+ gem "metacrunch", github: "ubpb/metacrunch", branch: :master
data/bin/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "metascrunch/elasticsearch"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ require "pry"
11
+ Pry.start
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,15 @@
1
+ require "elasticsearch"
2
+ require_relative "../elasticsearch"
3
+
4
+ module Metacrunch::Elasticsearch::ClientFactory
5
+ def client_factory
6
+ client_options = {
7
+ host: @host,
8
+ hosts: @hosts,
9
+ url: @url,
10
+ urls: @urls
11
+ }.compact
12
+
13
+ Elasticsearch::Client.new(client_options)
14
+ end
15
+ end
@@ -0,0 +1,76 @@
1
+ require "elasticsearch"
2
+ require "metacrunch/processor"
3
+ require_relative "../elasticsearch"
4
+ require_relative "./client_factory"
5
+ require_relative "./options_helpers"
6
+
7
+ class Metacrunch::Elasticsearch::IndexCreator < Metacrunch::Processor
8
+ include Metacrunch::Elasticsearch::ClientFactory
9
+ include Metacrunch::Elasticsearch::OptionsHelpers
10
+
11
+ attr_accessor :default_mapping
12
+ attr_accessor :delete_existing_index
13
+ attr_accessor :logger
14
+
15
+ def initialize(options = {})
16
+ (@client_args = options).deep_symbolize_keys!
17
+ extract_options!(@client_args, :_client_options_, :default_mapping, :delete_existing_index, :logger, :number_of_shards, :number_of_replicas)
18
+ raise ArgumentError.new("You have to supply an index name!") if @client_args[:index].blank?
19
+ end
20
+
21
+ def call(items = [], pipeline = nil)
22
+ client = client_factory
23
+ logger = pipeline.try(:logger) || @logger
24
+
25
+ if client.indices.exists?(@client_args)
26
+ if @delete_existing_index == true
27
+ client.indices.delete(@client_args)
28
+ log_index_deleted(logger, @client_args[:index], client) if logger
29
+ elsif @delete_existing_index == false
30
+ return
31
+ else
32
+ raise Metacrunch::Elasticsearch::IndexAlreadyExistsError
33
+ end
34
+ end
35
+
36
+ client.indices.create(@client_args.merge(
37
+ {
38
+ body: {
39
+ number_of_shards: @number_of_shards,
40
+ number_of_replicas: @number_of_replicas
41
+ }.compact
42
+ }
43
+ ))
44
+
45
+ log_index_created(logger, @client_args[:index], client) if logger
46
+
47
+ if @default_mapping
48
+ client.indices.put_mapping(
49
+ @client_args.merge(
50
+ type: "_default_",
51
+ body: {
52
+ _default_: @default_mapping
53
+ }
54
+ )
55
+ )
56
+ end
57
+ end
58
+
59
+ private
60
+
61
+ def log_index_created(logger, index, client)
62
+ paths = client.transport.hosts.map do |_host|
63
+ "#{_host[:host]}:#{_host[:port]}"
64
+ end
65
+
66
+ logger.info("Index #{index} created at #{paths}")
67
+ end
68
+
69
+ def log_index_deleted(logger, index, client)
70
+ paths = client.transport.hosts.map do |_host|
71
+ "#{_host[:host]}:#{_host[:port]}"
72
+ end
73
+
74
+ logger.info("Index #{index} deleted at #{paths}")
75
+ end
76
+ end
@@ -0,0 +1,91 @@
1
+ require "elasticsearch"
2
+ require "metacrunch/processor"
3
+ require_relative "../elasticsearch"
4
+ require_relative "./client_factory"
5
+ require_relative "./options_helpers"
6
+
7
+ class Metacrunch::Elasticsearch::Indexer < Metacrunch::Processor
8
+ include Metacrunch::Elasticsearch::ClientFactory
9
+ include Metacrunch::Elasticsearch::OptionsHelpers
10
+
11
+ attr_accessor :bulk_size
12
+ attr_accessor :callbacks
13
+ attr_accessor :id_accessor
14
+ attr_accessor :index
15
+ attr_accessor :logger
16
+ attr_accessor :type
17
+
18
+ def initialize(options = {})
19
+ (@client_args = options).deep_symbolize_keys!
20
+ extract_options!(@client_args, :_client_options_, :bulk_size, :callbacks, :id_accessor, :index, :logger, :type)
21
+ raise ArgumentError.new("You have to supply an index name!") if @index.blank?
22
+ end
23
+
24
+ def call(items = [], pipeline = nil)
25
+ logger = pipeline.try(:logger) || @logger
26
+
27
+ if (slice_size = @bulk_size || items.length) > 0
28
+ client = client_factory
29
+
30
+ items.each_slice(slice_size) do |_item_slice|
31
+ # bodies is an array to allow slicing in case of HTTP content length exceed
32
+ bodies = [_item_slice.inject([]) { |_memo, _item| _memo.concat bulk_item_factory(_item) }]
33
+
34
+ bulk_responses =
35
+ begin
36
+ bodies.map do |_body|
37
+ client.bulk body: _body
38
+ end
39
+ rescue
40
+ logger.info "Bulk index failed. Decreasing bulk size temporary and trying again." if logger
41
+
42
+ bodies = bodies.inject([]) do |_memo, _body|
43
+ # Since we have to work with the bulk request body instead if the original items
44
+ # the bodys length has to be a multiple of 2 in any case. .fdiv(2).fdiv(2).ceil * 2
45
+ # ensures this. Example 3698.fdiv(2).fdiv(2).fdiv(2).ceil * 2 == 1850
46
+ _memo.concat(_body.each_slice(_body.length.fdiv(2).fdiv(2).ceil * 2).to_a)
47
+ end
48
+
49
+ retry
50
+ end
51
+
52
+ bulk_responses.each do |_bulk_response|
53
+ log_items_indexed(logger, _bulk_response["items"].length, client) if logger
54
+
55
+ if after_indexed_callback = (@callbacks || {})[:after_indexed]
56
+ _item_slice.zip(_bulk_response["items"]).each do |_item, _item_response|
57
+ after_indexed_callback.call(_item, _item_response)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def bulk_item_factory(item)
68
+ [
69
+ { index: { _index: @index, _type: @type, _id: id(item) }.compact },
70
+ item.to_h
71
+ ]
72
+ end
73
+
74
+ def id(item)
75
+ if @id_accessor
76
+ if @id_accessor.respond_to?(:call)
77
+ @id_accessor.call(item)
78
+ else
79
+ item[@id_accessor]
80
+ end
81
+ end
82
+ end
83
+
84
+ def log_items_indexed(logger, amount, client)
85
+ paths = client.transport.hosts.map do |_host|
86
+ "#{_host[:host]}:#{_host[:port]}/#{@index}/#{@type}"
87
+ end
88
+
89
+ logger.info("Indexed #{amount} items to #{paths}")
90
+ end
91
+ end
@@ -0,0 +1,30 @@
1
+ require_relative "../elasticsearch"
2
+
3
+ module Metacrunch::Elasticsearch::OptionsHelpers
4
+ def extract_options!(options, *keys)
5
+ keys = keys
6
+ .map do |_key|
7
+ _key == :_client_options_ ? [:host, :hosts, :url, :urls] : _key
8
+ end
9
+ .flatten
10
+
11
+ options
12
+ .delete_if do |_key, _value|
13
+ if keys.include?(_key)
14
+ instance_variable_set("@#{_key}", _value)
15
+ true # else if _value is falsy, the key does not get deleted
16
+ end
17
+ end
18
+ end
19
+
20
+ def normalize_options!(options)
21
+ {
22
+ index: options[:index],
23
+ body: options.select { |_key, _| _key != :index }
24
+ }
25
+ .tap(&:compact!)
26
+ .try do |_result|
27
+ options.clear.merge!(_result)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,62 @@
1
+ require "elasticsearch"
2
+ require "metacrunch/processor"
3
+ require_relative "../elasticsearch"
4
+ require_relative "./client_factory"
5
+ require_relative "./options_helpers"
6
+
7
+ class Metacrunch::Elasticsearch::Searcher < Metacrunch::Processor
8
+ include Enumerable
9
+ include Metacrunch::Elasticsearch::ClientFactory
10
+ include Metacrunch::Elasticsearch::OptionsHelpers
11
+
12
+ DEFAULT_BODY = { query: { match_all: {} } }
13
+ DEFAULT_SCAN_SIZE = 200 # per shard
14
+ DEFAULT_SCROLL_EXPIRY_TIME = 10.minutes
15
+
16
+ attr_accessor :bulk_size
17
+ attr_accessor :index
18
+ attr_accessor :scan_size
19
+ attr_accessor :scroll_expiry_time
20
+ attr_accessor :type
21
+
22
+ def initialize(options = {})
23
+ options.deep_symbolize_keys!
24
+ extract_options!(options, :_client_options_, :bulk_size, :index, :scan_size, :scroll_expiry_time, :type)
25
+ @body = options.presence || DEFAULT_BODY
26
+ end
27
+
28
+ def call(items = [], pipeline = nil)
29
+ @docs_enumerator ||= @bulk_size ? each_slice(@bulk_size) : [each.to_a].to_enum
30
+
31
+ begin
32
+ items.concat(@docs_enumerator.next)
33
+ rescue StopIteration
34
+ pipeline.terminate!
35
+ end
36
+ end
37
+
38
+ def each
39
+ return enum_for(__method__) unless block_given?
40
+ client = client_factory
41
+
42
+ search_result = client.search({
43
+ body: @body,
44
+ index: @index,
45
+ scroll: "#{@scroll_expiry_time || DEFAULT_SCROLL_EXPIRY_TIME}s",
46
+ search_type: "scan",
47
+ size: @scan_size || DEFAULT_SCAN_SIZE
48
+ })
49
+
50
+ while (
51
+ search_result = client.scroll(
52
+ scroll: "#{DEFAULT_SCROLL_EXPIRY_TIME}s",
53
+ scroll_id: search_result["_scroll_id"]
54
+ ) and # don't use &&, the semantic of and is important here
55
+ search_result["hits"]["hits"].present?
56
+ ) do
57
+ search_result["hits"]["hits"].each do |_hit|
58
+ yield _hit
59
+ end
60
+ end
61
+ end
62
+ end
@@ -1,5 +1,5 @@
1
1
  module Metacrunch
2
2
  module Elasticsearch
3
- VERSION = "2.0.1"
3
+ VERSION = "2.1.0"
4
4
  end
5
5
  end
@@ -3,8 +3,16 @@ require "elasticsearch"
3
3
 
4
4
  module Metacrunch
5
5
  module Elasticsearch
6
- require_relative "./elasticsearch/uri"
6
+ require_relative "./elasticsearch/index_creator"
7
+ require_relative "./elasticsearch/indexer"
7
8
  require_relative "./elasticsearch/reader"
9
+ require_relative "./elasticsearch/searcher"
10
+ require_relative "./elasticsearch/uri"
8
11
  require_relative "./elasticsearch/writer"
12
+
13
+ #
14
+ # error class are inline to not clutter source files unnecessarily
15
+ #
16
+ class IndexAlreadyExistsError < StandardError; end
9
17
  end
10
18
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metacrunch-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - René Sprotte
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2015-09-25 00:00:00.000000000 Z
12
+ date: 2015-10-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -60,12 +60,21 @@ extensions: []
60
60
  extra_rdoc_files: []
61
61
  files:
62
62
  - ".gitignore"
63
+ - ".rspec"
64
+ - ".travis.yml"
63
65
  - Gemfile
64
66
  - License.txt
65
67
  - Rakefile
66
68
  - Readme.md
69
+ - bin/console
70
+ - bin/setup
67
71
  - lib/metacrunch/elasticsearch.rb
72
+ - lib/metacrunch/elasticsearch/client_factory.rb
73
+ - lib/metacrunch/elasticsearch/index_creator.rb
74
+ - lib/metacrunch/elasticsearch/indexer.rb
75
+ - lib/metacrunch/elasticsearch/options_helpers.rb
68
76
  - lib/metacrunch/elasticsearch/reader.rb
77
+ - lib/metacrunch/elasticsearch/searcher.rb
69
78
  - lib/metacrunch/elasticsearch/uri.rb
70
79
  - lib/metacrunch/elasticsearch/version.rb
71
80
  - lib/metacrunch/elasticsearch/writer.rb
@@ -96,4 +105,3 @@ signing_key:
96
105
  specification_version: 4
97
106
  summary: Metacrunch elasticsearch package
98
107
  test_files: []
99
- has_rdoc: